diff options
Diffstat (limited to 'drivers/edac')
72 files changed, 8886 insertions, 6555 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4cfdefbd744d..81e40543ffd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -23,14 +23,6 @@ menuconfig EDAC if EDAC -config EDAC_LEGACY_SYSFS - bool "EDAC legacy sysfs" - default y - help - Enable the compatibility sysfs nodes. - Use 'Y' if your edac utilities aren't ported to work with the newer - structures. - config EDAC_DEBUG bool "Debugging" select DEBUG_FS @@ -75,9 +67,39 @@ config EDAC_GHES In doubt, say 'Y'. +config EDAC_SCRUB + bool "EDAC scrub feature" + help + The EDAC scrub feature is optional and is designed to control the + memory scrubbers in the system. The common sysfs scrub interface + abstracts the control of various arbitrary scrubbing functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC scrub feature. + +config EDAC_ECS + bool "EDAC ECS (Error Check Scrub) feature" + help + The EDAC ECS feature is optional and is designed to control on-die + error check scrub (e.g., DDR5 ECS) in the system. The common sysfs + ECS interface abstracts the control of various ECS functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC ECS feature. + +config EDAC_MEM_REPAIR + bool "EDAC memory repair feature" + help + The EDAC memory repair feature is optional and is designed to control + the memory devices with repair features, such as Post Package Repair + (PPR), memory sparing etc. The common sysfs memory repair interface + abstracts the control of various memory repair functionalities into + a unified set of functions. + Say 'y/n' to enable/disable EDAC memory repair feature. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE + depends on AMD_NODE + imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. @@ -166,7 +188,7 @@ config EDAC_I3200 config EDAC_IE31200 tristate "Intel e312xx" - depends on PCI && X86 + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. @@ -261,6 +283,18 @@ config EDAC_I10NM system has non-volatile DIMMs you should also manually select CONFIG_ACPI_NFIT. +config EDAC_IMH + tristate "Intel Integrated Memory/IO Hub MC" + depends on X86_64 && X86_MCE_INTEL && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y + select DMI + select ACPI_ADXL + help + Support for error detection and correction the Intel + Integrated Memory/IO Hub Memory Controller. This MC IP is + first used on the Diamond Rapids servers but may appear on + others in the future. + config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL @@ -302,41 +336,6 @@ config EDAC_PASEMI Support for error detection and correction on PA Semi PWRficient. -config EDAC_CELL - tristate "Cell Broadband Engine memory controller" - depends on PPC_CELL_COMMON - help - Support for error detection and correction on the - Cell Broadband Engine internal memory controller - on platform without a hypervisor - -config EDAC_PPC4XX - tristate "PPC4xx IBM DDR2 Memory Controller" - depends on 4xx - help - This enables support for EDAC on the ECC memory used - with the IBM DDR2 memory controller found in various - PowerPC 4xx embedded processors such as the 405EX[r], - 440SP, 440SPe, 460EX, 460GT and 460SX. - -config EDAC_AMD8131 - tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on PCI && PPC_MAPLE - help - Support for error detection and correction on the - AMD8131 HyperTransport PCI-X Tunnel chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. - -config EDAC_AMD8111 - tristate "AMD8111 HyperTransport I/O Hub" - depends on PCI && PPC_MAPLE - help - Support for error detection and correction on the - AMD8111 HyperTransport I/O Hub chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. - config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" depends on PPC64 @@ -542,4 +541,59 @@ config EDAC_DMC520 Support for error detection and correction on the SoCs with ARM DMC-520 DRAM controller. +config EDAC_ZYNQMP + tristate "Xilinx ZynqMP OCM Controller" + depends on ARCH_ZYNQMP || COMPILE_TEST + help + This driver supports error detection and correction for the + Xilinx ZynqMP OCM (On Chip Memory) controller. It can also be + built as a module. In that case it will be called zynqmp_edac. + +config EDAC_NPCM + tristate "Nuvoton NPCM DDR Memory Controller" + depends on (ARCH_NPCM || COMPILE_TEST) + help + Support for error detection and correction on the Nuvoton NPCM DDR + memory controller. + + The memory controller supports single bit error correction, double bit + error detection (in-line ECC in which a section 1/8th of the memory + device used to store data is used for ECC storage). + +config EDAC_VERSAL + tristate "Xilinx Versal DDR Memory Controller" + depends on ARCH_ZYNQMP || COMPILE_TEST + help + Support for error detection and correction on the Xilinx Versal DDR + memory controller. + + Report both single bit errors (CE) and double bit errors (UE). + Support injecting both correctable and uncorrectable errors + for debugging purposes. + +config EDAC_LOONGSON + tristate "Loongson Memory Controller" + depends on LOONGARCH && ACPI + help + Support for error detection and correction on the Loongson + family memory controller. This driver reports single bit + errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 + are compatible. + +config EDAC_CORTEX_A72 + tristate "ARM Cortex A72" + depends on ARM64 + help + Support for L1/L2 cache error detection for ARM Cortex A72 processor. + The detected and reported errors are from reading CPU/L2 memory error + syndrome registers. + +config EDAC_VERSALNET + tristate "AMD VersalNET DDR Controller" + depends on CDX_CONTROLLER && ARCH_ZYNQMP + help + Support for single bit error correction, double bit error detection + and other system errors from various IP subsystems like RPU, NOCs, + HNICX, PL on the AMD Versal NET DDR memory controller. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 2d1641a27a28..8429b1e856bc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -12,6 +12,9 @@ edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o +edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o +edac_core-$(CONFIG_EDAC_ECS) += ecs.o +edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o @@ -54,16 +57,16 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o -skx_edac-y := skx_common.o skx_base.o -obj-$(CONFIG_EDAC_SKX) += skx_edac.o +skx_edac_common-y := skx_common.o -i10nm_edac-y := skx_common.o i10nm_base.o -obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o +skx_edac-y := skx_base.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o -obj-$(CONFIG_EDAC_CELL) += cell_edac.o -obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o -obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o -obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o +i10nm_edac-y := i10nm_base.o +obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o + +imh_edac-y := imh_base.o +obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o @@ -84,3 +87,9 @@ obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o +obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o +obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o +obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o +obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o +obj-$(CONFIG_EDAC_VERSALNET) += versalnet_edac.o +obj-$(CONFIG_EDAC_CORTEX_A72) += a72_edac.o diff --git a/drivers/edac/a72_edac.c b/drivers/edac/a72_edac.c new file mode 100644 index 000000000000..9262d75c3855 --- /dev/null +++ b/drivers/edac/a72_edac.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cortex A72 EDAC L1 and L2 cache error detection + * + * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de> + * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com> + * + * Based on Code from: + * Copyright (c) 2018, NXP Semiconductor + * Author: York Sun <york.sun@nxp.com> + */ + +#include <linux/module.h> +#include <linux/of.h> +#include <linux/bitfield.h> +#include <asm/smp_plat.h> + +#include "edac_module.h" + +#define DRVNAME "a72-edac" + +#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2) +#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3) + +#define CPUMERRSR_EL1_RAMID GENMASK(30, 24) +#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18) + +#define CPUMERRSR_EL1_VALID BIT(31) +#define CPUMERRSR_EL1_FATAL BIT(63) +#define L2MERRSR_EL1_VALID BIT(31) +#define L2MERRSR_EL1_FATAL BIT(63) + +#define L1_I_TAG_RAM 0x00 +#define L1_I_DATA_RAM 0x01 +#define L1_D_TAG_RAM 0x08 +#define L1_D_DATA_RAM 0x09 +#define TLB_RAM 0x18 + +#define MESSAGE_SIZE 64 + +struct mem_err_synd_reg { + u64 cpu_mesr; + u64 l2_mesr; +}; + +static struct cpumask compat_mask; + +static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu, + struct mem_err_synd_reg *mesr) +{ + u64 cpu_mesr = mesr->cpu_mesr; + u64 l2_mesr = mesr->l2_mesr; + char msg[MESSAGE_SIZE]; + + if (cpu_mesr & CPUMERRSR_EL1_VALID) { + const char *str; + bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL; + + switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) { + case L1_I_TAG_RAM: + str = "L1-I Tag RAM"; + break; + case L1_I_DATA_RAM: + str = "L1-I Data RAM"; + break; + case L1_D_TAG_RAM: + str = "L1-D Tag RAM"; + break; + case L1_D_DATA_RAM: + str = "L1-D Data RAM"; + break; + case TLB_RAM: + str = "TLB RAM"; + break; + default: + str = "Unspecified"; + break; + } + + snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d", + str, fatal ? "fatal" : "correctable", cpu); + + if (fatal) + edac_device_handle_ue(edac_ctl, cpu, 0, msg); + else + edac_device_handle_ce(edac_ctl, cpu, 0, msg); + } + + if (l2_mesr & L2MERRSR_EL1_VALID) { + bool fatal = l2_mesr & L2MERRSR_EL1_FATAL; + + snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx", + fatal ? "fatal" : "correctable", cpu, + FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr)); + if (fatal) + edac_device_handle_ue(edac_ctl, cpu, 1, msg); + else + edac_device_handle_ce(edac_ctl, cpu, 1, msg); + } +} + +static void read_errors(void *data) +{ + struct mem_err_synd_reg *mesr = data; + + mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1); + if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) { + write_sysreg_s(0, SYS_CPUMERRSR_EL1); + isb(); + } + mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1); + if (mesr->l2_mesr & L2MERRSR_EL1_VALID) { + write_sysreg_s(0, SYS_L2MERRSR_EL1); + isb(); + } +} + +static void a72_edac_check(struct edac_device_ctl_info *edac_ctl) +{ + struct mem_err_synd_reg mesr; + int cpu; + + cpus_read_lock(); + for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) { + smp_call_function_single(cpu, read_errors, &mesr, true); + report_errors(edac_ctl, cpu, &mesr); + } + cpus_read_unlock(); +} + +static int a72_edac_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edac_ctl; + struct device *dev = &pdev->dev; + int rc; + + edac_ctl = edac_device_alloc_ctl_info(0, "cpu", + num_possible_cpus(), "L", 2, 1, + edac_device_alloc_index()); + if (!edac_ctl) + return -ENOMEM; + + edac_ctl->edac_check = a72_edac_check; + edac_ctl->dev = dev; + edac_ctl->mod_name = dev_name(dev); + edac_ctl->dev_name = dev_name(dev); + edac_ctl->ctl_name = DRVNAME; + dev_set_drvdata(dev, edac_ctl); + + rc = edac_device_add_device(edac_ctl); + if (rc) + goto out_dev; + + return 0; + +out_dev: + edac_device_free_ctl_info(edac_ctl); + + return rc; +} + +static void a72_edac_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev); + + edac_device_del_device(edac_ctl->dev); + edac_device_free_ctl_info(edac_ctl); +} + +static const struct of_device_id cortex_arm64_edac_of_match[] = { + { .compatible = "arm,cortex-a72" }, + {} +}; +MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match); + +static struct platform_driver a72_edac_driver = { + .probe = a72_edac_probe, + .remove = a72_edac_remove, + .driver = { + .name = DRVNAME, + }, +}; + +static struct platform_device *a72_pdev; + +static int __init a72_edac_driver_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu); + if (np) { + if (of_match_node(cortex_arm64_edac_of_match, np) && + of_property_read_bool(np, "edac-enabled")) { + cpumask_set_cpu(cpu, &compat_mask); + } + } else { + pr_warn("failed to find device node for CPU %d\n", cpu); + } + } + + if (cpumask_empty(&compat_mask)) + return 0; + + a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0); + if (IS_ERR(a72_pdev)) { + pr_err("failed to register A72 EDAC device\n"); + return PTR_ERR(a72_pdev); + } + + return platform_driver_register(&a72_edac_driver); +} + +static void __exit a72_edac_driver_exit(void) +{ + platform_device_unregister(a72_pdev); + platform_driver_unregister(&a72_edac_driver); +} + +module_init(a72_edac_driver_init); +module_exit(a72_edac_driver_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); +MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver"); diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e7e8e624a436..0c5b94e64ea1 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -22,6 +22,7 @@ #include <linux/of_platform.h> #include <linux/panic_notifier.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/types.h> #include <linux/uaccess.h> @@ -98,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, @@ -127,7 +128,6 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL); if (!ptemp) { - dma_free_coherent(mci->pdev, 16, ptemp, dma_handle); edac_printk(KERN_ERR, EDAC_MC, "Inject: Buffer Allocation error\n"); return -ENOMEM; @@ -279,7 +279,6 @@ release: static int altr_sdram_probe(struct platform_device *pdev) { - const struct of_device_id *id; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct altr_sdram_mc_data *drvdata; @@ -290,10 +289,6 @@ static int altr_sdram_probe(struct platform_device *pdev) int irq, irq2, res = 0; unsigned long mem_size, irqflags = 0; - id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev); - if (!id) - return -ENODEV; - /* Grab the register range from the sdr controller in device tree */ mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "altr,sdr-syscon"); @@ -304,8 +299,7 @@ static int altr_sdram_probe(struct platform_device *pdev) } /* Check specific dependencies for the module */ - priv = of_match_node(altr_sdram_ctrl_of_match, - pdev->dev.of_node)->data; + priv = device_get_match_data(&pdev->dev); /* Validate the SDRAM controller has ECC enabled */ if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) || @@ -459,15 +453,13 @@ free: return res; } -static int altr_sdram_remove(struct platform_device *pdev) +static void altr_sdram_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); platform_set_drvdata(pdev, NULL); - - return 0; } /* @@ -744,8 +736,7 @@ static int altr_edac_device_probe(struct platform_device *pdev) } dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name, - 1, ecc_name, 1, 0, NULL, 0, - dev_instance++); + 1, ecc_name, 1, 0, dev_instance++); if (!dci) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -812,7 +803,7 @@ fail: return res; } -static int altr_edac_device_remove(struct platform_device *pdev) +static void altr_edac_device_remove(struct platform_device *pdev) { struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); struct altr_edac_device_dev *drvdata = dci->pvt_info; @@ -820,8 +811,6 @@ static int altr_edac_device_remove(struct platform_device *pdev) debugfs_remove_recursive(drvdata->debugfs_dir); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(dci); - - return 0; } static struct platform_driver altr_edac_device_driver = { @@ -1015,9 +1004,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -1198,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device) if (ret) return ret; - /* Verify OCRAM has been initialized */ + /* + * Verify that OCRAM has been initialized. + * During a warm reset, OCRAM contents are retained, but the control + * and status registers are reset to their default values. Therefore, + * ECC must be explicitly re-enabled in the control register. + * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set. + */ if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, - (base + ALTR_A10_ECC_INITSTAT_OFST))) - return -ENODEV; + (base + ALTR_A10_ECC_INITSTAT_OFST))) { + if (!ecc_test_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST))) + ecc_set_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST)); + else + return -ENODEV; + } /* Enable IRQ on Single Bit Error */ writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST)); @@ -1371,7 +1369,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ @@ -1461,7 +1459,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_USB */ @@ -1523,7 +1521,7 @@ static int altr_portb_setup(struct altr_edac_device_dev *device) /* Create the PortB EDAC device */ edac_idx = edac_device_alloc_index(); dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, 1, - ecc_name, 1, 0, NULL, 0, edac_idx); + ecc_name, 1, 0, edac_idx); if (!dci) { edac_printk(KERN_ERR, EDAC_DEVICE, "%s: Unable to allocate PortB EDAC device\n", @@ -1759,9 +1757,9 @@ altr_edac_a10_device_trig(struct file *file, const char __user *user_buf, local_irq_save(flags); if (trig_type == ALTR_UE_TRIGGER_CHAR) - writel(priv->ue_set_mask, set_addr); + writew(priv->ue_set_mask, set_addr); else - writel(priv->ce_set_mask, set_addr); + writew(priv->ce_set_mask, set_addr); /* Ensure the interrupt test bits are set */ wmb(); @@ -1791,7 +1789,7 @@ altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf, local_irq_save(flags); if (trig_type == ALTR_UE_TRIGGER_CHAR) { - writel(priv->ue_set_mask, set_addr); + writew(priv->ue_set_mask, set_addr); } else { /* Setup read/write of 4 bytes */ writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); @@ -1930,8 +1928,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, edac_idx = edac_device_alloc_index(); dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, - 1, ecc_name, 1, 0, NULL, 0, - edac_idx); + 1, ecc_name, 1, 0, edac_idx); if (!dci) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -2138,21 +2135,23 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; - edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64, - &a10_eccmgr_ic_ops, edac); + edac->domain = irq_domain_create_linear(dev_fwnode(&pdev->dev), 64, &a10_eccmgr_ic_ops, + edac); if (!edac->domain) { dev_err(&pdev->dev, "Error adding IRQ domain\n"); return -ENOMEM; } edac->sb_irq = platform_get_irq(pdev, 0); - if (edac->sb_irq < 0) { - dev_err(&pdev->dev, "No SBERR IRQ resource\n"); + if (edac->sb_irq < 0) return edac->sb_irq; - } irq_set_chained_handler_and_data(edac->sb_irq, altr_edac_a10_irq_handler, @@ -2184,10 +2183,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev) } #else edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + if (edac->db_irq < 0) return edac->db_irq; - } + irq_set_chained_handler_and_data(edac->db_irq, altr_edac_a10_irq_handler, edac); #endif @@ -2226,6 +2224,5 @@ static struct platform_driver altr_edac_a10_driver = { }; module_platform_driver(altr_edac_a10_driver); -MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e..7248d24c4908 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e3318e5575a3..2391f3469961 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/ras.h> +#include <linux/string_choices.h> #include "amd64_edac.h" -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> +#include <asm/amd/node.h> static struct edac_pci_ctl_info *pci_ctl; @@ -13,15 +16,12 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -static struct amd64_family_type *fam_type; - -static inline u32 get_umc_reg(u32 reg) +static inline u32 get_umc_reg(struct amd64_pvt *pvt, u32 reg) { - if (!fam_type->flags.zn_regs_v2) + if (!pvt->flags.zn_regs_v2) return reg; switch (reg) { - case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5; case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; } @@ -82,7 +82,7 @@ int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, amd64_warn("%s: error reading F%dx%03x.\n", func, PCI_FUNC(pdev->devfn), offset); - return err; + return pcibios_err_to_errno(err); } int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, @@ -95,7 +95,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, amd64_warn("%s: error writing to F%dx%03x.\n", func, PCI_FUNC(pdev->devfn), offset); - return err; + return pcibios_err_to_errno(err); } /* @@ -182,21 +182,6 @@ static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct, * other archs, we might not have access to the caches directly. */ -static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval) -{ - /* - * Fam17h supports scrub values between 0x5 and 0x14. Also, the values - * are shifted down by 0x5, so scrubval 0x5 is written to the register - * as 0x0, scrubval 0x6 as 0x1, etc. - */ - if (scrubval >= 0x5 && scrubval <= 0x14) { - scrubval -= 0x5; - pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF); - pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1); - } else { - pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1); - } -} /* * Scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found. @@ -229,9 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->umc) { - __f17h_set_scrubval(pvt, scrubval); - } else if (pvt->fam == 0x15 && pvt->model == 0x60) { + if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); f15h_select_dct(pvt, 1); @@ -271,16 +254,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) int i, retval = -EINVAL; u32 scrubval = 0; - if (pvt->umc) { - amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); - if (scrubval & BIT(0)) { - amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); - scrubval &= 0xF; - scrubval += 0x5; - } else { - scrubval = 0; - } - } else if (pvt->fam == 0x15) { + if (pvt->fam == 0x15) { /* Erratum #505 */ if (pvt->model < 0x10) f15h_select_dct(pvt, 0); @@ -463,7 +437,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < fam_type->max_mcs; i++) + for (i = 0; i < pvt->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -1003,321 +977,186 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } -/* Protect the PCI config register pairs used for DF indirect access. */ -static DEFINE_MUTEX(df_indirect_mutex); - /* - * Data Fabric Indirect Access uses FICAA/FICAD. - * - * Fabric Indirect Configuration Access Address (FICAA): Constructed based - * on the device's Instance Id and the PCI function and register offset of - * the desired register. + * See AMD PPR DF::LclNodeTypeMap * - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO - * and FICAD HI registers but so far we only need the LO register. + * This register gives information for nodes of the same type within a system. * - * Use Instance Id 0xFF to indicate a broadcast read. + * Reading this register from a GPU node will tell how many GPU nodes are in the + * system and what the lowest AMD Node ID value is for the GPU nodes. Use this + * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC. */ -#define DF_BROADCAST 0xFF -static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - struct pci_dev *F4; - u32 ficaa; - int err = -ENODEV; - - if (node >= amd_nb_num()) - goto out; - - F4 = node_to_amd_nb(node)->link; - if (!F4) - goto out; - - ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; - ficaa |= reg & 0x3FC; - ficaa |= (func & 0x7) << 11; - ficaa |= instance_id << 16; - - mutex_lock(&df_indirect_mutex); - - err = pci_write_config_dword(F4, 0x5C, ficaa); - if (err) { - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); - goto out_unlock; - } +static struct local_node_map { + u16 node_count; + u16 base_node_id; +} gpu_node_map; - err = pci_read_config_dword(F4, 0x98, lo); - if (err) - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); - -out_unlock: - mutex_unlock(&df_indirect_mutex); - -out: - return err; -} +#define PCI_DEVICE_ID_AMD_MI200_DF_F1 0x14d1 +#define REG_LOCAL_NODE_TYPE_MAP 0x144 -static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - return __df_indirect_read(node, func, reg, instance_id, lo); -} +/* Local Node Type Map (LNTM) fields */ +#define LNTM_NODE_COUNT GENMASK(27, 16) +#define LNTM_BASE_NODE_ID GENMASK(11, 0) -static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +static int gpu_get_node_map(struct amd64_pvt *pvt) { - return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); -} - -struct addr_ctx { - u64 ret_addr; + struct pci_dev *pdev; + int ret; u32 tmp; - u16 nid; - u8 inst_id; -}; - -static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) -{ - u64 dram_base_addr, dram_limit_addr, dram_hole_base; - - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; - u8 intlv_addr_sel, intlv_addr_bit; - u8 num_intlv_bits, hashed_bit; - u8 lgcy_mmio_hole_en, base = 0; - u8 cs_mask, cs_id = 0; - bool hash_enabled = false; - - struct addr_ctx ctx; - - memset(&ctx, 0, sizeof(ctx)); - - /* Start from the normalized address */ - ctx.ret_addr = norm_addr; - - ctx.nid = nid; - ctx.inst_id = umc; - - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) - goto out_err; - - /* Remove HiAddrOffset from normalized address, if enabled: */ - if (ctx.tmp & BIT(0)) { - u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; - - if (norm_addr >= hi_addr_offset) { - ctx.ret_addr -= hi_addr_offset; - base = 1; - } - } - /* Read D18F0x110 (DramBaseAddress). */ - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - /* Check if address range is valid. */ - if (!(ctx.tmp & BIT(0))) { - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, ctx.tmp); - goto out_err; - } - - lgcy_mmio_hole_en = ctx.tmp & BIT(1); - intlv_num_chan = (ctx.tmp >> 4) & 0xF; - intlv_addr_sel = (ctx.tmp >> 8) & 0x7; - dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; - - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ - if (intlv_addr_sel > 3) { - pr_err("%s: Invalid interleave address select %d.\n", - __func__, intlv_addr_sel); - goto out_err; - } - - /* Read D18F0x114 (DramLimitAddress). */ - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - intlv_num_sockets = (ctx.tmp >> 8) & 0x1; - intlv_num_dies = (ctx.tmp >> 10) & 0x3; - dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); - - intlv_addr_bit = intlv_addr_sel + 8; - - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ - switch (intlv_num_chan) { - case 0: intlv_num_chan = 0; break; - case 1: intlv_num_chan = 1; break; - case 3: intlv_num_chan = 2; break; - case 5: intlv_num_chan = 3; break; - case 7: intlv_num_chan = 4; break; - - case 8: intlv_num_chan = 1; - hash_enabled = true; - break; - default: - pr_err("%s: Invalid number of interleaved channels %d.\n", - __func__, intlv_num_chan); - goto out_err; - } + /* + * Mapping of nodes from hardware-provided AMD Node ID to a + * Linux logical one is applicable for MI200 models. Therefore, + * return early for other heterogeneous systems. + */ + if (pvt->F3->device != PCI_DEVICE_ID_AMD_MI200_DF_F3) + return 0; - num_intlv_bits = intlv_num_chan; + /* + * Node ID 0 is reserved for CPUs. Therefore, a non-zero Node ID + * means the values have been already cached. + */ + if (gpu_node_map.base_node_id) + return 0; - if (intlv_num_dies > 2) { - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", - __func__, intlv_num_dies); - goto out_err; + pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL); + if (!pdev) { + ret = -ENODEV; + goto out; } - num_intlv_bits += intlv_num_dies; - - /* Add a bit if sockets are interleaved. */ - num_intlv_bits += intlv_num_sockets; - - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { - pr_err("%s: Invalid interleave bits %d.\n", - __func__, num_intlv_bits); - goto out_err; + ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); + if (ret) { + ret = pcibios_err_to_errno(ret); + goto out; } - if (num_intlv_bits > 0) { - u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; + gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp); + gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp); - /* - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. - * This is the fabric id for this coherent slave. Use - * umc/channel# as instance id of the coherent slave - * for FICAA. - */ - if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) - goto out_err; - - cs_fabric_id = (ctx.tmp >> 8) & 0xFF; - die_id_bit = 0; - - /* If interleaved over more than 1 channel: */ - if (intlv_num_chan) { - die_id_bit = intlv_num_chan; - cs_mask = (1 << die_id_bit) - 1; - cs_id = cs_fabric_id & cs_mask; - } +out: + pci_dev_put(pdev); + return ret; +} - sock_id_bit = die_id_bit; +static int fixup_node_id(int node_id, struct mce *m) +{ + /* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */ + u8 nid = (m->ipid >> 44) & 0xF; - /* Read D18F1x208 (SystemFabricIdMask). */ - if (intlv_num_dies || intlv_num_sockets) - if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) - goto out_err; + if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2) + return node_id; - /* If interleaved over more than 1 die. */ - if (intlv_num_dies) { - sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (ctx.tmp >> 24) & 0xF; - die_id_mask = (ctx.tmp >> 8) & 0xFF; + /* Nodes below the GPU base node are CPU nodes and don't need a fixup. */ + if (nid < gpu_node_map.base_node_id) + return node_id; - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; - } + /* Convert the hardware-provided AMD Node ID to a Linux logical one. */ + return nid - gpu_node_map.base_node_id + 1; +} - /* If interleaved over more than 1 socket. */ - if (intlv_num_sockets) { - socket_id_shift = (ctx.tmp >> 28) & 0xF; - socket_id_mask = (ctx.tmp >> 16) & 0xFF; +static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; - } +/* + * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs + * are ECC capable. + */ +static unsigned long dct_determine_edac_cap(struct amd64_pvt *pvt) +{ + unsigned long edac_cap = EDAC_FLAG_NONE; + u8 bit; - /* - * The pre-interleaved address consists of XXXXXXIIIYYYYY - * where III is the ID for this CS, and XXXXXXYYYYY are the - * address bits from the post-interleaved address. - * "num_intlv_bits" has been calculated to tell us how many "I" - * bits there are. "intlv_addr_bit" tells us how many "Y" bits - * there are (where "I" starts). - */ - temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); - temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; - } + bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) + ? 19 + : 17; - /* Add dram base address */ - ctx.ret_addr += dram_base_addr; + if (pvt->dclr0 & BIT(bit)) + edac_cap = EDAC_FLAG_SECDED; - /* If legacy MMIO hole enabled */ - if (lgcy_mmio_hole_en) { - if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) - goto out_err; + return edac_cap; +} - dram_hole_base = ctx.tmp & GENMASK(31, 24); - if (ctx.ret_addr >= dram_hole_base) - ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); - } +static unsigned long umc_determine_edac_cap(struct amd64_pvt *pvt) +{ + u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; + unsigned long edac_cap = EDAC_FLAG_NONE; - if (hash_enabled) { - /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ctx.ret_addr >> 12) ^ - (ctx.ret_addr >> 18) ^ - (ctx.ret_addr >> 21) ^ - (ctx.ret_addr >> 30) ^ - cs_id; + for_each_umc(i) { + if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) + continue; - hashed_bit &= BIT(0); + umc_en_mask |= BIT(i); - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) - ctx.ret_addr ^= BIT(intlv_addr_bit); + /* UMC Configuration bit 12 (DimmEccEn) */ + if (pvt->umc[i].umc_cfg & BIT(12)) + dimm_ecc_en_mask |= BIT(i); } - /* Is calculated system address is above DRAM limit address? */ - if (ctx.ret_addr > dram_limit_addr) - goto out_err; - - *sys_addr = ctx.ret_addr; - return 0; + if (umc_en_mask == dimm_ecc_en_mask) + edac_cap = EDAC_FLAG_SECDED; -out_err: - return -EINVAL; + return edac_cap; } -static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); - /* - * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs - * are ECC capable. + * debug routine to display the memory sizes of all logical DIMMs and its + * CSROWs */ -static unsigned long determine_edac_cap(struct amd64_pvt *pvt) +static void dct_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { - unsigned long edac_cap = EDAC_FLAG_NONE; - u8 bit; + u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; + u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; + int dimm, size0, size1; - if (pvt->umc) { - u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; + if (pvt->fam == 0xf) { + /* K8 families < revF not supported yet */ + if (pvt->ext_model < K8_REV_F) + return; - for_each_umc(i) { - if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) - continue; + WARN_ON(ctrl != 0); + } - umc_en_mask |= BIT(i); + if (pvt->fam == 0x10) { + dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 + : pvt->dbam0; + dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? + pvt->csels[1].csbases : + pvt->csels[0].csbases; + } else if (ctrl) { + dbam = pvt->dbam0; + dcsb = pvt->csels[1].csbases; + } + edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, dbam); - /* UMC Configuration bit 12 (DimmEccEn) */ - if (pvt->umc[i].umc_cfg & BIT(12)) - dimm_ecc_en_mask |= BIT(i); - } + edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); - if (umc_en_mask == dimm_ecc_en_mask) - edac_cap = EDAC_FLAG_SECDED; - } else { - bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) - ? 19 - : 17; + /* Dump memory sizes for DIMM and its CSROWs */ + for (dimm = 0; dimm < 4; dimm++) { + size0 = 0; + if (dcsb[dimm * 2] & DCSB_CS_ENABLE) + /* + * For F15m60h, we need multiplier for LRDIMM cs_size + * calculation. We pass dimm value to the dbam_to_cs + * mapper so we can find the multiplier from the + * corresponding DCSM. + */ + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); - if (pvt->dclr0 & BIT(bit)) - edac_cap = EDAC_FLAG_SECDED; - } + size1 = 0; + if (dcsb[dimm * 2 + 1] & DCSB_CS_ENABLE) + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); - return edac_cap; + amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", + dimm * 2, size0, + dimm * 2 + 1, size1); + } } -static void debug_display_dimm_sizes(struct amd64_pvt *, u8); static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { @@ -1333,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); } - edac_dbg(1, "All DIMMs support ECC:%s\n", - (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19))); edac_dbg(1, " PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + str_enabled_disabled(dclr & BIT(8))); if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + str_yes_no(dclr & BIT(12)), + str_yes_no(dclr & BIT(13)), + str_yes_no(dclr & BIT(14)), + str_yes_no(dclr & BIT(15))); } #define CS_EVEN_PRIMARY BIT(0) @@ -1360,7 +1198,7 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) -static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) +static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { u8 base, count = 0; int cs_mode = 0; @@ -1371,7 +1209,9 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_PRIMARY; - /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_SECONDARY; + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; @@ -1392,7 +1232,106 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } -static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) +static int calculate_cs_size(u32 mask, unsigned int cs_mode) +{ + int msb, weight, num_zero_bits; + u32 deinterleaved_mask; + + if (!mask) + return 0; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. + */ + msb = fls(mask) - 1; + weight = hweight_long(mask); + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); + + /* Take the number of zero bits off from the top of the mask. */ + deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); + + return (deinterleaved_mask >> 2) + 1; +} + +static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, + unsigned int cs_mode, int csrow_nr, int dimm) +{ + int size; + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = calculate_cs_size(addr_mask, cs_mode); + + edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); + size += calculate_cs_size(addr_mask_sec, cs_mode); + + /* Return size in MBs. */ + return size >> 10; +} + +static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask = 0, addr_mask_sec = 0; + int cs_mask_nr = csrow_nr; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; + + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; + + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; + + /* + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. + */ + dimm = csrow_nr >> 1; + + if (!pvt->flags.zn_regs_v2) + cs_mask_nr >>= 1; + + if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) + addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; + + if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) + addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; + + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); +} + +static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1, cs0, cs1, cs_mode; @@ -1402,10 +1341,10 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) cs0 = dimm * 2; cs1 = dimm * 2 + 1; - cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + cs_mode = umc_get_cs_mode(dimm, ctrl, pvt); - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); + size0 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs0); + size1 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -1413,63 +1352,44 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) } } -static void __dump_misc_regs_df(struct amd64_pvt *pvt) +static void umc_dump_misc_regs(struct amd64_pvt *pvt) { struct amd64_umc *umc; - u32 i, tmp, umc_base; + u32 i; for_each_umc(i) { - umc_base = get_umc_base(i); umc = &pvt->umc[i]; edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); - - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp); - edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp); - - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp); - edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp); edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", - i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no", - (umc->umc_cap_hi & BIT(31)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cap_hi & BIT(30)), + str_yes_no(umc->umc_cap_hi & BIT(31))); edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", - i, (umc->umc_cfg & BIT(12)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cfg & BIT(12))); edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(6))); edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - - if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { - amd_smn_read(pvt->mc_node_id, - umc_base + get_umc_reg(UMCCH_ADDR_CFG), - &tmp); - edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", - i, 1 << ((tmp >> 4) & 0x3)); - } + i, str_yes_no(umc->dimm_cfg & BIT(7))); - debug_display_dimm_sizes_df(pvt, i); + umc_debug_display_dimm_sizes(pvt, i); } - - edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n", - pvt->dhar, dhar_base(pvt)); } -/* Display and decode various NB registers for debug purposes. */ -static void __dump_misc_regs(struct amd64_pvt *pvt) +static void dct_dump_misc_regs(struct amd64_pvt *pvt) { edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); edac_dbg(1, " NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL)); edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_SECDED), + str_yes_no(pvt->nbcap & NBCAP_CHIPKILL)); debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); @@ -1480,28 +1400,19 @@ static void __dump_misc_regs(struct amd64_pvt *pvt) (pvt->fam == 0xf) ? k8_dhar_offset(pvt) : f10_dhar_offset(pvt)); - debug_display_dimm_sizes(pvt, 0); + dct_debug_display_dimm_sizes(pvt, 0); /* everything below this point is Fam10h and above */ if (pvt->fam == 0xf) return; - debug_display_dimm_sizes(pvt, 1); + dct_debug_display_dimm_sizes(pvt, 1); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); -} - -/* Display and decode various NB registers for debug purposes. */ -static void dump_misc_regs(struct amd64_pvt *pvt) -{ - if (pvt->umc) - __dump_misc_regs_df(pvt); - else - __dump_misc_regs(pvt); - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt))); amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } @@ -1509,7 +1420,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt) /* * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ -static void prep_chip_selects(struct amd64_pvt *pvt) +static void dct_prep_chip_selects(struct amd64_pvt *pvt) { if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; @@ -1517,21 +1428,23 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; - } else if (pvt->fam >= 0x17) { - int umc; - - for_each_umc(umc) { - pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; - } - } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } -static void read_umc_base_mask(struct amd64_pvt *pvt) +static void umc_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2; + } +} + +static void umc_read_base_mask(struct amd64_pvt *pvt) { u32 umc_base_reg, umc_base_reg_sec; u32 umc_mask_reg, umc_mask_reg_sec; @@ -1540,6 +1453,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) u32 *base, *base_sec; u32 *mask, *mask_sec; int cs, umc; + u32 tmp; for_each_umc(umc) { umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; @@ -1552,17 +1466,21 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) base_reg = umc_base_reg + (cs * 4); base_reg_sec = umc_base_reg_sec + (cs * 4); - if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + if (!amd_smn_read(pvt->mc_node_id, base_reg, &tmp)) { + *base = tmp; edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *base, base_reg); + } - if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, &tmp)) { + *base_sec = tmp; edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *base_sec, base_reg_sec); + } } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(pvt, UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1571,13 +1489,17 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) mask_reg = umc_mask_reg + (cs * 4); mask_reg_sec = umc_mask_reg_sec + (cs * 4); - if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + if (!amd_smn_read(pvt->mc_node_id, mask_reg, &tmp)) { + *mask = tmp; edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *mask, mask_reg); + } - if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, &tmp)) { + *mask_sec = tmp; edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *mask_sec, mask_reg_sec); + } } } } @@ -1585,15 +1507,10 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ -static void read_dct_base_mask(struct amd64_pvt *pvt) +static void dct_read_base_mask(struct amd64_pvt *pvt) { int cs; - prep_chip_selects(pvt); - - if (pvt->umc) - return read_umc_base_mask(pvt); - for_each_chip_select(cs, 0, pvt) { int reg0 = DCSB0 + (cs * 4); int reg1 = DCSB1 + (cs * 4); @@ -1633,7 +1550,7 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static void determine_memory_type_df(struct amd64_pvt *pvt) +static void umc_determine_memory_type(struct amd64_pvt *pvt) { struct amd64_umc *umc; u32 i; @@ -1650,7 +1567,7 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) * Check if the system supports the "DDR Type" field in UMC Config * and has DDR5 DIMMs in use. */ - if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (pvt->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { if (umc->dimm_cfg & BIT(5)) umc->dram_type = MEM_LRDDR5; else if (umc->dimm_cfg & BIT(4)) @@ -1670,13 +1587,10 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) } } -static void determine_memory_type(struct amd64_pvt *pvt) +static void dct_determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; - if (pvt->umc) - return determine_memory_type_df(pvt); - switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1726,34 +1640,18 @@ static void determine_memory_type(struct amd64_pvt *pvt) WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; } + + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); return; ddr3: pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; } -/* Get the number of DCT channels the memory controller is using. */ -static int k8_early_channel_count(struct amd64_pvt *pvt) -{ - int flag; - - if (pvt->ext_model >= K8_REV_F) - /* RevF (NPT) and later */ - flag = pvt->dclr0 & WIDTH_128; - else - /* RevE and earlier */ - flag = pvt->dclr0 & REVE_WIDTH_128; - - /* not used */ - pvt->dclr1 = 0; - - return (flag) ? 2 : 1; -} - /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - u16 mce_nid = topology_die_id(m->extcpu); + u16 mce_nid = topology_amd_node_id(m->extcpu); struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; @@ -2001,69 +1899,6 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, } } -/* - * Get the number of DCT channels in use. - * - * Return: - * number of Memory Channels in operation - * Pass back: - * contents of the DCL0_LOW register - */ -static int f1x_early_channel_count(struct amd64_pvt *pvt) -{ - int i, j, channels = 0; - - /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ - if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128)) - return 2; - - /* - * Need to check if in unganged mode: In such, there are 2 channels, - * but they are not in 128 bit mode and thus the above 'dclr0' status - * bit will be OFF. - * - * Need to check DCT0[0] and DCT1[0] to see if only one of them has - * their CSEnable bit on. If so, then SINGLE DIMM case. - */ - edac_dbg(0, "Data width is not 128 bits - need more decoding\n"); - - /* - * Check DRAM Bank Address Mapping values for each DIMM to see if there - * is more than just one DIMM present in unganged mode. Need to check - * both controllers since DIMMs can be placed in either one. - */ - for (i = 0; i < 2; i++) { - u32 dbam = (i ? pvt->dbam1 : pvt->dbam0); - - for (j = 0; j < 4; j++) { - if (DBAM_DIMM(j, dbam) > 0) { - channels++; - break; - } - } - } - - if (channels > 2) - channels = 2; - - amd64_info("MCT channel count: %d\n", channels); - - return channels; -} - -static int f17_early_channel_count(struct amd64_pvt *pvt) -{ - int i, channels = 0; - - /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ - for_each_umc(i) - channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); - - amd64_info("MCT channel count: %d\n", channels); - - return channels; -} - static int ddr3_cs_size(unsigned i, bool dct_width) { unsigned shift = 0; @@ -2191,84 +2026,6 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, - unsigned int cs_mode, int csrow_nr) -{ - u32 addr_mask_orig, addr_mask_deinterleaved; - u32 msb, weight, num_zero_bits; - int cs_mask_nr = csrow_nr; - int dimm, size = 0; - - /* No Chip Selects are enabled. */ - if (!cs_mode) - return size; - - /* Requested size of an even CS but none are enabled. */ - if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) - return size; - - /* Requested size of an odd CS but none are enabled. */ - if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) - return size; - - /* - * Family 17h introduced systems with one mask per DIMM, - * and two Chip Selects per DIMM. - * - * CS0 and CS1 -> MASK0 / DIMM0 - * CS2 and CS3 -> MASK1 / DIMM1 - * - * Family 19h Model 10h introduced systems with one mask per Chip Select, - * and two Chip Selects per DIMM. - * - * CS0 -> MASK0 -> DIMM0 - * CS1 -> MASK1 -> DIMM0 - * CS2 -> MASK2 -> DIMM1 - * CS3 -> MASK3 -> DIMM1 - * - * Keep the mask number equal to the Chip Select number for newer systems, - * and shift the mask number for older systems. - */ - dimm = csrow_nr >> 1; - - if (!fam_type->flags.zn_regs_v2) - cs_mask_nr >>= 1; - - /* Asymmetric dual-rank DIMM support. */ - if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - else - addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; - - /* - * The number of zero bits in the mask is equal to the number of bits - * in a full mask minus the number of bits in the current mask. - * - * The MSB is the number of bits in the full mask because BIT[0] is - * always 0. - * - * In the special 3 Rank interleaving case, a single bit is flipped - * without swapping with the most significant bit. This can be handled - * by keeping the MSB where it is and ignoring the single zero bit. - */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); - - /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); - - edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); - - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; - - /* Return size in MBs. */ - return size >> 10; -} - static void read_dram_ctl_register(struct amd64_pvt *pvt) { @@ -2284,15 +2041,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) edac_dbg(0, " Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + str_yes_no(dct_high_range_enabled(pvt))); edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + str_enabled_disabled(dct_data_intlv_enabled(pvt)), + str_yes_no(dct_memory_cleared(pvt))); edac_dbg(0, " channel interleave: %s, " "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + str_enabled_disabled(dct_interleave_enabled(pvt)), dct_sel_interleave_addr(pvt)); } @@ -2792,227 +2549,6 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, } /* - * debug routine to display the memory sizes of all logical DIMMs and its - * CSROWs - */ -static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) -{ - int dimm, size0, size1; - u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; - u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; - - if (pvt->fam == 0xf) { - /* K8 families < revF not supported yet */ - if (pvt->ext_model < K8_REV_F) - return; - else - WARN_ON(ctrl != 0); - } - - if (pvt->fam == 0x10) { - dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 - : pvt->dbam0; - dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? - pvt->csels[1].csbases : - pvt->csels[0].csbases; - } else if (ctrl) { - dbam = pvt->dbam0; - dcsb = pvt->csels[1].csbases; - } - edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", - ctrl, dbam); - - edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); - - /* Dump memory sizes for DIMM and its CSROWs */ - for (dimm = 0; dimm < 4; dimm++) { - - size0 = 0; - if (dcsb[dimm*2] & DCSB_CS_ENABLE) - /* - * For F15m60h, we need multiplier for LRDIMM cs_size - * calculation. We pass dimm value to the dbam_to_cs - * mapper so we can find the multiplier from the - * corresponding DCSM. - */ - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam), - dimm); - - size1 = 0; - if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam), - dimm); - - amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, - dimm * 2 + 1, size1); - } -} - -static struct amd64_family_type family_types[] = { - [K8_CPUS] = { - .ctl_name = "K8", - .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - .max_mcs = 2, - .ops = { - .early_channel_count = k8_early_channel_count, - .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, - .dbam_to_cs = k8_dbam_to_chip_select, - } - }, - [F10_CPUS] = { - .ctl_name = "F10h", - .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f10_dbam_to_chip_select, - } - }, - [F15_CPUS] = { - .ctl_name = "F15h", - .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f15_dbam_to_chip_select, - } - }, - [F15_M30H_CPUS] = { - .ctl_name = "F15h_M30h", - .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F15_M60H_CPUS] = { - .ctl_name = "F15h_M60h", - .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f15_m60h_dbam_to_chip_select, - } - }, - [F16_CPUS] = { - .ctl_name = "F16h", - .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F16_M30H_CPUS] = { - .ctl_name = "F16h_M30h", - .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, - .max_mcs = 2, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F17_CPUS] = { - .ctl_name = "F17h", - .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, - .max_mcs = 2, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F17_M10H_CPUS] = { - .ctl_name = "F17h_M10h", - .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, - .max_mcs = 2, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F17_M30H_CPUS] = { - .ctl_name = "F17h_M30h", - .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, - .max_mcs = 8, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F17_M60H_CPUS] = { - .ctl_name = "F17h_M60h", - .f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6, - .max_mcs = 2, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F17_M70H_CPUS] = { - .ctl_name = "F17h_M70h", - .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, - .max_mcs = 2, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F19_CPUS] = { - .ctl_name = "F19h", - .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, - .max_mcs = 8, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F19_M10H_CPUS] = { - .ctl_name = "F19h_M10h", - .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, - .max_mcs = 12, - .flags.zn_regs_v2 = 1, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, - [F19_M50H_CPUS] = { - .ctl_name = "F19h_M50h", - .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0, - .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6, - .max_mcs = 2, - .ops = { - .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_addr_mask_to_cs_size, - } - }, -}; - -/* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm * uses those to find the symbol in error and thus the DIMM. @@ -3259,19 +2795,26 @@ static inline void decode_bus_error(int node_id, struct mce *m) * Currently, we can derive the channel number by looking at the 6th nibble in * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel * number. + * + * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of + * the MCA_SYND[ErrorInformation] field. */ -static int find_umc_channel(struct mce *m) +static void umc_get_err_info(struct mce *m, struct err_info *err) { - return (m->ipid & GENMASK(31, 0)) >> 20; + err->channel = (m->ipid & GENMASK(31, 0)) >> 20; + err->csrow = m->synd & 0x7; } static void decode_umc_error(int node_id, struct mce *m) { u8 ecc_type = (m->status >> 45) & 0x3; struct mem_ctl_info *mci; + unsigned long sys_addr; struct amd64_pvt *pvt; + struct atl_err a_err; struct err_info err; - u64 sys_addr; + + node_id = fixup_node_id(node_id, m); mci = edac_mc_find(node_id); if (!mci) @@ -3284,8 +2827,6 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(m); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -3300,9 +2841,14 @@ static void decode_umc_error(int node_id, struct mce *m) err.err_code = ERR_CHANNEL; } - err.csrow = m->synd & 0x7; + pvt->ops->get_err_info(m, &err); + + a_err.addr = m->addr; + a_err.ipid = m->ipid; + a_err.cpu = m->extcpu; - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; } @@ -3316,37 +2862,10 @@ log_error: /* * Use pvt->F3 which contains the F3 CPU PCI device to get the related * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. - * Reserve F0 and F6 on systems with a UMC. */ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) { - if (pvt->umc) { - pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); - if (!pvt->F0) { - edac_dbg(1, "F0 not found, device 0x%x\n", pci_id1); - return -ENODEV; - } - - pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); - if (!pvt->F6) { - pci_dev_put(pvt->F0); - pvt->F0 = NULL; - - edac_dbg(1, "F6 not found: device 0x%x\n", pci_id2); - return -ENODEV; - } - - if (!pci_ctl_dev) - pci_ctl_dev = &pvt->F0->dev; - - edac_dbg(1, "F0: %s\n", pci_name(pvt->F0)); - edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); - edac_dbg(1, "F6: %s\n", pci_name(pvt->F6)); - - return 0; - } - /* Reserve the ADDRESS MAP Device */ pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); if (!pvt->F1) { @@ -3374,37 +2893,11 @@ reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) return 0; } -static void free_mc_sibling_devs(struct amd64_pvt *pvt) -{ - if (pvt->umc) { - pci_dev_put(pvt->F0); - pci_dev_put(pvt->F6); - } else { - pci_dev_put(pvt->F1); - pci_dev_put(pvt->F2); - } -} - static void determine_ecc_sym_sz(struct amd64_pvt *pvt) { pvt->ecc_sym_sz = 4; - if (pvt->umc) { - u8 i; - - for_each_umc(i) { - /* Check enabled channels only: */ - if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { - if (pvt->umc[i].ecc_ctrl & BIT(9)) { - pvt->ecc_sym_sz = 16; - return; - } else if (pvt->umc[i].ecc_ctrl & BIT(7)) { - pvt->ecc_sym_sz = 8; - return; - } - } - } - } else if (pvt->fam >= 0x10) { + if (pvt->fam >= 0x10) { u32 tmp; amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); @@ -3421,11 +2914,11 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) /* * Retrieve the hardware registers of the memory controller. */ -static void __read_mc_regs_df(struct amd64_pvt *pvt) +static void umc_read_mc_regs(struct amd64_pvt *pvt) { u8 nid = pvt->mc_node_id; struct amd64_umc *umc; - u32 i, umc_base; + u32 i, tmp, umc_base; /* Read registers from each UMC */ for_each_umc(i) { @@ -3433,11 +2926,20 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); - amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); - amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); - amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); - amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); + if (!amd_smn_read(nid, umc_base + get_umc_reg(pvt, UMCCH_DIMM_CFG), &tmp)) + umc->dimm_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp)) + umc->umc_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp)) + umc->sdp_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp)) + umc->ecc_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &tmp)) + umc->umc_cap_hi = tmp; } } @@ -3445,7 +2947,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs) */ -static void read_mc_regs(struct amd64_pvt *pvt) +static void dct_read_mc_regs(struct amd64_pvt *pvt) { unsigned int range; u64 msr_val; @@ -3454,25 +2956,18 @@ static void read_mc_regs(struct amd64_pvt *pvt) * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since * those are Read-As-Zero. */ - rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); + rdmsrq(MSR_K8_TOP_MEM1, pvt->top_mem); edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); /* Check first whether TOP_MEM2 is enabled: */ - rdmsrl(MSR_AMD64_SYSCFG, msr_val); + rdmsrq(MSR_AMD64_SYSCFG, msr_val); if (msr_val & BIT(21)) { - rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); + rdmsrq(MSR_K8_TOP_MEM2, pvt->top_mem2); edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); } else { edac_dbg(0, " TOP_MEM2 disabled\n"); } - if (pvt->umc) { - __read_mc_regs_df(pvt); - amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar); - - goto skip; - } - amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); read_dram_ctl_register(pvt); @@ -3513,14 +3008,6 @@ static void read_mc_regs(struct amd64_pvt *pvt) amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1); } -skip: - read_dct_base_mask(pvt); - - determine_memory_type(pvt); - - if (!pvt->umc) - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); - determine_ecc_sym_sz(pvt); } @@ -3558,36 +3045,47 @@ skip: * encompasses * */ -static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) +static u32 dct_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; - int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) { - csrow_nr >>= 1; - cs_mode = DBAM_DIMM(csrow_nr, dbam); - } else { - cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); - } + csrow_nr >>= 1; + cs_mode = DBAM_DIMM(csrow_nr, dbam); nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", - csrow_nr_orig, dct, cs_mode); + csrow_nr, dct, cs_mode); edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; } -static int init_csrows_df(struct mem_ctl_info *mci) +static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) +{ + int csrow_nr = csrow_nr_orig; + u32 cs_mode, nr_pages; + + cs_mode = umc_get_cs_mode(csrow_nr >> 1, dct, pvt); + + nr_pages = umc_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; + + edac_dbg(0, "csrow: %d, channel: %d, cs_mode %d\n", + csrow_nr_orig, dct, cs_mode); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); + + return nr_pages; +} + +static void umc_init_csrows(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; enum edac_type edac_mode = EDAC_NONE; enum dev_type dev_type = DEV_UNKNOWN; struct dimm_info *dimm; - int empty = 1; u8 umc, cs; if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { @@ -3608,40 +3106,34 @@ static int init_csrows_df(struct mem_ctl_info *mci) if (!csrow_enabled(cs, umc, pvt)) continue; - empty = 0; dimm = mci->csrows[cs]->channels[umc]->dimm; edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, cs); - dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->nr_pages = umc_get_csrow_nr_pages(pvt, umc, cs); dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; dimm->grain = 64; } } - - return empty; } /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. */ -static int init_csrows(struct mem_ctl_info *mci) +static void dct_init_csrows(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; enum edac_type edac_mode = EDAC_NONE; struct csrow_info *csrow; struct dimm_info *dimm; - int i, j, empty = 1; int nr_pages = 0; + int i, j; u32 val; - if (pvt->umc) - return init_csrows_df(mci); - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); pvt->nbcfg = val; @@ -3664,19 +3156,18 @@ static int init_csrows(struct mem_ctl_info *mci) continue; csrow = mci->csrows[i]; - empty = 0; edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, i); if (row_dct0) { - nr_pages = get_csrow_nr_pages(pvt, 0, i); + nr_pages = dct_get_csrow_nr_pages(pvt, 0, i); csrow->channels[0]->dimm->nr_pages = nr_pages; } /* K8 has only one DCT */ if (pvt->fam != 0xf && row_dct1) { - int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i); + int row_dct1_pages = dct_get_csrow_nr_pages(pvt, 1, i); csrow->channels[1]->dimm->nr_pages = row_dct1_pages; nr_pages += row_dct1_pages; @@ -3691,15 +3182,13 @@ static int init_csrows(struct mem_ctl_info *mci) : EDAC_SECDED; } - for (j = 0; j < pvt->channel_count; j++) { + for (j = 0; j < pvt->max_mcs; j++) { dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; dimm->grain = 64; } } - - return empty; } /* get all cores on this DCT */ @@ -3708,7 +3197,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) int cpu; for_each_online_cpu(cpu) - if (topology_die_id(cpu) == nid) + if (topology_amd_node_id(cpu) == nid) cpumask_set_cpu(cpu, mask); } @@ -3733,8 +3222,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid) nbe = reg->l & MSR_MCGCTL_NBE; edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + cpu, reg->q, str_enabled_disabled(nbe)); if (!nbe) goto out; @@ -3862,59 +3350,51 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -static bool ecc_enabled(struct amd64_pvt *pvt) +static bool dct_ecc_enabled(struct amd64_pvt *pvt) { u16 nid = pvt->mc_node_id; bool nb_mce_en = false; - u8 ecc_en = 0, i; + u8 ecc_en = 0; u32 value; - if (boot_cpu_data.x86 >= 0x17) { - u8 umc_en_mask = 0, ecc_en_mask = 0; - struct amd64_umc *umc; - - for_each_umc(i) { - umc = &pvt->umc[i]; + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); - /* Only check enabled UMCs. */ - if (!(umc->sdp_ctrl & UMC_SDP_INIT)) - continue; + ecc_en = !!(value & NBCFG_ECC_ENABLE); - umc_en_mask |= BIT(i); + nb_mce_en = nb_mce_bank_enabled_on_node(nid); + if (!nb_mce_en) + edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", + MSR_IA32_MCG_CTL, nid); - if (umc->umc_cap_hi & UMC_ECC_ENABLED) - ecc_en_mask |= BIT(i); - } + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); - /* Check whether at least one UMC is enabled: */ - if (umc_en_mask) - ecc_en = umc_en_mask == ecc_en_mask; - else - edac_dbg(0, "Node %d: No enabled UMCs.\n", nid); + return ecc_en && nb_mce_en; +} - /* Assume UMC MCA banks are enabled. */ - nb_mce_en = true; - } else { - amd64_read_pci_cfg(pvt->F3, NBCFG, &value); +static bool umc_ecc_enabled(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + bool ecc_en = false; + int i; - ecc_en = !!(value & NBCFG_ECC_ENABLE); + /* Check whether at least one UMC is enabled: */ + for_each_umc(i) { + umc = &pvt->umc[i]; - nb_mce_en = nb_mce_bank_enabled_on_node(nid); - if (!nb_mce_en) - edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", - MSR_IA32_MCG_CTL, nid); + if (umc->sdp_ctrl & UMC_SDP_INIT && + umc->umc_cap_hi & UMC_ECC_ENABLED) { + ecc_en = true; + break; + } } - edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en)); - if (!ecc_en || !nb_mce_en) - return false; - else - return true; + return ecc_en; } static inline void -f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) +umc_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; @@ -3944,142 +3424,503 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci) +static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; mci->edac_ctl_cap = EDAC_FLAG_NONE; - if (pvt->umc) { - f17h_determine_edac_ctl_cap(mci, pvt); - } else { - if (pvt->nbcap & NBCAP_SECDED) - mci->edac_ctl_cap |= EDAC_FLAG_SECDED; + if (pvt->nbcap & NBCAP_SECDED) + mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (pvt->nbcap & NBCAP_CHIPKILL) - mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - } + if (pvt->nbcap & NBCAP_CHIPKILL) + mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - mci->edac_cap = determine_edac_cap(pvt); + mci->edac_cap = dct_determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam_type->ctl_name; + mci->ctl_name = pvt->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ mci->set_sdram_scrub_rate = set_scrub_rate; mci->get_sdram_scrub_rate = get_scrub_rate; + + dct_init_csrows(mci); +} + +static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + + umc_determine_edac_ctl_cap(mci, pvt); + + mci->edac_cap = umc_determine_edac_cap(pvt); + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); + mci->ctl_page_to_phys = NULL; + + umc_init_csrows(mci); +} + +static int dct_hw_info_get(struct amd64_pvt *pvt) +{ + int ret = reserve_mc_sibling_devs(pvt, pvt->f1_id, pvt->f2_id); + + if (ret) + return ret; + + dct_prep_chip_selects(pvt); + dct_read_base_mask(pvt); + dct_read_mc_regs(pvt); + dct_determine_memory_type(pvt); + + return 0; +} + +static int umc_hw_info_get(struct amd64_pvt *pvt) +{ + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; + + umc_prep_chip_selects(pvt); + umc_read_base_mask(pvt); + umc_read_mc_regs(pvt); + umc_determine_memory_type(pvt); + + return 0; } /* - * returns a pointer to the family descriptor on success, NULL otherwise. + * The CPUs have one channel per UMC, so UMC number is equivalent to a + * channel number. The GPUs have 8 channels per UMC, so the UMC number no + * longer works as a channel number. + * + * The channel number within a GPU UMC is given in MCA_IPID[15:12]. + * However, the IDs are split such that two UMC values go to one UMC, and + * the channel numbers are split in two groups of four. + * + * Refer to comment on gpu_get_umc_base(). + * + * For example, + * UMC0 CH[3:0] = 0x0005[3:0]000 + * UMC0 CH[7:4] = 0x0015[3:0]000 + * UMC1 CH[3:0] = 0x0025[3:0]000 + * UMC1 CH[7:4] = 0x0035[3:0]000 */ -static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) +static void gpu_get_err_info(struct mce *m, struct err_info *err) +{ + u8 ch = (m->ipid & GENMASK(31, 0)) >> 20; + u8 phy = ((m->ipid >> 12) & 0xf); + + err->channel = ch % 2 ? phy + 4 : phy; + err->csrow = phy; +} + +static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; + + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); +} + +static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +{ + int size, cs_mode, cs = 0; + + edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); + + cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + + for_each_chip_select(cs, ctrl, pvt) { + size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs); + amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size); + } +} + +static void gpu_dump_misc_regs(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); + edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); + edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); + edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i); + + gpu_debug_display_dimm_sizes(pvt, i); + } +} + +static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +{ + u32 nr_pages; + int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + + nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; + + edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); + + return nr_pages; +} + +static void gpu_init_csrows(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + u8 umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + dimm = mci->csrows[umc]->channels[cs]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs); + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = pvt->dram_type; + dimm->dtype = DEV_X16; + dimm->grain = 64; + } + } +} + +static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + + mci->mtype_cap = MEM_FLAG_HBM2; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->edac_cap = EDAC_FLAG_EC; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); + mci->ctl_page_to_phys = NULL; + + gpu_init_csrows(mci); +} + +/* ECC is enabled by default on GPU nodes */ +static bool gpu_ecc_enabled(struct amd64_pvt *pvt) +{ + return true; +} + +static inline u32 gpu_get_umc_base(struct amd64_pvt *pvt, u8 umc, u8 channel) +{ + /* + * On CPUs, there is one channel per UMC, so UMC numbering equals + * channel numbering. On GPUs, there are eight channels per UMC, + * so the channel numbering is different from UMC numbering. + * + * On CPU nodes channels are selected in 6th nibble + * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000; + * + * On GPU nodes channels are selected in 3rd nibble + * HBM chX[3:0]= [Y ]5X[3:0]000; + * HBM chX[7:4]= [Y+1]5X[3:0]000 + * + * On MI300 APU nodes, same as GPU nodes but channels are selected + * in the base address of 0x90000 + */ + umc *= 2; + + if (channel >= 4) + umc++; + + return pvt->gpu_umc_base + (umc << 20) + ((channel % 4) << 12); +} + +static void gpu_read_mc_regs(struct amd64_pvt *pvt) +{ + u8 nid = pvt->mc_node_id; + struct amd64_umc *umc; + u32 i, tmp, umc_base; + + /* Read registers from each UMC */ + for_each_umc(i) { + umc_base = gpu_get_umc_base(pvt, i, 0); + umc = &pvt->umc[i]; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp)) + umc->umc_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp)) + umc->sdp_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp)) + umc->ecc_ctrl = tmp; + } +} + +static void gpu_read_base_mask(struct amd64_pvt *pvt) +{ + u32 base_reg, mask_reg; + u32 *base, *mask; + int umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + base_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_BASE_ADDR; + base = &pvt->csels[umc].csbases[cs]; + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) { + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + } + + mask_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_ADDR_MASK; + mask = &pvt->csels[umc].csmasks[cs]; + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) { + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + } + } +} + +static void gpu_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 8; + pvt->csels[umc].m_cnt = 8; + } +} + +static int gpu_hw_info_get(struct amd64_pvt *pvt) +{ + int ret; + + ret = gpu_get_node_map(pvt); + if (ret) + return ret; + + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; + + gpu_prep_chip_selects(pvt); + gpu_read_base_mask(pvt); + gpu_read_mc_regs(pvt); + + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + pci_dev_put(pvt->F1); + pci_dev_put(pvt->F2); + kfree(pvt->umc); + kfree(pvt->csels); +} + +static struct low_ops umc_ops = { + .hw_info_get = umc_hw_info_get, + .ecc_enabled = umc_ecc_enabled, + .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, + .dump_misc_regs = umc_dump_misc_regs, + .get_err_info = umc_get_err_info, +}; + +static struct low_ops gpu_ops = { + .hw_info_get = gpu_hw_info_get, + .ecc_enabled = gpu_ecc_enabled, + .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs, + .dump_misc_regs = gpu_dump_misc_regs, + .get_err_info = gpu_get_err_info, +}; + +/* Use Family 16h versions for defaults and adjust as needed below. */ +static struct low_ops dct_ops = { + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .hw_info_get = dct_hw_info_get, + .ecc_enabled = dct_ecc_enabled, + .setup_mci_misc_attrs = dct_setup_mci_misc_attrs, + .dump_misc_regs = dct_dump_misc_regs, +}; + +static int per_family_init(struct amd64_pvt *pvt) { pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; + char *tmp_name = NULL; + pvt->max_mcs = 2; + + /* + * Decide on which ops group to use here and do any family/model + * overrides below. + */ + if (pvt->fam >= 0x17) + pvt->ops = &umc_ops; + else + pvt->ops = &dct_ops; switch (pvt->fam) { case 0xf: - fam_type = &family_types[K8_CPUS]; - pvt->ops = &family_types[K8_CPUS].ops; + tmp_name = (pvt->ext_model >= K8_REV_F) ? + "K8 revF or later" : "K8 revE or earlier"; + pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; + pvt->ops->map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow; + pvt->ops->dbam_to_cs = k8_dbam_to_chip_select; break; case 0x10: - fam_type = &family_types[F10_CPUS]; - pvt->ops = &family_types[F10_CPUS].ops; + pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; + pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; break; case 0x15: - if (pvt->model == 0x30) { - fam_type = &family_types[F15_M30H_CPUS]; - pvt->ops = &family_types[F15_M30H_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; break; - } else if (pvt->model == 0x60) { - fam_type = &family_types[F15_M60H_CPUS]; - pvt->ops = &family_types[F15_M60H_CPUS].ops; + case 0x60: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; + pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; + break; + case 0x13: + /* Richland is only client */ + return -ENODEV; + default: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; + pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; break; - /* Richland is only client */ - } else if (pvt->model == 0x13) { - return NULL; - } else { - fam_type = &family_types[F15_CPUS]; - pvt->ops = &family_types[F15_CPUS].ops; } break; case 0x16: - if (pvt->model == 0x30) { - fam_type = &family_types[F16_M30H_CPUS]; - pvt->ops = &family_types[F16_M30H_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; + break; + default: + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; break; } - fam_type = &family_types[F16_CPUS]; - pvt->ops = &family_types[F16_CPUS].ops; break; case 0x17: - if (pvt->model >= 0x10 && pvt->model <= 0x2f) { - fam_type = &family_types[F17_M10H_CPUS]; - pvt->ops = &family_types[F17_M10H_CPUS].ops; + switch (pvt->model) { + case 0x30 ... 0x3f: + pvt->max_mcs = 8; break; - } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { - fam_type = &family_types[F17_M30H_CPUS]; - pvt->ops = &family_types[F17_M30H_CPUS].ops; - break; - } else if (pvt->model >= 0x60 && pvt->model <= 0x6f) { - fam_type = &family_types[F17_M60H_CPUS]; - pvt->ops = &family_types[F17_M60H_CPUS].ops; - break; - } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { - fam_type = &family_types[F17_M70H_CPUS]; - pvt->ops = &family_types[F17_M70H_CPUS].ops; + default: break; } - fallthrough; - case 0x18: - fam_type = &family_types[F17_CPUS]; - pvt->ops = &family_types[F17_CPUS].ops; + break; - if (pvt->fam == 0x18) - family_types[F17_CPUS].ctl_name = "F18h"; + case 0x18: break; case 0x19: - if (pvt->model >= 0x10 && pvt->model <= 0x1f) { - fam_type = &family_types[F19_M10H_CPUS]; - pvt->ops = &family_types[F19_M10H_CPUS].ops; + switch (pvt->model) { + case 0x00 ... 0x0f: + pvt->max_mcs = 8; + break; + case 0x10 ... 0x1f: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x30 ... 0x3f: + if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) { + tmp_name = "MI200"; + pvt->max_mcs = 4; + pvt->dram_type = MEM_HBM2; + pvt->gpu_umc_base = 0x50000; + pvt->ops = &gpu_ops; + } else { + pvt->max_mcs = 8; + } + break; + case 0x60 ... 0x6f: + pvt->flags.zn_regs_v2 = 1; + break; + case 0x70 ... 0x7f: + pvt->max_mcs = 4; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x90 ... 0x9f: + pvt->max_mcs = 4; + pvt->dram_type = MEM_HBM3; + pvt->gpu_umc_base = 0x90000; + pvt->ops = &gpu_ops; + break; + case 0xa0 ... 0xaf: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; + break; + } + break; + + case 0x1A: + switch (pvt->model) { + case 0x00 ... 0x1f: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; break; - } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) { - fam_type = &family_types[F17_M70H_CPUS]; - pvt->ops = &family_types[F17_M70H_CPUS].ops; - fam_type->ctl_name = "F19h_M20h"; + case 0x40 ... 0x4f: + pvt->flags.zn_regs_v2 = 1; break; - } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) { - fam_type = &family_types[F19_M50H_CPUS]; - pvt->ops = &family_types[F19_M50H_CPUS].ops; - fam_type->ctl_name = "F19h_M50h"; + case 0x50 ... 0x57: + case 0xc0 ... 0xc7: + pvt->max_mcs = 16; + pvt->flags.zn_regs_v2 = 1; break; - } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) { - fam_type = &family_types[F19_M10H_CPUS]; - pvt->ops = &family_types[F19_M10H_CPUS].ops; - fam_type->ctl_name = "F19h_MA0h"; + case 0x90 ... 0x9f: + case 0xa0 ... 0xaf: + pvt->max_mcs = 8; + pvt->flags.zn_regs_v2 = 1; break; } - fam_type = &family_types[F19_CPUS]; - pvt->ops = &family_types[F19_CPUS].ops; - family_types[F19_CPUS].ctl_name = "F19h"; break; default: amd64_err("Unsupported family!\n"); - return NULL; + return -ENODEV; } - return fam_type; + if (tmp_name) + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name); + else + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", + pvt->fam, pvt->model); + + pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL); + if (!pvt->csels) + return -ENOMEM; + + return 0; } static const struct attribute_group *amd64_edac_attr_groups[] = { @@ -4090,67 +3931,33 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -static int hw_info_get(struct amd64_pvt *pvt) -{ - u16 pci_id1, pci_id2; - int ret; - - if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) - return -ENOMEM; - - pci_id1 = fam_type->f0_id; - pci_id2 = fam_type->f6_id; - } else { - pci_id1 = fam_type->f1_id; - pci_id2 = fam_type->f2_id; - } - - ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (ret) - return ret; - - read_mc_regs(pvt); - - return 0; -} - -static void hw_info_put(struct amd64_pvt *pvt) +/* + * For heterogeneous and APU models EDAC CHIP_SELECT and CHANNEL layers + * should be swapped to fit into the layers. + */ +static unsigned int get_layer_size(struct amd64_pvt *pvt, u8 layer) { - if (pvt->F0 || pvt->F1) - free_mc_sibling_devs(pvt); + bool is_gpu = (pvt->ops == &gpu_ops); - kfree(pvt->umc); + if (!layer) + return is_gpu ? pvt->max_mcs + : pvt->csels[0].b_cnt; + else + return is_gpu ? pvt->csels[0].b_cnt + : pvt->max_mcs; } static int init_one_instance(struct amd64_pvt *pvt) { struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - int ret = -EINVAL; - - /* - * We need to determine how many memory channels there are. Then use - * that information for calculating the size of the dynamic instance - * tables in the 'mci' structure. - */ - pvt->channel_count = pvt->ops->early_channel_count(pvt); - if (pvt->channel_count < 0) - return ret; + int ret = -ENOMEM; - ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = pvt->csels[0].b_cnt; + layers[0].size = get_layer_size(pvt, 0); layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; - - /* - * Always allocate two channels since we can have setups with DIMMs on - * only one channel. Also, this simplifies handling later for the price - * of a couple of KBs tops. - */ - layers[1].size = fam_type->max_mcs; + layers[1].size = get_layer_size(pvt, 1); layers[1].is_virt_csrow = false; mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); @@ -4160,10 +3967,7 @@ static int init_one_instance(struct amd64_pvt *pvt) mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci); - - if (init_csrows(mci)) - mci->edac_cap = EDAC_FLAG_NONE; + pvt->ops->setup_mci_misc_attrs(mci); ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { @@ -4180,7 +3984,7 @@ static bool instance_has_memory(struct amd64_pvt *pvt) bool cs_enabled = false; int cs = 0, dct = 0; - for (dct = 0; dct < fam_type->max_mcs; dct++) { + for (dct = 0; dct < pvt->max_mcs; dct++) { for_each_chip_select(cs, dct, pvt) cs_enabled |= csrow_enabled(cs, dct, pvt); } @@ -4209,12 +4013,11 @@ static int probe_one_instance(unsigned int nid) pvt->mc_node_id = nid; pvt->F3 = F3; - ret = -ENODEV; - fam_type = per_family_init(pvt); - if (!fam_type) + ret = per_family_init(pvt); + if (ret < 0) goto err_enable; - ret = hw_info_get(pvt); + ret = pvt->ops->hw_info_get(pvt); if (ret < 0) goto err_enable; @@ -4224,7 +4027,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - if (!ecc_enabled(pvt)) { + if (!pvt->ops->ecc_enabled(pvt)) { ret = -ENODEV; if (!ecc_enable_override) @@ -4250,13 +4053,10 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, - (pvt->fam == 0xf ? - (pvt->ext_model >= K8_REV_F ? "revF or later " - : "revE or earlier ") - : ""), pvt->mc_node_id); + amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id); - dump_misc_regs(pvt); + /* Display and decode various registers for debug purposes. */ + pvt->ops->dump_misc_regs(pvt); return ret; @@ -4319,6 +4119,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL), X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL), X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x1A, NULL), { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); @@ -4370,19 +4171,17 @@ static int __init amd64_edac_init(void) } /* register stuff with EDAC MCE */ - if (boot_cpu_data.x86 >= 0x17) + if (boot_cpu_data.x86 >= 0x17) { amd_register_ecc_decoder(decode_umc_error); - else + } else { amd_register_ecc_decoder(decode_bus_error); - - setup_pci_device(); + setup_pci_device(); + } #ifdef CONFIG_X86_32 amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); #endif - printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); - return 0; err_pci: @@ -4427,10 +4226,8 @@ module_init(amd64_edac_init); module_exit(amd64_edac_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, " - "Dave Peterson, Thayne Harbaugh"); -MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " - EDAC_AMD64_VERSION); +MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD"); +MODULE_DESCRIPTION("MC support for AMD64 memory controllers"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 38e5ad95d010..1757c1b99fc8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/mmzone.h> #include <linux/edac.h> +#include <linux/bitfield.h> #include <asm/cpu_device_id.h> #include <asm/msr.h> #include "edac_module.h" @@ -85,7 +86,6 @@ * sections 3.5.4 and 3.5.5 for more information. */ -#define EDAC_AMD64_VERSION "3.5.0" #define EDAC_MOD_STR "amd64_edac" /* Extended Model from CPUID, for CPU Revision numbers */ @@ -96,11 +96,12 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 12 #define ON true #define OFF false +#define MAX_CTL_NAMELEN 19 + /* * PCI-defined configuration space registers */ @@ -114,22 +115,6 @@ #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 -#define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 -#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee -#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 -#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 -#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F0 0x1448 -#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F6 0x144e -#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 -#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 -#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 -#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 -#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad -#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 -#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a -#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670 /* * Function 1 - Address Map @@ -215,8 +200,6 @@ #define DCT_SEL_HI 0x114 #define F15H_M60H_SCRCTRL 0x1C8 -#define F17H_SCR_BASE_ADDR 0x48 -#define F17H_SCR_LIMIT_ADDR 0x4C /* * Function 3 - Misc Control @@ -274,15 +257,11 @@ #define UMCCH_ADDR_MASK 0x20 #define UMCCH_ADDR_MASK_SEC 0x28 #define UMCCH_ADDR_MASK_SEC_DDR5 0x30 -#define UMCCH_ADDR_CFG 0x30 -#define UMCCH_ADDR_CFG_DDR5 0x40 #define UMCCH_DIMM_CFG 0x80 #define UMCCH_DIMM_CFG_DDR5 0x90 #define UMCCH_UMC_CFG 0x100 #define UMCCH_SDP_CTRL 0x104 #define UMCCH_ECC_CTRL 0x14C -#define UMCCH_ECC_BAD_SYMBOL 0xD90 -#define UMCCH_UMC_CAP 0xDF0 #define UMCCH_UMC_CAP_HI 0xDF4 /* UMC CH bitfields */ @@ -291,25 +270,6 @@ #define UMC_SDP_INIT BIT(31) -enum amd_families { - K8_CPUS = 0, - F10_CPUS, - F15_CPUS, - F15_M30H_CPUS, - F15_M60H_CPUS, - F16_CPUS, - F16_M30H_CPUS, - F17_CPUS, - F17_M10H_CPUS, - F17_M30H_CPUS, - F17_M60H_CPUS, - F17_M70H_CPUS, - F19_CPUS, - F19_M10H_CPUS, - F19_M50H_CPUS, - NUM_FAMILIES, -}; - /* Error injection control structure */ struct error_injection { u32 section; @@ -352,11 +312,21 @@ struct amd64_umc { enum mem_type dram_type; }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_pvt { struct low_ops *ops; /* pci_device handles which we utilize */ - struct pci_dev *F0, *F1, *F2, *F3, *F6; + struct pci_dev *F1, *F2, *F3; u16 mc_node_id; /* MC index of this MC node */ u8 fam; /* CPU family */ @@ -364,7 +334,6 @@ struct amd64_pvt { u8 stepping; /* ... stepping */ int ext_model; /* extended model value of this node */ - int channel_count; /* Raw registers */ u32 dclr0; /* DRAM Configuration Low DCT0 reg */ @@ -373,13 +342,12 @@ struct amd64_pvt { u32 dchr1; /* DRAM Configuration High DCT1 reg */ u32 nbcap; /* North Bridge Capabilities */ u32 nbcfg; /* F10 North Bridge Configuration */ - u32 ext_nbcfg; /* Extended F10 North Bridge Configuration */ u32 dhar; /* DRAM Hoist reg */ u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ /* one for each DCT/UMC */ - struct chip_select csels[NUM_CONTROLLERS]; + struct chip_select *csels; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; @@ -390,10 +358,17 @@ struct amd64_pvt { u32 dct_sel_lo; /* DRAM Controller Select Low */ u32 dct_sel_hi; /* DRAM Controller Select High */ u32 online_spare; /* On-Line spare Reg */ + u32 gpu_umc_base; /* Base address used for channel selection on GPUs */ /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; + char ctl_name[MAX_CTL_NAMELEN]; + u16 f1_id, f2_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; + + struct amd64_family_flags flags; /* place to store error injection parameters prior to issue */ struct error_injection injection; @@ -484,30 +459,15 @@ struct ecc_settings { * functions and per device encoding/decoding logic. */ struct low_ops { - int (*early_channel_count) (struct amd64_pvt *pvt); - void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, - struct err_info *); - int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode, int cs_mask_nr); -}; - -struct amd64_family_flags { - /* - * Indicates that the system supports the new register offsets, etc. - * first introduced with Family 19h Model 10h. - */ - __u64 zn_regs_v2 : 1, - - __reserved : 63; -}; - -struct amd64_family_type { - const char *ctl_name; - u16 f0_id, f1_id, f2_id, f6_id; - /* Maximum number of memory controllers per die/node. */ - u8 max_mcs; - struct amd64_family_flags flags; - struct low_ops ops; + void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, u64 sys_addr, + struct err_info *err); + int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct, + unsigned int cs_mode, int cs_mask_nr); + int (*hw_info_get)(struct amd64_pvt *pvt); + bool (*ecc_enabled)(struct amd64_pvt *pvt); + void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); + void (*dump_misc_regs)(struct amd64_pvt *pvt); + void (*get_err_info)(struct mce *m, struct err_info *err); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c deleted file mode 100644 index 7508aa416ddb..000000000000 --- a/drivers/edac/amd8111_edac.c +++ /dev/null @@ -1,597 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * amd8111_edac.c, AMD8111 Hyper Transport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/bitops.h> -#include <linux/edac.h> -#include <linux/pci_ids.h> -#include <asm/io.h> - -#include "edac_module.h" -#include "amd8111_edac.h" - -#define AMD8111_EDAC_REVISION " Ver: 1.0.0" -#define AMD8111_EDAC_MOD_STR "amd8111_edac" - -#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 - -enum amd8111_edac_devs { - LPC_BRIDGE = 0, -}; - -enum amd8111_edac_pcis { - PCI_BRIDGE = 0, -}; - -/* Wrapper functions for accessing PCI configuration space */ -static int edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); - - return ret; -} - -static void edac_pci_read_byte(struct pci_dev *dev, int reg, u8 *val8) -{ - int ret; - - ret = pci_read_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -static void edac_pci_write_byte(struct pci_dev *dev, int reg, u8 val8) -{ - int ret; - - ret = pci_write_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -/* - * device-specific methods for amd8111 PCI Bridge Controller - * - * Error Reporting and Handling for amd8111 chipset could be found - * in its datasheet 3.1.2 section, P37 - */ -static void amd8111_pci_bridge_init(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - /* First clear error detection flags on the host interface */ - - /* Clear SSE/SMA/STA flags in the global status register*/ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Clear CRC and Link Fail flags in HT Link Control reg */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_CLEAR_MASK) - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Second clear all fault on the secondary interface */ - - /* Clear error flags in the memory-base limit reg. */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Expired flag in Interrupt/Bridge Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - /* Last enable error detections */ - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Enable System Error reporting in global status register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 |= PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Enable CRC Sync flood packets to HyperTransport Link */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 |= HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Enable SSE reporting etc in Interrupt control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 |= PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_exit(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Disable System Error reporting */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 &= ~PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Disable CRC flood packets */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 &= ~HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Disable DTSERREN/MARSP/SERREN in Interrupt Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 &= ~PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8111_pci_info *pci_info = edac_dev->pvt_info; - struct pci_dev *dev = pci_info->dev; - u32 val32; - - /* Check out PCI Bridge Status and Command Register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) { - printk(KERN_INFO "Error(s) in PCI bridge status and command" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "SSE: %d, RMA: %d, RTA: %d\n", - (val32 & PCI_STSCMD_SSE) != 0, - (val32 & PCI_STSCMD_RMA) != 0, - (val32 & PCI_STSCMD_RTA) != 0); - - val32 |= PCI_STSCMD_CLEAR_MASK; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out HyperTransport Link Control Register */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_LKFAIL) { - printk(KERN_INFO "Error(s) in hypertransport link control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "LKFAIL: %d\n", - (val32 & HT_LINK_LKFAIL) != 0); - - val32 |= HT_LINK_LKFAIL; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Interrupt and Bridge Control Register */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_DTSTAT) { - printk(KERN_INFO "Error(s) in PCI interrupt and bridge control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "DTSTAT: %d\n", - (val32 & PCI_INTBRG_CTRL_DTSTAT) != 0); - - val32 |= PCI_INTBRG_CTRL_DTSTAT; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Bridge Memory Base-Limit Register */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in mem limit register on %s device\n", - pci_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - (val32 & MEM_LIMIT_DPE) != 0, - (val32 & MEM_LIMIT_RSE) != 0, - (val32 & MEM_LIMIT_RMA) != 0, - (val32 & MEM_LIMIT_RTA) != 0, - (val32 & MEM_LIMIT_STA) != 0, - (val32 & MEM_LIMIT_MDPE) != 0); - - val32 |= MEM_LIMIT_CLEAR_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct resource *legacy_io_res; -static int at_compat_reg_broken; -#define LEGACY_NR_PORTS 1 - -/* device-specific methods for amd8111 LPC Bridge device */ -static void amd8111_lpc_bridge_init(struct amd8111_dev_info *dev_info) -{ - u8 val8; - struct pci_dev *dev = dev_info->dev; - - /* First clear REG_AT_COMPAT[SERR, IOCHK] if necessary */ - legacy_io_res = request_region(REG_AT_COMPAT, LEGACY_NR_PORTS, - AMD8111_EDAC_MOD_STR); - if (!legacy_io_res) - printk(KERN_INFO "%s: failed to request legacy I/O region " - "start %d, len %d\n", __func__, - REG_AT_COMPAT, LEGACY_NR_PORTS); - else { - val8 = __do_inb(REG_AT_COMPAT); - if (val8 == 0xff) { /* buggy port */ - printk(KERN_INFO "%s: port %d is buggy, not supported" - " by hardware?\n", __func__, REG_AT_COMPAT); - at_compat_reg_broken = 1; - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); - legacy_io_res = NULL; - } else { - u8 out8 = 0; - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) - __do_outb(out8, REG_AT_COMPAT); - } - } - - /* Second clear error flags on LPC bridge */ - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); -} - -static void amd8111_lpc_bridge_exit(struct amd8111_dev_info *dev_info) -{ - if (legacy_io_res) - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); -} - -static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev) -{ - struct amd8111_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u8 val8; - - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in IO control register on %s device\n", - dev_info->ctl_name); - printk(KERN_INFO "LPC ERR: %d, PW2LPC: %d\n", - (val8 & IO_CTRL_1_LPC_ERR) != 0, - (val8 & IO_CTRL_1_PW2LPC) != 0); - - val8 |= IO_CTRL_1_CLEAR_MASK; - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); - - edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); - } - - if (at_compat_reg_broken == 0) { - u8 out8 = 0; - val8 = __do_inb(REG_AT_COMPAT); - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) { - __do_outb(out8, REG_AT_COMPAT); - edac_device_handle_ue(edac_dev, 0, 0, - edac_dev->ctl_name); - } - } -} - -/* General devices represented by edac_device_ctl_info */ -static struct amd8111_dev_info amd8111_devices[] = { - [LPC_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_LPC, - .ctl_name = "lpc", - .init = amd8111_lpc_bridge_init, - .exit = amd8111_lpc_bridge_exit, - .check = amd8111_lpc_bridge_check, - }, - {0}, -}; - -/* PCI controllers represented by edac_pci_ctl_info */ -static struct amd8111_pci_info amd8111_pcis[] = { - [PCI_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_PCI, - .ctl_name = "AMD8111_PCI_Controller", - .init = amd8111_pci_bridge_init, - .exit = amd8111_pci_bridge_exit, - .check = amd8111_pci_bridge_check, - }, - {0}, -}; - -static int amd8111_dev_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data]; - int ret = -ENODEV; - - dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - dev_info->err_dev, NULL); - - if (!dev_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - goto err; - } - - if (pci_enable_device(dev_info->dev)) { - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - goto err_dev_put; - } - - /* - * we do not allocate extra private structure for - * edac_device_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_device_alloc_index(); - dev_info->edac_dev = - edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, - NULL, 0, 0, - NULL, 0, dev_info->edac_idx); - if (!dev_info->edac_dev) { - ret = -ENOMEM; - goto err_dev_put; - } - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = dev_info->check; - - if (dev_info->init) - dev_info->init(dev_info); - - if (edac_device_add_device(dev_info->edac_dev) > 0) { - printk(KERN_ERR "failed to add edac_dev for %s\n", - dev_info->ctl_name); - goto err_edac_free_ctl; - } - - printk(KERN_INFO "added one edac_dev on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - - return 0; - -err_edac_free_ctl: - edac_device_free_ctl_info(dev_info->edac_dev); -err_dev_put: - pci_dev_put(dev_info->dev); -err: - return ret; -} - -static void amd8111_dev_remove(struct pci_dev *dev) -{ - struct amd8111_dev_info *dev_info; - - for (dev_info = amd8111_devices; dev_info->err_dev; dev_info++) - if (dev_info->dev->device == dev->device) - break; - - if (!dev_info->err_dev) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_device_del_device(dev_info->edac_dev->dev); - edac_device_free_ctl_info(dev_info->edac_dev); - } - - if (dev_info->exit) - dev_info->exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static int amd8111_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data]; - int ret = -ENODEV; - - pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - pci_info->err_dev, NULL); - - if (!pci_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - goto err; - } - - if (pci_enable_device(pci_info->dev)) { - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - goto err_dev_put; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - pci_info->edac_idx = edac_pci_alloc_index(); - pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name); - if (!pci_info->edac_dev) { - ret = -ENOMEM; - goto err_dev_put; - } - - pci_info->edac_dev->pvt_info = pci_info; - pci_info->edac_dev->dev = &pci_info->dev->dev; - pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - pci_info->edac_dev->ctl_name = pci_info->ctl_name; - pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - pci_info->edac_dev->edac_check = pci_info->check; - - if (pci_info->init) - pci_info->init(pci_info); - - if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) { - printk(KERN_ERR "failed to add edac_pci for %s\n", - pci_info->ctl_name); - goto err_edac_free_ctl; - } - - printk(KERN_INFO "added one edac_pci on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - - return 0; - -err_edac_free_ctl: - edac_pci_free_ctl_info(pci_info->edac_dev); -err_dev_put: - pci_dev_put(pci_info->dev); -err: - return ret; -} - -static void amd8111_pci_remove(struct pci_dev *dev) -{ - struct amd8111_pci_info *pci_info; - - for (pci_info = amd8111_pcis; pci_info->err_dev; pci_info++) - if (pci_info->dev->device == dev->device) - break; - - if (!pci_info->err_dev) /* should never happen */ - return; - - if (pci_info->edac_dev) { - edac_pci_del_device(pci_info->edac_dev->dev); - edac_pci_free_ctl_info(pci_info->edac_dev); - } - - if (pci_info->exit) - pci_info->exit(pci_info); - - pci_dev_put(pci_info->dev); -} - -/* PCI Device ID talbe for general EDAC device */ -static const struct pci_device_id amd8111_edac_dev_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_LPC), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = LPC_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_dev_tbl); - -static struct pci_driver amd8111_edac_dev_driver = { - .name = "AMD8111_EDAC_DEV", - .probe = amd8111_dev_probe, - .remove = amd8111_dev_remove, - .id_table = amd8111_edac_dev_tbl, -}; - -/* PCI Device ID table for EDAC PCI controller */ -static const struct pci_device_id amd8111_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_PCI), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = PCI_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_pci_tbl); - -static struct pci_driver amd8111_edac_pci_driver = { - .name = "AMD8111_EDAC_PCI", - .probe = amd8111_pci_probe, - .remove = amd8111_pci_remove, - .id_table = amd8111_edac_pci_tbl, -}; - -static int __init amd8111_edac_init(void) -{ - int val; - - printk(KERN_INFO "AMD8111 EDAC driver " AMD8111_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - val = pci_register_driver(&amd8111_edac_dev_driver); - val |= pci_register_driver(&amd8111_edac_pci_driver); - - return val; -} - -static void __exit amd8111_edac_exit(void) -{ - pci_unregister_driver(&amd8111_edac_pci_driver); - pci_unregister_driver(&amd8111_edac_dev_driver); -} - - -module_init(amd8111_edac_init); -module_exit(amd8111_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n"); -MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module"); diff --git a/drivers/edac/amd8111_edac.h b/drivers/edac/amd8111_edac.h deleted file mode 100644 index 200cab1b3e42..000000000000 --- a/drivers/edac/amd8111_edac.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * amd8111_edac.h, EDAC defs for AMD8111 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - */ - -#ifndef _AMD8111_EDAC_H_ -#define _AMD8111_EDAC_H_ - -/************************************************************ - * PCI Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_PCI_STSCMD 0x04 -enum pci_stscmd_bits { - PCI_STSCMD_SSE = BIT(30), - PCI_STSCMD_RMA = BIT(29), - PCI_STSCMD_RTA = BIT(28), - PCI_STSCMD_SERREN = BIT(8), - PCI_STSCMD_CLEAR_MASK = (PCI_STSCMD_SSE | - PCI_STSCMD_RMA | - PCI_STSCMD_RTA) -}; - -/************************************************************ - * PCI Bridge Memory Base-Limit Register, DevA:0x1c - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_CLEAR_MASK = (MEM_LIMIT_DPE | - MEM_LIMIT_RSE | - MEM_LIMIT_RMA | - MEM_LIMIT_RTA | - MEM_LIMIT_STA | - MEM_LIMIT_MDPE) -}; - -/************************************************************ - * HyperTransport Link Control Register, DevA:0xc4 - ************************************************************/ -#define REG_HT_LINK 0xc4 -enum ht_link_bits { - HT_LINK_LKFAIL = BIT(4), - HT_LINK_CRCFEN = BIT(1), - HT_LINK_CLEAR_MASK = (HT_LINK_LKFAIL) -}; - -/************************************************************ - * PCI Bridge Interrupt and Bridge Control, DevA:0x3c - ************************************************************/ -#define REG_PCI_INTBRG_CTRL 0x3c -enum pci_intbrg_ctrl_bits { - PCI_INTBRG_CTRL_DTSERREN = BIT(27), - PCI_INTBRG_CTRL_DTSTAT = BIT(26), - PCI_INTBRG_CTRL_MARSP = BIT(21), - PCI_INTBRG_CTRL_SERREN = BIT(17), - PCI_INTBRG_CTRL_PEREN = BIT(16), - PCI_INTBRG_CTRL_CLEAR_MASK = (PCI_INTBRG_CTRL_DTSTAT), - PCI_INTBRG_CTRL_POLL_MASK = (PCI_INTBRG_CTRL_DTSERREN | - PCI_INTBRG_CTRL_MARSP | - PCI_INTBRG_CTRL_SERREN) -}; - -/************************************************************ - * I/O Control 1 Register, DevB:0x40 - ************************************************************/ -#define REG_IO_CTRL_1 0x40 -enum io_ctrl_1_bits { - IO_CTRL_1_NMIONERR = BIT(7), - IO_CTRL_1_LPC_ERR = BIT(6), - IO_CTRL_1_PW2LPC = BIT(1), - IO_CTRL_1_CLEAR_MASK = (IO_CTRL_1_LPC_ERR | IO_CTRL_1_PW2LPC) -}; - -/************************************************************ - * Legacy I/O Space Registers - ************************************************************/ -#define REG_AT_COMPAT 0x61 -enum at_compat_bits { - AT_COMPAT_SERR = BIT(7), - AT_COMPAT_IOCHK = BIT(6), - AT_COMPAT_CLRIOCHK = BIT(3), - AT_COMPAT_CLRSERR = BIT(2), -}; - -struct amd8111_dev_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* device index */ - char *ctl_name; - struct edac_device_ctl_info *edac_dev; - void (*init)(struct amd8111_dev_info *dev_info); - void (*exit)(struct amd8111_dev_info *dev_info); - void (*check)(struct edac_device_ctl_info *edac_dev); -}; - -struct amd8111_pci_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* pci index */ - const char *ctl_name; - struct edac_pci_ctl_info *edac_dev; - void (*init)(struct amd8111_pci_info *dev_info); - void (*exit)(struct amd8111_pci_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8111_EDAC_H_ */ diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c deleted file mode 100644 index 169353710982..000000000000 --- a/drivers/edac/amd8131_edac.c +++ /dev/null @@ -1,358 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * amd8131_edac.c, AMD8131 hypertransport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/bitops.h> -#include <linux/edac.h> -#include <linux/pci_ids.h> - -#include "edac_module.h" -#include "amd8131_edac.h" - -#define AMD8131_EDAC_REVISION " Ver: 1.0.0" -#define AMD8131_EDAC_MOD_STR "amd8131_edac" - -/* Wrapper functions for accessing PCI configuration space */ -static void edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -/* Support up to two AMD8131 chipsets on a platform */ -static struct amd8131_dev_info amd8131_devices[] = { - { - .inst = NORTH_A, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_A, - .ctl_name = "AMD8131_PCIX_NORTH_A", - }, - { - .inst = NORTH_B, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_B, - .ctl_name = "AMD8131_PCIX_NORTH_B", - }, - { - .inst = SOUTH_A, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_A, - .ctl_name = "AMD8131_PCIX_SOUTH_A", - }, - { - .inst = SOUTH_B, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_B, - .ctl_name = "AMD8131_PCIX_SOUTH_B", - }, - {.inst = NO_BRIDGE,}, -}; - -static void amd8131_pcix_init(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* First clear error detection flags */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Timedout flag */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Clear CRC Error flag on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Clear CRC Error flag on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - /* - * Then enable all error detections. - * - * Setup Discard Timer Sync Flood Enable, - * System Error Enable and Parity Error Enable. - */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 |= INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Enable overall SERR Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 |= STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Setup CRC Flood Enable for link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Setup CRC Flood Enable for link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_exit(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* Disable SERR, PERR and DTSE Error detection */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 &= ~(INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE); - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Disable overall System Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 &= ~STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Disable CRC Sync Flood on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Disable CRC Sync Flood on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8131_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u32 val32; - - /* Check PCI-X Bridge Memory Base-Limit Register for errors */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) { - printk(KERN_INFO "Error(s) in mem limit register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - val32 & MEM_LIMIT_DPE, - val32 & MEM_LIMIT_RSE, - val32 & MEM_LIMIT_RMA, - val32 & MEM_LIMIT_RTA, - val32 & MEM_LIMIT_STA, - val32 & MEM_LIMIT_MDPE); - - val32 |= MEM_LIMIT_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if Discard Timer timed out */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) { - printk(KERN_INFO "Error(s) in interrupt and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DTS: %d\n", val32 & INT_CTLR_DTS); - - val32 |= INT_CTLR_DTS; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_A); - - val32 |= LNK_CTRL_CRCERR_A; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_B); - - val32 |= LNK_CTRL_CRCERR_B; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct amd8131_info amd8131_chipset = { - .err_dev = PCI_DEVICE_ID_AMD_8131_APIC, - .devices = amd8131_devices, - .init = amd8131_pcix_init, - .exit = amd8131_pcix_exit, - .check = amd8131_pcix_check, -}; - -/* - * There are 4 PCIX Bridges on ATCA-6101 that share the same PCI Device ID, - * so amd8131_probe() would be called by kernel 4 times, with different - * address of pci_dev for each of them each time. - */ -static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return -ENODEV; - - /* - * We can't call pci_get_device() as we are used to do because - * there are 4 of them but pci_dev_get() instead. - */ - dev_info->dev = pci_dev_get(dev); - - if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - return -ENODEV; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_pci_alloc_index(); - dev_info->edac_dev = edac_pci_alloc_ctl_info(0, dev_info->ctl_name); - if (!dev_info->edac_dev) - return -ENOMEM; - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = amd8131_chipset.check; - - if (amd8131_chipset.init) - amd8131_chipset.init(dev_info); - - if (edac_pci_add_device(dev_info->edac_dev, dev_info->edac_idx) > 0) { - printk(KERN_ERR "failed edac_pci_add_device() for %s\n", - dev_info->ctl_name); - edac_pci_free_ctl_info(dev_info->edac_dev); - return -ENODEV; - } - - printk(KERN_INFO "added one device on AMD8131 " - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - - return 0; -} - -static void amd8131_remove(struct pci_dev *dev) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_pci_del_device(dev_info->edac_dev->dev); - edac_pci_free_ctl_info(dev_info->edac_dev); - } - - if (amd8131_chipset.exit) - amd8131_chipset.exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static const struct pci_device_id amd8131_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8131_BRIDGE), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = 0, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8131_edac_pci_tbl); - -static struct pci_driver amd8131_edac_driver = { - .name = AMD8131_EDAC_MOD_STR, - .probe = amd8131_probe, - .remove = amd8131_remove, - .id_table = amd8131_edac_pci_tbl, -}; - -static int __init amd8131_edac_init(void) -{ - printk(KERN_INFO "AMD8131 EDAC driver " AMD8131_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - return pci_register_driver(&amd8131_edac_driver); -} - -static void __exit amd8131_edac_exit(void) -{ - pci_unregister_driver(&amd8131_edac_driver); -} - -module_init(amd8131_edac_init); -module_exit(amd8131_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n"); -MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module"); diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h deleted file mode 100644 index 5f362abdaf12..000000000000 --- a/drivers/edac/amd8131_edac.h +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * amd8131_edac.h, EDAC defs for AMD8131 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - */ - -#ifndef _AMD8131_EDAC_H_ -#define _AMD8131_EDAC_H_ - -#define DEVFN_PCIX_BRIDGE_NORTH_A 8 -#define DEVFN_PCIX_BRIDGE_NORTH_B 16 -#define DEVFN_PCIX_BRIDGE_SOUTH_A 24 -#define DEVFN_PCIX_BRIDGE_SOUTH_B 32 - -/************************************************************ - * PCI-X Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_STS_CMD 0x04 -enum sts_cmd_bits { - STS_CMD_SSE = BIT(30), - STS_CMD_SERREN = BIT(8) -}; - -/************************************************************ - * PCI-X Bridge Interrupt and Bridge Control Register, - ************************************************************/ -#define REG_INT_CTLR 0x3c -enum int_ctlr_bits { - INT_CTLR_DTSE = BIT(27), - INT_CTLR_DTS = BIT(26), - INT_CTLR_SERR = BIT(17), - INT_CTLR_PERR = BIT(16) -}; - -/************************************************************ - * PCI-X Bridge Memory Base-Limit Register, DevA:0x1C - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_MASK = MEM_LIMIT_DPE|MEM_LIMIT_RSE|MEM_LIMIT_RMA| - MEM_LIMIT_RTA|MEM_LIMIT_STA|MEM_LIMIT_MDPE -}; - -/************************************************************ - * Link Configuration And Control Register, side A - ************************************************************/ -#define REG_LNK_CTRL_A 0xc4 - -/************************************************************ - * Link Configuration And Control Register, side B - ************************************************************/ -#define REG_LNK_CTRL_B 0xc8 - -enum lnk_ctrl_bits { - LNK_CTRL_CRCERR_A = BIT(9), - LNK_CTRL_CRCERR_B = BIT(8), - LNK_CTRL_CRCFEN = BIT(1) -}; - -enum pcix_bridge_inst { - NORTH_A = 0, - NORTH_B = 1, - SOUTH_A = 2, - SOUTH_B = 3, - NO_BRIDGE = 4 -}; - -struct amd8131_dev_info { - int devfn; - enum pcix_bridge_inst inst; - struct pci_dev *dev; - int edac_idx; /* pci device index */ - char *ctl_name; - struct edac_pci_ctl_info *edac_dev; -}; - -/* - * AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC - * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are - * four PCIX Bridges on ATCA-6101 altogether. - * - * These PCIX Bridges share the same PCI Device ID and are all of - * Function Zero, they could be discrimated by their pci_dev->devfn. - * They share the same set of init/check/exit methods, and their - * private structures are collected in the devices[] array. - */ -struct amd8131_info { - u16 err_dev; /* PCI Device ID for AMD8131 APIC*/ - struct amd8131_dev_info *devices; - void (*init)(struct amd8131_dev_info *dev_info); - void (*exit)(struct amd8131_dev_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8131_EDAC_H_ */ - diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c index c4bd2fb9c46b..d64248fcf4c0 100644 --- a/drivers/edac/armada_xp_edac.c +++ b/drivers/edac/armada_xp_edac.c @@ -5,7 +5,9 @@ #include <linux/kernel.h> #include <linux/edac.h> -#include <linux/of_platform.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> #include <asm/hardware/cache-l2x0.h> #include <asm/hardware/cache-aurora-l2.h> @@ -351,15 +353,13 @@ static int axp_mc_probe(struct platform_device *pdev) return 0; } -static int axp_mc_remove(struct platform_device *pdev) +static void axp_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); platform_set_drvdata(pdev, NULL); - - return 0; } static struct platform_driver axp_mc_driver = { @@ -523,7 +523,7 @@ static int aurora_l2_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "data ECC is not enabled\n"); dci = edac_device_alloc_ctl_info(sizeof(*drvdata), - "cpu", 1, "L", 1, 2, NULL, 0, 0); + "cpu", 1, "L", 1, 2, 0); if (!dci) return -ENOMEM; @@ -564,7 +564,7 @@ static int aurora_l2_probe(struct platform_device *pdev) return 0; } -static int aurora_l2_remove(struct platform_device *pdev) +static void aurora_l2_remove(struct platform_device *pdev) { struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); #ifdef CONFIG_EDAC_DEBUG @@ -575,8 +575,6 @@ static int aurora_l2_remove(struct platform_device *pdev) edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(dci); platform_set_drvdata(pdev, NULL); - - return 0; } static struct platform_driver aurora_l2_driver = { diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c index 6bd5f8815919..dadb8acbee3d 100644 --- a/drivers/edac/aspeed_edac.c +++ b/drivers/edac/aspeed_edac.c @@ -357,7 +357,7 @@ probe_exit02: } -static int aspeed_remove(struct platform_device *pdev) +static void aspeed_remove(struct platform_device *pdev) { struct mem_ctl_info *mci; @@ -369,8 +369,6 @@ static int aspeed_remove(struct platform_device *pdev) mci = edac_mc_del_mc(&pdev->dev); if (mci) edac_mc_free(mci); - - return 0; } diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c index e4736eb37bfb..ae3bb7afa103 100644 --- a/drivers/edac/bluefield_edac.c +++ b/drivers/edac/bluefield_edac.c @@ -47,13 +47,22 @@ #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 #define MLXBF_EDAC_ERROR_GRAIN 8 +#define MLXBF_WRITE_REG_32 (0x82000009) +#define MLXBF_READ_REG_32 (0x8200000A) +#define MLXBF_SIP_SVC_VERSION (0x8200ff03) + +#define MLXBF_SMCCC_ACCESS_VIOLATION (-4) + +#define MLXBF_SVC_REQ_MAJOR 0 +#define MLXBF_SVC_REQ_MINOR 3 + /* - * Request MLNX_SIP_GET_DIMM_INFO + * Request MLXBF_SIP_GET_DIMM_INFO * * Retrieve information about DIMM on a certain slot. * * Call register usage: - * a0: MLNX_SIP_GET_DIMM_INFO + * a0: MLXBF_SIP_GET_DIMM_INFO * a1: (Memory controller index) << 16 | (Dimm index in memory controller) * a2-7: not used. * @@ -61,7 +70,7 @@ * a0: MLXBF_DIMM_INFO defined below describing the DIMM. * a1-3: not used. */ -#define MLNX_SIP_GET_DIMM_INFO 0x82000008 +#define MLXBF_SIP_GET_DIMM_INFO 0x82000008 /* Format for the SMC response about the memory information */ #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) @@ -72,9 +81,15 @@ #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) struct bluefield_edac_priv { + /* pointer to device structure */ + struct device *dev; int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; void __iomem *emi_base; int dimm_per_mc; + /* access to secure regs supported */ + bool svc_sreg_support; + /* SMC table# for secure regs access */ + u32 sreg_tbl; }; static u64 smc_call1(u64 smc_op, u64 smc_arg) @@ -86,6 +101,71 @@ static u64 smc_call1(u64 smc_op, u64 smc_arg) return res.a0; } +static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, + 0, 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + + *result = (u32)res.a1; + return 0; +} + +static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, + 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + else + return 0; +} + +static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_readl(addr, result, priv->sreg_tbl); + else + *result = readl(addr); + + return err; +} + +static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_writel(addr, data, priv->sreg_tbl); + else + writel(data, addr); + + return err; +} + /* * Gather the ECC information from the External Memory Interface registers * and report it to the edac handler. @@ -99,7 +179,7 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; enum hw_event_mc_err_type ecc_type; u64 ecc_dimm_addr; - int ecc_dimm; + int ecc_dimm, err; ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : HW_EVENT_ERR_UNCORRECTED; @@ -109,14 +189,21 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, * registers with information about the last ECC error occurrence. */ ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; - writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); + err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); + if (err) + dev_err(priv->dev, "ECC latch select write failed.\n"); /* * Verify that the ECC reported info in the registers is of the * same type as the one asked to report. If not, just report the * error without the detailed information. */ - dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); + err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); + if (err) { + dev_err(priv->dev, "DRAM syndrom read failed.\n"); + return; + } + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); @@ -127,13 +214,27 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, return; } - dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); + err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); + if (err) { + dev_err(priv->dev, "DRAM additional info read failed.\n"); + return; + } + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; - edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); - edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); + if (err) { + dev_err(priv->dev, "Error addr 0 read failed.\n"); + return; + } + + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); + if (err) { + dev_err(priv->dev, "Error addr 1 read failed.\n"); + return; + } ecc_dimm_addr = ((u64)edea1 << 32) | edea0; @@ -147,6 +248,7 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) { struct bluefield_edac_priv *priv = mci->pvt_info; u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + int err; /* * The memory controller might not be initialized by the firmware @@ -155,7 +257,12 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) if (mci->edac_cap == EDAC_FLAG_NONE) return; - ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); + err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); + if (err) { + dev_err(priv->dev, "ECC count read failed.\n"); + return; + } + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); @@ -172,15 +279,18 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) } /* Write to clear reported errors. */ - if (ecc_count) - writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); + if (ecc_count) { + err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); + if (err) + dev_err(priv->dev, "ECC Error write failed.\n"); + } } /* Initialize the DIMMs information for the given memory controller. */ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) { struct bluefield_edac_priv *priv = mci->pvt_info; - int mem_ctrl_idx = mci->mc_idx; + u64 mem_ctrl_idx = mci->mc_idx; struct dimm_info *dimm; u64 smc_info, smc_arg; int is_empty = 1, i; @@ -189,7 +299,7 @@ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) dimm = mci->dimms[i]; smc_arg = mem_ctrl_idx << 16 | i; - smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); + smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { dimm->mtype = MEM_EMPTY; @@ -244,6 +354,7 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev) struct bluefield_edac_priv *priv; struct device *dev = &pdev->dev; struct edac_mc_layer layers[1]; + struct arm_smccc_res res; struct mem_ctl_info *mci; struct resource *emi_res; unsigned int mc_idx, dimm_count; @@ -279,13 +390,43 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev) return -ENOMEM; priv = mci->pvt_info; + priv->dev = dev; + + /* + * The "sec_reg_block" property in the ACPI table determines the method + * the driver uses to access the EMI registers: + * a) property is not present - directly access registers via readl/writel + * b) property is present - indirectly access registers via SMC calls + * (assuming required Silicon Provider service version found) + */ + if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { + priv->svc_sreg_support = false; + } else { + /* + * Check for minimum required Arm Silicon Provider (SiP) service + * version, ensuring support of required SMC function IDs. + */ + arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 == MLXBF_SVC_REQ_MAJOR && + res.a1 >= MLXBF_SVC_REQ_MINOR) { + priv->svc_sreg_support = true; + } else { + dev_err(dev, "Required SMCs are not supported.\n"); + ret = -EINVAL; + goto err; + } + } priv->dimm_per_mc = dimm_count; - priv->emi_base = devm_ioremap_resource(dev, emi_res); - if (IS_ERR(priv->emi_base)) { - dev_err(dev, "failed to map EMI IO resource\n"); - ret = PTR_ERR(priv->emi_base); - goto err; + if (!priv->svc_sreg_support) { + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + } else { + priv->emi_base = (void __iomem *)emi_res->start; } mci->pdev = dev; @@ -320,17 +461,14 @@ err: edac_mc_free(mci); return ret; - } -static int bluefield_edac_mc_remove(struct platform_device *pdev) +static void bluefield_edac_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static const struct acpi_device_id bluefield_mc_acpi_ids[] = { diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c deleted file mode 100644 index bc1f3416400e..000000000000 --- a/drivers/edac/cell_edac.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Cell MIC driver for ECC counting - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * - * This file may be distributed under the terms of the - * GNU General Public License. - */ -#undef DEBUG - -#include <linux/edac.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/stop_machine.h> -#include <linux/io.h> -#include <linux/of_address.h> -#include <asm/machdep.h> -#include <asm/cell-regs.h> - -#include "edac_module.h" - -struct cell_edac_priv -{ - struct cbe_mic_tm_regs __iomem *regs; - int node; - int chanmask; -#ifdef DEBUG - u64 prev_fir; -#endif -}; - -static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) -{ - struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = mci->csrows[0]; - unsigned long address, pfn, offset, syndrome; - - dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n", - priv->node, chan, ar); - - /* Address decoding is likely a bit bogus, to dbl check */ - address = (ar & 0xffffffffe0000000ul) >> 29; - if (priv->chanmask == 0x3) - address = (address << 1) | chan; - pfn = address >> PAGE_SHIFT; - offset = address & ~PAGE_MASK; - syndrome = (ar & 0x000000001fe00000ul) >> 21; - - /* TODO: Decoding of the error address */ - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - csrow->first_page + pfn, offset, syndrome, - 0, chan, -1, "", ""); -} - -static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) -{ - struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = mci->csrows[0]; - unsigned long address, pfn, offset; - - dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n", - priv->node, chan, ar); - - /* Address decoding is likely a bit bogus, to dbl check */ - address = (ar & 0xffffffffe0000000ul) >> 29; - if (priv->chanmask == 0x3) - address = (address << 1) | chan; - pfn = address >> PAGE_SHIFT; - offset = address & ~PAGE_MASK; - - /* TODO: Decoding of the error address */ - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - csrow->first_page + pfn, offset, 0, - 0, chan, -1, "", ""); -} - -static void cell_edac_check(struct mem_ctl_info *mci) -{ - struct cell_edac_priv *priv = mci->pvt_info; - u64 fir, addreg, clear = 0; - - fir = in_be64(&priv->regs->mic_fir); -#ifdef DEBUG - if (fir != priv->prev_fir) { - dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir); - priv->prev_fir = fir; - } -#endif - if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_0); - clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET; - cell_edac_count_ce(mci, 0, addreg); - } - if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_1); - clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET; - cell_edac_count_ce(mci, 1, addreg); - } - if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_0); - clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET; - cell_edac_count_ue(mci, 0, addreg); - } - if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_1); - clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET; - cell_edac_count_ue(mci, 1, addreg); - } - - /* The procedure for clearing FIR bits is a bit ... weird */ - if (clear) { - fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK); - fir |= CBE_MIC_FIR_ECC_RESET_MASK; - fir &= ~clear; - out_be64(&priv->regs->mic_fir, fir); - (void)in_be64(&priv->regs->mic_fir); - - mb(); /* sync up */ -#ifdef DEBUG - fir = in_be64(&priv->regs->mic_fir); - dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir); -#endif - } -} - -static void cell_edac_init_csrows(struct mem_ctl_info *mci) -{ - struct csrow_info *csrow = mci->csrows[0]; - struct dimm_info *dimm; - struct cell_edac_priv *priv = mci->pvt_info; - struct device_node *np; - int j; - u32 nr_pages; - - for_each_node_by_name(np, "memory") { - struct resource r; - - /* We "know" that the Cell firmware only creates one entry - * in the "memory" nodes. If that changes, this code will - * need to be adapted. - */ - if (of_address_to_resource(np, 0, &r)) - continue; - if (of_node_to_nid(np) != priv->node) - continue; - csrow->first_page = r.start >> PAGE_SHIFT; - nr_pages = resource_size(&r) >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + nr_pages - 1; - - for (j = 0; j < csrow->nr_channels; j++) { - dimm = csrow->channels[j]->dimm; - dimm->mtype = MEM_XDR; - dimm->edac_mode = EDAC_SECDED; - dimm->nr_pages = nr_pages / csrow->nr_channels; - } - dev_dbg(mci->pdev, - "Initialized on node %d, chanmask=0x%x," - " first_page=0x%lx, nr_pages=0x%x\n", - priv->node, priv->chanmask, - csrow->first_page, nr_pages); - break; - } - of_node_put(np); -} - -static int cell_edac_probe(struct platform_device *pdev) -{ - struct cbe_mic_tm_regs __iomem *regs; - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct cell_edac_priv *priv; - u64 reg; - int rc, chanmask, num_chans; - - regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id)); - if (regs == NULL) - return -ENODEV; - - edac_op_state = EDAC_OPSTATE_POLL; - - /* Get channel population */ - reg = in_be64(®s->mic_mnt_cfg); - dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016llx\n", reg); - chanmask = 0; - if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP) - chanmask |= 0x1; - if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP) - chanmask |= 0x2; - if (chanmask == 0) { - dev_warn(&pdev->dev, - "Yuck ! No channel populated ? Aborting !\n"); - return -ENODEV; - } - dev_dbg(&pdev->dev, "Initial FIR = 0x%016llx\n", - in_be64(®s->mic_fir)); - - /* Allocate & init EDAC MC data structure */ - num_chans = chanmask == 3 ? 2 : 1; - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = num_chans; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers, - sizeof(struct cell_edac_priv)); - if (mci == NULL) - return -ENOMEM; - priv = mci->pvt_info; - priv->regs = regs; - priv->node = pdev->id; - priv->chanmask = chanmask; - mci->pdev = &pdev->dev; - mci->mtype_cap = MEM_FLAG_XDR; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; - mci->mod_name = "cell_edac"; - mci->ctl_name = "MIC"; - mci->dev_name = dev_name(&pdev->dev); - mci->edac_check = cell_edac_check; - cell_edac_init_csrows(mci); - - /* Register with EDAC core */ - rc = edac_mc_add_mc(mci); - if (rc) { - dev_err(&pdev->dev, "failed to register with EDAC core\n"); - edac_mc_free(mci); - return rc; - } - - return 0; -} - -static int cell_edac_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); - if (mci) - edac_mc_free(mci); - return 0; -} - -static struct platform_driver cell_edac_driver = { - .driver = { - .name = "cbe-mic", - }, - .probe = cell_edac_probe, - .remove = cell_edac_remove, -}; - -static int __init cell_edac_init(void) -{ - /* Sanity check registers data structure */ - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_ecc_address_0) != 0xf8); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_ecc_address_1) != 0x1b8); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_config) != 0x218); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_fir) != 0x230); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_mnt_cfg) != 0x210); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_exc) != 0x208); - - return platform_driver_register(&cell_edac_driver); -} - -static void __exit cell_edac_exit(void) -{ - platform_driver_unregister(&cell_edac_driver); -} - -module_init(cell_edac_init); -module_exit(cell_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>"); -MODULE_DESCRIPTION("ECC counting for Cell MIC"); diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 9797e6d60dde..9c9e4369c041 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -797,7 +797,7 @@ static void cpc925_add_edac_devices(void __iomem *vbase) dev_info->edac_idx = edac_device_alloc_index(); dev_info->edac_dev = edac_device_alloc_ctl_info(0, dev_info->ctl_name, - 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx); + 1, NULL, 0, 0, dev_info->edac_idx); if (!dev_info->edac_dev) { cpc925_printk(KERN_ERR, "No memory for edac device\n"); goto err1; @@ -1010,7 +1010,7 @@ out: return res; } -static int cpc925_remove(struct platform_device *pdev) +static void cpc925_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); @@ -1023,8 +1023,6 @@ static int cpc925_remove(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver cpc925_edac_driver = { diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 4804332d9946..8195fc9c9354 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include <linux/string_choices.h> + #include "edac_module.h" static struct dentry *edac_debugfs; @@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file, "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", errcount, (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", - errcount > 1 ? "s" : "", + str_plural(errcount), mci->fake_inject_layer[0], mci->fake_inject_layer[1], mci->fake_inject_layer[2] diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c index 1fa5ca57e9ec..64a4d0a07032 100644 --- a/drivers/edac/dmc520_edac.c +++ b/drivers/edac/dmc520_edac.c @@ -480,7 +480,6 @@ static int dmc520_edac_probe(struct platform_device *pdev) struct mem_ctl_info *mci; void __iomem *reg_base; u32 irq_mask_all = 0; - struct resource *res; struct device *dev; int ret, idx, irq; u32 reg_val; @@ -505,8 +504,7 @@ static int dmc520_edac_probe(struct platform_device *pdev) } /* Initialize dmc520 edac */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - reg_base = devm_ioremap_resource(dev, res); + reg_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(reg_base)) return PTR_ERR(reg_base); @@ -602,7 +600,7 @@ err: return ret; } -static int dmc520_edac_remove(struct platform_device *pdev) +static void dmc520_edac_remove(struct platform_device *pdev) { u32 reg_val, idx, irq_mask_all = 0; struct mem_ctl_info *mci; @@ -626,8 +624,6 @@ static int dmc520_edac_remove(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static const struct of_device_id dmc520_edac_driver_id[] = { diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index ac7c9b42d4c7..7221b4bb6df2 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1462,7 +1462,7 @@ module_init(e752x_init); module_exit(e752x_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman"); MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 497e710fca3d..5852b95fa470 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -596,8 +596,7 @@ module_init(e7xxx_init); module_exit(e7xxx_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on.work by Dan Hollis et al"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al"); MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c new file mode 100644 index 000000000000..51c451c7f0f0 --- /dev/null +++ b/drivers/edac/ecs.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic ECS driver is designed to support control of on-die error + * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts + * the control of various ECS functionalities into a unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +#define EDAC_ECS_FRU_NAME "ecs_fru" + +enum edac_ecs_attributes { + ECS_LOG_ENTRY_TYPE, + ECS_MODE, + ECS_RESET, + ECS_THRESHOLD, + ECS_MAX_ATTRS +}; + +struct edac_ecs_dev_attr { + struct device_attribute dev_attr; + int fru_id; +}; + +struct edac_ecs_fru_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS]; + struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +struct edac_ecs_context { + u16 num_media_frus; + struct edac_ecs_fru_context *fru_ctxs; +}; + +#define TO_ECS_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr) + +#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n") + +#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul) + +static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; + + switch (attr_id) { + case ECS_LOG_ENTRY_TYPE: + if (ops->get_log_entry_type) { + if (ops->set_log_entry_type) + return a->mode; + else + return 0444; + } + break; + case ECS_MODE: + if (ops->get_mode) { + if (ops->set_mode) + return a->mode; + else + return 0444; + } + break; + case ECS_RESET: + if (ops->reset) + return a->mode; + break; + case ECS_THRESHOLD: + if (ops->get_threshold) { + if (ops->set_threshold) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_ECS_ATTR_RO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_WO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_RW(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .fru_id = _fru_id }) + +static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups, + u16 num_media_frus) +{ + struct edac_ecs_context *ecs_ctx; + u32 fru; + + ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL); + if (!ecs_ctx) + return -ENOMEM; + + ecs_ctx->num_media_frus = num_media_frus; + ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus, + sizeof(*ecs_ctx->fru_ctxs), + GFP_KERNEL); + if (!ecs_ctx->fru_ctxs) + return -ENOMEM; + + for (fru = 0; fru < num_media_frus; fru++) { + struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru]; + struct attribute_group *group = &fru_ctx->group; + int i; + + fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru); + fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru); + fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); + fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); + + for (i = 0; i < ECS_MAX_ATTRS; i++) { + sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr); + fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + } + + sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); + group->name = fru_ctx->name; + group->attrs = fru_ctx->ecs_attrs; + group->is_visible = ecs_attr_visible; + + attr_groups[fru] = group; + } + + return 0; +} + +/** + * edac_ecs_get_desc - get EDAC ECS descriptors + * @ecs_dev: client device, supports ECS feature + * @attr_groups: pointer to attribute group container + * @num_media_frus: number of media FRUs in the device + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, u16 num_media_frus) +{ + if (!ecs_dev || !attr_groups || !num_media_frus) + return -EINVAL; + + return ecs_create_desc(ecs_dev, attr_groups, num_media_frus); +} diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 0689e1510721..0734909b08a4 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -56,14 +56,12 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) struct edac_device_ctl_info * edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances, char *blk_name, unsigned nr_blocks, unsigned off_val, - struct edac_dev_sysfs_block_attribute *attrib_spec, - unsigned nr_attrib, int device_index) + int device_index) { - struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; struct edac_device_block *dev_blk, *blk_p, *blk; struct edac_device_instance *dev_inst, *inst; struct edac_device_ctl_info *dev_ctl; - unsigned instance, block, attr; + unsigned instance, block; void *pvt; int err; @@ -85,15 +83,6 @@ edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instance dev_ctl->blocks = dev_blk; - if (nr_attrib) { - dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute), - GFP_KERNEL); - if (!dev_attrib) - goto free; - - dev_ctl->attribs = dev_attrib; - } - if (pvt_sz) { pvt = kzalloc(pvt_sz, GFP_KERNEL); if (!pvt) @@ -132,44 +121,6 @@ edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instance edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n", instance, inst, block, blk, blk->name); - - /* if there are NO attributes OR no attribute pointer - * then continue on to next block iteration - */ - if ((nr_attrib == 0) || (attrib_spec == NULL)) - continue; - - /* setup the attribute array for this block */ - blk->nr_attribs = nr_attrib; - attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; - blk->block_attributes = attrib_p; - - edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n", - blk->block_attributes); - - /* Initialize every user specified attribute in this - * block with the data the caller passed in - * Each block gets its own copy of pointers, - * and its unique 'value' - */ - for (attr = 0; attr < nr_attrib; attr++) { - attrib = &attrib_p[attr]; - - /* populate the unique per attrib - * with the code pointers and info - */ - attrib->attr = attrib_spec[attr].attr; - attrib->show = attrib_spec[attr].show; - attrib->store = attrib_spec[attr].store; - - attrib->block = blk; /* up link */ - - edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n", - attrib, attrib->attr.name, - &attrib_spec[attr], - attrib_spec[attr].attr.name - ); - } } } @@ -619,3 +570,188 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, block ? block->name : "N/A", count, msg); } EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); + +static void edac_dev_release(struct device *dev) +{ + struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + + kfree(ctx->mem_repair); + kfree(ctx->scrub); + kfree(ctx->dev.groups); + kfree(ctx); +} + +static const struct device_type edac_dev_type = { + .name = "edac_dev", + .release = edac_dev_release, +}; + +static void edac_dev_unreg(void *data) +{ + device_unregister(data); +} + +/** + * edac_dev_register - register device for RAS features with EDAC + * @parent: parent device. + * @name: name for the folder in the /sys/bus/edac/devices/, + * which is derived from the parent device. + * For e.g. /sys/bus/edac/devices/cxl_mem0/ + * @private: parent driver's data to store in the context if any. + * @num_features: number of RAS features to register. + * @ras_features: list of RAS features to register. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + * + */ +int edac_dev_register(struct device *parent, char *name, + void *private, int num_features, + const struct edac_dev_feature *ras_features) +{ + const struct attribute_group **ras_attr_groups; + struct edac_dev_data *dev_data; + struct edac_dev_feat_ctx *ctx; + int mem_repair_cnt = 0; + int attr_gcnt = 0; + int ret = -ENOMEM; + int scrub_cnt = 0; + int feat; + + if (!parent || !name || !num_features || !ras_features) + return -EINVAL; + + /* Double parse to make space for attributes */ + for (feat = 0; feat < num_features; feat++) { + switch (ras_features[feat].ft_type) { + case RAS_FEAT_SCRUB: + attr_gcnt++; + scrub_cnt++; + break; + case RAS_FEAT_ECS: + attr_gcnt += ras_features[feat].ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + attr_gcnt++; + mem_repair_cnt++; + break; + default: + return -EINVAL; + } + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL); + if (!ras_attr_groups) + goto ctx_free; + + if (scrub_cnt) { + ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL); + if (!ctx->scrub) + goto groups_free; + } + + if (mem_repair_cnt) { + ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL); + if (!ctx->mem_repair) + goto data_mem_free; + } + + attr_gcnt = 0; + scrub_cnt = 0; + mem_repair_cnt = 0; + for (feat = 0; feat < num_features; feat++, ras_features++) { + switch (ras_features->ft_type) { + case RAS_FEAT_SCRUB: + if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->scrub[scrub_cnt]; + dev_data->instance = scrub_cnt; + dev_data->scrub_ops = ras_features->scrub_ops; + dev_data->private = ras_features->ctx; + ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + scrub_cnt++; + attr_gcnt++; + break; + case RAS_FEAT_ECS: + if (!ras_features->ecs_ops) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->ecs; + dev_data->ecs_ops = ras_features->ecs_ops; + dev_data->private = ras_features->ctx; + ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->ecs_info.num_media_frus); + if (ret) + goto data_mem_free; + + attr_gcnt += ras_features->ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + if (!ras_features->mem_repair_ops || + mem_repair_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->mem_repair[mem_repair_cnt]; + dev_data->instance = mem_repair_cnt; + dev_data->mem_repair_ops = ras_features->mem_repair_ops; + dev_data->private = ras_features->ctx; + ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + mem_repair_cnt++; + attr_gcnt++; + break; + default: + ret = -EINVAL; + goto data_mem_free; + } + } + + ctx->dev.parent = parent; + ctx->dev.bus = edac_get_sysfs_subsys(); + ctx->dev.type = &edac_dev_type; + ctx->dev.groups = ras_attr_groups; + ctx->private = private; + dev_set_drvdata(&ctx->dev, ctx); + + ret = dev_set_name(&ctx->dev, "%s", name); + if (ret) + goto data_mem_free; + + ret = device_register(&ctx->dev); + if (ret) { + put_device(&ctx->dev); + return ret; + } + + return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); + +data_mem_free: + kfree(ctx->mem_repair); + kfree(ctx->scrub); +groups_free: + kfree(ras_attr_groups); +ctx_free: + kfree(ctx); + return ret; +} +EXPORT_SYMBOL_GPL(edac_dev_register); diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h index 3f44e6b9d387..034711d71ebf 100644 --- a/drivers/edac/edac_device.h +++ b/drivers/edac/edac_device.h @@ -22,7 +22,6 @@ #ifndef _EDAC_DEVICE_H_ #define _EDAC_DEVICE_H_ -#include <linux/completion.h> #include <linux/device.h> #include <linux/edac.h> #include <linux/kobject.h> @@ -95,22 +94,13 @@ struct edac_dev_sysfs_attribute { * * used in leaf 'block' nodes for adding controls/attributes * - * each block in each instance of the containing control structure - * can have an array of the following. The show and store functions - * will be filled in with the show/store function in the - * low level driver. - * - * The 'value' field will be the actual value field used for - * counting + * each block in each instance of the containing control structure can + * have an array of the following. The show function will be filled in + * with the show function in the low level driver. */ struct edac_dev_sysfs_block_attribute { struct attribute attr; ssize_t (*show)(struct kobject *, struct attribute *, char *); - ssize_t (*store)(struct kobject *, struct attribute *, - const char *, size_t); - struct edac_device_block *block; - - unsigned int value; }; /* device block control structure */ @@ -176,7 +166,7 @@ struct edac_device_ctl_info { struct edac_dev_sysfs_attribute *sysfs_attributes; /* pointer to main 'edac' subsys in sysfs */ - struct bus_type *edac_subsys; + const struct bus_type *edac_subsys; /* the internal state of this controller instance */ int op_state; @@ -200,8 +190,6 @@ struct edac_device_ctl_info { unsigned long start_time; /* edac_device load start time (jiffies) */ - struct completion removal_complete; - /* sysfs top name under 'edac' directory * and instance name: * cpu/cpu0/... @@ -217,7 +205,6 @@ struct edac_device_ctl_info { u32 nr_instances; struct edac_device_instance *instances; struct edac_device_block *blocks; - struct edac_dev_sysfs_block_attribute *attribs; /* Event counters for the this whole EDAC Device */ struct edac_device_counter counters; @@ -245,8 +232,6 @@ extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( char *edac_device_name, unsigned nr_instances, char *edac_block_name, unsigned nr_blocks, unsigned offset_value, - struct edac_dev_sysfs_block_attribute *block_attributes, - unsigned nr_attribs, int device_index); /* The offset value can be: @@ -356,7 +341,6 @@ static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci) { if (ci) { kfree(ci->pvt_info); - kfree(ci->attribs); kfree(ci->blocks); kfree(ci->instances); kfree(ci); diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index ac678b4a21fc..fcebc4ffea26 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -228,8 +228,9 @@ static struct kobj_type ktype_device_ctrl = { */ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) { - struct bus_type *edac_subsys; - int err; + struct device *dev_root; + const struct bus_type *edac_subsys; + int err = -ENODEV; edac_dbg(1, "\n"); @@ -247,15 +248,16 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) */ edac_dev->owner = THIS_MODULE; - if (!try_module_get(edac_dev->owner)) { - err = -ENODEV; + if (!try_module_get(edac_dev->owner)) goto err_out; - } /* register */ - err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl, - &edac_subsys->dev_root->kobj, - "%s", edac_dev->name); + dev_root = bus_get_dev_root(edac_subsys); + if (dev_root) { + err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl, + &dev_root->kobj, "%s", edac_dev->name); + put_device(dev_root); + } if (err) { edac_dbg(1, "Failed to register '.../edac/%s'\n", edac_dev->name); @@ -455,35 +457,19 @@ static ssize_t edac_dev_block_show(struct kobject *kobj, return -EIO; } -/* Function to 'store' fields into the edac_dev 'block' structure */ -static ssize_t edac_dev_block_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct edac_dev_sysfs_block_attribute *block_attr; - - block_attr = to_block_attr(attr); - - if (block_attr->store) - return block_attr->store(kobj, attr, buffer, count); - return -EIO; -} - /* edac_dev file operations for a 'block' */ static const struct sysfs_ops device_block_ops = { .show = edac_dev_block_show, - .store = edac_dev_block_store }; -#define BLOCK_ATTR(_name,_mode,_show,_store) \ +#define BLOCK_ATTR(_name,_mode,_show) \ static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ - .store = _store, \ }; -BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); -BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); +BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show); +BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show); /* list of edac_dev 'block' attributes */ static struct attribute *device_block_attrs[] = { diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 6faeb2ab3960..0959320fe51c 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -166,6 +166,7 @@ const char * const edac_mem_types[] = { [MEM_NVDIMM] = "Non-volatile-RAM", [MEM_WIO2] = "Wide-IO-2", [MEM_HBM2] = "High-bandwidth-memory-Gen2", + [MEM_HBM3] = "High-bandwidth-memory-Gen3", }; EXPORT_SYMBOL_GPL(edac_mem_types); @@ -213,7 +214,7 @@ static int edac_mc_alloc_csrows(struct mem_ctl_info *mci) unsigned int row, chn; /* - * Alocate and fill the csrow/channels structs + * Allocate and fill the csrow/channels structs */ mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); if (!mci->csrows) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 15f63452a9be..091cc6aae8a9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -115,378 +115,6 @@ static const char * const edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -#ifdef CONFIG_EDAC_LEGACY_SYSFS -/* - * EDAC sysfs CSROW data structures and methods - */ - -#define to_csrow(k) container_of(k, struct csrow_info, dev) - -/* - * We need it to avoid namespace conflicts between the legacy API - * and the per-dimm/per-rank one - */ -#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) - -struct dev_ch_attribute { - struct device_attribute attr; - unsigned int channel; -}; - -#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ - static struct dev_ch_attribute dev_attr_legacy_##_name = \ - { __ATTR(_name, _mode, _show, _store), (_var) } - -#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - int i; - u32 nr_pages = 0; - - for (i = 0; i < csrow->nr_channels; i++) - nr_pages += csrow->channels[i]->dimm->nr_pages; - return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]); -} - -static ssize_t csrow_dev_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - /* if field has not been initialized, there is nothing to send */ - if (!rank->dimm->label[0]) - return 0; - - return snprintf(data, sizeof(rank->dimm->label) + 1, "%s\n", - rank->dimm->label); -} - -static ssize_t channel_dimm_label_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - size_t copy_count = count; - - if (count == 0) - return -EINVAL; - - if (data[count - 1] == '\0' || data[count - 1] == '\n') - copy_count -= 1; - - if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label)) - return -EINVAL; - - strncpy(rank->dimm->label, data, copy_count); - rank->dimm->label[copy_count] = '\0'; - - return count; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - return sprintf(data, "%u\n", rank->ce_count); -} - -/* cwrow<id>/attribute files */ -DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); -DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); -DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); -DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); -DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); -DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); - -/* default attributes of the CSROW<id> object */ -static struct attribute *csrow_attrs[] = { - &dev_attr_legacy_dev_type.attr, - &dev_attr_legacy_mem_type.attr, - &dev_attr_legacy_edac_mode.attr, - &dev_attr_legacy_size_mb.attr, - &dev_attr_legacy_ue_count.attr, - &dev_attr_legacy_ce_count.attr, - NULL, -}; - -static const struct attribute_group csrow_attr_grp = { - .attrs = csrow_attrs, -}; - -static const struct attribute_group *csrow_attr_groups[] = { - &csrow_attr_grp, - NULL -}; - -static const struct device_type csrow_attr_type = { - .groups = csrow_attr_groups, -}; - -/* - * possible dynamic channel DIMM Label attribute files - * - */ -DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 0); -DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 1); -DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 2); -DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 3); -DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 4); -DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 5); -DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 6); -DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 7); -DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 8); -DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 9); -DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 10); -DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 11); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct attribute *dynamic_csrow_dimm_attr[] = { - &dev_attr_legacy_ch0_dimm_label.attr.attr, - &dev_attr_legacy_ch1_dimm_label.attr.attr, - &dev_attr_legacy_ch2_dimm_label.attr.attr, - &dev_attr_legacy_ch3_dimm_label.attr.attr, - &dev_attr_legacy_ch4_dimm_label.attr.attr, - &dev_attr_legacy_ch5_dimm_label.attr.attr, - &dev_attr_legacy_ch6_dimm_label.attr.attr, - &dev_attr_legacy_ch7_dimm_label.attr.attr, - &dev_attr_legacy_ch8_dimm_label.attr.attr, - &dev_attr_legacy_ch9_dimm_label.attr.attr, - &dev_attr_legacy_ch10_dimm_label.attr.attr, - &dev_attr_legacy_ch11_dimm_label.attr.attr, - NULL -}; - -/* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 5); -DEVICE_CHANNEL(ch6_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 6); -DEVICE_CHANNEL(ch7_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 7); -DEVICE_CHANNEL(ch8_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 8); -DEVICE_CHANNEL(ch9_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 9); -DEVICE_CHANNEL(ch10_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 10); -DEVICE_CHANNEL(ch11_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 11); - -/* Total possible dynamic ce_count attribute file table */ -static struct attribute *dynamic_csrow_ce_count_attr[] = { - &dev_attr_legacy_ch0_ce_count.attr.attr, - &dev_attr_legacy_ch1_ce_count.attr.attr, - &dev_attr_legacy_ch2_ce_count.attr.attr, - &dev_attr_legacy_ch3_ce_count.attr.attr, - &dev_attr_legacy_ch4_ce_count.attr.attr, - &dev_attr_legacy_ch5_ce_count.attr.attr, - &dev_attr_legacy_ch6_ce_count.attr.attr, - &dev_attr_legacy_ch7_ce_count.attr.attr, - &dev_attr_legacy_ch8_ce_count.attr.attr, - &dev_attr_legacy_ch9_ce_count.attr.attr, - &dev_attr_legacy_ch10_ce_count.attr.attr, - &dev_attr_legacy_ch11_ce_count.attr.attr, - NULL -}; - -static umode_t csrow_dev_is_visible(struct kobject *kobj, - struct attribute *attr, int idx) -{ - struct device *dev = kobj_to_dev(kobj); - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - if (idx >= csrow->nr_channels) - return 0; - - if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) { - WARN_ONCE(1, "idx: %d\n", idx); - return 0; - } - - /* Only expose populated DIMMs */ - if (!csrow->channels[idx]->dimm->nr_pages) - return 0; - - return attr->mode; -} - - -static const struct attribute_group csrow_dev_dimm_group = { - .attrs = dynamic_csrow_dimm_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group csrow_dev_ce_count_group = { - .attrs = dynamic_csrow_ce_count_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group *csrow_dev_groups[] = { - &csrow_dev_dimm_group, - &csrow_dev_ce_count_group, - NULL -}; - -static void csrow_release(struct device *dev) -{ - /* - * Nothing to do, just unregister sysfs here. The mci - * device owns the data and will also release it. - */ -} - -static inline int nr_pages_per_csrow(struct csrow_info *csrow) -{ - int chan, nr_pages = 0; - - for (chan = 0; chan < csrow->nr_channels; chan++) - nr_pages += csrow->channels[chan]->dimm->nr_pages; - - return nr_pages; -} - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) -{ - int err; - - csrow->dev.type = &csrow_attr_type; - csrow->dev.groups = csrow_dev_groups; - csrow->dev.release = csrow_release; - device_initialize(&csrow->dev); - csrow->dev.parent = &mci->dev; - csrow->mci = mci; - dev_set_name(&csrow->dev, "csrow%d", index); - dev_set_drvdata(&csrow->dev, csrow); - - err = device_add(&csrow->dev); - if (err) { - edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); - put_device(&csrow->dev); - return err; - } - - edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); - - return 0; -} - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_objects(struct mem_ctl_info *mci) -{ - int err, i; - struct csrow_info *csrow; - - for (i = 0; i < mci->nr_csrows; i++) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) - goto error; - } - return 0; - -error: - for (--i; i >= 0; i--) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } - - return err; -} - -static void edac_delete_csrow_objects(struct mem_ctl_info *mci) -{ - int i; - - for (i = 0; i < mci->nr_csrows; i++) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } -} - -#endif - /* * Per-dimm (or per-rank) devices */ @@ -515,7 +143,7 @@ static ssize_t dimmdev_label_show(struct device *dev, if (!dimm->label[0]) return 0; - return snprintf(data, sizeof(dimm->label) + 1, "%s\n", dimm->label); + return sysfs_emit(data, "%s\n", dimm->label); } static ssize_t dimmdev_label_store(struct device *dev, @@ -535,7 +163,7 @@ static ssize_t dimmdev_label_store(struct device *dev, if (copy_count == 0 || copy_count >= sizeof(dimm->label)) return -EINVAL; - strncpy(dimm->label, data, copy_count); + memcpy(dimm->label, data, copy_count); dimm->label[copy_count] = '\0'; return count; @@ -546,7 +174,7 @@ static ssize_t dimmdev_size_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); + return sysfs_emit(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); } static ssize_t dimmdev_mem_type_show(struct device *dev, @@ -554,7 +182,7 @@ static ssize_t dimmdev_mem_type_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", edac_mem_types[dimm->mtype]); + return sysfs_emit(data, "%s\n", edac_mem_types[dimm->mtype]); } static ssize_t dimmdev_dev_type_show(struct device *dev, @@ -562,7 +190,7 @@ static ssize_t dimmdev_dev_type_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", dev_types[dimm->dtype]); + return sysfs_emit(data, "%s\n", dev_types[dimm->dtype]); } static ssize_t dimmdev_edac_mode_show(struct device *dev, @@ -571,7 +199,7 @@ static ssize_t dimmdev_edac_mode_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", edac_caps[dimm->edac_mode]); + return sysfs_emit(data, "%s\n", edac_caps[dimm->edac_mode]); } static ssize_t dimmdev_ce_count_show(struct device *dev, @@ -580,7 +208,7 @@ static ssize_t dimmdev_ce_count_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%u\n", dimm->ce_count); + return sysfs_emit(data, "%u\n", dimm->ce_count); } static ssize_t dimmdev_ue_count_show(struct device *dev, @@ -589,7 +217,7 @@ static ssize_t dimmdev_ue_count_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%u\n", dimm->ue_count); + return sysfs_emit(data, "%u\n", dimm->ue_count); } /* dimm/rank attribute files */ @@ -637,7 +265,7 @@ static void dimm_release(struct device *dev) */ } -/* Create a DIMM object under specifed memory controller device */ +/* Create a DIMM object under specified memory controller device */ static int edac_create_dimm_object(struct mem_ctl_info *mci, struct dimm_info *dimm) { @@ -758,7 +386,7 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, return bandwidth; } - return sprintf(data, "%d\n", bandwidth); + return sysfs_emit(data, "%d\n", bandwidth); } /* default attribute files for the MCI object */ @@ -768,7 +396,7 @@ static ssize_t mci_ue_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%u\n", mci->ue_mc); + return sysfs_emit(data, "%u\n", mci->ue_mc); } static ssize_t mci_ce_count_show(struct device *dev, @@ -777,7 +405,7 @@ static ssize_t mci_ce_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%u\n", mci->ce_mc); + return sysfs_emit(data, "%u\n", mci->ce_mc); } static ssize_t mci_ce_noinfo_show(struct device *dev, @@ -786,7 +414,7 @@ static ssize_t mci_ce_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%u\n", mci->ce_noinfo_count); + return sysfs_emit(data, "%u\n", mci->ce_noinfo_count); } static ssize_t mci_ue_noinfo_show(struct device *dev, @@ -795,7 +423,7 @@ static ssize_t mci_ue_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%u\n", mci->ue_noinfo_count); + return sysfs_emit(data, "%u\n", mci->ue_noinfo_count); } static ssize_t mci_seconds_show(struct device *dev, @@ -804,7 +432,7 @@ static ssize_t mci_seconds_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ); + return sysfs_emit(data, "%ld\n", (jiffies - mci->start_time) / HZ); } static ssize_t mci_ctl_name_show(struct device *dev, @@ -813,7 +441,7 @@ static ssize_t mci_ctl_name_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%s\n", mci->ctl_name); + return sysfs_emit(data, "%s\n", mci->ctl_name); } static ssize_t mci_size_mb_show(struct device *dev, @@ -833,7 +461,7 @@ static ssize_t mci_size_mb_show(struct device *dev, } } - return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages)); + return sysfs_emit(data, "%u\n", PAGES_TO_MiB(total_pages)); } static ssize_t mci_max_location_show(struct device *dev, @@ -966,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, goto fail; } -#ifdef CONFIG_EDAC_LEGACY_SYSFS - err = edac_create_csrow_objects(mci); - if (err < 0) - goto fail; -#endif - edac_create_debugfs_nodes(mci); return 0; @@ -996,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) #ifdef CONFIG_EDAC_DEBUG edac_debugfs_remove_recursive(mci->debugfs); #endif -#ifdef CONFIG_EDAC_LEGACY_SYSFS - edac_delete_csrow_objects(mci); -#endif mci_for_each_dimm(mci, dimm) { if (!device_is_registered(&dimm->dev)) diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 32a931d0cb71..1c9f62382666 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -67,7 +67,7 @@ char *edac_op_state_to_string(int opstate) * sysfs object: /sys/devices/system/edac * need to export to other files */ -static struct bus_type edac_subsys = { +static const struct bus_type edac_subsys = { .name = "edac", .dev_name = "edac", }; @@ -90,7 +90,7 @@ static void edac_subsys_exit(void) } /* return pointer to the 'edac' node in sysfs */ -struct bus_type *edac_get_sysfs_subsys(void) +const struct bus_type *edac_get_sysfs_subsys(void) { return &edac_subsys; } diff --git a/drivers/edac/edac_pci.h b/drivers/edac/edac_pci.h index 5175f5724cfa..3f47cd9b2b03 100644 --- a/drivers/edac/edac_pci.h +++ b/drivers/edac/edac_pci.h @@ -22,7 +22,6 @@ #ifndef _EDAC_PCI_H_ #define _EDAC_PCI_H_ -#include <linux/completion.h> #include <linux/device.h> #include <linux/edac.h> #include <linux/kobject.h> @@ -48,8 +47,6 @@ struct edac_pci_ctl_info { int pci_idx; - struct bus_type *edac_subsys; /* pointer to subsystem */ - /* the internal state of this controller instance */ int op_state; /* work struct for this instance */ @@ -72,8 +69,6 @@ struct edac_pci_ctl_info { unsigned long start_time; /* edac_pci load start time (jiffies) */ - struct completion complete; - /* sysfs top name under 'edac' directory * and instance name: * cpu/cpu0/... diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 888d5728ecef..7b44afcf48db 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -337,8 +337,9 @@ static struct kobj_type ktype_edac_pci_main_kobj = { */ static int edac_pci_main_kobj_setup(void) { - int err; - struct bus_type *edac_subsys; + int err = -ENODEV; + const struct bus_type *edac_subsys; + struct device *dev_root; edac_dbg(0, "\n"); @@ -357,7 +358,6 @@ static int edac_pci_main_kobj_setup(void) */ if (!try_module_get(THIS_MODULE)) { edac_dbg(1, "try_module_get() failed\n"); - err = -ENODEV; goto decrement_count_fail; } @@ -369,9 +369,13 @@ static int edac_pci_main_kobj_setup(void) } /* Instanstiate the pci object */ - err = kobject_init_and_add(edac_pci_top_main_kobj, - &ktype_edac_pci_main_kobj, - &edac_subsys->dev_root->kobj, "pci"); + dev_root = bus_get_dev_root(edac_subsys); + if (dev_root) { + err = kobject_init_and_add(edac_pci_top_main_kobj, + &ktype_edac_pci_main_kobj, + &dev_root->kobj, "pci"); + put_device(dev_root); + } if (err) { edac_dbg(1, "Failed to register '.../edac/pci'\n"); goto kobject_init_and_add_fail; @@ -517,7 +521,7 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev) /* read the device TYPE, looking for bridges */ pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) + if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) get_pci_parity_status(dev, 1); } @@ -579,7 +583,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev)); - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) { /* On bridges, need to examine secondary status register */ status = get_pci_parity_status(dev, 1); diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c index ac2102b25706..e4eaec0aa81d 100644 --- a/drivers/edac/fsl_ddr_edac.c +++ b/drivers/edac/fsl_ddr_edac.c @@ -22,8 +22,7 @@ #include <linux/smp.h> #include <linux/gfp.h> -#include <linux/of_platform.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/of_address.h> #include "edac_module.h" #include "fsl_ddr_edac.h" @@ -32,18 +31,30 @@ static int edac_mc_idx; -static u32 orig_ddr_err_disable; -static u32 orig_ddr_err_sbe; -static bool little_endian; +static inline void __iomem *ddr_reg_addr(struct fsl_mc_pdata *pdata, unsigned int off) +{ + if (pdata->flag == TYPE_IMX9 && off >= FSL_MC_DATA_ERR_INJECT_HI && off <= FSL_MC_ERR_SBE) + return pdata->inject_vbase + off - FSL_MC_DATA_ERR_INJECT_HI + + IMX9_MC_DATA_ERR_INJECT_OFF; + + if (pdata->flag == TYPE_IMX9 && off >= IMX9_MC_ERR_EN) + return pdata->inject_vbase + off - IMX9_MC_ERR_EN; -static inline u32 ddr_in32(void __iomem *addr) + return pdata->mc_vbase + off; +} + +static inline u32 ddr_in32(struct fsl_mc_pdata *pdata, unsigned int off) { - return little_endian ? ioread32(addr) : ioread32be(addr); + void __iomem *addr = ddr_reg_addr(pdata, off); + + return pdata->little_endian ? ioread32(addr) : ioread32be(addr); } -static inline void ddr_out32(void __iomem *addr, u32 value) +static inline void ddr_out32(struct fsl_mc_pdata *pdata, unsigned int off, u32 value) { - if (little_endian) + void __iomem *addr = ddr_reg_addr(pdata, off); + + if (pdata->little_endian) iowrite32(value, addr); else iowrite32be(value, addr); @@ -61,7 +72,7 @@ static ssize_t fsl_mc_inject_data_hi_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_HI)); } static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, @@ -71,7 +82,7 @@ static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_LO)); } static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, @@ -81,7 +92,7 @@ static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT)); + ddr_in32(pdata, FSL_MC_ECC_ERR_INJECT)); } static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, @@ -98,7 +109,7 @@ static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_HI, val); return count; } return 0; @@ -118,7 +129,7 @@ static ssize_t fsl_mc_inject_data_lo_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_LO, val); return count; } return 0; @@ -138,7 +149,7 @@ static ssize_t fsl_mc_inject_ctrl_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val); + ddr_out32(pdata, FSL_MC_ECC_ERR_INJECT, val); return count; } return 0; @@ -287,7 +298,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) int bad_data_bit; int bad_ecc_bit; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return; @@ -296,14 +307,14 @@ static void fsl_mc_check(struct mem_ctl_info *mci) /* no more processing if not ECC bit errors */ if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); return; } - syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC); + syndrome = ddr_in32(pdata, FSL_MC_CAPTURE_ECC); /* Mask off appropriate bits of syndrome based on bus width */ - bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) & + bus_width = (ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG) & DSC_DBW_MASK) ? 32 : 64; if (bus_width == 64) syndrome &= 0xff; @@ -311,8 +322,8 @@ static void fsl_mc_check(struct mem_ctl_info *mci) syndrome &= 0xffff; err_addr = make64( - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS), - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS)); + ddr_in32(pdata, FSL_MC_CAPTURE_EXT_ADDRESS), + ddr_in32(pdata, FSL_MC_CAPTURE_ADDRESS)); pfn = err_addr >> PAGE_SHIFT; for (row_index = 0; row_index < mci->nr_csrows; row_index++) { @@ -321,29 +332,33 @@ static void fsl_mc_check(struct mem_ctl_info *mci) break; } - cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI); - cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO); + cap_high = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_HI); + cap_low = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_LO); /* * Analyze single-bit errors on 64-bit wide buses * TODO: Add support for 32-bit wide buses */ if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + u64 cap = (u64)cap_high << 32 | cap_low; + u32 s = syndrome; + sbe_ecc_decode(cap_high, cap_low, syndrome, &bad_data_bit, &bad_ecc_bit); - if (bad_data_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty Data bit: %d\n", bad_data_bit); - if (bad_ecc_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty ECC bit: %d\n", bad_ecc_bit); + if (bad_data_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty Data bit: %d\n", bad_data_bit); + cap ^= 1ULL << bad_data_bit; + } + + if (bad_ecc_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty ECC bit: %d\n", bad_ecc_bit); + s ^= 1 << bad_ecc_bit; + } fsl_mc_printk(mci, KERN_ERR, "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high ^ (1 << (bad_data_bit - 32)), - cap_low ^ (1 << bad_data_bit), - syndrome ^ (1 << bad_ecc_bit)); + upper_32_bits(cap), lower_32_bits(cap), s); } fsl_mc_printk(mci, KERN_ERR, @@ -368,7 +383,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) row_index, 0, -1, mci->ctl_name, ""); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); } static irqreturn_t fsl_mc_isr(int irq, void *dev_id) @@ -377,7 +392,7 @@ static irqreturn_t fsl_mc_isr(int irq, void *dev_id) struct fsl_mc_pdata *pdata = mci->pvt_info; u32 err_detect; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return IRQ_NONE; @@ -397,7 +412,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) u32 cs_bnds; int index; - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); sdtype = sdram_ctl & DSC_SDTYPE_MASK; if (sdram_ctl & DSC_RD_EN) { @@ -432,6 +447,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) case 0x05000000: mtype = MEM_DDR4; break; + case 0x04000000: + mtype = MEM_LPDDR4; + break; default: mtype = MEM_UNKNOWN; break; @@ -445,7 +463,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) csrow = mci->csrows[index]; dimm = csrow->channels[0]->dimm; - cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 + + cs_bnds = ddr_in32(pdata, FSL_MC_CS_BNDS_0 + (index * FSL_MC_CS_BNDS_OFS)); start = (cs_bnds & 0xffff0000) >> 16; @@ -465,7 +483,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) dimm->grain = 8; dimm->mtype = mtype; dimm->dtype = DEV_UNKNOWN; - if (sdram_ctl & DSC_X32_EN) + if (pdata->flag == TYPE_IMX9) + dimm->dtype = DEV_X16; + else if (sdram_ctl & DSC_X32_EN) dimm->dtype = DEV_X32; dimm->edac_mode = EDAC_SECDED; } @@ -477,6 +497,7 @@ int fsl_mc_err_probe(struct platform_device *op) struct edac_mc_layer layers[2]; struct fsl_mc_pdata *pdata; struct resource r; + u32 ecc_en_mask; u32 sdram_ctl; int res; @@ -504,11 +525,13 @@ int fsl_mc_err_probe(struct platform_device *op) mci->ctl_name = pdata->name; mci->dev_name = pdata->name; + pdata->flag = (unsigned long)device_get_match_data(&op->dev); + /* * Get the endianness of DDR controller registers. * Default is big endian. */ - little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); + pdata->little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); res = of_address_to_resource(op->dev.of_node, 0, &r); if (res) { @@ -532,8 +555,23 @@ int fsl_mc_err_probe(struct platform_device *op) goto err; } - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); - if (!(sdram_ctl & DSC_ECC_EN)) { + if (pdata->flag == TYPE_IMX9) { + pdata->inject_vbase = devm_platform_ioremap_resource_byname(op, "inject"); + if (IS_ERR(pdata->inject_vbase)) { + res = -ENOMEM; + goto err; + } + } + + if (pdata->flag == TYPE_IMX9) { + sdram_ctl = ddr_in32(pdata, IMX9_MC_ERR_EN); + ecc_en_mask = ERR_ECC_EN | ERR_INLINE_ECC; + } else { + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); + ecc_en_mask = DSC_ECC_EN; + } + + if ((sdram_ctl & ecc_en_mask) != ecc_en_mask) { /* no ECC */ pr_warn("%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; @@ -544,7 +582,8 @@ int fsl_mc_err_probe(struct platform_device *op) mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR | MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 | MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 | - MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LPDDR4; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; @@ -559,11 +598,11 @@ int fsl_mc_err_probe(struct platform_device *op) fsl_ddr_init_csrows(mci); /* store the original error disable bits */ - orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0); + pdata->orig_ddr_err_disable = ddr_in32(pdata, FSL_MC_ERR_DISABLE); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, 0); /* clear all error bits */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0); + ddr_out32(pdata, FSL_MC_ERR_DETECT, ~0); res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups); if (res) { @@ -572,15 +611,15 @@ int fsl_mc_err_probe(struct platform_device *op) } if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, + ddr_out32(pdata, FSL_MC_ERR_INT_EN, DDR_EIE_MBEE | DDR_EIE_SBEE); /* store the original error management threshold */ - orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase + - FSL_MC_ERR_SBE) & 0xff0000; + pdata->orig_ddr_err_sbe = ddr_in32(pdata, + FSL_MC_ERR_SBE) & 0xff0000; /* set threshold to 1 error per interrupt */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000); + ddr_out32(pdata, FSL_MC_ERR_SBE, 0x10000); /* register interrupts */ pdata->irq = platform_get_irq(op, 0); @@ -613,7 +652,7 @@ err: return res; } -int fsl_mc_err_remove(struct platform_device *op) +void fsl_mc_err_remove(struct platform_device *op) { struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); struct fsl_mc_pdata *pdata = mci->pvt_info; @@ -621,14 +660,14 @@ int fsl_mc_err_remove(struct platform_device *op) edac_dbg(0, "\n"); if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0); + ddr_out32(pdata, FSL_MC_ERR_INT_EN, 0); } - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, - orig_ddr_err_disable); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, + pdata->orig_ddr_err_disable); + ddr_out32(pdata, FSL_MC_ERR_SBE, pdata->orig_ddr_err_sbe); + edac_mc_del_mc(&op->dev); edac_mc_free(mci); - return 0; } diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h index 332439d7b2d9..73618f79e587 100644 --- a/drivers/edac/fsl_ddr_edac.h +++ b/drivers/edac/fsl_ddr_edac.h @@ -39,6 +39,9 @@ #define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54 #define FSL_MC_ERR_SBE 0x0e58 +#define IMX9_MC_ERR_EN 0x1000 +#define IMX9_MC_DATA_ERR_INJECT_OFF 0x100 + #define DSC_MEM_EN 0x80000000 #define DSC_ECC_EN 0x20000000 #define DSC_RD_EN 0x10000000 @@ -46,6 +49,9 @@ #define DSC_DBW_32 0x00080000 #define DSC_DBW_64 0x00000000 +#define ERR_ECC_EN 0x80000000 +#define ERR_INLINE_ECC 0x40000000 + #define DSC_SDTYPE_MASK 0x07000000 #define DSC_X32_EN 0x00000020 @@ -65,12 +71,19 @@ #define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ #define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ +#define TYPE_IMX9 0x1 /* MC used by iMX9 having registers changed */ + struct fsl_mc_pdata { char *name; int edac_idx; void __iomem *mc_vbase; + void __iomem *inject_vbase; int irq; + u32 orig_ddr_err_disable; + u32 orig_ddr_err_sbe; + bool little_endian; + unsigned long flag; }; int fsl_mc_err_probe(struct platform_device *op); -int fsl_mc_err_remove(struct platform_device *op); +void fsl_mc_err_remove(struct platform_device *op); #endif diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index cf2b618c1ada..d80c88818691 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,6 +15,7 @@ #include "edac_module.h" #include <ras/ras_event.h> #include <linux/notifier.h> +#include <linux/string.h> #define OTHER_DETAIL_LEN 400 @@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, p = pvt->msg; p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { - strcpy(pvt->msg, "unknown error"); + strscpy(pvt->msg, "unknown error"); } /* Error address */ @@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; - strcpy(e->label, dimm->label); + strscpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) - strcpy(e->label, "unknown memory"); + strscpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; @@ -547,7 +548,7 @@ static int __init ghes_edac_init(void) return -ENODEV; if (list_empty(ghes_devs)) { - pr_info("GHES probing device list is empty"); + pr_info("GHES probing device list is empty\n"); return -ENODEV; } diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index c4549cec788b..24f163ff323f 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -7,8 +7,9 @@ #include <linux/ctype.h> #include <linux/edac.h> #include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> -#include <linux/of_platform.h> #include "edac_module.h" @@ -53,7 +54,7 @@ static int highbank_l2_err_probe(struct platform_device *pdev) int res = 0; dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu", - 1, "L", 1, 2, NULL, 0, 0); + 1, "L", 1, 2, 0); if (!dci) return -ENOMEM; @@ -117,13 +118,12 @@ err: return res; } -static int highbank_l2_err_remove(struct platform_device *pdev) +static void highbank_l2_err_remove(struct platform_device *pdev) { struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(dci); - return 0; } static struct platform_driver highbank_l2_edac_driver = { diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 19fba258ae10..a8879d72d064 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -7,8 +7,9 @@ #include <linux/ctype.h> #include <linux/edac.h> #include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> -#include <linux/of_platform.h> #include <linux/uaccess.h> #include "edac_module.h" @@ -250,13 +251,12 @@ free: return res; } -static int highbank_mc_remove(struct platform_device *pdev) +static void highbank_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - return 0; } static struct platform_driver highbank_mc_edac_driver = { diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 65aeea53e2df..89b3e8cc38b1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -13,7 +13,7 @@ #include "edac_module.h" #include "skx_common.h" -#define I10NM_REVISION "v0.0.5" +#define I10NM_REVISION "v0.0.6" #define EDAC_MOD_STR "i10nm_edac" /* Debug macros */ @@ -22,25 +22,30 @@ #define I10NM_GET_SCK_BAR(d, reg) \ pci_read_config_dword((d)->uracu, 0xd0, &(reg)) -#define I10NM_GET_IMC_BAR(d, i, reg) \ - pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) +#define I10NM_GET_IMC_BAR(d, i, reg) \ + pci_read_config_dword((d)->uracu, \ + (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg)) #define I10NM_GET_SAD(d, offset, i, reg)\ - pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) + pci_read_config_dword((d)->sad_all, (offset) + (i) * \ + (res_cfg->type == GNR ? 12 : 8), &(reg)) #define I10NM_GET_HBM_IMC_BAR(d, reg) \ pci_read_config_dword((d)->uracu, 0xd4, &(reg)) #define I10NM_GET_CAPID3_CFG(d, reg) \ - pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg)) + pci_read_config_dword((d)->pcu_cr3, \ + res_cfg->type == GNR ? 0x290 : 0x90, &(reg)) +#define I10NM_GET_CAPID5_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, \ + res_cfg->type == GNR ? 0x298 : 0x98, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \ + (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \ (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i) \ readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ (i) * (m)->chan_mmio_sz) #define I10NM_GET_MCMTR(m, i) \ - readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ - (i) * (m)->chan_mmio_sz) -#define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ + (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ (i) * (m)->chan_mmio_sz) #define I10NM_GET_REG32(m, i, offset) \ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) @@ -56,7 +61,11 @@ #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) +#define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 +#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 +#define I10NM_GNR_IMC_MMIO_SIZE 0x4000 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 +#define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) @@ -64,12 +73,6 @@ #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) -#define RETRY_RD_ERR_LOG_UC BIT(1) -#define RETRY_RD_ERR_LOG_NOOVER BIT(14) -#define RETRY_RD_ERR_LOG_EN BIT(15) -#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) -#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) - static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; @@ -77,215 +80,319 @@ static int retry_rd_err_log; static int decoding_via_mca; static bool mem_cfg_2lm; -static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; -static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; -static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; -static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; -static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; - -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, - u32 *offsets_scrub, u32 *offsets_demand, - u32 *offsets_demand2) +static struct reg_rrl icx_reg_rrl_ddr = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, + }, + .widths = {4, 4, 4, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_ddr = { + .set_num = 3, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch0 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch1 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl gnr_reg_rrl_ddr = { + .set_num = 4, + .reg_num = 6, + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(14), + .noover_mask = BIT(15), + .en_mask = BIT(12), + + .cecnt_num = 8, + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, +}; + +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + switch (width) { + case 4: + return I10NM_GET_REG32(imc, chan, offset); + case 8: + return I10NM_GET_REG64(imc, chan, offset); + default: + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); + return 0; + } +} + +static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) +{ + switch (width) { + case 4: + return I10NM_SET_REG32(imc, chan, offset, (u32)val); + default: + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); + } +} + +static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + int rrl_set, bool enable, u32 *rrl_ctl) { - u32 s, d, d2; + enum rrl_mode mode = rrl->modes[rrl_set]; + u32 offset = rrl->offsets[rrl_set][0], v; + u8 width = rrl->widths[0]; + bool first, scrub; + + /* First or last read error. */ + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); + /* Patrol scrub or on-demand read error. */ + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); - s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); - if (offsets_demand2) - d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); + v = read_imc_reg(imc, chan, offset, width); if (enable) { - /* Save default configurations */ - imc->chan[chan].retry_rd_err_log_s = s; - imc->chan[chan].retry_rd_err_log_d = d; - if (offsets_demand2) - imc->chan[chan].retry_rd_err_log_d2 = d2; - - s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - s |= RETRY_RD_ERR_LOG_EN; - d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - d |= RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - d2 &= ~RETRY_RD_ERR_LOG_UC; - d2 |= RETRY_RD_ERR_LOG_NOOVER; - d2 |= RETRY_RD_ERR_LOG_EN; - } + /* Save default configurations. */ + *rrl_ctl = v; + v &= ~rrl->uc_mask; + + if (first) + v |= rrl->noover_mask; + else + v &= ~rrl->noover_mask; + + if (scrub) + v |= rrl->en_patspr_mask; + else + v &= ~rrl->en_patspr_mask; + + v |= rrl->en_mask; } else { - /* Restore default configurations */ - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) - s |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) - s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) - s &= ~RETRY_RD_ERR_LOG_EN; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) - d |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) - d |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) - d &= ~RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) - d2 |= RETRY_RD_ERR_LOG_UC; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) - d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) - d2 &= ~RETRY_RD_ERR_LOG_EN; + /* Restore default configurations. */ + if (*rrl_ctl & rrl->uc_mask) + v |= rrl->uc_mask; + + if (first) { + if (!(*rrl_ctl & rrl->noover_mask)) + v &= ~rrl->noover_mask; + } else { + if (*rrl_ctl & rrl->noover_mask) + v |= rrl->noover_mask; + } + + if (scrub) { + if (!(*rrl_ctl & rrl->en_patspr_mask)) + v &= ~rrl->en_patspr_mask; + } else { + if (*rrl_ctl & rrl->en_patspr_mask) + v |= rrl->en_patspr_mask; } + + if (!(*rrl_ctl & rrl->en_mask)) + v &= ~rrl->en_mask; } - I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, offsets_demand[0], d); - if (offsets_demand2) - I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); + write_imc_reg(imc, chan, offset, width, v); +} + +static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + bool enable, u32 *rrl_ctl) +{ + for (int i = 0; i < rrl->set_num; i++) + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); +} + +static void enable_rrls_ddr(struct skx_imc *imc, bool enable) +{ + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; + int i, chan_num = res_cfg->ddr_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase) + return; + + for (i = 0; i < chan_num; i++) + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); +} + +static void enable_rrls_hbm(struct skx_imc *imc, bool enable) +{ + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; + int i, chan_num = res_cfg->hbm_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) + return; + + for (i = 0; i < chan_num; i++) { + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); + } } static void enable_retry_rd_err_log(bool enable) { - struct skx_imc *imc; struct skx_dev *d; - int i, j; + int i, imc_num; edac_dbg(2, "\n"); - list_for_each_entry(d, i10nm_edac_list, list) - for (i = 0; i < I10NM_NUM_IMC; i++) { - imc = &d->imc[i]; - if (!imc->mbase) - continue; + list_for_each_entry(d, i10nm_edac_list, list) { + imc_num = res_cfg->ddr_imc_num; + for (i = 0; i < imc_num; i++) + enable_rrls_ddr(&d->imc[i], enable); - for (j = 0; j < I10NM_NUM_CHANNELS; j++) { - if (imc->hbm_mc) { - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub_hbm0, - res_cfg->offsets_demand_hbm0, - NULL); - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub_hbm1, - res_cfg->offsets_demand_hbm1, - NULL); - } else { - __enable_retry_rd_err_log(imc, j, enable, - res_cfg->offsets_scrub, - res_cfg->offsets_demand, - res_cfg->offsets_demand2); - } - } + imc_num += res_cfg->hbm_imc_num; + for (; i < imc_num; i++) + enable_rrls_hbm(&d->imc[i], enable); } } static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, int len, bool scrub_err) { + int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 log0, log1, log2, log3, log4; - u32 corr0, corr1, corr2, corr3; - u32 lxg0, lxg1, lxg3, lxg4; - u32 *xffsets = NULL; - u64 log2a, log5; - u64 lxg2a, lxg5; - u32 *offsets; - int n, pch; + u64 log, corr, status_mask; + struct reg_rrl *rrl; + bool scrub; + u32 offset; + u8 width; if (!imc->mbase) return; - if (imc->hbm_mc) { - pch = res->cs & 1; + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; - if (pch) - offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : - res_cfg->offsets_demand_hbm1; - else - offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : - res_cfg->offsets_demand_hbm0; - } else { - if (scrub_err) { - offsets = res_cfg->offsets_scrub; - } else { - offsets = res_cfg->offsets_demand; - xffsets = res_cfg->offsets_demand2; - } - } + if (!rrl) + return; - log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); - log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); - log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); - log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); - log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); - - if (xffsets) { - lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); - lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); - lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); - lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); - lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); - } + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - if (res_cfg->type == SPR) { - log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", - log0, log1, log2a, log3, log4, log5); + n = scnprintf(msg, len, " retry_rd_err_log["); + for (i = 0; i < rrl->set_num; i++) { + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); + if (scrub_err != scrub) + continue; - if (len - n > 0) { - if (xffsets) { - lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); - n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", - lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); - } else { - n += snprintf(msg + n, len - n, "]"); - } - } - } else { - log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", - log0, log1, log2, log3, log4, log5); - } + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { + offset = rrl->offsets[i][j]; + width = rrl->widths[j]; + log = read_imc_reg(imc, ch, offset, width); - if (imc->hbm_mc) { - if (pch) { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", log); + else + n += scnprintf(msg + n, len - n, "%.16llx ", log); + + /* Clear RRL status if RRL in Linux control mode. */ + if (retry_rd_err_log == 2 && !j && (log & status_mask)) + write_imc_reg(imc, ch, offset, width, log & ~status_mask); } - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); } - if (len - n > 0) - snprintf(msg + n, len - n, - " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", - corr0 & 0xffff, corr0 >> 16, - corr1 & 0xffff, corr1 >> 16, - corr2 & 0xffff, corr2 >> 16, - corr3 & 0xffff, corr3 >> 16); - - /* Clear status bits */ - if (retry_rd_err_log == 2) { - if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); + + if (len - n > 0) { + n += scnprintf(msg + n, len - n, " correrrcnt["); + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { + offset = rrl->cecnt_offsets[i]; + width = rrl->cecnt_widths[i]; + corr = read_imc_reg(imc, ch, offset, width); + + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ + if (res_cfg->type <= SPR) { + n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); + } else { + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", corr); + else + n += scnprintf(msg + n, len - n, "%.16llx ", corr); + } } - if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); - } + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); } } @@ -311,6 +418,80 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, return pdev; } +/** + * i10nm_get_imc_num() - Get the number of present DDR memory controllers. + * + * @cfg : The pointer to the structure of EDAC resource configurations. + * + * For Granite Rapids CPUs, the number of present DDR memory controllers read + * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. + * For other CPUs, the number of present DDR memory controllers is statically + * configured in @cfg->ddr_imc_num. + * + * RETURNS : 0 on success, < 0 on failure. + */ +static int i10nm_get_imc_num(struct res_config *cfg) +{ + int n, imc_num, chan_num = 0; + struct skx_dev *d; + u32 reg; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus], + res_cfg->pcu_cr3_bdf.dev, + res_cfg->pcu_cr3_bdf.fun); + if (!d->pcu_cr3) + continue; + + if (I10NM_GET_CAPID5_CFG(d, reg)) + continue; + + n = I10NM_DDR_IMC_CH_CNT(reg); + + if (!chan_num) { + chan_num = n; + edac_dbg(2, "Get DDR CH number: %d\n", chan_num); + } else if (chan_num != n) { + i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n); + } + } + + switch (cfg->type) { + case GNR: + /* + * One channel per DDR memory controller for Granite Rapids CPUs. + */ + imc_num = chan_num; + + if (!imc_num) { + i10nm_printk(KERN_ERR, "Invalid DDR MC number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set DDR MC number: %d", imc_num); + + /* Release and reallocate skx_dev list with the updated number. */ + skx_remove(); + if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0) + return -ENODEV; + } + + return 0; + default: + /* + * For other CPUs, the number of present DDR memory controllers + * is statically pre-configured in cfg->ddr_imc_num. + */ + return 0; + } +} + static bool i10nm_check_2lm(struct res_config *cfg) { struct skx_dev *d; @@ -318,9 +499,9 @@ static bool i10nm_check_2lm(struct res_config *cfg) int i; list_for_each_entry(d, i10nm_edac_list, list) { - d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1], - PCI_SLOT(cfg->sad_all_devfn), - PCI_FUNC(cfg->sad_all_devfn)); + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus], + res_cfg->sad_all_bdf.dev, + res_cfg->sad_all_bdf.fun); if (!d->sad_all) continue; @@ -337,20 +518,39 @@ static bool i10nm_check_2lm(struct res_config *cfg) } /* - * Check whether the error comes from DDRT by ICX/Tremont model specific error code. - * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. + * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. + * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. */ static bool i10nm_mscod_is_ddrt(u32 mscod) { - switch (mscod) { - case 0x0106: case 0x0107: - case 0x0800: case 0x0804: - case 0x0806 ... 0x0808: - case 0x080a ... 0x080e: - case 0x0810: case 0x0811: - case 0x0816: case 0x081e: - case 0x081f: - return true; + switch (res_cfg->type) { + case I10NM: + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + case SPR: + switch (mscod) { + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + default: + return false; } return false; @@ -358,6 +558,7 @@ static bool i10nm_mscod_is_ddrt(u32 mscod) static bool i10nm_mc_decode_available(struct mce *mce) { +#define ICX_IMCx_CHy 0x06666000 u8 bank; if (!decoding_via_mca || mem_cfg_2lm) @@ -371,21 +572,26 @@ static bool i10nm_mc_decode_available(struct mce *mce) switch (res_cfg->type) { case I10NM: - if (bank < 13 || bank > 26) + /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ + if (!(ICX_IMCx_CHy & (1 << bank))) return false; - - /* DDRT errors can't be decoded from MCA bank registers */ - if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) - return false; - - if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + break; + case SPR: + if (bank < 13 || bank > 20) return false; - - /* Check whether one of {13,14,17,18,21,22,25,26} */ - return ((bank - 13) & BIT(1)) == 0; + break; default: return false; } + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + return true; } static bool i10nm_mc_decode(struct decoded_addr *res) @@ -407,9 +613,29 @@ static bool i10nm_mc_decode(struct decoded_addr *res) switch (res_cfg->type) { case I10NM: - bank = m->bank - 13; - res->imc = bank / 4; - res->channel = bank % 2; + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + break; + case SPR: + bank = m->bank - 13; + res->imc = bank / 2; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 36); + res->bank_group = GET_BITFIELD(m->misc, 37, 38); + res->bank_address = GET_BITFIELD(m->misc, 39, 40); + res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; + res->rank = GET_BITFIELD(m->misc, 57, 57); + res->dimm = GET_BITFIELD(m->misc, 58, 58); break; default: return false; @@ -421,34 +647,165 @@ static bool i10nm_mc_decode(struct decoded_addr *res) return false; } - res->column = GET_BITFIELD(m->misc, 9, 18) << 2; - res->row = GET_BITFIELD(m->misc, 19, 39); - res->bank_group = GET_BITFIELD(m->misc, 40, 41); - res->bank_address = GET_BITFIELD(m->misc, 42, 43); - res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; - res->rank = GET_BITFIELD(m->misc, 56, 58); - res->dimm = res->rank >> 2; - res->rank = res->rank % 4; - return true; } +/** + * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. + * + * @d : The pointer to the structure of CPU socket EDAC device. + * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). + * @physical_idx : To store the corresponding physical index of @logical_idx. + * + * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure. + */ +static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx) +{ +#define GNR_MAX_IMC_PCI_CNT 28 + + struct pci_dev *mdev; + int i, logical = 0; + + /* + * Detect present memory controllers from { PCI device: 8-5, function 7-1 } + */ + for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) { + mdev = pci_get_dev_wrapper(d->seg, + d->bus[res_cfg->ddr_mdev_bdf.bus], + res_cfg->ddr_mdev_bdf.dev + i / 7, + res_cfg->ddr_mdev_bdf.fun + i % 7); + + if (mdev) { + if (logical == logical_idx) { + *physical_idx = i; + return mdev; + } + + pci_dev_put(mdev); + logical++; + } + } + + return NULL; +} + +static u32 get_gnr_imc_mmio_offset(void) +{ + if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D) + return I10NM_GNR_D_IMC_MMIO_OFFSET; + + return I10NM_GNR_IMC_MMIO_OFFSET; +} + +/** + * get_ddr_munit() - Get the resource of the i-th DDR memory controller. + * + * @d : The pointer to the structure of CPU socket EDAC device. + * @i : The index of the CPU socket relative DDR memory controller. + * @offset : To store the MMIO offset of the i-th DDR memory controller. + * @size : To store the MMIO size of the i-th DDR memory controller. + * + * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure. + */ +static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size) +{ + struct pci_dev *mdev; + int physical_idx; + u32 reg; + + switch (res_cfg->type) { + case GNR: + if (I10NM_GET_IMC_BAR(d, 0, reg)) { + i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n"); + return NULL; + } + + mdev = get_gnr_mdev(d, i, &physical_idx); + if (!mdev) + return NULL; + + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + + get_gnr_imc_mmio_offset() + + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; + *size = I10NM_GNR_IMC_MMIO_SIZE; + + break; + default: + if (I10NM_GET_IMC_BAR(d, i, reg)) { + i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i); + return NULL; + } + + mdev = pci_get_dev_wrapper(d->seg, + d->bus[res_cfg->ddr_mdev_bdf.bus], + res_cfg->ddr_mdev_bdf.dev + i, + res_cfg->ddr_mdev_bdf.fun); + if (!mdev) + return NULL; + + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg); + *size = I10NM_GET_IMC_MMIO_SIZE(reg); + } + + return mdev; +} + +/** + * i10nm_imc_absent() - Check whether the memory controller @imc is absent + * + * @imc : The pointer to the structure of memory controller EDAC device. + * + * RETURNS : true if the memory controller EDAC device is absent, false otherwise. + */ +static bool i10nm_imc_absent(struct skx_imc *imc) +{ + u32 mcmtr; + int i; + + switch (res_cfg->type) { + case SPR: + for (i = 0; i < res_cfg->ddr_chan_num; i++) { + mcmtr = I10NM_GET_MCMTR(imc, i); + edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr); + if (mcmtr != ~0) + return false; + } + + /* + * Some workstations' absent memory controllers still + * appear as PCIe devices, misleading the EDAC driver. + * By observing that the MMIO registers of these absent + * memory controllers consistently hold the value of ~0. + * + * We identify a memory controller as absent by checking + * if its MMIO register "mcmtr" == ~0 in all its channels. + */ + return true; + default: + return false; + } +} + static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; void __iomem *mbase; unsigned long size; struct skx_dev *d; - int i, j = 0; + int i, lmc, j = 0; u32 reg, off; u64 base; list_for_each_entry(d, i10nm_edac_list, list) { - d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1); + d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus], + res_cfg->util_all_bdf.dev, + res_cfg->util_all_bdf.fun); if (!d->util_all) return -ENODEV; - d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1); + d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus], + res_cfg->uracu_bdf.dev, + res_cfg->uracu_bdf.fun); if (!d->uracu) return -ENODEV; @@ -461,9 +818,9 @@ static int i10nm_get_ddr_munits(void) edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", j++, base, reg); - for (i = 0; i < I10NM_NUM_DDR_IMC; i++) { - mdev = pci_get_dev_wrapper(d->seg, d->bus[0], - 12 + i, 0); + for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) { + mdev = get_ddr_munit(d, i, &off, &size); + if (i == 0 && !mdev) { i10nm_printk(KERN_ERR, "No IMC found\n"); return -ENODEV; @@ -471,15 +828,6 @@ static int i10nm_get_ddr_munits(void) if (!mdev) continue; - d->imc[i].mdev = mdev; - - if (I10NM_GET_IMC_BAR(d, i, reg)) { - i10nm_printk(KERN_ERR, "Failed to get mc bar\n"); - return -ENODEV; - } - - off = I10NM_GET_IMC_MMIO_OFFSET(reg); - size = I10NM_GET_IMC_MMIO_SIZE(reg); edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n", i, base + off, size, reg); @@ -490,7 +838,19 @@ static int i10nm_get_ddr_munits(void) return -ENODEV; } - d->imc[i].mbase = mbase; + d->imc[lmc].mbase = mbase; + if (i10nm_imc_absent(&d->imc[lmc])) { + pci_dev_put(mdev); + iounmap(mbase); + d->imc[lmc].mbase = NULL; + edac_dbg(2, "Skip absent mc%d\n", i); + continue; + } else { + d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); + lmc++; + } } } @@ -519,7 +879,6 @@ static int i10nm_get_hbm_munits(void) u64 base; list_for_each_entry(d, i10nm_edac_list, list) { - d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3); if (!d->pcu_cr3) return -ENODEV; @@ -540,11 +899,13 @@ static int i10nm_get_hbm_munits(void) } base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); - lmc = I10NM_NUM_DDR_IMC; + lmc = res_cfg->ddr_imc_num; + + for (i = 0; i < res_cfg->hbm_imc_num; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus], + res_cfg->hbm_mdev_bdf.dev + i / 4, + res_cfg->hbm_mdev_bdf.fun + i % 4); - for (i = 0; i < I10NM_NUM_HBM_IMC; i++) { - mdev = pci_get_dev_wrapper(d->seg, d->bus[0], - 12 + i / 4, 1 + i % 4); if (i == 0 && !mdev) { i10nm_printk(KERN_ERR, "No hbm mc found\n"); return -ENODEV; @@ -594,49 +955,95 @@ static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xcc, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, .ddr_chan_mmio_sz = 0x4000, - .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_bdf = {1, 29, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config i10nm_cfg1 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xd0, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, .ddr_chan_mmio_sz = 0x4000, - .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_bdf = {1, 29, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config spr_cfg = { .type = SPR, .decs_did = 0x3252, .busno_cfg_offset = 0xd0, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, + .hbm_imc_num = 16, + .hbm_chan_num = 2, + .hbm_dimm_num = 1, .ddr_chan_mmio_sz = 0x8000, .hbm_chan_mmio_sz = 0x4000, .support_ddr5 = true, - .sad_all_devfn = PCI_DEVFN(10, 0), + .sad_all_bdf = {1, 10, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, + .sad_all_offset = 0x300, + .reg_rrl_ddr = &spr_reg_rrl_ddr, + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, +}; + +static struct res_config gnr_cfg = { + .type = GNR, + .decs_did = 0x3252, + .busno_cfg_offset = 0xd0, + .ddr_imc_num = 12, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_chan_mmio_sz = 0x4000, + .support_ddr5 = true, + .sad_all_bdf = {0, 13, 0}, + .pcu_cr3_bdf = {0, 5, 0}, + .util_all_bdf = {0, 13, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 5, 1}, .sad_all_offset = 0x300, - .offsets_scrub = offsets_scrub_spr, - .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, - .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, - .offsets_demand = offsets_demand_spr, - .offsets_demand2 = offsets_demand2_spr, - .offsets_demand_hbm0 = offsets_demand_spr_hbm0, - .offsets_demand_hbm1 = offsets_demand_spr_hbm1, + .reg_rrl_ddr = &gnr_reg_rrl_ddr, }; static const struct x86_cpu_id i10nm_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1), + + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -651,12 +1058,21 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan) return !!GET_BITFIELD(mcmtr, 2, 2); } +static bool i10nm_channel_disabled(struct skx_imc *imc, int chan) +{ + u32 mcmtr = I10NM_GET_MCMTR(imc, chan); + + edac_dbg(1, "mc%d ch%d mcmtr reg %x\n", imc->mc, chan, mcmtr); + + return (mcmtr == ~0 || GET_BITFIELD(mcmtr, 18, 18)); +} + static int i10nm_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; - u32 mtr, amap, mcddrtcfg; + u32 mtr, mcddrtcfg = 0; struct dimm_info *dimm; int i, j, ndimms; @@ -664,9 +1080,16 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, if (!imc->mbase) continue; + if (i10nm_channel_disabled(imc, i)) { + edac_dbg(1, "mc%d ch%d is disabled.\n", imc->mc, i); + continue; + } + ndimms = 0; - amap = I10NM_GET_AMAP(imc, i); - mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); + + if (res_cfg->type != GNR) + mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); + for (j = 0; j < imc->num_dimms; j++) { dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); @@ -674,7 +1097,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, @@ -695,63 +1118,16 @@ static struct notifier_block i10nm_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/i10nm_test/addr. - */ -static struct dentry *i10nm_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_i10nm_debug(void) -{ - i10nm_test = edac_debugfs_create_dir("i10nm_test"); - if (!i10nm_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, i10nm_test, - NULL, &fops_u64_wo)) { - debugfs_remove(i10nm_test); - i10nm_test = NULL; - } -} - -static void teardown_i10nm_debug(void) -{ - debugfs_remove_recursive(i10nm_test); -} -#else -static inline void setup_i10nm_debug(void) {} -static inline void teardown_i10nm_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - static int __init i10nm_init(void) { - u8 mc = 0, src_id = 0, node_id = 0; + u8 mc = 0, src_id = 0; const struct x86_cpu_id *id; struct res_config *cfg; const char *owner; struct skx_dev *d; int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; u64 tolm, tohm; + int imc_num; edac_dbg(2, "\n"); @@ -770,6 +1146,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); @@ -784,6 +1161,10 @@ static int __init i10nm_init(void) return -ENODEV; } + rc = i10nm_get_imc_num(cfg); + if (rc < 0) + goto fail; + mem_cfg_2lm = i10nm_check_2lm(cfg); skx_set_mem_cfg(mem_cfg_2lm); @@ -792,35 +1173,33 @@ static int __init i10nm_init(void) if (i10nm_get_hbm_munits() && rc) goto fail; + imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num; + list_for_each_entry(d, i10nm_edac_list, list) { rc = skx_get_src_id(d, 0xf8, &src_id); if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - - edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id); - for (i = 0; i < I10NM_NUM_IMC; i++) { + edac_dbg(2, "src_id = %d\n", src_id); + for (i = 0; i < imc_num; i++) { if (!d->imc[i].mdev) continue; d->imc[i].mc = mc++; d->imc[i].lmc = i; - d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; + d->imc[i].src_id = src_id; if (d->imc[i].hbm_mc) { d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; - d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS; - d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS; + d->imc[i].num_channels = cfg->hbm_chan_num; + d->imc[i].num_dimms = cfg->hbm_dimm_num; } else { d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; - d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS; - d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS; + d->imc[i].num_channels = cfg->ddr_chan_num; + d->imc[i].num_dimms = cfg->ddr_dimm_num; } - rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, + pci_name(d->imc[i].mdev), "Intel_10nm Socket", EDAC_MOD_STR, i10nm_get_dimm_config, cfg); if (rc < 0) @@ -834,9 +1213,9 @@ static int __init i10nm_init(void) opstate_init(); mce_register_decode_chain(&i10nm_mce_dec); - setup_i10nm_debug(); + skx_setup_debug("i10nm_test"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); @@ -856,13 +1235,13 @@ static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(NULL, NULL); if (retry_rd_err_log == 2) enable_retry_rd_err_log(false); } - teardown_i10nm_debug(); + skx_teardown_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index ba46057d4220..4a1bebc1ff14 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -338,11 +338,11 @@ struct i5000_pvt { u16 mir0, mir1, mir2; - u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b0_mtr[NUM_MTRS]; /* Memory Technology Reg */ u16 b0_ambpresent0; /* Branch 0, Channel 0 */ - u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ + u16 b0_ambpresent1; /* Branch 0, Channel 1 */ - u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b1_mtr[NUM_MTRS]; /* Memory Technology Reg */ u16 b1_ambpresent0; /* Branch 1, Channel 8 */ u16 b1_ambpresent1; /* Branch 1, Channel 1 */ @@ -1210,7 +1210,7 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) &pvt->b0_ambpresent1); edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); - /* Only if we have 2 branchs (4 channels) */ + /* Only if we have 2 branches (4 channels) */ if (pvt->maxch < CHANNELS_PER_BRANCH) { pvt->b1_ambpresent0 = 0; pvt->b1_ambpresent1 = 0; @@ -1573,13 +1573,10 @@ module_init(i5000_init); module_exit(i5000_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); -MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " - I5000_REVISION); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); +MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); module_param(misc_messages, int, 0444); MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages"); - diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index f5d82518c15e..d470afe65001 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -909,7 +909,7 @@ static void i5100_do_inject(struct mem_ctl_info *mci) * * The injection code don't work without setting this register. * The register needs to be flipped off then on else the hardware - * will only preform the first injection. + * will only perform the first injection. * * Stop condition bits 7:4 * 1010 - Stop after one injection @@ -1220,6 +1220,5 @@ module_init(i5100_init); module_exit(i5100_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Arthur Jones <ajones@riverbed.com>"); +MODULE_AUTHOR("Arthur Jones <ajones@riverbed.com>"); MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers"); diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 49b4499269fb..b5cf25905b05 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/edac.h> #include <linux/mmzone.h> +#include <linux/string_choices.h> #include "edac_module.h" @@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr) edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 61adaa872ba7..69068f8d0cad 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/edac.h> #include <linux/mmzone.h> +#include <linux/string_choices.h> #include "edac_module.h" @@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt, edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); edac_dbg(0, "Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings))); edac_dbg(0, "Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings))); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 23d25724bae4..91e0a88ef904 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -376,7 +376,7 @@ static const struct pci_id_table pci_dev_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem), PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield), PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere), - {0,} /* 0 terminated list. */ + { NULL, } }; /* @@ -385,7 +385,7 @@ static const struct pci_id_table pci_dev_table[] = { static const struct pci_device_id i7core_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, - {0,} /* 0 terminated list. */ + { 0, } }; /**************************************************************************** diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index fbec90d00f1e..b8a497f0de28 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -355,8 +355,7 @@ module_init(i82860_init); module_exit(i82860_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " - "Ben Woodard <woodard@redhat.com>"); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) Ben Woodard <woodard@redhat.com>"); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); module_param(edac_op_state, int, 0444); diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 9ef13570f2e5..eaab6af143e1 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -19,7 +19,8 @@ * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers - * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 590f: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 5918: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers @@ -43,6 +44,7 @@ * but lo_hi_readq() ensures that we are safe across all e3-1200 processors. */ +#include <linux/bitfield.h> #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> @@ -50,6 +52,8 @@ #include <linux/edac.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <asm/mce.h> +#include <asm/msr.h> #include "edac_module.h" #define EDAC_MOD_STR "ie31200_edac" @@ -67,7 +71,8 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F #define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 #define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F -#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x590f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_12 0x5918 /* Coffee Lake-S */ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 @@ -82,43 +87,46 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -/* Test if HB is for Skylake or later. */ -#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ - (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ - (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ - PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) - -#define IE31200_DIMMS 4 -#define IE31200_RANKS 8 -#define IE31200_RANKS_PER_CHANNEL 4 +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 /* 8P+8E, e.g. i7-13700 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 /* 6P+8E, e.g. i5-13500, i5-13600, i5-14500 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 /* 4P+0E, e.g. i3-13100E */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700 /* 8P+16E, e.g. i9-13900, i9-14900 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5 0xa740 /* 8P+12E, e.g. i7-14700 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6 0xa704 /* 6P+8E, e.g. i5-14600 */ + +/* Raptor Lake-HX */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1 0xa702 /* 8P+16E, e.g. i9-13950HX */ + +/* Alder Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660 +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2 0x4668 /* 8P+4E, e.g. i7-12700K */ +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3 0x4648 /* 6P+4E, e.g. i5-12600K */ + +/* Bartlett Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1 0x4639 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2 0x463c +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3 0x4642 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4 0x4643 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5 0xa731 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6 0xa732 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7 0xa733 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8 0xa741 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9 0xa744 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10 0xa745 + +#define IE31200_RANKS_PER_CHANNEL 8 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 #define IE31200_MCHBAR_HIGH 0x4c -#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15) -#define IE31200_MMR_WINDOW_SIZE BIT(15) /* * Error Status Register (16b) * - * 15 reserved - * 14 Isochronous TBWRR Run Behind FIFO Full - * (ITCV) - * 13 Isochronous TBWRR Run Behind FIFO Put - * (ITSTV) - * 12 reserved - * 11 MCH Thermal Sensor Event - * for SMI/SCI/SERR (GTSE) - * 10 reserved - * 9 LOCK to non-DRAM Memory Flag (LCKF) - * 8 reserved - * 7 DRAM Throttle Flag (DTF) - * 6:2 reserved * 1 Multi-bit DRAM ECC Error Flag (DMERR) * 0 Single-bit DRAM ECC Error Flag (DSERR) */ @@ -127,68 +135,57 @@ #define IE31200_ERRSTS_CE BIT(0) #define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) -/* - * Channel 0 ECC Error Log (64b) - * - * 63:48 Error Column Address (ERRCOL) - * 47:32 Error Row Address (ERRROW) - * 31:29 Error Bank Address (ERRBANK) - * 28:27 Error Rank Address (ERRRANK) - * 26:24 reserved - * 23:16 Error Syndrome (ERRSYND) - * 15: 2 reserved - * 1 Multiple Bit Error Status (MERRSTS) - * 0 Correctable Error Status (CERRSTS) - */ - -#define IE31200_C0ECCERRLOG 0x40c8 -#define IE31200_C1ECCERRLOG 0x44c8 -#define IE31200_C0ECCERRLOG_SKL 0x4048 -#define IE31200_C1ECCERRLOG_SKL 0x4448 -#define IE31200_ECCERRLOG_CE BIT(0) -#define IE31200_ECCERRLOG_UE BIT(1) -#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) -#define IE31200_ECCERRLOG_RANK_SHIFT 27 -#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16) -#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16 - -#define IE31200_ECCERRLOG_SYNDROME(log) \ - ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \ - IE31200_ECCERRLOG_SYNDROME_SHIFT) - #define IE31200_CAPID0 0xe4 #define IE31200_CAPID0_PDCD BIT(4) #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 -#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) -#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) -#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 -#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) -#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 - -/* Skylake reports 1GB increments, everything else is 256MB */ -#define IE31200_PAGES(n, skl) \ - (n << (28 + (2 * skl) - PAGE_SHIFT)) - static int nr_channels; static struct pci_dev *mci_pdev; static int ie31200_registered = 1; +struct res_config { + enum mem_type mtype; + bool cmci; + int imc_num; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; + /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + struct ie31200_priv { void __iomem *window; void __iomem *c0errlog; void __iomem *c1errlog; + struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; }; +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + enum ie31200_chips { IE31200 = 0, + IE31200_1 = 1, }; struct ie31200_dev_info { @@ -199,18 +196,22 @@ struct ie31200_error_info { u16 errsts; u16 errsts2; u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; }; static const struct ie31200_dev_info ie31200_devs[] = { [IE31200] = { .ctl_name = "IE31200" }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, }; struct dimm_data { - u8 size; /* in multiples of 256MB, except Skylake is 1GB */ - u8 dual_rank : 1, - x16_width : 2; /* 0 means x8 width */ + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; }; static int how_many_channels(struct pci_dev *pdev) @@ -248,29 +249,54 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(u64 log) -{ - return ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); -} +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) static void ie31200_clear_error_info(struct mem_ctl_info *mci) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + /* * Clear any error bits. * (Yes, we really clear bits by writing 1 to them.) */ - pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS, + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); } static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { - struct pci_dev *pdev; + struct pci_dev *pdev = mci_to_pci_dev(mci); struct ie31200_priv *priv = mci->pvt_info; - pdev = to_pci_dev(mci->pdev); + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } /* * This is a mess because there is no atomic way to read all the @@ -306,46 +332,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, static void ie31200_process_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; int channel; u64 log; - if (!(info->errsts & IE31200_ERRSTS_BITS)) - return; + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; - if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - -1, -1, -1, "UE overwrote CE", ""); - info->errsts = info->errsts2; + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } } for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; - if (log & IE31200_ECCERRLOG_UE) { + if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, 0, + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); - } else if (log & IE31200_ECCERRLOG_CE) { + } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, - IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 CE", ""); } } } -static void ie31200_check(struct mem_ctl_info *mci) +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) { struct ie31200_error_info info; + info.erraddr = mce ? mce->addr : 0; ie31200_get_and_clear_error_info(mci, &info); ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { u64 mchbar; @@ -358,7 +394,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); - u.mchbar &= IE31200_MCHBAR_MASK; + u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -366,7 +403,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -374,155 +411,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } -static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, + struct res_config *cfg) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } -static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg, int mc) { - dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; -} + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; + u32 addr_decode; + int i, j, k; -static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, - bool skl) -{ - if (skl) - __skl_populate_dimm_info(dd, addr_decode, chan); - else - __populate_dimm_info(dd, addr_decode, chan); -} + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); + + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); + + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } + } +} -static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) { - int i, j, ret; - struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; - void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode, mad_offset; - - /* - * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit - * this logic when adding new CPU support. - */ - bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device); - - edac_dbg(0, "MC:\n"); - - if (!ecc_capable(pdev)) { - ie31200_printk(KERN_INFO, "No ECC support\n"); - return -ENODEV; - } + struct mem_ctl_info *mci; + void __iomem *window; + int ret; nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = IE31200_DIMMS; + layers[0].size = IE31200_RANKS_PER_CHANNEL; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct ie31200_priv)); if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev); + window = ie31200_map_mchbar(pdev, cfg, mc); if (!window) { ret = -ENODEV; goto fail_free; } edac_dbg(3, "MC: init mci\n"); - mci->pdev = &pdev->dev; - if (skl) - mci->mtype_cap = MEM_FLAG_DDR4; - else - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[dev_idx].ctl_name; + mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); - mci->edac_check = ie31200_check; + mci->edac_check = cfg->cmci ? NULL : ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; - if (skl) { - priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; - priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; - mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; - } else { - priv->c0errlog = window + IE31200_C0ECCERRLOG; - priv->c1errlog = window + IE31200_C1ECCERRLOG; - mad_offset = IE31200_MAD_DIMM_0_OFFSET; - } - - /* populate DIMM info */ - for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + mad_offset + - (i * 4)); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - populate_dimm_info(&dimm_info[i][j], addr_decode, j, - skl); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info[i][j].size, - dimm_info[i][j].dual_rank, - dimm_info[i][j].x16_width); - } - } - + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); /* - * The dram rank boundary (DRB) reg values are boundary addresses - * for each DRAM rank with a granularity of 64MB. DRB regs are - * cumulative; the last one will contain the total memory - * contained in all ranks. + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. */ - for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { - for (j = 0; j < IE31200_CHANNELS; j++) { - struct dimm_info *dimm; - unsigned long nr_pages; - - nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); - if (nr_pages == 0) - continue; - - if (dimm_info[j][i].dual_rank) { - nr_pages = nr_pages / 2; - dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* just a guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - dimm = edac_get_dimm(mci, i * 2, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* same guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - } + mci->pdev = mc ? &priv->dev : &pdev->dev; + ie31200_get_dimm_config(mci, window, cfg, mc); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -531,16 +521,117 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) goto fail_unmap; } - /* get this far and it's successful */ - edac_dbg(3, "MC: success\n"); + ie31200_pvt.priv[mc] = priv; return 0; - fail_unmap: + put_device(&priv->dev); iounmap(window); - fail_free: edac_mc_free(mci); + return ret; +} + +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + put_device(&priv->dev); + edac_mc_free(mci); + } +} + +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int i, ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } + + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; + +fail_register: + ie31200_unregister_mcis(); return ret; } @@ -552,7 +643,7 @@ static int ie31200_init_one(struct pci_dev *pdev, edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; - rc = ie31200_probe1(pdev, ent->driver_data); + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); if (rc == 0 && !mci_pdev) mci_pdev = pci_dev_get(pdev); @@ -561,42 +652,129 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { - struct mem_ctl_info *mci; - struct ie31200_priv *priv; + struct ie31200_priv *priv = ie31200_pvt.priv[0]; edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; - mci = edac_mc_del_mc(&pdev->dev); - if (!mci) - return; - priv = mci->pvt_info; - iounmap(priv->window); - edac_mc_free(mci); + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); + ie31200_unregister_mcis(); } +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .cmci = true, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10), (kernel_ulong_t)&rpl_s_cfg}, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); @@ -613,12 +791,10 @@ static int __init ie31200_init(void) int pci_rc, i; edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) - goto fail0; + return pci_rc; if (!mci_pdev) { ie31200_registered = 0; @@ -629,11 +805,13 @@ static int __init ie31200_init(void) if (mci_pdev) break; } + if (!mci_pdev) { edac_dbg(0, "ie31200 pci_get_device fail\n"); pci_rc = -ENODEV; - goto fail1; + goto fail0; } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); if (pci_rc < 0) { edac_dbg(0, "ie31200 init fail\n"); @@ -641,12 +819,12 @@ static int __init ie31200_init(void) goto fail1; } } - return 0; + return 0; fail1: - pci_unregister_driver(&ie31200_driver); -fail0: pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); return pci_rc; } diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 544dd19072ea..553c31a2d922 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -27,7 +27,7 @@ #include "edac_mc.h" #include "edac_module.h" -#define IGEN6_REVISION "v2.5" +#define IGEN6_REVISION "v2.5.1" #define EDAC_MOD_STR "igen6_edac" #define IGEN6_NMI_NAME "igen6_ibecc" @@ -58,6 +58,7 @@ /* Capability register E */ #define CAPID_E_OFFSET 0xf0 #define CAPID_E_IBECC BIT(12) +#define CAPID_E_IBECC_BIT18 BIT(18) /* Error Status */ #define ERRSTS_OFFSET 0xc8 @@ -80,6 +81,7 @@ #define ECC_ERROR_LOG_UE BIT_ULL(63) #define ECC_ERROR_LOG_ADDR_SHIFT 5 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) +#define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) /* Host MMIO base address */ @@ -125,6 +127,7 @@ static struct res_config { bool machine_check; + /* The number of present memory controllers. */ int num_imc; u32 imc_base; u32 cmf_base; @@ -133,6 +136,8 @@ static struct res_config { u32 ibecc_base; u32 ibecc_error_log_offset; bool (*ibecc_available)(struct pci_dev *pdev); + /* Extract error address logged in IBECC */ + u64 (*err_addr)(u64 ecclog); /* Convert error address logged in IBECC to system physical address */ u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); /* Convert error address logged in IBECC to integrated memory controller address */ @@ -222,6 +227,87 @@ static struct work_struct ecclog_work; #define DID_ADL_SKU3 0x4621 #define DID_ADL_SKU4 0x4641 +/* Compute die IDs for Alder Lake-N with IBECC */ +#define DID_ADL_N_SKU1 0x4614 +#define DID_ADL_N_SKU2 0x4617 +#define DID_ADL_N_SKU3 0x461b +#define DID_ADL_N_SKU4 0x461c +#define DID_ADL_N_SKU5 0x4673 +#define DID_ADL_N_SKU6 0x4674 +#define DID_ADL_N_SKU7 0x4675 +#define DID_ADL_N_SKU8 0x4677 +#define DID_ADL_N_SKU9 0x4678 +#define DID_ADL_N_SKU10 0x4679 +#define DID_ADL_N_SKU11 0x467c +#define DID_ADL_N_SKU12 0x4632 + +/* Compute die IDs for Arizona Beach with IBECC */ +#define DID_AZB_SKU1 0x4676 + +/* Compute did IDs for Amston Lake with IBECC */ +#define DID_ASL_SKU1 0x464a + +/* Compute die IDs for Raptor Lake-P with IBECC */ +#define DID_RPL_P_SKU1 0xa706 +#define DID_RPL_P_SKU2 0xa707 +#define DID_RPL_P_SKU3 0xa708 +#define DID_RPL_P_SKU4 0xa716 +#define DID_RPL_P_SKU5 0xa718 + +/* Compute die IDs for Meteor Lake-PS with IBECC */ +#define DID_MTL_PS_SKU1 0x7d21 +#define DID_MTL_PS_SKU2 0x7d22 +#define DID_MTL_PS_SKU3 0x7d23 +#define DID_MTL_PS_SKU4 0x7d24 + +/* Compute die IDs for Meteor Lake-P with IBECC */ +#define DID_MTL_P_SKU1 0x7d01 +#define DID_MTL_P_SKU2 0x7d02 +#define DID_MTL_P_SKU3 0x7d14 + +/* Compute die IDs for Arrow Lake-UH with IBECC */ +#define DID_ARL_UH_SKU1 0x7d06 +#define DID_ARL_UH_SKU2 0x7d20 +#define DID_ARL_UH_SKU3 0x7d30 + +/* Compute die IDs for Panther Lake-H with IBECC */ +#define DID_PTL_H_SKU1 0xb000 +#define DID_PTL_H_SKU2 0xb001 +#define DID_PTL_H_SKU3 0xb002 + +/* Compute die IDs for Wildcat Lake with IBECC */ +#define DID_WCL_SKU1 0xfd00 + +static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) +{ + union { + u64 v; + struct { + u32 v_lo; + u32 v_hi; + }; + } u; + + if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { + igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); + return -ENODEV; + } + + if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { + igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); + return -ENODEV; + } + + if (!(u.v & MCHBAR_EN)) { + igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); + return -ENODEV; + } + + *mchbar = MCHBAR_BASE(u.v); + + return 0; +} + static bool ehl_ibecc_available(struct pci_dev *pdev) { u32 v; @@ -245,7 +331,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) if (igen6_tom <= _4GB) return eaddr + igen6_tolud - _4GB; - if (eaddr < _4GB) + if (eaddr >= igen6_tom) return eaddr + igen6_tolud - igen6_tom; return eaddr; @@ -272,6 +358,39 @@ static bool tgl_ibecc_available(struct pci_dev *pdev) return !(CAPID_E_IBECC & v); } +static bool mtl_p_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) + return false; + + return !(CAPID_E_IBECC_BIT18 & v); +} + +static bool mtl_ps_ibecc_available(struct pci_dev *pdev) +{ +#define MCHBAR_MEMSS_IBECCDIS 0x13c00 + void __iomem *window; + u64 mchbar; + u32 val; + + if (get_mchbar(pdev, &mchbar)) + return false; + + window = ioremap(mchbar, MCHBAR_SIZE * 2); + if (!window) { + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); + return false; + } + + val = readl(window + MCHBAR_MEMSS_IBECCDIS); + iounmap(window); + + /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ + return !GET_BITFIELD(val, 6, 6); +} + static u64 mem_addr_to_sys_addr(u64 maddr) { if (maddr < igen6_tolud) @@ -358,6 +477,11 @@ static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) return imc_addr; } +static u64 rpl_p_err_addr(u64 ecclog) +{ + return ECC_ERROR_LOG_ADDR45(ecclog); +} + static struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, @@ -403,7 +527,63 @@ static struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct pci_device_id igen6_pci_tbl[] = { +static struct res_config adl_n_cfg = { + .machine_check = true, + .num_imc = 1, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config rpl_p_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr = rpl_p_err_addr, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config mtl_ps_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_ps_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config mtl_p_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_p_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config wcl_cfg = { + .machine_check = true, + .num_imc = 1, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_p_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, @@ -424,6 +604,39 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg }, { }, }; MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); @@ -596,13 +809,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc) { u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); - if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { - /* Clear CE/UE bits by writing 1s */ - writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); - return ecclog; - } + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; - return 0; + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; } static void errsts_clear(struct igen6_imc *imc) @@ -627,7 +849,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); if (enable) errcmd |= ERRCMD_CE | ERRSTS_UE; @@ -636,7 +858,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); return 0; } @@ -679,8 +901,11 @@ static void ecclog_work_cb(struct work_struct *work) llist_for_each_entry_safe(node, tmp, head, llnode) { memset(&res, 0, sizeof(res)); - eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << - ECC_ERROR_LOG_ADDR_SHIFT; + if (res_cfg->err_addr) + eaddr = res_cfg->err_addr(node->ecclog); + else + eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << + ECC_ERROR_LOG_ADDR_SHIFT; res.mc = node->mc; res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); @@ -969,22 +1194,8 @@ static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) igen6_tom = u.v & GENMASK_ULL(38, 20); - if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { - igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); - goto fail; - } - - if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { - igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); + if (get_mchbar(pdev, mchbar)) goto fail; - } - - if (!(u.v & MCHBAR_EN)) { - igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); - goto fail; - } - - *mchbar = MCHBAR_BASE(u.v); #ifdef CONFIG_EDAC_DEBUG if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) @@ -1000,23 +1211,35 @@ fail: return -ENODEV; } -static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) +static void igen6_check(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u64 ecclog; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + return; + + if (!ecclog_gen_pool_add(imc->mc, ecclog)) + irq_work_queue(&ecclog_irq_work); +} + +/* Check whether the memory controller is absent. */ +static bool igen6_imc_absent(void __iomem *window) +{ + return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; +} + +static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) { struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct igen6_imc *imc; - void __iomem *window; int rc; edac_dbg(2, "\n"); - mchbar += mc * MCHBAR_SIZE; - window = ioremap(mchbar, MCHBAR_SIZE); - if (!window) { - igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); - return -ENODEV; - } - layers[0].type = EDAC_MC_LAYER_CHANNEL; layers[0].size = NUM_CHANNELS; layers[0].is_virt_csrow = false; @@ -1041,6 +1264,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; mci->dev_name = pci_name(pdev); + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = igen6_check; mci->pvt_info = &igen6_pvt->imc[mc]; imc = mci->pvt_info; @@ -1075,11 +1300,12 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) imc->mci = mci; return 0; fail3: + put_device(&imc->dev); + mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: edac_mc_free(mci); fail: - iounmap(window); return rc; } @@ -1099,11 +1325,65 @@ static void igen6_unregister_mcis(void) edac_mc_del_mc(mci->pdev); kfree(mci->ctl_name); + mci->pvt_info = NULL; edac_mc_free(mci); + put_device(&imc->dev); iounmap(imc->window); } } +static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) +{ + void __iomem *window; + int lmc, pmc, rc; + u64 base; + + for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { + base = mchbar + pmc * MCHBAR_SIZE; + window = ioremap(base, MCHBAR_SIZE); + if (!window) { + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); + rc = -ENOMEM; + goto out_unregister_mcis; + } + + if (igen6_imc_absent(window)) { + iounmap(window); + edac_dbg(2, "Skip absent mc%d\n", pmc); + continue; + } + + rc = igen6_register_mci(lmc, window, pdev); + if (rc) + goto out_iounmap; + + /* Done, if all present MCs are detected and registered. */ + if (++lmc >= res_cfg->num_imc) + break; + } + + if (!lmc) { + igen6_printk(KERN_ERR, "No mc found.\n"); + return -ENODEV; + } + + if (lmc < res_cfg->num_imc) { + igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.", + res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } + + return 0; + +out_iounmap: + iounmap(window); + +out_unregister_mcis: + igen6_unregister_mcis(); + + return rc; +} + static int igen6_mem_slice_setup(u64 mchbar) { struct igen6_imc *imc = &igen6_pvt->imc[0]; @@ -1178,10 +1458,29 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) +{ + /* + * Quirk: Certain SoCs' error reporting interrupts don't work. + * Force polling mode for them to ensure that memory error + * events can be handled. + */ + if (ent->device == DID_ADL_N_SKU4) { + edac_op_state = EDAC_OPSTATE_POLL; + return; + } + + /* Set the mode according to the configuration data. */ + if (cfg->machine_check) + edac_op_state = EDAC_OPSTATE_INT; + else + edac_op_state = EDAC_OPSTATE_NMI; +} + static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u64 mchbar; - int i, rc; + int rc; edac_dbg(2, "\n"); @@ -1195,11 +1494,11 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto fail; - for (i = 0; i < res_cfg->num_imc; i++) { - rc = igen6_register_mci(i, mchbar, pdev); - if (rc) - goto fail2; - } + opstate_set(res_cfg, ent); + + rc = igen6_register_mcis(pdev, mchbar); + if (rc) + goto fail; if (res_cfg->num_imc > 1) { rc = igen6_mem_slice_setup(mchbar); @@ -1216,9 +1515,6 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&ecclog_work, ecclog_work_cb); init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); - /* Check if any pending errors before registering the NMI handler */ - ecclog_handler(); - rc = register_err_handler(); if (rc) goto fail3; @@ -1230,6 +1526,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto fail4; } + /* Check if any pending errors before/during the registration of the error handler */ + ecclog_handler(); + igen6_debug_setup(); return 0; fail4: @@ -1278,8 +1577,6 @@ static int __init igen6_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; - edac_op_state = EDAC_OPSTATE_NMI; - rc = pci_register_driver(&igen6_driver); if (rc) return rc; diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c new file mode 100644 index 000000000000..4348b3883b45 --- /dev/null +++ b/drivers/edac/imh_base.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller. + * Copyright (c) 2025, Intel Corporation. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include <asm/cpu.h> +#include "edac_module.h" +#include "skx_common.h" + +#define IMH_REVISION "v0.0.1" +#define EDAC_MOD_STR "imh_edac" + +/* Debug macros */ +#define imh_printk(level, fmt, arg...) \ + edac_printk(level, "imh", fmt, ##arg) + +/* Configuration Agent(Ubox) */ +#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23) +#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3) + +/* PUNIT */ +#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30) + +/* Memory Controller */ +#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2) +#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15) + +/* System Cache Agent(SCA) */ +#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) +#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) + +/* Home Agent (HA) */ +#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8) + +/** + * struct local_reg - A register as described in the local package view. + * + * @pkg: (input) The package where the register is located. + * @pbase: (input) The IP MMIO base physical address in the local package view. + * @size: (input) The IP MMIO size. + * @offset: (input) The register offset from the IP MMIO base @pbase. + * @width: (input) The register width in byte. + * @vbase: (internal) The IP MMIO base virtual address. + * @val: (output) The register value. + */ +struct local_reg { + int pkg; + u64 pbase; + u32 size; + u32 offset; + u8 width; + void __iomem *vbase; + u64 val; +}; + +#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \ + struct local_reg name = { \ + .pkg = package, \ + .pbase = (north ? (cfg)->mmio_base_l_north : \ + (cfg)->mmio_base_l_south) + \ + (cfg)->ip_name##_base + \ + (cfg)->ip_name##_size * (ip_idx), \ + .size = (cfg)->ip_name##_size, \ + .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \ + .width = (cfg)->ip_name##_reg_##reg_name##_width, \ + } + +static u64 readx(void __iomem *addr, u8 width) +{ + switch (width) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width); + return 0; + } +} + +static void __read_local_reg(void *reg) +{ + struct local_reg *r = (struct local_reg *)reg; + + r->val = readx(r->vbase + r->offset, r->width); +} + +/* Read a local-view register. */ +static bool read_local_reg(struct local_reg *reg) +{ + int cpu; + + /* Get the target CPU in the package @reg->pkg. */ + for_each_online_cpu(cpu) { + if (reg->pkg == topology_physical_package_id(cpu)) + break; + } + + if (cpu >= nr_cpu_ids) + return false; + + reg->vbase = ioremap(reg->pbase, reg->size); + if (!reg->vbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase); + return false; + } + + /* Get the target CPU to read the register. */ + smp_call_function_single(cpu, __read_local_reg, reg, 1); + iounmap(reg->vbase); + + return true; +} + +/* Get the bitmap of memory controller instances in package @pkg. */ +static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north) +{ + DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3); + + if (!read_local_reg(®)) + return 0; + + edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n", + pkg, north ? "north" : "south", + DDR_IMC_BITMAP(reg.val), reg.val); + + return DDR_IMC_BITMAP(reg.val); +} + +static void imc_release(struct device *dev) +{ + edac_dbg(2, "imc device %s released\n", dev_name(dev)); + kfree(dev); +} + +static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d, + bool north, int lmc) +{ + unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num; + unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north); + void __iomem *mbase; + struct device *dev; + int i, rc, pmc; + u64 base; + + for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) { + base = north ? d->mmio_base_h_north : d->mmio_base_h_south; + base += cfg->ddr_imc_base + size * i; + + edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n", + d->pkg, lmc, base, size); + + /* Set up the imc MMIO. */ + mbase = ioremap(base, size); + if (!mbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].lmc = lmc; + + /* Create the imc device instance. */ + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->release = imc_release; + device_initialize(dev); + rc = dev_set_name(dev, "0x%llx", base); + if (rc) { + imh_printk(KERN_ERR, "Failed to set dev name\n"); + put_device(dev); + return rc; + } + + d->imc[lmc].dev = dev; + + /* Set up the imc index mapping. */ + pmc = north ? i : 8 + i; + skx_set_mc_mapping(d, pmc, lmc); + + lmc++; + } + + return lmc; +} + +static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d) +{ + int lmc = __get_ddr_munits(cfg, d, true, 0); + + if (lmc < 0) + return false; + + lmc = __get_ddr_munits(cfg, d, false, lmc); + if (lmc <= 0) + return false; + + return true; +} + +static bool get_socket_id(struct res_config *cfg, struct skx_dev *d) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id); + u8 src_id; + int i; + + if (!read_local_reg(®)) + return false; + + src_id = SOCKET_ID(reg.val); + edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val); + + for (i = 0; i < cfg->ddr_imc_num; i++) + d->imc[i].src_id = src_id; + + return true; +} + +/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */ +static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm) +{ + DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm); + + if (!read_local_reg(®)) + return false; + + *tolm = TOLM(reg.val); + edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val); + + DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm); + + if (!read_local_reg(®2)) + return false; + + *tohm = TOHM(reg2.val); + edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val); + + return true; +} + +/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */ +static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list) +{ + int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; + struct skx_dev *d; + + for (i = 0; i < n; i++) { + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); + if (!d) + return -ENOMEM; + + DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base); + + /* Get MMIO_BASE_H for the north-IMH. */ + if (!read_local_reg(®) || !reg.val) { + kfree(d); + imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i); + return -ENODEV; + } + + d->mmio_base_h_north = MMIO_BASE_H(reg.val); + edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_north, reg.val); + + /* Get MMIO_BASE_H for the south-IMH (optional). */ + DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base); + + if (read_local_reg(®2)) { + d->mmio_base_h_south = MMIO_BASE_H(reg2.val); + edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_south, reg2.val); + } + + d->pkg = i; + d->num_imc = imc_num; + skx_init_mc_mapping(d); + list_add_tail(&d->list, edac_list); + } + + return 0; +} + +/* Get the number of per-package memory controllers. */ +static int imh_get_imc_num(struct res_config *cfg) +{ + int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) + + hweight32(get_imc_bitmap(cfg, 0, false)); + + if (!imc_num) { + imh_printk(KERN_ERR, "Invalid mc number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set ddr mc number %d\n", imc_num); + } + + return 0; +} + +/* Get all memory controllers' parameters. */ +static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + u8 mc = 0; + int i; + + list_for_each_entry(d, edac_list, list) { + if (!get_ddr_munits(cfg, d)) { + imh_printk(KERN_ERR, "No mc found\n"); + return -ENODEV; + } + + if (!get_socket_id(cfg, d)) { + imh_printk(KERN_ERR, "Failed to get socket id\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz; + imc->num_channels = cfg->ddr_chan_num; + imc->num_dimms = cfg->ddr_dimm_num; + imc->mc = mc++; + } + } + + return 0; +} + +static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode); + + if (!read_local_reg(®)) + return false; + + if (!NMCACHING(reg.val)) + return false; + + edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx); + return true; +} + +/* Check whether the system has a 2-level memory configuration. */ +static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head) +{ + struct skx_dev *d; + int i; + + list_for_each_entry(d, head, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) + if (check_2lm_enabled(cfg, d, i)) + return true; + } + + return false; +} + +/* Helpers to read memory controller registers */ +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width); +} + +static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width); +} + +static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset + + cfg->ddr_reg_dimmmtr_width * dimm, + cfg->ddr_reg_dimmmtr_width); +} + +static bool ecc_enabled(u32 mcmtr) +{ + return (bool)ECC_ENABLED(mcmtr); +} + +static bool dimm_populated(u32 dimmmtr) +{ + return (bool)DIMM_POPULATED(dimmmtr); +} + +/* Get each DIMM's configurations of the memory controller @mci. */ +static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + u32 mcmtr, dimmmtr; + int i, j, ndimms; + + for (i = 0; i < imc->num_channels; i++) { + if (!imc->mbase) + continue; + + mcmtr = read_imc_mcmtr(cfg, imc, i); + + for (ndimms = 0, j = 0; j < imc->num_dimms; j++) { + dimmmtr = read_imc_dimmmtr(cfg, imc, i, j); + edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n", + mcmtr, dimmmtr, imc->mc, i, j); + + if (!dimm_populated(dimmmtr)) + continue; + + dimm = edac_get_dimm(mci, i, j, 0); + ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm, + imc, i, j, cfg); + } + + if (ndimms && !ecc_enabled(mcmtr)) { + imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n", + imc->mc, i); + return -ENODEV; + } + } + + return 0; +} + +/* Register all memory controllers to the EDAC core. */ +static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + int i, rc; + + list_for_each_entry(d, edac_list, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + rc = skx_register_mci(imc, imc->dev, + dev_name(imc->dev), + "Intel IMH-based Socket", + EDAC_MOD_STR, + imh_get_dimm_config, cfg); + if (rc) + return rc; + } + } + + return 0; +} + +static struct res_config dmr_cfg = { + .type = DMR, + .support_ddr5 = true, + .mmio_base_l_north = 0xf6800000, + .mmio_base_l_south = 0xf6000000, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_imc_base = 0x39b000, + .ddr_chan_mmio_sz = 0x8000, + .ddr_reg_mcmtr_offset = 0x360, + .ddr_reg_mcmtr_width = 4, + .ddr_reg_dimmmtr_offset = 0x370, + .ddr_reg_dimmmtr_width = 4, + .ubox_base = 0x0, + .ubox_size = 0x2000, + .ubox_reg_mmio_base_offset = 0x580, + .ubox_reg_mmio_base_width = 4, + .ubox_reg_socket_id_offset = 0x1080, + .ubox_reg_socket_id_width = 4, + .pcu_base = 0x3000, + .pcu_size = 0x10000, + .pcu_reg_capid3_offset = 0x290, + .pcu_reg_capid3_width = 4, + .sca_base = 0x24c000, + .sca_size = 0x2500, + .sca_reg_tolm_offset = 0x2100, + .sca_reg_tolm_width = 8, + .sca_reg_tohm_offset = 0x2108, + .sca_reg_tohm_width = 8, + .ha_base = 0x3eb000, + .ha_size = 0x1000, + .ha_reg_mode_offset = 0x4a0, + .ha_reg_mode_width = 4, +}; + +static const struct x86_cpu_id imh_cpuids[] = { + X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, imh_cpuids); + +static struct notifier_block imh_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init imh_init(void) +{ + const struct x86_cpu_id *id; + struct list_head *edac_list; + struct res_config *cfg; + const char *owner; + u64 tolm, tohm; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(imh_cpuids); + if (!id) + return -ENODEV; + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + + if (!imh_get_tolm_tohm(cfg, &tolm, &tohm)) + return -ENODEV; + + skx_set_hi_lo(tolm, tohm); + + rc = imh_get_imc_num(cfg); + if (rc < 0) + goto fail; + + edac_list = skx_get_edac_list(); + + rc = imh_get_all_mmio_base_h(cfg, edac_list); + if (rc) + goto fail; + + rc = imh_get_munits(cfg, edac_list); + if (rc) + goto fail; + + skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list)); + + rc = imh_register_mci(cfg, edac_list); + if (rc) + goto fail; + + rc = skx_adxl_get(); + if (rc) + goto fail; + + opstate_init(); + mce_register_decode_chain(&imh_mce_dec); + skx_setup_debug("imh_test"); + + imh_printk(KERN_INFO, "%s\n", IMH_REVISION); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit imh_exit(void) +{ + edac_dbg(2, "\n"); + + skx_teardown_debug(); + mce_unregister_decode_chain(&imh_mce_dec); + skx_adxl_put(); + skx_remove(); +} + +module_init(imh_init); +module_exit(imh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Qiuxu Zhuo"); +MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller"); diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c index 35ceaca578e1..a2caa7fc5412 100644 --- a/drivers/edac/layerscape_edac.c +++ b/drivers/edac/layerscape_edac.c @@ -21,6 +21,7 @@ static const struct of_device_id fsl_ddr_mc_err_of_match[] = { { .compatible = "fsl,qoriq-memory-controller", }, + { .compatible = "nxp,imx9-memory-controller", .data = (void *)TYPE_IMX9, }, {}, }; MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match); @@ -69,8 +70,8 @@ static void __exit fsl_ddr_mc_exit(void) module_exit(fsl_ddr_mc_exit); +MODULE_DESCRIPTION("Freescale Layerscape EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("NXP Semiconductor"); module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c new file mode 100644 index 000000000000..38745800ed01 --- /dev/null +++ b/drivers/edac/loongson_edac.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited. + */ + +#include <linux/acpi.h> +#include <linux/edac.h> +#include <linux/init.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include "edac_module.h" + +#define ECC_CS_COUNT_REG 0x18 + +struct loongson_edac_pvt { + void __iomem *ecc_base; + + /* + * The ECC register in this controller records the number of errors + * encountered since reset and cannot be zeroed so in order to be able + * to report the error count at each check, this records the previous + * register state. + */ + int last_ce_count; +}; + +static int read_ecc(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + u64 ecc; + int cs; + + ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG); + /* cs0 -- cs3 */ + cs = ecc & 0xff; + cs += (ecc >> 8) & 0xff; + cs += (ecc >> 16) & 0xff; + cs += (ecc >> 24) & 0xff; + + return cs; +} + +static void edac_check(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + int new, add; + + new = read_ecc(mci); + add = new - pvt->last_ce_count; + pvt->last_ce_count = new; + if (add <= 0) + return; + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, + 0, 0, 0, 0, 0, -1, "error", ""); +} + +static void dimm_config_init(struct mem_ctl_info *mci) +{ + struct dimm_info *dimm; + u32 size, npages; + + /* size not used */ + size = -1; + npages = MiB_TO_PAGES(size); + + dimm = edac_get_dimm(mci, 0, 0, 0); + dimm->nr_pages = npages; + snprintf(dimm->label, sizeof(dimm->label), + "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0); + dimm->grain = 8; +} + +static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + + pvt->ecc_base = vbase; + pvt->last_ce_count = read_ecc(mci); +} + +static int edac_probe(struct platform_device *pdev) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + void __iomem *vbase; + int ret; + + vbase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(vbase)) + return PTR_ERR(vbase); + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = 1; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct loongson_edac_pvt)); + if (mci == NULL) + return -ENOMEM; + + mci->mc_idx = edac_device_alloc_index(); + mci->mtype_cap = MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "loongson_edac.c"; + mci->ctl_name = "loongson_edac_ctl"; + mci->dev_name = "loongson_edac_dev"; + mci->ctl_page_to_phys = NULL; + mci->pdev = &pdev->dev; + mci->error_desc.grain = 8; + mci->edac_check = edac_check; + + pvt_init(mci, vbase); + dimm_config_init(mci); + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return ret; + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (mci) + edac_mc_free(mci); +} + +static const struct acpi_device_id loongson_edac_acpi_match[] = { + {"LOON0010", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match); + +static struct platform_driver loongson_edac_driver = { + .probe = edac_probe, + .remove = edac_remove, + .driver = { + .name = "loongson-mc-edac", + .acpi_match_table = loongson_edac_acpi_match, + }, +}; +module_platform_driver(loongson_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>"); +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index cc5c63feb26a..af3c12284a1e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -3,6 +3,7 @@ #include <linux/slab.h> #include <asm/cpu.h> +#include <asm/msr.h> #include "mce_amd.h" @@ -143,482 +144,6 @@ static const char * const mc6_mce_desc[] = { "Status Register File", }; -/* Scalable MCA error strings */ -static const char * const smca_ls_mce_desc[] = { - "Load queue parity error", - "Store queue parity error", - "Miss address buffer payload parity error", - "Level 1 TLB parity error", - "DC Tag error type 5", - "DC Tag error type 6", - "DC Tag error type 1", - "Internal error type 1", - "Internal error type 2", - "System Read Data Error Thread 0", - "System Read Data Error Thread 1", - "DC Tag error type 2", - "DC Data error type 1 and poison consumption", - "DC Data error type 2", - "DC Data error type 3", - "DC Tag error type 4", - "Level 2 TLB parity error", - "PDC parity error", - "DC Tag error type 3", - "DC Tag error type 5", - "L2 Fill Data error", -}; - -static const char * const smca_ls2_mce_desc[] = { - "An ECC error was detected on a data cache read by a probe or victimization", - "An ECC error or L2 poison was detected on a data cache read by a load", - "An ECC error was detected on a data cache read-modify-write by a store", - "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization", - "An ECC error or poison bit mismatch was detected on a tag read by a load", - "An ECC error or poison bit mismatch was detected on a tag read by a store", - "An ECC error was detected on an EMEM read by a load", - "An ECC error was detected on an EMEM read-modify-write by a store", - "A parity error was detected in an L1 TLB entry by any access", - "A parity error was detected in an L2 TLB entry by any access", - "A parity error was detected in a PWC entry by any access", - "A parity error was detected in an STQ entry by any access", - "A parity error was detected in an LDQ entry by any access", - "A parity error was detected in a MAB entry by any access", - "A parity error was detected in an SCB entry state field by any access", - "A parity error was detected in an SCB entry address field by any access", - "A parity error was detected in an SCB entry data field by any access", - "A parity error was detected in a WCB entry by any access", - "A poisoned line was detected in an SCB entry by any access", - "A SystemReadDataError error was reported on read data returned from L2 for a load", - "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", - "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", - "A hardware assertion error was reported", - "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", -}; - -static const char * const smca_if_mce_desc[] = { - "Op Cache Microtag Probe Port Parity Error", - "IC Microtag or Full Tag Multi-hit Error", - "IC Full Tag Parity Error", - "IC Data Array Parity Error", - "Decoupling Queue PhysAddr Parity Error", - "L0 ITLB Parity Error", - "L1 ITLB Parity Error", - "L2 ITLB Parity Error", - "BPQ Thread 0 Snoop Parity Error", - "BPQ Thread 1 Snoop Parity Error", - "L1 BTB Multi-Match Error", - "L2 BTB Multi-Match Error", - "L2 Cache Response Poison Error", - "System Read Data Error", - "Hardware Assertion Error", - "L1-TLB Multi-Hit", - "L2-TLB Multi-Hit", - "BSR Parity Error", - "CT MCE", -}; - -static const char * const smca_l2_mce_desc[] = { - "L2M Tag Multiple-Way-Hit error", - "L2M Tag or State Array ECC Error", - "L2M Data Array ECC Error", - "Hardware Assert Error", -}; - -static const char * const smca_de_mce_desc[] = { - "Micro-op cache tag parity error", - "Micro-op cache data parity error", - "Instruction buffer parity error", - "Micro-op queue parity error", - "Instruction dispatch queue parity error", - "Fetch address FIFO parity error", - "Patch RAM data parity error", - "Patch RAM sequencer parity error", - "Micro-op buffer parity error", - "Hardware Assertion MCA Error", -}; - -static const char * const smca_ex_mce_desc[] = { - "Watchdog Timeout error", - "Physical register file parity error", - "Flag register file parity error", - "Immediate displacement register file parity error", - "Address generator payload parity error", - "EX payload parity error", - "Checkpoint queue parity error", - "Retire dispatch queue parity error", - "Retire status queue parity error", - "Scheduling queue parity error", - "Branch buffer queue parity error", - "Hardware Assertion error", - "Spec Map parity error", - "Retire Map parity error", -}; - -static const char * const smca_fp_mce_desc[] = { - "Physical register file (PRF) parity error", - "Freelist (FL) parity error", - "Schedule queue parity error", - "NSQ parity error", - "Retire queue (RQ) parity error", - "Status register file (SRF) parity error", - "Hardware assertion", -}; - -static const char * const smca_l3_mce_desc[] = { - "Shadow Tag Macro ECC Error", - "Shadow Tag Macro Multi-way-hit Error", - "L3M Tag ECC Error", - "L3M Tag Multi-way-hit Error", - "L3M Data ECC Error", - "SDP Parity Error or SystemReadDataError from XI", - "L3 Victim Queue Parity Error", - "L3 Hardware Assertion", -}; - -static const char * const smca_cs_mce_desc[] = { - "Illegal Request", - "Address Violation", - "Security Violation", - "Illegal Response", - "Unexpected Response", - "Request or Probe Parity Error", - "Read Response Parity Error", - "Atomic Request Parity Error", - "Probe Filter ECC Error", -}; - -static const char * const smca_cs2_mce_desc[] = { - "Illegal Request", - "Address Violation", - "Security Violation", - "Illegal Response", - "Unexpected Response", - "Request or Probe Parity Error", - "Read Response Parity Error", - "Atomic Request Parity Error", - "SDP read response had no match in the CS queue", - "Probe Filter Protocol Error", - "Probe Filter ECC Error", - "SDP read response had an unexpected RETRY error", - "Counter overflow error", - "Counter underflow error", -}; - -static const char * const smca_pie_mce_desc[] = { - "Hardware Assert", - "Register security violation", - "Link Error", - "Poison data consumption", - "A deferred error was detected in the DF" -}; - -static const char * const smca_umc_mce_desc[] = { - "DRAM ECC error", - "Data poison error", - "SDP parity error", - "Advanced peripheral bus error", - "Address/Command parity error", - "Write data CRC error", - "DCQ SRAM ECC error", - "AES SRAM ECC error", -}; - -static const char * const smca_umc2_mce_desc[] = { - "DRAM ECC error", - "Data poison error", - "SDP parity error", - "Reserved", - "Address/Command parity error", - "Write data parity error", - "DCQ SRAM ECC error", - "Reserved", - "Read data parity error", - "Rdb SRAM ECC error", - "RdRsp SRAM ECC error", - "LM32 MP errors", -}; - -static const char * const smca_pb_mce_desc[] = { - "An ECC error in the Parameter Block RAM array", -}; - -static const char * const smca_psp_mce_desc[] = { - "An ECC or parity error in a PSP RAM instance", -}; - -static const char * const smca_psp2_mce_desc[] = { - "High SRAM ECC or parity error", - "Low SRAM ECC or parity error", - "Instruction Cache Bank 0 ECC or parity error", - "Instruction Cache Bank 1 ECC or parity error", - "Instruction Tag Ram 0 parity error", - "Instruction Tag Ram 1 parity error", - "Data Cache Bank 0 ECC or parity error", - "Data Cache Bank 1 ECC or parity error", - "Data Cache Bank 2 ECC or parity error", - "Data Cache Bank 3 ECC or parity error", - "Data Tag Bank 0 parity error", - "Data Tag Bank 1 parity error", - "Data Tag Bank 2 parity error", - "Data Tag Bank 3 parity error", - "Dirty Data Ram parity error", - "TLB Bank 0 parity error", - "TLB Bank 1 parity error", - "System Hub Read Buffer ECC or parity error", -}; - -static const char * const smca_smu_mce_desc[] = { - "An ECC or parity error in an SMU RAM instance", -}; - -static const char * const smca_smu2_mce_desc[] = { - "High SRAM ECC or parity error", - "Low SRAM ECC or parity error", - "Data Cache Bank A ECC or parity error", - "Data Cache Bank B ECC or parity error", - "Data Tag Cache Bank A ECC or parity error", - "Data Tag Cache Bank B ECC or parity error", - "Instruction Cache Bank A ECC or parity error", - "Instruction Cache Bank B ECC or parity error", - "Instruction Tag Cache Bank A ECC or parity error", - "Instruction Tag Cache Bank B ECC or parity error", - "System Hub Read Buffer ECC or parity error", - "PHY RAM ECC error", -}; - -static const char * const smca_mp5_mce_desc[] = { - "High SRAM ECC or parity error", - "Low SRAM ECC or parity error", - "Data Cache Bank A ECC or parity error", - "Data Cache Bank B ECC or parity error", - "Data Tag Cache Bank A ECC or parity error", - "Data Tag Cache Bank B ECC or parity error", - "Instruction Cache Bank A ECC or parity error", - "Instruction Cache Bank B ECC or parity error", - "Instruction Tag Cache Bank A ECC or parity error", - "Instruction Tag Cache Bank B ECC or parity error", -}; - -static const char * const smca_mpdma_mce_desc[] = { - "Main SRAM [31:0] bank ECC or parity error", - "Main SRAM [63:32] bank ECC or parity error", - "Main SRAM [95:64] bank ECC or parity error", - "Main SRAM [127:96] bank ECC or parity error", - "Data Cache Bank A ECC or parity error", - "Data Cache Bank B ECC or parity error", - "Data Tag Cache Bank A ECC or parity error", - "Data Tag Cache Bank B ECC or parity error", - "Instruction Cache Bank A ECC or parity error", - "Instruction Cache Bank B ECC or parity error", - "Instruction Tag Cache Bank A ECC or parity error", - "Instruction Tag Cache Bank B ECC or parity error", - "Data Cache Bank A ECC or parity error", - "Data Cache Bank B ECC or parity error", - "Data Tag Cache Bank A ECC or parity error", - "Data Tag Cache Bank B ECC or parity error", - "Instruction Cache Bank A ECC or parity error", - "Instruction Cache Bank B ECC or parity error", - "Instruction Tag Cache Bank A ECC or parity error", - "Instruction Tag Cache Bank B ECC or parity error", - "Data Cache Bank A ECC or parity error", - "Data Cache Bank B ECC or parity error", - "Data Tag Cache Bank A ECC or parity error", - "Data Tag Cache Bank B ECC or parity error", - "Instruction Cache Bank A ECC or parity error", - "Instruction Cache Bank B ECC or parity error", - "Instruction Tag Cache Bank A ECC or parity error", - "Instruction Tag Cache Bank B ECC or parity error", - "System Hub Read Buffer ECC or parity error", - "MPDMA TVF DVSEC Memory ECC or parity error", - "MPDMA TVF MMIO Mailbox0 ECC or parity error", - "MPDMA TVF MMIO Mailbox1 ECC or parity error", - "MPDMA TVF Doorbell Memory ECC or parity error", - "MPDMA TVF SDP Slave Memory 0 ECC or parity error", - "MPDMA TVF SDP Slave Memory 1 ECC or parity error", - "MPDMA TVF SDP Slave Memory 2 ECC or parity error", - "MPDMA TVF SDP Master Memory 0 ECC or parity error", - "MPDMA TVF SDP Master Memory 1 ECC or parity error", - "MPDMA TVF SDP Master Memory 2 ECC or parity error", - "MPDMA TVF SDP Master Memory 3 ECC or parity error", - "MPDMA TVF SDP Master Memory 4 ECC or parity error", - "MPDMA TVF SDP Master Memory 5 ECC or parity error", - "MPDMA TVF SDP Master Memory 6 ECC or parity error", - "MPDMA PTE Command FIFO ECC or parity error", - "MPDMA PTE Hub Data FIFO ECC or parity error", - "MPDMA PTE Internal Data FIFO ECC or parity error", - "MPDMA PTE Command Memory DMA ECC or parity error", - "MPDMA PTE Command Memory Internal ECC or parity error", - "MPDMA PTE DMA Completion FIFO ECC or parity error", - "MPDMA PTE Tablewalk Completion FIFO ECC or parity error", - "MPDMA PTE Descriptor Completion FIFO ECC or parity error", - "MPDMA PTE ReadOnly Completion FIFO ECC or parity error", - "MPDMA PTE DirectWrite Completion FIFO ECC or parity error", - "SDP Watchdog Timer expired", -}; - -static const char * const smca_nbio_mce_desc[] = { - "ECC or Parity error", - "PCIE error", - "SDP ErrEvent error", - "SDP Egress Poison Error", - "IOHC Internal Poison Error", -}; - -static const char * const smca_pcie_mce_desc[] = { - "CCIX PER Message logging", - "CCIX Read Response with Status: Non-Data Error", - "CCIX Write Response with Status: Non-Data Error", - "CCIX Read Response with Status: Data Error", - "CCIX Non-okay write response with data error", -}; - -static const char * const smca_pcie2_mce_desc[] = { - "SDP Parity Error logging", -}; - -static const char * const smca_xgmipcs_mce_desc[] = { - "Data Loss Error", - "Training Error", - "Flow Control Acknowledge Error", - "Rx Fifo Underflow Error", - "Rx Fifo Overflow Error", - "CRC Error", - "BER Exceeded Error", - "Tx Vcid Data Error", - "Replay Buffer Parity Error", - "Data Parity Error", - "Replay Fifo Overflow Error", - "Replay Fifo Underflow Error", - "Elastic Fifo Overflow Error", - "Deskew Error", - "Flow Control CRC Error", - "Data Startup Limit Error", - "FC Init Timeout Error", - "Recovery Timeout Error", - "Ready Serial Timeout Error", - "Ready Serial Attempt Error", - "Recovery Attempt Error", - "Recovery Relock Attempt Error", - "Replay Attempt Error", - "Sync Header Error", - "Tx Replay Timeout Error", - "Rx Replay Timeout Error", - "LinkSub Tx Timeout Error", - "LinkSub Rx Timeout Error", - "Rx CMD Packet Error", -}; - -static const char * const smca_xgmiphy_mce_desc[] = { - "RAM ECC Error", - "ARC instruction buffer parity error", - "ARC data buffer parity error", - "PHY APB error", -}; - -static const char * const smca_nbif_mce_desc[] = { - "Timeout error from GMI", - "SRAM ECC error", - "NTB Error Event", - "SDP Parity error", -}; - -static const char * const smca_sata_mce_desc[] = { - "Parity error for port 0", - "Parity error for port 1", - "Parity error for port 2", - "Parity error for port 3", - "Parity error for port 4", - "Parity error for port 5", - "Parity error for port 6", - "Parity error for port 7", -}; - -static const char * const smca_usb_mce_desc[] = { - "Parity error or ECC error for S0 RAM0", - "Parity error or ECC error for S0 RAM1", - "Parity error or ECC error for S0 RAM2", - "Parity error for PHY RAM0", - "Parity error for PHY RAM1", - "AXI Slave Response error", -}; - -static const char * const smca_gmipcs_mce_desc[] = { - "Data Loss Error", - "Training Error", - "Replay Parity Error", - "Rx Fifo Underflow Error", - "Rx Fifo Overflow Error", - "CRC Error", - "BER Exceeded Error", - "Tx Fifo Underflow Error", - "Replay Buffer Parity Error", - "Tx Overflow Error", - "Replay Fifo Overflow Error", - "Replay Fifo Underflow Error", - "Elastic Fifo Overflow Error", - "Deskew Error", - "Offline Error", - "Data Startup Limit Error", - "FC Init Timeout Error", - "Recovery Timeout Error", - "Ready Serial Timeout Error", - "Ready Serial Attempt Error", - "Recovery Attempt Error", - "Recovery Relock Attempt Error", - "Deskew Abort Error", - "Rx Buffer Error", - "Rx LFDS Fifo Overflow Error", - "Rx LFDS Fifo Underflow Error", - "LinkSub Tx Timeout Error", - "LinkSub Rx Timeout Error", - "Rx CMD Packet Error", - "LFDS Training Timeout Error", - "LFDS FC Init Timeout Error", - "Data Loss Error", -}; - -struct smca_mce_desc { - const char * const *descs; - unsigned int num_descs; -}; - -static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, - [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) }, - [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, - [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, - [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, - [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) }, - [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, - [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, - [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, - [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, - [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, - [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, - [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, - [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, - [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, - [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) }, - [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, - [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) }, - [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, - [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) }, - [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, - [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, - [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, - [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, - /* NBIF and SHUB have the same error descriptions, for now. */ - [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, - [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, - [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, - [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, - [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, - /* All the PHY bank types have the same error descriptions, for now. */ - [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, - [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, - [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, -}; - static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -1060,7 +585,7 @@ static void decode_mc3_mce(struct mce *m) static void decode_mc4_mce(struct mce *m) { unsigned int fam = x86_family(m->cpuid); - int node_id = topology_die_id(m->extcpu); + int node_id = topology_amd_node_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u8 offset = 0; @@ -1163,11 +688,51 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } +static const char * const smca_long_names[] = { + [SMCA_LS ... SMCA_LS_V2] = "Load Store Unit", + [SMCA_IF] = "Instruction Fetch Unit", + [SMCA_L2_CACHE] = "L2 Cache", + [SMCA_DE] = "Decode Unit", + [SMCA_RESERVED] = "Reserved", + [SMCA_EX] = "Execution Unit", + [SMCA_FP] = "Floating Point Unit", + [SMCA_L3_CACHE] = "L3 Cache", + [SMCA_CS ... SMCA_CS_V2] = "Coherent Slave", + [SMCA_PIE] = "Power, Interrupts, etc.", + + /* UMC v2 is separate because both of them can exist in a single system. */ + [SMCA_UMC] = "Unified Memory Controller", + [SMCA_UMC_V2] = "Unified Memory Controller v2", + [SMCA_PB] = "Parameter Block", + [SMCA_PSP ... SMCA_PSP_V2] = "Platform Security Processor", + [SMCA_SMU ... SMCA_SMU_V2] = "System Management Unit", + [SMCA_MP5] = "Microprocessor 5 Unit", + [SMCA_MPDMA] = "MPDMA Unit", + [SMCA_NBIO] = "Northbridge IO Unit", + [SMCA_PCIE ... SMCA_PCIE_V2] = "PCI Express Unit", + [SMCA_XGMI_PCS] = "Ext Global Memory Interconnect PCS Unit", + [SMCA_NBIF] = "NBIF Unit", + [SMCA_SHUB] = "System Hub Unit", + [SMCA_SATA] = "SATA Unit", + [SMCA_USB] = "USB Unit", + [SMCA_GMI_PCS] = "Global Memory Interconnect PCS Unit", + [SMCA_XGMI_PHY] = "Ext Global Memory Interconnect PHY Unit", + [SMCA_WAFL_PHY] = "WAFL PHY Unit", + [SMCA_GMI_PHY] = "Global Memory Interconnect PHY Unit", +}; + +static const char *smca_get_long_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_long_names[t]; +} + /* Decode errors according to Scalable MCA specification */ static void decode_smca_error(struct mce *m) { enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); - const char *ip_name; u8 xec = XEC(m->status, xec_mask); if (bank_type >= N_SMCA_BANK_TYPES) @@ -1178,16 +743,11 @@ static void decode_smca_error(struct mce *m) return; } - ip_name = smca_get_long_name(bank_type); - - pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec); + pr_emerg(HW_ERR "%s Ext. Error Code: %d", smca_get_long_name(bank_type), xec); - /* Only print the decode of valid error codes */ - if (xec < smca_mce_descs[bank_type].num_descs) - pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]); - - if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) - decode_dram_ecc(topology_die_id(m->extcpu), m); + if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && + xec == 0 && decode_dram_ecc) + decode_dram_ecc(topology_amd_node_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) @@ -1234,7 +794,9 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; + struct mce_hw_err *err = to_mce_hw_err(m); unsigned int fam = x86_family(m->cpuid); + u32 mca_config_lo = 0, dummy; int ecc; if (m->kflags & MCE_HANDLED_CEC) @@ -1254,11 +816,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ((m->status & MCI_STATUS_PCC) ? "PCC" : "-")); if (boot_cpu_has(X86_FEATURE_SMCA)) { - u32 low, high; - u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(m->bank), &mca_config_lo, &dummy); - if (!rdmsr_safe(addr, &low, &high) && - (low & MCI_CONFIG_MCAX)) + if (mca_config_lo & MCI_CONFIG_MCAX) pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); @@ -1291,8 +851,18 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (boot_cpu_has(X86_FEATURE_SMCA)) { pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); - if (m->status & MCI_STATUS_SYNDV) - pr_cont(", Syndrome: 0x%016llx", m->synd); + if (m->status & MCI_STATUS_SYNDV) { + pr_cont(", Syndrome: 0x%016llx\n", m->synd); + if (mca_config_lo & MCI_CONFIG_FRUTEXT) { + char frutext[17]; + + frutext[16] = '\0'; + memcpy(&frutext[0], &err->vendor.amd.synd1, 8); + memcpy(&frutext[8], &err->vendor.amd.synd2, 8); + + pr_emerg(HW_ERR "FRU Text: %s", frutext); + } + } pr_cont("\n"); diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c new file mode 100644 index 000000000000..108d69209146 --- /dev/null +++ b/drivers/edac/mem_repair.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC memory repair driver is designed to control the memory + * devices with memory repair features, such as Post Package Repair (PPR), + * memory sparing etc. The common sysfs memory repair interface abstracts + * the control of various arbitrary memory repair functionalities into a + * unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +enum edac_mem_repair_attributes { + MR_TYPE, + MR_PERSIST_MODE, + MR_SAFE_IN_USE, + MR_HPA, + MR_MIN_HPA, + MR_MAX_HPA, + MR_DPA, + MR_MIN_DPA, + MR_MAX_DPA, + MR_NIBBLE_MASK, + MR_BANK_GROUP, + MR_BANK, + MR_RANK, + MR_ROW, + MR_COLUMN, + MR_CHANNEL, + MR_SUB_CHANNEL, + MEM_DO_REPAIR, + MR_MAX_ATTRS +}; + +struct edac_mem_repair_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_mem_repair_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS]; + struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +const char * const edac_repair_type[] = { + [EDAC_REPAIR_PPR] = "ppr", + [EDAC_REPAIR_CACHELINE_SPARING] = "cacheline-sparing", + [EDAC_REPAIR_ROW_SPARING] = "row-sparing", + [EDAC_REPAIR_BANK_SPARING] = "bank-sparing", + [EDAC_REPAIR_RANK_SPARING] = "rank-sparing", +}; +EXPORT_SYMBOL_GPL(edac_repair_type); + +#define TO_MR_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr) + +#define MR_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n") +MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n") +MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n") +MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") +MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n") +MR_ATTR_SHOW(bank, get_bank, u32, "%u\n") +MR_ATTR_SHOW(rank, get_rank, u32, "%u\n") +MR_ATTR_SHOW(row, get_row, u32, "0x%x\n") +MR_ATTR_SHOW(column, get_column, u32, "%u\n") +MR_ATTR_SHOW(channel, get_channel, u32, "%u\n") +MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n") + +#define MR_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) +MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) +MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) +MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) +MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul) +MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul) +MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul) +MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul) +MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul) +MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul) +MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul) + +#define MR_DO_OP(attrib, cb) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \ + unsigned long data; \ + int ret; \ + \ + ret = kstrtoul(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_DO_OP(repair, do_repair) + +static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance; + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; + + switch (attr_id) { + case MR_TYPE: + if (ops->get_repair_type) + return a->mode; + break; + case MR_PERSIST_MODE: + if (ops->get_persist_mode) { + if (ops->set_persist_mode) + return a->mode; + else + return 0444; + } + break; + case MR_SAFE_IN_USE: + if (ops->get_repair_safe_when_in_use) + return a->mode; + break; + case MR_HPA: + if (ops->get_hpa) { + if (ops->set_hpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_HPA: + if (ops->get_min_hpa) + return a->mode; + break; + case MR_MAX_HPA: + if (ops->get_max_hpa) + return a->mode; + break; + case MR_DPA: + if (ops->get_dpa) { + if (ops->set_dpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_DPA: + if (ops->get_min_dpa) + return a->mode; + break; + case MR_MAX_DPA: + if (ops->get_max_dpa) + return a->mode; + break; + case MR_NIBBLE_MASK: + if (ops->get_nibble_mask) { + if (ops->set_nibble_mask) + return a->mode; + else + return 0444; + } + break; + case MR_BANK_GROUP: + if (ops->get_bank_group) { + if (ops->set_bank_group) + return a->mode; + else + return 0444; + } + break; + case MR_BANK: + if (ops->get_bank) { + if (ops->set_bank) + return a->mode; + else + return 0444; + } + break; + case MR_RANK: + if (ops->get_rank) { + if (ops->set_rank) + return a->mode; + else + return 0444; + } + break; + case MR_ROW: + if (ops->get_row) { + if (ops->set_row) + return a->mode; + else + return 0444; + } + break; + case MR_COLUMN: + if (ops->get_column) { + if (ops->set_column) + return a->mode; + else + return 0444; + } + break; + case MR_CHANNEL: + if (ops->get_channel) { + if (ops->set_channel) + return a->mode; + else + return 0444; + } + break; + case MR_SUB_CHANNEL: + if (ops->get_sub_channel) { + if (ops->set_sub_channel) + return a->mode; + else + return 0444; + } + break; + case MEM_DO_REPAIR: + if (ops->do_repair) + return a->mode; + break; + default: + break; + } + + return 0; +} + +static const struct device_attribute mem_repair_dev_attr[] = { + [MR_TYPE] = __ATTR_RO(repair_type), + [MR_PERSIST_MODE] = __ATTR_RW(persist_mode), + [MR_SAFE_IN_USE] = __ATTR_RO(repair_safe_when_in_use), + [MR_HPA] = __ATTR_RW(hpa), + [MR_MIN_HPA] = __ATTR_RO(min_hpa), + [MR_MAX_HPA] = __ATTR_RO(max_hpa), + [MR_DPA] = __ATTR_RW(dpa), + [MR_MIN_DPA] = __ATTR_RO(min_dpa), + [MR_MAX_DPA] = __ATTR_RO(max_dpa), + [MR_NIBBLE_MASK] = __ATTR_RW(nibble_mask), + [MR_BANK_GROUP] = __ATTR_RW(bank_group), + [MR_BANK] = __ATTR_RW(bank), + [MR_RANK] = __ATTR_RW(rank), + [MR_ROW] = __ATTR_RW(row), + [MR_COLUMN] = __ATTR_RW(column), + [MR_CHANNEL] = __ATTR_RW(channel), + [MR_SUB_CHANNEL] = __ATTR_RW(sub_channel), + [MEM_DO_REPAIR] = __ATTR_WO(repair) +}; + +static int mem_repair_create_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ + struct edac_mem_repair_context *ctx; + struct attribute_group *group; + int i; + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < MR_MAX_ATTRS; i++) { + ctx->mem_repair_dev_attr[i].dev_attr = mem_repair_dev_attr[i]; + ctx->mem_repair_dev_attr[i].instance = instance; + sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr); + ctx->mem_repair_attrs[i] = + &ctx->mem_repair_dev_attr[i].dev_attr.attr; + } + + sprintf(ctx->name, "%s%d", "mem_repair", instance); + group = &ctx->group; + group->name = ctx->name; + group->attrs = ctx->mem_repair_attrs; + group->is_visible = mem_repair_attr_visible; + attr_groups[0] = group; + + return 0; +} + +/** + * edac_mem_repair_get_desc - get EDAC memory repair descriptors + * @dev: client device with memory repair feature + * @attr_groups: pointer to attribute group container + * @instance: device's memory repair instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!dev || !attr_groups) + return -EINVAL; + + return mem_repair_create_desc(dev, attr_groups, instance); +} diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index e50d7928bf8f..a45dc6b35ede 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -22,8 +22,7 @@ #include <linux/gfp.h> #include <linux/fsl/edac.h> -#include <linux/of_platform.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include "edac_module.h" @@ -301,7 +300,7 @@ err: return res; } -static int mpc85xx_pci_err_remove(struct platform_device *op) +static void mpc85xx_pci_err_remove(struct platform_device *op) { struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); struct mpc85xx_pci_pdata *pdata = pci->pvt_info; @@ -313,8 +312,6 @@ static int mpc85xx_pci_err_remove(struct platform_device *op) edac_pci_del_device(&op->dev); edac_pci_free_ctl_info(pci); - - return 0; } static const struct platform_device_id mpc85xx_pci_err_match[] = { @@ -499,7 +496,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) return -ENOMEM; edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata), - "cpu", 1, "L", 1, 2, NULL, 0, + "cpu", 1, "L", 1, 2, edac_dev_idx); if (!edac_dev) { devres_release_group(&op->dev, mpc85xx_l2_err_probe); @@ -592,7 +589,7 @@ err: return res; } -static int mpc85xx_l2_err_remove(struct platform_device *op) +static void mpc85xx_l2_err_remove(struct platform_device *op) { struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&op->dev); struct mpc85xx_l2_pdata *pdata = edac_dev->pvt_info; @@ -607,7 +604,6 @@ static int mpc85xx_l2_err_remove(struct platform_device *op) out_be32(pdata->l2_vbase + MPC85XX_L2_ERRDIS, orig_l2_err_disable); edac_device_del_device(&op->dev); edac_device_free_ctl_info(edac_dev); - return 0; } static const struct of_device_id mpc85xx_l2_err_of_match[] = { @@ -708,8 +704,8 @@ static void __exit mpc85xx_mc_exit(void) module_exit(mpc85xx_mc_exit); +MODULE_DESCRIPTION("Freescale MPC85xx Memory Controller EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Montavista Software, Inc."); module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/npcm_edac.c b/drivers/edac/npcm_edac.c new file mode 100644 index 000000000000..e60a99eb8cfb --- /dev/null +++ b/drivers/edac/npcm_edac.c @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2022 Nuvoton Technology Corporation + +#include <linux/debugfs.h> +#include <linux/iopoll.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include "edac_module.h" + +#define EDAC_MOD_NAME "npcm-edac" +#define EDAC_MSG_SIZE 256 + +/* chip serials */ +#define NPCM7XX_CHIP BIT(0) +#define NPCM8XX_CHIP BIT(1) + +/* syndrome values */ +#define UE_SYNDROME 0x03 + +/* error injection */ +#define ERROR_TYPE_CORRECTABLE 0 +#define ERROR_TYPE_UNCORRECTABLE 1 +#define ERROR_LOCATION_DATA 0 +#define ERROR_LOCATION_CHECKCODE 1 +#define ERROR_BIT_DATA_MAX 63 +#define ERROR_BIT_CHECKCODE_MAX 7 + +static char data_synd[] = { + 0xf4, 0xf1, 0xec, 0xea, 0xe9, 0xe6, 0xe5, 0xe3, + 0xdc, 0xda, 0xd9, 0xd6, 0xd5, 0xd3, 0xce, 0xcb, + 0xb5, 0xb0, 0xad, 0xab, 0xa8, 0xa7, 0xa4, 0xa2, + 0x9d, 0x9b, 0x98, 0x97, 0x94, 0x92, 0x8f, 0x8a, + 0x75, 0x70, 0x6d, 0x6b, 0x68, 0x67, 0x64, 0x62, + 0x5e, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x4f, 0x4a, + 0x34, 0x31, 0x2c, 0x2a, 0x29, 0x26, 0x25, 0x23, + 0x1c, 0x1a, 0x19, 0x16, 0x15, 0x13, 0x0e, 0x0b +}; + +static struct regmap *npcm_regmap; + +struct npcm_platform_data { + /* chip serials */ + int chip; + + /* memory controller registers */ + u32 ctl_ecc_en; + u32 ctl_int_status; + u32 ctl_int_ack; + u32 ctl_int_mask_master; + u32 ctl_int_mask_ecc; + u32 ctl_ce_addr_l; + u32 ctl_ce_addr_h; + u32 ctl_ce_data_l; + u32 ctl_ce_data_h; + u32 ctl_ce_synd; + u32 ctl_ue_addr_l; + u32 ctl_ue_addr_h; + u32 ctl_ue_data_l; + u32 ctl_ue_data_h; + u32 ctl_ue_synd; + u32 ctl_source_id; + u32 ctl_controller_busy; + u32 ctl_xor_check_bits; + + /* masks and shifts */ + u32 ecc_en_mask; + u32 int_status_ce_mask; + u32 int_status_ue_mask; + u32 int_ack_ce_mask; + u32 int_ack_ue_mask; + u32 int_mask_master_non_ecc_mask; + u32 int_mask_master_global_mask; + u32 int_mask_ecc_non_event_mask; + u32 ce_addr_h_mask; + u32 ce_synd_mask; + u32 ce_synd_shift; + u32 ue_addr_h_mask; + u32 ue_synd_mask; + u32 ue_synd_shift; + u32 source_id_ce_mask; + u32 source_id_ce_shift; + u32 source_id_ue_mask; + u32 source_id_ue_shift; + u32 controller_busy_mask; + u32 xor_check_bits_mask; + u32 xor_check_bits_shift; + u32 writeback_en_mask; + u32 fwc_mask; +}; + +struct priv_data { + void __iomem *reg; + char message[EDAC_MSG_SIZE]; + const struct npcm_platform_data *pdata; + + /* error injection */ + struct dentry *debugfs; + u8 error_type; + u8 location; + u8 bit; +}; + +static void handle_ce(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val_h = 0, val_l, id, synd; + u64 addr = 0, data = 0; + + pdata = priv->pdata; + regmap_read(npcm_regmap, pdata->ctl_ce_addr_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) { + regmap_read(npcm_regmap, pdata->ctl_ce_addr_h, &val_h); + val_h &= pdata->ce_addr_h_mask; + } + addr = ((addr | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_ce_data_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) + regmap_read(npcm_regmap, pdata->ctl_ce_data_h, &val_h); + data = ((data | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_source_id, &id); + id = (id & pdata->source_id_ce_mask) >> pdata->source_id_ce_shift; + + regmap_read(npcm_regmap, pdata->ctl_ce_synd, &synd); + synd = (synd & pdata->ce_synd_mask) >> pdata->ce_synd_shift; + + snprintf(priv->message, EDAC_MSG_SIZE, + "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, addr >> PAGE_SHIFT, + addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, ""); +} + +static void handle_ue(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val_h = 0, val_l, id, synd; + u64 addr = 0, data = 0; + + pdata = priv->pdata; + regmap_read(npcm_regmap, pdata->ctl_ue_addr_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) { + regmap_read(npcm_regmap, pdata->ctl_ue_addr_h, &val_h); + val_h &= pdata->ue_addr_h_mask; + } + addr = ((addr | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_ue_data_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) + regmap_read(npcm_regmap, pdata->ctl_ue_data_h, &val_h); + data = ((data | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_source_id, &id); + id = (id & pdata->source_id_ue_mask) >> pdata->source_id_ue_shift; + + regmap_read(npcm_regmap, pdata->ctl_ue_synd, &synd); + synd = (synd & pdata->ue_synd_mask) >> pdata->ue_synd_shift; + + snprintf(priv->message, EDAC_MSG_SIZE, + "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, addr >> PAGE_SHIFT, + addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, ""); +} + +static irqreturn_t edac_ecc_isr(int irq, void *dev_id) +{ + const struct npcm_platform_data *pdata; + struct mem_ctl_info *mci = dev_id; + u32 status; + + pdata = ((struct priv_data *)mci->pvt_info)->pdata; + regmap_read(npcm_regmap, pdata->ctl_int_status, &status); + if (status & pdata->int_status_ce_mask) { + handle_ce(mci); + + /* acknowledge the CE interrupt */ + regmap_write(npcm_regmap, pdata->ctl_int_ack, + pdata->int_ack_ce_mask); + return IRQ_HANDLED; + } else if (status & pdata->int_status_ue_mask) { + handle_ue(mci); + + /* acknowledge the UE interrupt */ + regmap_write(npcm_regmap, pdata->ctl_int_ack, + pdata->int_ack_ue_mask); + return IRQ_HANDLED; + } + + WARN_ON_ONCE(1); + return IRQ_NONE; +} + +static ssize_t force_ecc_error(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val, syndrome; + int ret; + + pdata = priv->pdata; + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "force an ECC error, type = %d, location = %d, bit = %d\n", + priv->error_type, priv->location, priv->bit); + + /* ensure no pending writes */ + ret = regmap_read_poll_timeout(npcm_regmap, pdata->ctl_controller_busy, + val, !(val & pdata->controller_busy_mask), + 1000, 10000); + if (ret) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "wait pending writes timeout\n"); + return count; + } + + regmap_read(npcm_regmap, pdata->ctl_xor_check_bits, &val); + val &= ~pdata->xor_check_bits_mask; + + /* write syndrome to XOR_CHECK_BITS */ + if (priv->error_type == ERROR_TYPE_CORRECTABLE) { + if (priv->location == ERROR_LOCATION_DATA && + priv->bit > ERROR_BIT_DATA_MAX) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "data bit should not exceed %d (%d)\n", + ERROR_BIT_DATA_MAX, priv->bit); + return count; + } + + if (priv->location == ERROR_LOCATION_CHECKCODE && + priv->bit > ERROR_BIT_CHECKCODE_MAX) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "checkcode bit should not exceed %d (%d)\n", + ERROR_BIT_CHECKCODE_MAX, priv->bit); + return count; + } + + syndrome = priv->location ? 1 << priv->bit + : data_synd[priv->bit]; + + regmap_write(npcm_regmap, pdata->ctl_xor_check_bits, + val | (syndrome << pdata->xor_check_bits_shift) | + pdata->writeback_en_mask); + } else if (priv->error_type == ERROR_TYPE_UNCORRECTABLE) { + regmap_write(npcm_regmap, pdata->ctl_xor_check_bits, + val | (UE_SYNDROME << pdata->xor_check_bits_shift)); + } + + /* force write check */ + regmap_update_bits(npcm_regmap, pdata->ctl_xor_check_bits, + pdata->fwc_mask, pdata->fwc_mask); + + return count; +} + +static const struct file_operations force_ecc_error_fops = { + .open = simple_open, + .write = force_ecc_error, + .llseek = generic_file_llseek, +}; + +/* + * Setup debugfs for error injection. + * + * Nodes: + * error_type - 0: CE, 1: UE + * location - 0: data, 1: checkcode + * bit - 0 ~ 63 for data and 0 ~ 7 for checkcode + * force_ecc_error - trigger + * + * Examples: + * 1. Inject a correctable error (CE) at checkcode bit 7. + * ~# echo 0 > /sys/kernel/debug/edac/npcm-edac/error_type + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/location + * ~# echo 7 > /sys/kernel/debug/edac/npcm-edac/bit + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error + * + * 2. Inject an uncorrectable error (UE). + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/error_type + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error + */ +static void setup_debugfs(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + + priv->debugfs = edac_debugfs_create_dir(mci->mod_name); + if (!priv->debugfs) + return; + + edac_debugfs_create_x8("error_type", 0644, priv->debugfs, &priv->error_type); + edac_debugfs_create_x8("location", 0644, priv->debugfs, &priv->location); + edac_debugfs_create_x8("bit", 0644, priv->debugfs, &priv->bit); + edac_debugfs_create_file("force_ecc_error", 0200, priv->debugfs, + &mci->dev, &force_ecc_error_fops); +} + +static int setup_irq(struct mem_ctl_info *mci, struct platform_device *pdev) +{ + const struct npcm_platform_data *pdata; + int ret, irq; + + pdata = ((struct priv_data *)mci->pvt_info)->pdata; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "IRQ not defined in DTS\n"); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, edac_ecc_isr, 0, + dev_name(&pdev->dev), mci); + if (ret < 0) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "failed to request IRQ\n"); + return ret; + } + + /* enable the functional group of ECC and mask the others */ + regmap_write(npcm_regmap, pdata->ctl_int_mask_master, + pdata->int_mask_master_non_ecc_mask); + + if (pdata->chip == NPCM8XX_CHIP) + regmap_write(npcm_regmap, pdata->ctl_int_mask_ecc, + pdata->int_mask_ecc_non_event_mask); + + return 0; +} + +static const struct regmap_config npcm_regmap_cfg = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +}; + +static int edac_probe(struct platform_device *pdev) +{ + const struct npcm_platform_data *pdata; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct priv_data *priv; + void __iomem *reg; + u32 val; + int rc; + + reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + npcm_regmap = devm_regmap_init_mmio(dev, reg, &npcm_regmap_cfg); + if (IS_ERR(npcm_regmap)) + return PTR_ERR(npcm_regmap); + + pdata = of_device_get_match_data(dev); + if (!pdata) + return -EINVAL; + + /* bail out if ECC is not enabled */ + regmap_read(npcm_regmap, pdata->ctl_ecc_en, &val); + if (!(val & pdata->ecc_en_mask)) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "ECC is not enabled\n"); + return -EPERM; + } + + edac_op_state = EDAC_OPSTATE_INT; + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = 1; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct priv_data)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + priv = mci->pvt_info; + priv->reg = reg; + priv->pdata = pdata; + platform_set_drvdata(pdev, mci); + + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "npcm_ddr_controller"; + mci->dev_name = dev_name(&pdev->dev); + mci->mod_name = EDAC_MOD_NAME; + mci->ctl_page_to_phys = NULL; + + rc = setup_irq(mci, pdev); + if (rc) + goto free_edac_mc; + + rc = edac_mc_add_mc(mci); + if (rc) + goto free_edac_mc; + + if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP) + setup_debugfs(mci); + + return rc; + +free_edac_mc: + edac_mc_free(mci); + return rc; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + + pdata = priv->pdata; + if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP) + edac_debugfs_remove_recursive(priv->debugfs); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + regmap_write(npcm_regmap, pdata->ctl_int_mask_master, + pdata->int_mask_master_global_mask); + regmap_update_bits(npcm_regmap, pdata->ctl_ecc_en, pdata->ecc_en_mask, 0); +} + +static const struct npcm_platform_data npcm750_edac = { + .chip = NPCM7XX_CHIP, + + /* memory controller registers */ + .ctl_ecc_en = 0x174, + .ctl_int_status = 0x1d0, + .ctl_int_ack = 0x1d4, + .ctl_int_mask_master = 0x1d8, + .ctl_ce_addr_l = 0x188, + .ctl_ce_data_l = 0x190, + .ctl_ce_synd = 0x18c, + .ctl_ue_addr_l = 0x17c, + .ctl_ue_data_l = 0x184, + .ctl_ue_synd = 0x180, + .ctl_source_id = 0x194, + + /* masks and shifts */ + .ecc_en_mask = BIT(24), + .int_status_ce_mask = GENMASK(4, 3), + .int_status_ue_mask = GENMASK(6, 5), + .int_ack_ce_mask = GENMASK(4, 3), + .int_ack_ue_mask = GENMASK(6, 5), + .int_mask_master_non_ecc_mask = GENMASK(30, 7) | GENMASK(2, 0), + .int_mask_master_global_mask = BIT(31), + .ce_synd_mask = GENMASK(6, 0), + .ce_synd_shift = 0, + .ue_synd_mask = GENMASK(6, 0), + .ue_synd_shift = 0, + .source_id_ce_mask = GENMASK(29, 16), + .source_id_ce_shift = 16, + .source_id_ue_mask = GENMASK(13, 0), + .source_id_ue_shift = 0, +}; + +static const struct npcm_platform_data npcm845_edac = { + .chip = NPCM8XX_CHIP, + + /* memory controller registers */ + .ctl_ecc_en = 0x16c, + .ctl_int_status = 0x228, + .ctl_int_ack = 0x244, + .ctl_int_mask_master = 0x220, + .ctl_int_mask_ecc = 0x260, + .ctl_ce_addr_l = 0x18c, + .ctl_ce_addr_h = 0x190, + .ctl_ce_data_l = 0x194, + .ctl_ce_data_h = 0x198, + .ctl_ce_synd = 0x190, + .ctl_ue_addr_l = 0x17c, + .ctl_ue_addr_h = 0x180, + .ctl_ue_data_l = 0x184, + .ctl_ue_data_h = 0x188, + .ctl_ue_synd = 0x180, + .ctl_source_id = 0x19c, + .ctl_controller_busy = 0x20c, + .ctl_xor_check_bits = 0x174, + + /* masks and shifts */ + .ecc_en_mask = GENMASK(17, 16), + .int_status_ce_mask = GENMASK(1, 0), + .int_status_ue_mask = GENMASK(3, 2), + .int_ack_ce_mask = GENMASK(1, 0), + .int_ack_ue_mask = GENMASK(3, 2), + .int_mask_master_non_ecc_mask = GENMASK(30, 3) | GENMASK(1, 0), + .int_mask_master_global_mask = BIT(31), + .int_mask_ecc_non_event_mask = GENMASK(8, 4), + .ce_addr_h_mask = GENMASK(1, 0), + .ce_synd_mask = GENMASK(15, 8), + .ce_synd_shift = 8, + .ue_addr_h_mask = GENMASK(1, 0), + .ue_synd_mask = GENMASK(15, 8), + .ue_synd_shift = 8, + .source_id_ce_mask = GENMASK(29, 16), + .source_id_ce_shift = 16, + .source_id_ue_mask = GENMASK(13, 0), + .source_id_ue_shift = 0, + .controller_busy_mask = BIT(0), + .xor_check_bits_mask = GENMASK(23, 16), + .xor_check_bits_shift = 16, + .writeback_en_mask = BIT(24), + .fwc_mask = BIT(8), +}; + +static const struct of_device_id npcm_edac_of_match[] = { + { + .compatible = "nuvoton,npcm750-memory-controller", + .data = &npcm750_edac + }, + { + .compatible = "nuvoton,npcm845-memory-controller", + .data = &npcm845_edac + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, npcm_edac_of_match); + +static struct platform_driver npcm_edac_driver = { + .driver = { + .name = "npcm-edac", + .of_match_table = npcm_edac_of_match, + }, + .probe = edac_probe, + .remove = edac_remove, +}; + +module_platform_driver(npcm_edac_driver); + +MODULE_AUTHOR("Medad CChien <medadyoung@gmail.com>"); +MODULE_AUTHOR("Marvin Lin <kflin@nuvoton.com>"); +MODULE_DESCRIPTION("Nuvoton NPCM EDAC Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index c33059e9b0be..e6b1595a3cb5 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -138,7 +138,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) /* 'Tags' are block 0, 'Data' is block 1*/ l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0, - NULL, 0, edac_device_alloc_index()); + edac_device_alloc_index()); if (!l2c) return -ENOMEM; @@ -184,14 +184,12 @@ err: return -ENXIO; } -static int octeon_l2c_remove(struct platform_device *pdev) +static void octeon_l2c_remove(struct platform_device *pdev) { struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(l2c); - - return 0; } static struct platform_driver octeon_l2c_driver = { @@ -203,5 +201,6 @@ static struct platform_driver octeon_l2c_driver = { }; module_platform_driver(octeon_l2c_driver); +MODULE_DESCRIPTION("Cavium Octeon Secondary Caches (L2C) EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index aeb222ca3ed1..f7176b95b4fe 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -302,13 +302,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } -static int octeon_lmc_edac_remove(struct platform_device *pdev) +static void octeon_lmc_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - return 0; } static struct platform_driver octeon_lmc_edac_driver = { @@ -320,5 +319,6 @@ static struct platform_driver octeon_lmc_edac_driver = { }; module_platform_driver(octeon_lmc_edac_driver); +MODULE_DESCRIPTION("Cavium Octeon DRAM Memory Controller (LMC) EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 754eced59c32..aa1219db0b17 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -92,7 +92,7 @@ static int co_cache_error_probe(struct platform_device *pdev) platform_set_drvdata(pdev, p); p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), - "cache", 2, 0, NULL, 0, + "cache", 2, 0, edac_device_alloc_index()); if (!p->ed) goto err; @@ -119,14 +119,13 @@ err: return -ENXIO; } -static int co_cache_error_remove(struct platform_device *pdev) +static void co_cache_error_remove(struct platform_device *pdev) { struct co_cache_error *p = platform_get_drvdata(pdev); unregister_co_cache_error_notifier(&p->notifier); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(p->ed); - return 0; } static struct platform_driver co_cache_error_driver = { @@ -138,5 +137,6 @@ static struct platform_driver co_cache_error_driver = { }; module_platform_driver(co_cache_error_driver); +MODULE_DESCRIPTION("Cavium Octeon Primary Caches EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c index 28b238eecefc..c4f3bc33a971 100644 --- a/drivers/edac/octeon_edac-pci.c +++ b/drivers/edac/octeon_edac-pci.c @@ -87,14 +87,12 @@ err: return res; } -static int octeon_pci_remove(struct platform_device *pdev) +static void octeon_pci_remove(struct platform_device *pdev) { struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); edac_pci_del_device(&pdev->dev); edac_pci_free_ctl_info(pci); - - return 0; } static struct platform_driver octeon_pci_driver = { @@ -106,5 +104,6 @@ static struct platform_driver octeon_pci_driver = { }; module_platform_driver(octeon_pci_driver); +MODULE_DESCRIPTION("Cavium Octeon PCI Controller EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 2b306f2cc605..af14c8a3279f 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -16,18 +16,20 @@ * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters. */ -#include <linux/module.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/edac.h> #include <linux/init.h> +#include <linux/math64.h> +#include <linux/mmzone.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> #include <linux/pci.h> #include <linux/pci_ids.h> +#include <linux/sizes.h> #include <linux/slab.h> -#include <linux/delay.h> -#include <linux/edac.h> -#include <linux/mmzone.h> #include <linux/smp.h> -#include <linux/bitmap.h> -#include <linux/math64.h> -#include <linux/mod_devicetable.h> + #include <linux/platform_data/x86/p2sb.h> #include <asm/cpu_device_id.h> @@ -109,7 +111,6 @@ static struct mem_ctl_info *pnd2_mci; #define MOT_CHAN_INTLV_BIT_1SLC_2CH 12 #define MOT_CHAN_INTLV_BIT_2SLC_2CH 13 #define SELECTOR_DISABLED (-1) -#define _4GB (1ul << 32) #define PMI_ADDRESS_WIDTH 31 #define PND_MAX_PHYS_BIT 39 @@ -183,7 +184,7 @@ static int _apl_rd_reg(int port, int off, int op, u32 *data) } P2SB_READ(dword, P2SB_DATA_OFF, data); - ret = (status >> 1) & 0x3; + ret = (status >> 1) & GENMASK(1, 0); out: /* Hide the P2SB device, if it was hidden before */ if (hidden) @@ -307,7 +308,7 @@ static bool two_channels; /* Both PMI channels in one slice enabled */ static u8 sym_chan_mask; static u8 asym_chan_mask; -static u8 chan_mask; +static unsigned long chan_mask; static int slice_selector = -1; static int chan_selector = -1; @@ -329,7 +330,7 @@ static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask) return; } if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) { - pr_info(FW_BUG "MOT mask not power of two\n"); + pr_info(FW_BUG "MOT mask is invalid\n"); return; } if (base & ~mask) { @@ -371,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p, struct b_cr_asym_mem_region1_mchbar *as1, struct b_cr_asym_2way_mem_region_mchbar *as2way) { - const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + static const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; int mask = 0; if (as2way->asym_2way_interleave_enable) @@ -488,7 +489,7 @@ static int dnv_get_registers(void) */ static int get_registers(void) { - const int intlv[] = { 10, 11, 12, 12 }; + static const int intlv[] = { 10, 11, 12, 12 }; if (RD_REG(&tolud, b_cr_tolud_pci) || RD_REG(&touud_lo, b_cr_touud_lo_pci) || @@ -587,7 +588,7 @@ static int get_registers(void) /* Get a contiguous memory address (remove the MMIO gap) */ static u64 remove_mmio_gap(u64 sys) { - return (sys < _4GB) ? sys : sys - (_4GB - top_lm); + return (sys < SZ_4G) ? sys : sys - (SZ_4G - top_lm); } /* Squeeze out one address bit, shift upper part down to fill gap */ @@ -598,7 +599,7 @@ static void remove_addr_bit(u64 *addr, int bitidx) if (bitidx == -1) return; - mask = (1ull << bitidx) - 1; + mask = BIT_ULL(bitidx) - 1; *addr = ((*addr >> 1) & ~mask) | (*addr & mask); } @@ -642,8 +643,8 @@ static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg) int sym_chan_shift = sym_channels >> 1; /* Give up if address is out of range, or in MMIO gap */ - if (addr >= (1ul << PND_MAX_PHYS_BIT) || - (addr >= top_lm && addr < _4GB) || addr >= top_hm) { + if (addr >= BIT(PND_MAX_PHYS_BIT) || + (addr >= top_lm && addr < SZ_4G) || addr >= top_hm) { snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr); return -EINVAL; } @@ -727,10 +728,10 @@ static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg) } /* Translate PMI address to memory (rank, row, bank, column) */ -#define C(n) (0x10 | (n)) /* column */ -#define B(n) (0x20 | (n)) /* bank */ -#define R(n) (0x40 | (n)) /* row */ -#define RS (0x80) /* rank */ +#define C(n) (BIT(4) | (n)) /* column */ +#define B(n) (BIT(5) | (n)) /* bank */ +#define R(n) (BIT(6) | (n)) /* row */ +#define RS (BIT(7)) /* rank */ /* addrdec values */ #define AMAP_1KB 0 @@ -1064,9 +1065,9 @@ static int apl_check_ecc_active(void) int i, ret = 0; /* Check dramtype and ECC mode for each present DIMM */ - for (i = 0; i < APL_NUM_CHANNELS; i++) - if (chan_mask & BIT(i)) - ret += check_channel(i); + for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS) + ret += check_channel(i); + return ret ? -EINVAL : 0; } @@ -1205,10 +1206,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci) u64 capacity; int i, g; - for (i = 0; i < APL_NUM_CHANNELS; i++) { - if (!(chan_mask & BIT(i))) - continue; - + for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS) { dimm = edac_get_dimm(mci, i, 0, 0); if (!dimm) { edac_dbg(0, "No allocated DIMM for channel %d\n", i); @@ -1228,8 +1226,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci) } pvt->dimm_geom[i] = g; - capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) * - (1ul << dimms[g].colbits); + capacity = (d->rken0 + d->rken1) * 8 * BIT(dimms[g].rowbits + dimms[g].colbits); edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3)); dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); dimm->grain = 32; @@ -1295,7 +1292,7 @@ static void dnv_get_dimm_config(struct mem_ctl_info *mci) continue; } - capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits); + capacity = ranks_of_dimm[j] * banks * BIT(rowbits + colbits); edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3)); dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); dimm->grain = 32; @@ -1514,8 +1511,8 @@ static struct dunit_ops dnv_ops = { }; static const struct x86_cpu_id pnd2_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &apl_ops), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &dnv_ops), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &apl_ops), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &dnv_ops), { } }; MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c deleted file mode 100644 index 046969b4e82e..000000000000 --- a/drivers/edac/ppc4xx_edac.c +++ /dev/null @@ -1,1428 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2008 Nuovation System Designs, LLC - * Grant Erickson <gerickson@nuovations.com> - */ - -#include <linux/edac.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/of_device.h> -#include <linux/of_irq.h> -#include <linux/of_platform.h> -#include <linux/types.h> - -#include <asm/dcr.h> - -#include "edac_module.h" -#include "ppc4xx_edac.h" - -/* - * This file implements a driver for monitoring and handling events - * associated with the IMB DDR2 ECC controller found in the AMCC/IBM - * 405EX[r], 440SP, 440SPe, 460EX, 460GT and 460SX. - * - * As realized in the 405EX[r], this controller features: - * - * - Support for registered- and non-registered DDR1 and DDR2 memory. - * - 32-bit or 16-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 4-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Two (2) memory banks/ranks. - * - Up to 1 GiB per bank/rank in 32-bit mode and up to 512 MiB per - * bank/rank in 16-bit mode. - * - * As realized in the 440SP and 440SPe, this controller changes/adds: - * - * - 64-bit or 32-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 8-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Up to 4 GiB per bank/rank in 64-bit mode and up to 2 GiB - * per bank/rank in 32-bit mode. - * - * As realized in the 460EX and 460GT, this controller changes/adds: - * - * - 64-bit or 32-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 8-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Four (4) memory banks/ranks. - * - Up to 16 GiB per bank/rank in 64-bit mode and up to 8 GiB - * per bank/rank in 32-bit mode. - * - * At present, this driver has ONLY been tested against the controller - * realization in the 405EX[r] on the AMCC Kilauea and Haleakala - * boards (256 MiB w/o ECC memory soldered onto the board) and a - * proprietary board based on those designs (128 MiB ECC memory, also - * soldered onto the board). - * - * Dynamic feature detection and handling needs to be added for the - * other realizations of this controller listed above. - * - * Eventually, this driver will likely be adapted to the above variant - * realizations of this controller as well as broken apart to handle - * the other known ECC-capable controllers prevalent in other 4xx - * processors: - * - * - IBM SDRAM (405GP, 405CR and 405EP) "ibm,sdram-4xx" - * - IBM DDR1 (440GP, 440GX, 440EP and 440GR) "ibm,sdram-4xx-ddr" - * - Denali DDR1/DDR2 (440EPX and 440GRX) "denali,sdram-4xx-ddr2" - * - * For this controller, unfortunately, correctable errors report - * nothing more than the beat/cycle and byte/lane the correction - * occurred on and the check bit group that covered the error. - * - * In contrast, uncorrectable errors also report the failing address, - * the bus master and the transaction direction (i.e. read or write) - * - * Regardless of whether the error is a CE or a UE, we report the - * following pieces of information in the driver-unique message to the - * EDAC subsystem: - * - * - Device tree path - * - Bank(s) - * - Check bit error group - * - Beat(s)/lane(s) - */ - -/* Preprocessor Definitions */ - -#define EDAC_OPSTATE_INT_STR "interrupt" -#define EDAC_OPSTATE_POLL_STR "polled" -#define EDAC_OPSTATE_UNKNOWN_STR "unknown" - -#define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac" -#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0" - -#define PPC4XX_EDAC_MESSAGE_SIZE 256 - -/* - * Kernel logging without an EDAC instance - */ -#define ppc4xx_edac_printk(level, fmt, arg...) \ - edac_printk(level, "PPC4xx MC", fmt, ##arg) - -/* - * Kernel logging with an EDAC instance - */ -#define ppc4xx_edac_mc_printk(level, mci, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "PPC4xx", fmt, ##arg) - -/* - * Macros to convert bank configuration size enumerations into MiB and - * page values. - */ -#define SDRAM_MBCF_SZ_MiB_MIN 4 -#define SDRAM_MBCF_SZ_TO_MiB(n) (SDRAM_MBCF_SZ_MiB_MIN \ - << (SDRAM_MBCF_SZ_DECODE(n))) -#define SDRAM_MBCF_SZ_TO_PAGES(n) (SDRAM_MBCF_SZ_MiB_MIN \ - << (20 - PAGE_SHIFT + \ - SDRAM_MBCF_SZ_DECODE(n))) - -/* - * The ibm,sdram-4xx-ddr2 Device Control Registers (DCRs) are - * indirectly accessed and have a base and length defined by the - * device tree. The base can be anything; however, we expect the - * length to be precisely two registers, the first for the address - * window and the second for the data window. - */ -#define SDRAM_DCR_RESOURCE_LEN 2 -#define SDRAM_DCR_ADDR_OFFSET 0 -#define SDRAM_DCR_DATA_OFFSET 1 - -/* - * Device tree interrupt indices - */ -#define INTMAP_ECCDED_INDEX 0 /* Double-bit Error Detect */ -#define INTMAP_ECCSEC_INDEX 1 /* Single-bit Error Correct */ - -/* Type Definitions */ - -/* - * PPC4xx SDRAM memory controller private instance data - */ -struct ppc4xx_edac_pdata { - dcr_host_t dcr_host; /* Indirect DCR address/data window mapping */ - struct { - int sec; /* Single-bit correctable error IRQ assigned */ - int ded; /* Double-bit detectable error IRQ assigned */ - } irqs; -}; - -/* - * Various status data gathered and manipulated when checking and - * reporting ECC status. - */ -struct ppc4xx_ecc_status { - u32 ecces; - u32 besr; - u32 bearh; - u32 bearl; - u32 wmirq; -}; - -/* Global Variables */ - -/* - * Device tree node type and compatible tuples this driver can match - * on. - */ -static const struct of_device_id ppc4xx_edac_match[] = { - { - .compatible = "ibm,sdram-4xx-ddr2" - }, - { } -}; -MODULE_DEVICE_TABLE(of, ppc4xx_edac_match); - -/* - * TODO: The row and channel parameters likely need to be dynamically - * set based on the aforementioned variant controller realizations. - */ -static const unsigned ppc4xx_edac_nr_csrows = 2; -static const unsigned ppc4xx_edac_nr_chans = 1; - -/* - * Strings associated with PLB master IDs capable of being posted in - * SDRAM_BESR or SDRAM_WMIRQ on uncorrectable ECC errors. - */ -static const char * const ppc4xx_plb_masters[9] = { - [SDRAM_PLB_M0ID_ICU] = "ICU", - [SDRAM_PLB_M0ID_PCIE0] = "PCI-E 0", - [SDRAM_PLB_M0ID_PCIE1] = "PCI-E 1", - [SDRAM_PLB_M0ID_DMA] = "DMA", - [SDRAM_PLB_M0ID_DCU] = "DCU", - [SDRAM_PLB_M0ID_OPB] = "OPB", - [SDRAM_PLB_M0ID_MAL] = "MAL", - [SDRAM_PLB_M0ID_SEC] = "SEC", - [SDRAM_PLB_M0ID_AHB] = "AHB" -}; - -/** - * mfsdram - read and return controller register data - * @dcr_host: A pointer to the DCR mapping. - * @idcr_n: The indirect DCR register to read. - * - * This routine reads and returns the data associated with the - * controller's specified indirect DCR register. - * - * Returns the read data. - */ -static inline u32 -mfsdram(const dcr_host_t *dcr_host, unsigned int idcr_n) -{ - return __mfdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET, - dcr_host->base + SDRAM_DCR_DATA_OFFSET, - idcr_n); -} - -/** - * mtsdram - write controller register data - * @dcr_host: A pointer to the DCR mapping. - * @idcr_n: The indirect DCR register to write. - * @value: The data to write. - * - * This routine writes the provided data to the controller's specified - * indirect DCR register. - */ -static inline void -mtsdram(const dcr_host_t *dcr_host, unsigned int idcr_n, u32 value) -{ - return __mtdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET, - dcr_host->base + SDRAM_DCR_DATA_OFFSET, - idcr_n, - value); -} - -/** - * ppc4xx_edac_check_bank_error - check a bank for an ECC bank error - * @status: A pointer to the ECC status structure to check for an - * ECC bank error. - * @bank: The bank to check for an ECC error. - * - * This routine determines whether the specified bank has an ECC - * error. - * - * Returns true if the specified bank has an ECC error; otherwise, - * false. - */ -static bool -ppc4xx_edac_check_bank_error(const struct ppc4xx_ecc_status *status, - unsigned int bank) -{ - switch (bank) { - case 0: - return status->ecces & SDRAM_ECCES_BK0ER; - case 1: - return status->ecces & SDRAM_ECCES_BK1ER; - default: - return false; - } -} - -/** - * ppc4xx_edac_generate_bank_message - generate interpretted bank status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the bank message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[BKNER] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_bank_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - unsigned int row, rows; - - n = snprintf(buffer, size, "%s: Banks: ", mci->dev_name); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - for (rows = 0, row = 0; row < mci->nr_csrows; row++) { - if (ppc4xx_edac_check_bank_error(status, row)) { - n = snprintf(buffer, size, "%s%u", - (rows++ ? ", " : ""), row); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - } - } - - n = snprintf(buffer, size, "%s; ", rows ? "" : "None"); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_checkbit_message - generate interpretted checkbit message - * @mci: A pointer to the EDAC memory controller instance associated - * with the checkbit message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[CKBER] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_checkbit_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const char *ckber = NULL; - - switch (status->ecces & SDRAM_ECCES_CKBER_MASK) { - case SDRAM_ECCES_CKBER_NONE: - ckber = "None"; - break; - case SDRAM_ECCES_CKBER_32_ECC_0_3: - ckber = "ECC0:3"; - break; - case SDRAM_ECCES_CKBER_32_ECC_4_8: - switch (mfsdram(&pdata->dcr_host, SDRAM_MCOPT1) & - SDRAM_MCOPT1_WDTH_MASK) { - case SDRAM_MCOPT1_WDTH_16: - ckber = "ECC0:3"; - break; - case SDRAM_MCOPT1_WDTH_32: - ckber = "ECC4:8"; - break; - default: - ckber = "Unknown"; - break; - } - break; - case SDRAM_ECCES_CKBER_32_ECC_0_8: - ckber = "ECC0:8"; - break; - default: - ckber = "Unknown"; - break; - } - - return snprintf(buffer, size, "Checkbit Error: %s", ckber); -} - -/** - * ppc4xx_edac_generate_lane_message - generate interpretted byte lane message - * @mci: A pointer to the EDAC memory controller instance associated - * with the byte lane message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[BNCE] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_lane_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - unsigned int lane, lanes; - const unsigned int first_lane = 0; - const unsigned int lane_count = 16; - - n = snprintf(buffer, size, "; Byte Lane Errors: "); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - for (lanes = 0, lane = first_lane; lane < lane_count; lane++) { - if ((status->ecces & SDRAM_ECCES_BNCE_ENCODE(lane)) != 0) { - n = snprintf(buffer, size, - "%s%u", - (lanes++ ? ", " : ""), lane); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - } - } - - n = snprintf(buffer, size, "%s; ", lanes ? "" : "None"); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_ecc_message - generate interpretted ECC status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the ECCES message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS register of - * the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_ecc_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - - n = ppc4xx_edac_generate_bank_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - n = ppc4xx_edac_generate_checkbit_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - n = ppc4xx_edac_generate_lane_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_plb_message - generate interpretted PLB status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the PLB message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the PLB-related BESR - * and/or WMIRQ registers of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_plb_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - unsigned int master; - bool read; - - if ((status->besr & SDRAM_BESR_MASK) == 0) - return 0; - - if ((status->besr & SDRAM_BESR_M0ET_MASK) == SDRAM_BESR_M0ET_NONE) - return 0; - - read = ((status->besr & SDRAM_BESR_M0RW_MASK) == SDRAM_BESR_M0RW_READ); - - master = SDRAM_BESR_M0ID_DECODE(status->besr); - - return snprintf(buffer, size, - "%s error w/ PLB master %u \"%s\"; ", - (read ? "Read" : "Write"), - master, - (((master >= SDRAM_PLB_M0ID_FIRST) && - (master <= SDRAM_PLB_M0ID_LAST)) ? - ppc4xx_plb_masters[master] : "UNKNOWN")); -} - -/** - * ppc4xx_edac_generate_message - generate interpretted status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the driver-unique message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the driver-unique - * EDAC report message from the specified ECC status. - */ -static void -ppc4xx_edac_generate_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n; - - if (buffer == NULL || size == 0) - return; - - n = ppc4xx_edac_generate_ecc_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - return; - - buffer += n; - size -= n; - - ppc4xx_edac_generate_plb_message(mci, status, buffer, size); -} - -#ifdef DEBUG -/** - * ppc4xx_ecc_dump_status - dump controller ECC status registers - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being dumped. - * @status: A pointer to the ECC status structure to generate the - * dump from. - * - * This routine dumps to the kernel log buffer the raw and - * interpretted specified ECC status. - */ -static void -ppc4xx_ecc_dump_status(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - ppc4xx_edac_mc_printk(KERN_INFO, mci, - "\n" - "\tECCES: 0x%08x\n" - "\tWMIRQ: 0x%08x\n" - "\tBESR: 0x%08x\n" - "\tBEAR: 0x%08x%08x\n" - "\t%s\n", - status->ecces, - status->wmirq, - status->besr, - status->bearh, - status->bearl, - message); -} -#endif /* DEBUG */ - -/** - * ppc4xx_ecc_get_status - get controller ECC status - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being retrieved. - * @status: A pointer to the ECC status structure to populate the - * ECC status with. - * - * This routine reads and masks, as appropriate, all the relevant - * status registers that deal with ibm,sdram-4xx-ddr2 ECC errors. - * While we read all of them, for correctable errors, we only expect - * to deal with ECCES. For uncorrectable errors, we expect to deal - * with all of them. - */ -static void -ppc4xx_ecc_get_status(const struct mem_ctl_info *mci, - struct ppc4xx_ecc_status *status) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const dcr_host_t *dcr_host = &pdata->dcr_host; - - status->ecces = mfsdram(dcr_host, SDRAM_ECCES) & SDRAM_ECCES_MASK; - status->wmirq = mfsdram(dcr_host, SDRAM_WMIRQ) & SDRAM_WMIRQ_MASK; - status->besr = mfsdram(dcr_host, SDRAM_BESR) & SDRAM_BESR_MASK; - status->bearl = mfsdram(dcr_host, SDRAM_BEARL); - status->bearh = mfsdram(dcr_host, SDRAM_BEARH); -} - -/** - * ppc4xx_ecc_clear_status - clear controller ECC status - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being cleared. - * @status: A pointer to the ECC status structure containing the - * values to write to clear the ECC status. - * - * This routine clears--by writing the masked (as appropriate) status - * values back to--the status registers that deal with - * ibm,sdram-4xx-ddr2 ECC errors. - */ -static void -ppc4xx_ecc_clear_status(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const dcr_host_t *dcr_host = &pdata->dcr_host; - - mtsdram(dcr_host, SDRAM_ECCES, status->ecces & SDRAM_ECCES_MASK); - mtsdram(dcr_host, SDRAM_WMIRQ, status->wmirq & SDRAM_WMIRQ_MASK); - mtsdram(dcr_host, SDRAM_BESR, status->besr & SDRAM_BESR_MASK); - mtsdram(dcr_host, SDRAM_BEARL, 0); - mtsdram(dcr_host, SDRAM_BEARH, 0); -} - -/** - * ppc4xx_edac_handle_ce - handle controller correctable ECC error (CE) - * @mci: A pointer to the EDAC memory controller instance - * associated with the correctable error being handled and reported. - * @status: A pointer to the ECC status structure associated with - * the correctable error being handled and reported. - * - * This routine handles an ibm,sdram-4xx-ddr2 controller ECC - * correctable error. Per the aforementioned discussion, there's not - * enough status available to use the full EDAC correctable error - * interface, so we just pass driver-unique message to the "no info" - * interface. - */ -static void -ppc4xx_edac_handle_ce(struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - int row; - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - for (row = 0; row < mci->nr_csrows; row++) - if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, 0, - row, 0, -1, - message, ""); -} - -/** - * ppc4xx_edac_handle_ue - handle controller uncorrectable ECC error (UE) - * @mci: A pointer to the EDAC memory controller instance - * associated with the uncorrectable error being handled and - * reported. - * @status: A pointer to the ECC status structure associated with - * the uncorrectable error being handled and reported. - * - * This routine handles an ibm,sdram-4xx-ddr2 controller ECC - * uncorrectable error. - */ -static void -ppc4xx_edac_handle_ue(struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - const u64 bear = ((u64)status->bearh << 32 | status->bearl); - const unsigned long page = bear >> PAGE_SHIFT; - const unsigned long offset = bear & ~PAGE_MASK; - int row; - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - for (row = 0; row < mci->nr_csrows; row++) - if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - page, offset, 0, - row, 0, -1, - message, ""); -} - -/** - * ppc4xx_edac_check - check controller for ECC errors - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller being - * checked. - * - * This routine is used to check and post ECC errors and is called by - * both the EDAC polling thread and this driver's CE and UE interrupt - * handler. - */ -static void -ppc4xx_edac_check(struct mem_ctl_info *mci) -{ -#ifdef DEBUG - static unsigned int count; -#endif - struct ppc4xx_ecc_status status; - - ppc4xx_ecc_get_status(mci, &status); - -#ifdef DEBUG - if (count++ % 30 == 0) - ppc4xx_ecc_dump_status(mci, &status); -#endif - - if (status.ecces & SDRAM_ECCES_UE) - ppc4xx_edac_handle_ue(mci, &status); - - if (status.ecces & SDRAM_ECCES_CE) - ppc4xx_edac_handle_ce(mci, &status); - - ppc4xx_ecc_clear_status(mci, &status); -} - -/** - * ppc4xx_edac_isr - SEC (CE) and DED (UE) interrupt service routine - * @irq: The virtual interrupt number being serviced. - * @dev_id: A pointer to the EDAC memory controller instance - * associated with the interrupt being handled. - * - * This routine implements the interrupt handler for both correctable - * (CE) and uncorrectable (UE) ECC errors for the ibm,sdram-4xx-ddr2 - * controller. It simply calls through to the same routine used during - * polling to check, report and clear the ECC status. - * - * Unconditionally returns IRQ_HANDLED. - */ -static irqreturn_t -ppc4xx_edac_isr(int irq, void *dev_id) -{ - struct mem_ctl_info *mci = dev_id; - - ppc4xx_edac_check(mci); - - return IRQ_HANDLED; -} - -/** - * ppc4xx_edac_get_dtype - return the controller memory width - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which the width - * is derived. - * - * This routine returns the EDAC device type width appropriate for the - * current controller configuration. - * - * TODO: This needs to be conditioned dynamically through feature - * flags or some such when other controller variants are supported as - * the 405EX[r] is 16-/32-bit and the others are 32-/64-bit with the - * 16- and 64-bit field definition/value/enumeration (b1) overloaded - * among them. - * - * Returns a device type width enumeration. - */ -static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1) -{ - switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) { - case SDRAM_MCOPT1_WDTH_16: - return DEV_X2; - case SDRAM_MCOPT1_WDTH_32: - return DEV_X4; - default: - return DEV_UNKNOWN; - } -} - -/** - * ppc4xx_edac_get_mtype - return controller memory type - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which the memory type - * is derived. - * - * This routine returns the EDAC memory type appropriate for the - * current controller configuration. - * - * Returns a memory type enumeration. - */ -static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1) -{ - bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN); - - switch (mcopt1 & SDRAM_MCOPT1_DDR_TYPE_MASK) { - case SDRAM_MCOPT1_DDR2_TYPE: - return rden ? MEM_RDDR2 : MEM_DDR2; - case SDRAM_MCOPT1_DDR1_TYPE: - return rden ? MEM_RDDR : MEM_DDR; - default: - return MEM_UNKNOWN; - } -} - -/** - * ppc4xx_edac_init_csrows - initialize driver instance rows - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller for which - * the csrows (i.e. banks/ranks) are being initialized. - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which bank width - * and memory typ information is derived. - * - * This routine initializes the virtual "chip select rows" associated - * with the EDAC memory controller instance. An ibm,sdram-4xx-ddr2 - * controller bank/rank is mapped to a row. - * - * Returns 0 if OK; otherwise, -EINVAL if the memory bank size - * configuration cannot be determined. - */ -static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - int status = 0; - enum mem_type mtype; - enum dev_type dtype; - enum edac_type edac_mode; - int row, j; - u32 mbxcf, size, nr_pages; - - /* Establish the memory type and width */ - - mtype = ppc4xx_edac_get_mtype(mcopt1); - dtype = ppc4xx_edac_get_dtype(mcopt1); - - /* Establish EDAC mode */ - - if (mci->edac_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - else if (mci->edac_cap & EDAC_FLAG_EC) - edac_mode = EDAC_EC; - else - edac_mode = EDAC_NONE; - - /* - * Initialize each chip select row structure which correspond - * 1:1 with a controller bank/rank. - */ - - for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *csi = mci->csrows[row]; - - /* - * Get the configuration settings for this - * row/bank/rank and skip disabled banks. - */ - - mbxcf = mfsdram(&pdata->dcr_host, SDRAM_MBXCF(row)); - - if ((mbxcf & SDRAM_MBCF_BE_MASK) != SDRAM_MBCF_BE_ENABLE) - continue; - - /* Map the bank configuration size setting to pages. */ - - size = mbxcf & SDRAM_MBCF_SZ_MASK; - - switch (size) { - case SDRAM_MBCF_SZ_4MB: - case SDRAM_MBCF_SZ_8MB: - case SDRAM_MBCF_SZ_16MB: - case SDRAM_MBCF_SZ_32MB: - case SDRAM_MBCF_SZ_64MB: - case SDRAM_MBCF_SZ_128MB: - case SDRAM_MBCF_SZ_256MB: - case SDRAM_MBCF_SZ_512MB: - case SDRAM_MBCF_SZ_1GB: - case SDRAM_MBCF_SZ_2GB: - case SDRAM_MBCF_SZ_4GB: - case SDRAM_MBCF_SZ_8GB: - nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size); - break; - default: - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unrecognized memory bank %d " - "size 0x%08x\n", - row, SDRAM_MBCF_SZ_DECODE(size)); - status = -EINVAL; - goto done; - } - - /* - * It's unclear exactly what grain should be set to - * here. The SDRAM_ECCES register allows resolution of - * an error down to a nibble which would potentially - * argue for a grain of '1' byte, even though we only - * know the associated address for uncorrectable - * errors. This value is not used at present for - * anything other than error reporting so getting it - * wrong should be of little consequence. Other - * possible values would be the PLB width (16), the - * page size (PAGE_SIZE) or the memory width (2 or 4). - */ - for (j = 0; j < csi->nr_channels; j++) { - struct dimm_info *dimm = csi->channels[j]->dimm; - - dimm->nr_pages = nr_pages / csi->nr_channels; - dimm->grain = 1; - - dimm->mtype = mtype; - dimm->dtype = dtype; - - dimm->edac_mode = edac_mode; - } - } - - done: - return status; -} - -/** - * ppc4xx_edac_mc_init - initialize driver instance - * @mci: A pointer to the EDAC memory controller instance being - * initialized. - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is bound to. - * @dcr_host: A pointer to the DCR data containing the DCR mapping - * for this controller instance. - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which ECC capabilities - * and scrub mode are derived. - * - * This routine performs initialization of the EDAC memory controller - * instance and related driver-private data associated with the - * ibm,sdram-4xx-ddr2 memory controller the instance is bound to. - * - * Returns 0 if OK; otherwise, < 0 on error. - */ -static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, - struct platform_device *op, - const dcr_host_t *dcr_host, u32 mcopt1) -{ - int status = 0; - const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); - struct ppc4xx_edac_pdata *pdata = NULL; - const struct device_node *np = op->dev.of_node; - - if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL) - return -EINVAL; - - /* Initial driver pointers and private data */ - - mci->pdev = &op->dev; - - dev_set_drvdata(mci->pdev, mci); - - pdata = mci->pvt_info; - - pdata->dcr_host = *dcr_host; - - /* Initialize controller capabilities and configuration */ - - mci->mtype_cap = (MEM_FLAG_DDR | MEM_FLAG_RDDR | - MEM_FLAG_DDR2 | MEM_FLAG_RDDR2); - - mci->edac_ctl_cap = (EDAC_FLAG_NONE | - EDAC_FLAG_EC | - EDAC_FLAG_SECDED); - - mci->scrub_cap = SCRUB_NONE; - mci->scrub_mode = SCRUB_NONE; - - /* - * Update the actual capabilites based on the MCOPT1[MCHK] - * settings. Scrubbing is only useful if reporting is enabled. - */ - - switch (memcheck) { - case SDRAM_MCOPT1_MCHK_CHK: - mci->edac_cap = EDAC_FLAG_EC; - break; - case SDRAM_MCOPT1_MCHK_CHK_REP: - mci->edac_cap = (EDAC_FLAG_EC | EDAC_FLAG_SECDED); - mci->scrub_mode = SCRUB_SW_SRC; - break; - default: - mci->edac_cap = EDAC_FLAG_NONE; - break; - } - - /* Initialize strings */ - - mci->mod_name = PPC4XX_EDAC_MODULE_NAME; - mci->ctl_name = ppc4xx_edac_match->compatible; - mci->dev_name = np->full_name; - - /* Initialize callbacks */ - - mci->edac_check = ppc4xx_edac_check; - mci->ctl_page_to_phys = NULL; - - /* Initialize chip select rows */ - - status = ppc4xx_edac_init_csrows(mci, mcopt1); - - if (status) - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to initialize rows!\n"); - - return status; -} - -/** - * ppc4xx_edac_register_irq - setup and register controller interrupts - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is bound to. - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller for which - * interrupts are being registered. - * - * This routine parses the correctable (CE) and uncorrectable error (UE) - * interrupts from the device tree node and maps and assigns them to - * the associated EDAC memory controller instance. - * - * Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be - * mapped and assigned. - */ -static int ppc4xx_edac_register_irq(struct platform_device *op, - struct mem_ctl_info *mci) -{ - int status = 0; - int ded_irq, sec_irq; - struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - struct device_node *np = op->dev.of_node; - - ded_irq = irq_of_parse_and_map(np, INTMAP_ECCDED_INDEX); - sec_irq = irq_of_parse_and_map(np, INTMAP_ECCSEC_INDEX); - - if (!ded_irq || !sec_irq) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to map interrupts.\n"); - status = -ENODEV; - goto fail; - } - - status = request_irq(ded_irq, - ppc4xx_edac_isr, - 0, - "[EDAC] MC ECCDED", - mci); - - if (status < 0) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to request irq %d for ECC DED", - ded_irq); - status = -ENODEV; - goto fail1; - } - - status = request_irq(sec_irq, - ppc4xx_edac_isr, - 0, - "[EDAC] MC ECCSEC", - mci); - - if (status < 0) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to request irq %d for ECC SEC", - sec_irq); - status = -ENODEV; - goto fail2; - } - - ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCDED irq is %d\n", ded_irq); - ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCSEC irq is %d\n", sec_irq); - - pdata->irqs.ded = ded_irq; - pdata->irqs.sec = sec_irq; - - return 0; - - fail2: - free_irq(sec_irq, mci); - - fail1: - free_irq(ded_irq, mci); - - fail: - return status; -} - -/** - * ppc4xx_edac_map_dcrs - locate and map controller registers - * @np: A pointer to the device tree node containing the DCR - * resources to map. - * @dcr_host: A pointer to the DCR data to populate with the - * DCR mapping. - * - * This routine attempts to locate in the device tree and map the DCR - * register resources associated with the controller's indirect DCR - * address and data windows. - * - * Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on - * error. - */ -static int ppc4xx_edac_map_dcrs(const struct device_node *np, - dcr_host_t *dcr_host) -{ - unsigned int dcr_base, dcr_len; - - if (np == NULL || dcr_host == NULL) - return -EINVAL; - - /* Get the DCR resource extent and sanity check the values. */ - - dcr_base = dcr_resource_start(np, 0); - dcr_len = dcr_resource_len(np, 0); - - if (dcr_base == 0 || dcr_len == 0) { - ppc4xx_edac_printk(KERN_ERR, - "Failed to obtain DCR property.\n"); - return -ENODEV; - } - - if (dcr_len != SDRAM_DCR_RESOURCE_LEN) { - ppc4xx_edac_printk(KERN_ERR, - "Unexpected DCR length %d, expected %d.\n", - dcr_len, SDRAM_DCR_RESOURCE_LEN); - return -ENODEV; - } - - /* Attempt to map the DCR extent. */ - - *dcr_host = dcr_map(np, dcr_base, dcr_len); - - if (!DCR_MAP_OK(*dcr_host)) { - ppc4xx_edac_printk(KERN_INFO, "Failed to map DCRs.\n"); - return -ENODEV; - } - - return 0; -} - -/** - * ppc4xx_edac_probe - check controller and bind driver - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller being probed for driver binding. - * - * This routine probes a specific ibm,sdram-4xx-ddr2 controller - * instance for binding with the driver. - * - * Returns 0 if the controller instance was successfully bound to the - * driver; otherwise, < 0 on error. - */ -static int ppc4xx_edac_probe(struct platform_device *op) -{ - int status = 0; - u32 mcopt1, memcheck; - dcr_host_t dcr_host; - const struct device_node *np = op->dev.of_node; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - static int ppc4xx_edac_instance; - - /* - * At this point, we only support the controller realized on - * the AMCC PPC 405EX[r]. Reject anything else. - */ - - if (!of_device_is_compatible(np, "ibm,sdram-405ex") && - !of_device_is_compatible(np, "ibm,sdram-405exr")) { - ppc4xx_edac_printk(KERN_NOTICE, - "Only the PPC405EX[r] is supported.\n"); - return -ENODEV; - } - - /* - * Next, get the DCR property and attempt to map it so that we - * can probe the controller. - */ - - status = ppc4xx_edac_map_dcrs(np, &dcr_host); - - if (status) - return status; - - /* - * First determine whether ECC is enabled at all. If not, - * there is no useful checking or monitoring that can be done - * for this controller. - */ - - mcopt1 = mfsdram(&dcr_host, SDRAM_MCOPT1); - memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); - - if (memcheck == SDRAM_MCOPT1_MCHK_NON) { - ppc4xx_edac_printk(KERN_INFO, "%pOF: No ECC memory detected or " - "ECC is disabled.\n", np); - status = -ENODEV; - goto done; - } - - /* - * At this point, we know ECC is enabled, allocate an EDAC - * controller instance and perform the appropriate - * initialization. - */ - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = ppc4xx_edac_nr_csrows; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = ppc4xx_edac_nr_chans; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers, - sizeof(struct ppc4xx_edac_pdata)); - if (mci == NULL) { - ppc4xx_edac_printk(KERN_ERR, "%pOF: " - "Failed to allocate EDAC MC instance!\n", - np); - status = -ENOMEM; - goto done; - } - - status = ppc4xx_edac_mc_init(mci, op, &dcr_host, mcopt1); - - if (status) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to initialize instance!\n"); - goto fail; - } - - /* - * We have a valid, initialized EDAC instance bound to the - * controller. Attempt to register it with the EDAC subsystem - * and, if necessary, register interrupts. - */ - - if (edac_mc_add_mc(mci)) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to add instance!\n"); - status = -ENODEV; - goto fail; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - status = ppc4xx_edac_register_irq(op, mci); - - if (status) - goto fail1; - } - - ppc4xx_edac_instance++; - - return 0; - - fail1: - edac_mc_del_mc(mci->pdev); - - fail: - edac_mc_free(mci); - - done: - return status; -} - -/** - * ppc4xx_edac_remove - unbind driver from controller - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is to be unbound/removed - * from. - * - * This routine unbinds the EDAC memory controller instance associated - * with the specified ibm,sdram-4xx-ddr2 controller described by the - * OpenFirmware device tree node passed as a parameter. - * - * Unconditionally returns 0. - */ -static int -ppc4xx_edac_remove(struct platform_device *op) -{ - struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); - struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - - if (edac_op_state == EDAC_OPSTATE_INT) { - free_irq(pdata->irqs.sec, mci); - free_irq(pdata->irqs.ded, mci); - } - - dcr_unmap(pdata->dcr_host, SDRAM_DCR_RESOURCE_LEN); - - edac_mc_del_mc(mci->pdev); - edac_mc_free(mci); - - return 0; -} - -/** - * ppc4xx_edac_opstate_init - initialize EDAC reporting method - * - * This routine ensures that the EDAC memory controller reporting - * method is mapped to a sane value as the EDAC core defines the value - * to EDAC_OPSTATE_INVAL by default. We don't call the global - * opstate_init as that defaults to polling and we want interrupt as - * the default. - */ -static inline void __init -ppc4xx_edac_opstate_init(void) -{ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_INT: - break; - default: - edac_op_state = EDAC_OPSTATE_INT; - break; - } - - ppc4xx_edac_printk(KERN_INFO, "Reporting type: %s\n", - ((edac_op_state == EDAC_OPSTATE_POLL) ? - EDAC_OPSTATE_POLL_STR : - ((edac_op_state == EDAC_OPSTATE_INT) ? - EDAC_OPSTATE_INT_STR : - EDAC_OPSTATE_UNKNOWN_STR))); -} - -static struct platform_driver ppc4xx_edac_driver = { - .probe = ppc4xx_edac_probe, - .remove = ppc4xx_edac_remove, - .driver = { - .name = PPC4XX_EDAC_MODULE_NAME, - .of_match_table = ppc4xx_edac_match, - }, -}; - -/** - * ppc4xx_edac_init - driver/module insertion entry point - * - * This routine is the driver/module insertion entry point. It - * initializes the EDAC memory controller reporting state and - * registers the driver as an OpenFirmware device tree platform - * driver. - */ -static int __init -ppc4xx_edac_init(void) -{ - ppc4xx_edac_printk(KERN_INFO, PPC4XX_EDAC_MODULE_REVISION "\n"); - - ppc4xx_edac_opstate_init(); - - return platform_driver_register(&ppc4xx_edac_driver); -} - -/** - * ppc4xx_edac_exit - driver/module removal entry point - * - * This routine is the driver/module removal entry point. It - * unregisters the driver as an OpenFirmware device tree platform - * driver. - */ -static void __exit -ppc4xx_edac_exit(void) -{ - platform_driver_unregister(&ppc4xx_edac_driver); -} - -module_init(ppc4xx_edac_init); -module_exit(ppc4xx_edac_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Grant Erickson <gerickson@nuovations.com>"); -MODULE_DESCRIPTION("EDAC MC Driver for the PPC4xx IBM DDR2 Memory Controller"); -module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting State: " - "0=" EDAC_OPSTATE_POLL_STR ", 2=" EDAC_OPSTATE_INT_STR); diff --git a/drivers/edac/ppc4xx_edac.h b/drivers/edac/ppc4xx_edac.h deleted file mode 100644 index b38459aa58ee..000000000000 --- a/drivers/edac/ppc4xx_edac.h +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2008 Nuovation System Designs, LLC - * Grant Erickson <gerickson@nuovations.com> - * - * This file defines processor mnemonics for accessing and managing - * the IBM DDR1/DDR2 ECC controller found in the 405EX[r], 440SP, - * 440SPe, 460EX, 460GT and 460SX. - */ - -#ifndef __PPC4XX_EDAC_H -#define __PPC4XX_EDAC_H - -#include <linux/types.h> - -/* - * Macro for generating register field mnemonics - */ -#define PPC_REG_BITS 32 -#define PPC_REG_VAL(bit, val) ((val) << ((PPC_REG_BITS - 1) - (bit))) -#define PPC_REG_DECODE(bit, val) ((val) >> ((PPC_REG_BITS - 1) - (bit))) - -/* - * IBM 4xx DDR1/DDR2 SDRAM memory controller registers (at least those - * relevant to ECC) - */ -#define SDRAM_BESR 0x00 /* Error status (read/clear) */ -#define SDRAM_BESRT 0x01 /* Error statuss (test/set) */ -#define SDRAM_BEARL 0x02 /* Error address low */ -#define SDRAM_BEARH 0x03 /* Error address high */ -#define SDRAM_WMIRQ 0x06 /* Write master (read/clear) */ -#define SDRAM_WMIRQT 0x07 /* Write master (test/set) */ -#define SDRAM_MCOPT1 0x20 /* Controller options 1 */ -#define SDRAM_MBXCF_BASE 0x40 /* Bank n configuration base */ -#define SDRAM_MBXCF(n) (SDRAM_MBXCF_BASE + (4 * (n))) -#define SDRAM_MB0CF SDRAM_MBXCF(0) -#define SDRAM_MB1CF SDRAM_MBXCF(1) -#define SDRAM_MB2CF SDRAM_MBXCF(2) -#define SDRAM_MB3CF SDRAM_MBXCF(3) -#define SDRAM_ECCCR 0x98 /* ECC error status */ -#define SDRAM_ECCES SDRAM_ECCCR - -/* - * PLB Master IDs - */ -#define SDRAM_PLB_M0ID_FIRST 0 -#define SDRAM_PLB_M0ID_ICU SDRAM_PLB_M0ID_FIRST -#define SDRAM_PLB_M0ID_PCIE0 1 -#define SDRAM_PLB_M0ID_PCIE1 2 -#define SDRAM_PLB_M0ID_DMA 3 -#define SDRAM_PLB_M0ID_DCU 4 -#define SDRAM_PLB_M0ID_OPB 5 -#define SDRAM_PLB_M0ID_MAL 6 -#define SDRAM_PLB_M0ID_SEC 7 -#define SDRAM_PLB_M0ID_AHB 8 -#define SDRAM_PLB_M0ID_LAST SDRAM_PLB_M0ID_AHB -#define SDRAM_PLB_M0ID_COUNT (SDRAM_PLB_M0ID_LAST - \ - SDRAM_PLB_M0ID_FIRST + 1) - -/* - * Memory Controller Bus Error Status Register - */ -#define SDRAM_BESR_MASK PPC_REG_VAL(7, 0xFF) -#define SDRAM_BESR_M0ID_MASK PPC_REG_VAL(3, 0xF) -#define SDRAM_BESR_M0ID_DECODE(n) PPC_REG_DECODE(3, n) -#define SDRAM_BESR_M0ID_ICU PPC_REG_VAL(3, SDRAM_PLB_M0ID_ICU) -#define SDRAM_BESR_M0ID_PCIE0 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE0) -#define SDRAM_BESR_M0ID_PCIE1 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE1) -#define SDRAM_BESR_M0ID_DMA PPC_REG_VAL(3, SDRAM_PLB_M0ID_DMA) -#define SDRAM_BESR_M0ID_DCU PPC_REG_VAL(3, SDRAM_PLB_M0ID_DCU) -#define SDRAM_BESR_M0ID_OPB PPC_REG_VAL(3, SDRAM_PLB_M0ID_OPB) -#define SDRAM_BESR_M0ID_MAL PPC_REG_VAL(3, SDRAM_PLB_M0ID_MAL) -#define SDRAM_BESR_M0ID_SEC PPC_REG_VAL(3, SDRAM_PLB_M0ID_SEC) -#define SDRAM_BESR_M0ID_AHB PPC_REG_VAL(3, SDRAM_PLB_M0ID_AHB) -#define SDRAM_BESR_M0ET_MASK PPC_REG_VAL(6, 0x7) -#define SDRAM_BESR_M0ET_NONE PPC_REG_VAL(6, 0) -#define SDRAM_BESR_M0ET_ECC PPC_REG_VAL(6, 1) -#define SDRAM_BESR_M0RW_MASK PPC_REG_VAL(7, 1) -#define SDRAM_BESR_M0RW_WRITE PPC_REG_VAL(7, 0) -#define SDRAM_BESR_M0RW_READ PPC_REG_VAL(7, 1) - -/* - * Memory Controller PLB Write Master Interrupt Register - */ -#define SDRAM_WMIRQ_MASK PPC_REG_VAL(8, 0x1FF) -#define SDRAM_WMIRQ_ENCODE(id) PPC_REG_VAL((id % \ - SDRAM_PLB_M0ID_COUNT), 1) -#define SDRAM_WMIRQ_ICU PPC_REG_VAL(SDRAM_PLB_M0ID_ICU, 1) -#define SDRAM_WMIRQ_PCIE0 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE0, 1) -#define SDRAM_WMIRQ_PCIE1 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE1, 1) -#define SDRAM_WMIRQ_DMA PPC_REG_VAL(SDRAM_PLB_M0ID_DMA, 1) -#define SDRAM_WMIRQ_DCU PPC_REG_VAL(SDRAM_PLB_M0ID_DCU, 1) -#define SDRAM_WMIRQ_OPB PPC_REG_VAL(SDRAM_PLB_M0ID_OPB, 1) -#define SDRAM_WMIRQ_MAL PPC_REG_VAL(SDRAM_PLB_M0ID_MAL, 1) -#define SDRAM_WMIRQ_SEC PPC_REG_VAL(SDRAM_PLB_M0ID_SEC, 1) -#define SDRAM_WMIRQ_AHB PPC_REG_VAL(SDRAM_PLB_M0ID_AHB, 1) - -/* - * Memory Controller Options 1 Register - */ -#define SDRAM_MCOPT1_MCHK_MASK PPC_REG_VAL(3, 0x3) /* ECC mask */ -#define SDRAM_MCOPT1_MCHK_NON PPC_REG_VAL(3, 0x0) /* No ECC gen */ -#define SDRAM_MCOPT1_MCHK_GEN PPC_REG_VAL(3, 0x2) /* ECC gen */ -#define SDRAM_MCOPT1_MCHK_CHK PPC_REG_VAL(3, 0x1) /* ECC gen and chk */ -#define SDRAM_MCOPT1_MCHK_CHK_REP PPC_REG_VAL(3, 0x3) /* ECC gen/chk/rpt */ -#define SDRAM_MCOPT1_MCHK_DECODE(n) ((((u32)(n)) >> 28) & 0x3) -#define SDRAM_MCOPT1_RDEN_MASK PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM mask */ -#define SDRAM_MCOPT1_RDEN PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM enbl */ -#define SDRAM_MCOPT1_WDTH_MASK PPC_REG_VAL(7, 0x1) /* Width mask */ -#define SDRAM_MCOPT1_WDTH_32 PPC_REG_VAL(7, 0x0) /* 32 bits */ -#define SDRAM_MCOPT1_WDTH_16 PPC_REG_VAL(7, 0x1) /* 16 bits */ -#define SDRAM_MCOPT1_DDR_TYPE_MASK PPC_REG_VAL(11, 0x1) /* DDR type mask */ -#define SDRAM_MCOPT1_DDR1_TYPE PPC_REG_VAL(11, 0x0) /* DDR1 type */ -#define SDRAM_MCOPT1_DDR2_TYPE PPC_REG_VAL(11, 0x1) /* DDR2 type */ - -/* - * Memory Bank 0 - n Configuration Register - */ -#define SDRAM_MBCF_BA_MASK PPC_REG_VAL(12, 0x1FFF) -#define SDRAM_MBCF_SZ_MASK PPC_REG_VAL(19, 0xF) -#define SDRAM_MBCF_SZ_DECODE(mbxcf) PPC_REG_DECODE(19, mbxcf) -#define SDRAM_MBCF_SZ_4MB PPC_REG_VAL(19, 0x0) -#define SDRAM_MBCF_SZ_8MB PPC_REG_VAL(19, 0x1) -#define SDRAM_MBCF_SZ_16MB PPC_REG_VAL(19, 0x2) -#define SDRAM_MBCF_SZ_32MB PPC_REG_VAL(19, 0x3) -#define SDRAM_MBCF_SZ_64MB PPC_REG_VAL(19, 0x4) -#define SDRAM_MBCF_SZ_128MB PPC_REG_VAL(19, 0x5) -#define SDRAM_MBCF_SZ_256MB PPC_REG_VAL(19, 0x6) -#define SDRAM_MBCF_SZ_512MB PPC_REG_VAL(19, 0x7) -#define SDRAM_MBCF_SZ_1GB PPC_REG_VAL(19, 0x8) -#define SDRAM_MBCF_SZ_2GB PPC_REG_VAL(19, 0x9) -#define SDRAM_MBCF_SZ_4GB PPC_REG_VAL(19, 0xA) -#define SDRAM_MBCF_SZ_8GB PPC_REG_VAL(19, 0xB) -#define SDRAM_MBCF_AM_MASK PPC_REG_VAL(23, 0xF) -#define SDRAM_MBCF_AM_MODE0 PPC_REG_VAL(23, 0x0) -#define SDRAM_MBCF_AM_MODE1 PPC_REG_VAL(23, 0x1) -#define SDRAM_MBCF_AM_MODE2 PPC_REG_VAL(23, 0x2) -#define SDRAM_MBCF_AM_MODE3 PPC_REG_VAL(23, 0x3) -#define SDRAM_MBCF_AM_MODE4 PPC_REG_VAL(23, 0x4) -#define SDRAM_MBCF_AM_MODE5 PPC_REG_VAL(23, 0x5) -#define SDRAM_MBCF_AM_MODE6 PPC_REG_VAL(23, 0x6) -#define SDRAM_MBCF_AM_MODE7 PPC_REG_VAL(23, 0x7) -#define SDRAM_MBCF_AM_MODE8 PPC_REG_VAL(23, 0x8) -#define SDRAM_MBCF_AM_MODE9 PPC_REG_VAL(23, 0x9) -#define SDRAM_MBCF_BE_MASK PPC_REG_VAL(31, 0x1) -#define SDRAM_MBCF_BE_DISABLE PPC_REG_VAL(31, 0x0) -#define SDRAM_MBCF_BE_ENABLE PPC_REG_VAL(31, 0x1) - -/* - * ECC Error Status - */ -#define SDRAM_ECCES_MASK PPC_REG_VAL(21, 0x3FFFFF) -#define SDRAM_ECCES_BNCE_MASK PPC_REG_VAL(15, 0xFFFF) -#define SDRAM_ECCES_BNCE_ENCODE(lane) PPC_REG_VAL(((lane) & 0xF), 1) -#define SDRAM_ECCES_CKBER_MASK PPC_REG_VAL(17, 0x3) -#define SDRAM_ECCES_CKBER_NONE PPC_REG_VAL(17, 0) -#define SDRAM_ECCES_CKBER_16_ECC_0_3 PPC_REG_VAL(17, 2) -#define SDRAM_ECCES_CKBER_32_ECC_0_3 PPC_REG_VAL(17, 1) -#define SDRAM_ECCES_CKBER_32_ECC_4_8 PPC_REG_VAL(17, 2) -#define SDRAM_ECCES_CKBER_32_ECC_0_8 PPC_REG_VAL(17, 3) -#define SDRAM_ECCES_CE PPC_REG_VAL(18, 1) -#define SDRAM_ECCES_UE PPC_REG_VAL(19, 1) -#define SDRAM_ECCES_BKNER_MASK PPC_REG_VAL(21, 0x3) -#define SDRAM_ECCES_BK0ER PPC_REG_VAL(20, 1) -#define SDRAM_ECCES_BK1ER PPC_REG_VAL(21, 1) - -#endif /* __PPC4XX_EDAC_H */ diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index c45519f59dc1..f3da9385ca0d 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -21,30 +21,9 @@ #define TRP_SYN_REG_CNT 6 #define DRP_SYN_REG_CNT 8 -#define LLCC_COMMON_STATUS0 0x0003000c #define LLCC_LB_CNT_MASK GENMASK(31, 28) #define LLCC_LB_CNT_SHIFT 28 -/* Single & double bit syndrome register offsets */ -#define TRP_ECC_SB_ERR_SYN0 0x0002304c -#define TRP_ECC_DB_ERR_SYN0 0x00020370 -#define DRP_ECC_SB_ERR_SYN0 0x0004204c -#define DRP_ECC_DB_ERR_SYN0 0x00042070 - -/* Error register offsets */ -#define TRP_ECC_ERROR_STATUS1 0x00020348 -#define TRP_ECC_ERROR_STATUS0 0x00020344 -#define DRP_ECC_ERROR_STATUS1 0x00042048 -#define DRP_ECC_ERROR_STATUS0 0x00042044 - -/* TRP, DRP interrupt register offsets */ -#define DRP_INTERRUPT_STATUS 0x00041000 -#define TRP_INTERRUPT_0_STATUS 0x00020480 -#define DRP_INTERRUPT_CLEAR 0x00041008 -#define DRP_ECC_ERROR_CNTR_CLEAR 0x00040004 -#define TRP_INTERRUPT_0_CLEAR 0x00020484 -#define TRP_ECC_ERROR_CNTR_CLEAR 0x00020440 - /* Mask and shift macros */ #define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0) #define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16) @@ -60,15 +39,6 @@ #define DRP_TRP_INT_CLEAR GENMASK(1, 0) #define DRP_TRP_CNT_CLEAR GENMASK(1, 0) -/* Config registers offsets*/ -#define DRP_ECC_ERROR_CFG 0x00040000 - -/* Tag RAM, Data RAM interrupt register offsets */ -#define CMN_INTERRUPT_0_ENABLE 0x0003001c -#define CMN_INTERRUPT_2_ENABLE 0x0003003c -#define TRP_INTERRUPT_0_ENABLE 0x00020488 -#define DRP_INTERRUPT_ENABLE 0x0004100c - #define SB_ERROR_THRESHOLD 0x1 #define SB_ERROR_THRESHOLD_SHIFT 24 #define SB_DB_TRP_INTERRUPT_ENABLE 0x3 @@ -76,6 +46,8 @@ #define DRP0_INTERRUPT_ENABLE BIT(6) #define SB_DB_DRP_INTERRUPT_ENABLE 0x3 +#define ECC_POLL_MSEC 5000 + enum { LLCC_DRAM_CE = 0, LLCC_DRAM_UE, @@ -86,9 +58,6 @@ enum { static const struct llcc_edac_reg_data edac_reg_data[] = { [LLCC_DRAM_CE] = { .name = "DRAM Single-bit", - .synd_reg = DRP_ECC_SB_ERR_SYN0, - .count_status_reg = DRP_ECC_ERROR_STATUS1, - .ways_status_reg = DRP_ECC_ERROR_STATUS0, .reg_cnt = DRP_SYN_REG_CNT, .count_mask = ECC_SB_ERR_COUNT_MASK, .ways_mask = ECC_SB_ERR_WAYS_MASK, @@ -96,9 +65,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_DRAM_UE] = { .name = "DRAM Double-bit", - .synd_reg = DRP_ECC_DB_ERR_SYN0, - .count_status_reg = DRP_ECC_ERROR_STATUS1, - .ways_status_reg = DRP_ECC_ERROR_STATUS0, .reg_cnt = DRP_SYN_REG_CNT, .count_mask = ECC_DB_ERR_COUNT_MASK, .ways_mask = ECC_DB_ERR_WAYS_MASK, @@ -106,9 +72,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_TRAM_CE] = { .name = "TRAM Single-bit", - .synd_reg = TRP_ECC_SB_ERR_SYN0, - .count_status_reg = TRP_ECC_ERROR_STATUS1, - .ways_status_reg = TRP_ECC_ERROR_STATUS0, .reg_cnt = TRP_SYN_REG_CNT, .count_mask = ECC_SB_ERR_COUNT_MASK, .ways_mask = ECC_SB_ERR_WAYS_MASK, @@ -116,9 +79,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_TRAM_UE] = { .name = "TRAM Double-bit", - .synd_reg = TRP_ECC_DB_ERR_SYN0, - .count_status_reg = TRP_ECC_ERROR_STATUS1, - .ways_status_reg = TRP_ECC_ERROR_STATUS0, .reg_cnt = TRP_SYN_REG_CNT, .count_mask = ECC_DB_ERR_COUNT_MASK, .ways_mask = ECC_DB_ERR_WAYS_MASK, @@ -126,7 +86,7 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, }; -static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) +static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap) { u32 sb_err_threshold; int ret; @@ -135,31 +95,31 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) * Configure interrupt enable registers such that Tag, Data RAM related * interrupts are propagated to interrupt controller for servicing */ - ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, TRP0_INTERRUPT_ENABLE, TRP0_INTERRUPT_ENABLE); if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable, SB_DB_TRP_INTERRUPT_ENABLE, SB_DB_TRP_INTERRUPT_ENABLE); if (ret) return ret; sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT); - ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG, + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg, sb_err_threshold); if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, DRP0_INTERRUPT_ENABLE, DRP0_INTERRUPT_ENABLE); if (ret) return ret; - ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE, + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable, SB_DB_DRP_INTERRUPT_ENABLE); return ret; } @@ -168,29 +128,33 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) static int qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) { - int ret = 0; + int ret; switch (err_type) { case LLCC_DRAM_CE: case LLCC_DRAM_UE: - ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_interrupt_clear, DRP_TRP_INT_CLEAR); if (ret) return ret; - ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_ecc_error_cntr_clear, DRP_TRP_CNT_CLEAR); if (ret) return ret; break; case LLCC_TRAM_CE: case LLCC_TRAM_UE: - ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_interrupt_0_clear, DRP_TRP_INT_CLEAR); if (ret) return ret; - ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_ecc_error_cntr_clear, DRP_TRP_CNT_CLEAR); if (ret) return ret; @@ -203,17 +167,55 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) return ret; } +struct qcom_llcc_syn_regs { + u32 synd_reg; + u32 count_status_reg; + u32 ways_status_reg; +}; + +static void get_reg_offsets(struct llcc_drv_data *drv, int err_type, + struct qcom_llcc_syn_regs *syn_regs) +{ + const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset; + + switch (err_type) { + case LLCC_DRAM_CE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_DRAM_UE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_TRAM_CE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + case LLCC_TRAM_UE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + } +} + /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/ static int dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) { struct llcc_edac_reg_data reg_data = edac_reg_data[err_type]; + struct qcom_llcc_syn_regs regs = { }; int err_cnt, err_ways, ret, i; u32 synd_reg, synd_val; + get_reg_offsets(drv, err_type, ®s); + for (i = 0; i < reg_data.reg_cnt; i++) { - synd_reg = reg_data.synd_reg + (i * 4); - ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg, + synd_reg = regs.synd_reg + (i * 4); + ret = regmap_read(drv->regmaps[bank], synd_reg, &synd_val); if (ret) goto clear; @@ -222,8 +224,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) reg_data.name, i, synd_val); } - ret = regmap_read(drv->regmap, - drv->offsets[bank] + reg_data.count_status_reg, + ret = regmap_read(drv->regmaps[bank], regs.count_status_reg, &err_cnt); if (ret) goto clear; @@ -233,8 +234,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n", reg_data.name, err_cnt); - ret = regmap_read(drv->regmap, - drv->offsets[bank] + reg_data.ways_status_reg, + ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg, &err_ways); if (ret) goto clear; @@ -285,8 +285,7 @@ dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank) return ret; } -static irqreturn_t -llcc_ecc_irq_handler(int irq, void *edev_ctl) +static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl) { struct edac_device_ctl_info *edac_dev_ctl = edev_ctl; struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data; @@ -296,8 +295,7 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl) /* Iterate over the banks and look for Tag RAM or Data RAM errors */ for (i = 0; i < drv->num_banks; i++) { - ret = regmap_read(drv->regmap, - drv->offsets[i] + DRP_INTERRUPT_STATUS, + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status, &drp_error); if (!ret && (drp_error & SB_ECC_ERROR)) { @@ -312,8 +310,7 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl) if (!ret) irq_rc = IRQ_HANDLED; - ret = regmap_read(drv->regmap, - drv->offsets[i] + TRP_INTERRUPT_0_STATUS, + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status, &trp_error); if (!ret && (trp_error & SB_ECC_ERROR)) { @@ -332,6 +329,11 @@ llcc_ecc_irq_handler(int irq, void *edev_ctl) return irq_rc; } +static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl) +{ + llcc_ecc_irq_handler(0, edev_ctl); +} + static int qcom_llcc_edac_probe(struct platform_device *pdev) { struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data; @@ -340,14 +342,15 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) int ecc_irq; int rc; - rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap); - if (rc) - return rc; + if (!llcc_driv_data->ecc_irq_configured) { + rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); + if (rc) + return rc; + } /* Allocate edac control info */ edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank", llcc_driv_data->num_banks, 1, - NULL, 0, edac_device_alloc_index()); if (!edev_ctl) @@ -359,49 +362,56 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) edev_ctl->ctl_name = "llcc"; edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE; - rc = edac_device_add_device(edev_ctl); - if (rc) - goto out_mem; - - platform_set_drvdata(pdev, edev_ctl); - - /* Request for ecc irq */ + /* Check if LLCC driver has passed ECC IRQ */ ecc_irq = llcc_driv_data->ecc_irq; - if (ecc_irq < 0) { - rc = -ENODEV; - goto out_dev; - } - rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler, + if (ecc_irq > 0) { + /* Use interrupt mode if IRQ is available */ + rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler, IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl); - if (rc) - goto out_dev; + if (!rc) { + edac_op_state = EDAC_OPSTATE_INT; + goto irq_done; + } + } - return rc; + /* Fall back to polling mode otherwise */ + edev_ctl->poll_msec = ECC_POLL_MSEC; + edev_ctl->edac_check = llcc_ecc_check; + edac_op_state = EDAC_OPSTATE_POLL; -out_dev: - edac_device_del_device(edev_ctl->dev); -out_mem: - edac_device_free_ctl_info(edev_ctl); +irq_done: + rc = edac_device_add_device(edev_ctl); + if (rc) { + edac_device_free_ctl_info(edev_ctl); + return rc; + } + + platform_set_drvdata(pdev, edev_ctl); return rc; } -static int qcom_llcc_edac_remove(struct platform_device *pdev) +static void qcom_llcc_edac_remove(struct platform_device *pdev) { struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev); edac_device_del_device(edev_ctl->dev); edac_device_free_ctl_info(edev_ctl); - - return 0; } +static const struct platform_device_id qcom_llcc_edac_id_table[] = { + { .name = "qcom_llcc_edac" }, + {} +}; +MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table); + static struct platform_driver qcom_llcc_edac_driver = { .probe = qcom_llcc_edac_probe, .remove = qcom_llcc_edac_remove, .driver = { .name = "qcom_llcc_edac", }, + .id_table = qcom_llcc_edac_id_table, }; module_platform_driver(qcom_llcc_edac_driver); diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index d0aef83dca2a..61e979d5437a 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -415,8 +415,7 @@ module_init(r82600_init); module_exit(r82600_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. " - "on behalf of EADS Astrium"); +MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. on behalf of EADS Astrium"); MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 0c779a0326b6..d5f12219598a 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -29,6 +29,8 @@ /* Static vars */ static LIST_HEAD(sbridge_edac_list); +static char sb_msg[256]; +static char sb_msg_full[512]; /* * Alter this version for the module when modifications are made @@ -109,8 +111,8 @@ static const u32 knl_interleave_list[] = { 0x104, 0x10c, 0x114, 0x11c, /* 20-23 */ }; #define MAX_INTERLEAVE \ - (max_t(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ - max_t(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ + (MAX_T(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ + MAX_T(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ ARRAY_SIZE(knl_interleave_list)))) struct interleave_pkg { @@ -439,7 +441,7 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = { static const struct pci_id_table pci_dev_descr_sbridge_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE), - {0,} /* 0 terminated list. */ + { NULL, } }; /* This changes depending if 1HA or 2HA: @@ -505,7 +507,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { static const struct pci_id_table pci_dev_descr_ibridge_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE), - {0,} /* 0 terminated list. */ + { NULL, } }; /* Haswell support */ @@ -576,7 +578,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = { static const struct pci_id_table pci_dev_descr_haswell_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL), - {0,} /* 0 terminated list. */ + { NULL, } }; /* Knight's Landing Support */ @@ -620,7 +622,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = { static const struct pci_id_table pci_dev_descr_knl_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING), - {0,} + { NULL, } }; /* @@ -686,7 +688,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = { static const struct pci_id_table pci_dev_descr_broadwell_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL), - {0,} /* 0 terminated list. */ + { NULL, } }; @@ -3079,7 +3081,6 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *optype, msg[256], msg_full[512]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -3095,10 +3096,10 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * aligned address reported by patrol scrubber. */ u32 lsb = GET_BITFIELD(m->misc, 0, 5); + char *optype, *area_type = "DRAM"; long channel_mask, first_channel; u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = "DRAM"; if (pvt->info.type != SANDY_BRIDGE) recoverable = true; @@ -3168,32 +3169,32 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, channel = knl_channel_remap(m->bank == 16, channel); channel_mask = 1 << channel; - snprintf(msg, sizeof(msg), - "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", - overflow ? " OVERFLOW" : "", - (uncorrected_error && recoverable) - ? " recoverable" : " ", - mscod, errcode, channel, A + channel); + snprintf(sb_msg, sizeof(sb_msg), + "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) + ? " recoverable" : " ", + mscod, errcode, channel, A + channel); edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, 0, -1, - optype, msg); + optype, sb_msg); } return; } else if (lsb < 12) { rc = get_memory_error_data(mci, m->addr, &socket, &ha, &channel_mask, &rank, - &area_type, msg); + &area_type, sb_msg); } else { rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, - &channel_mask, msg); + &channel_mask, sb_msg); } if (rc < 0) goto err_parsing; new_mci = get_mci_for_node_id(socket, ha); if (!new_mci) { - strcpy(msg, "Error: socket got corrupted!"); + strscpy(sb_msg, "Error: socket got corrupted!"); goto err_parsing; } mci = new_mci; @@ -3218,7 +3219,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, */ if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) channel = first_channel; - snprintf(msg_full, sizeof(msg_full), + snprintf(sb_msg_full, sizeof(sb_msg_full), "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", @@ -3226,9 +3227,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, mscod, errcode, socket, ha, channel_mask, - rank, msg); + rank, sb_msg); - edac_dbg(0, "%s\n", msg_full); + edac_dbg(0, "%s\n", sb_msg_full); /* FIXME: need support for channel mask */ @@ -3239,12 +3240,12 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, dimm, -1, - optype, msg_full); + optype, sb_msg_full); return; err_parsing: edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, - msg, ""); + sb_msg, ""); } @@ -3546,13 +3547,13 @@ fail0: } static const struct x86_cpu_id sbridge_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &pci_dev_descr_sbridge_table), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &pci_dev_descr_ibridge_table), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &pci_dev_descr_haswell_table), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &pci_dev_descr_broadwell_table), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &pci_dev_descr_broadwell_table), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &pci_dev_descr_knl_table), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &pci_dev_descr_knl_table), + X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &pci_dev_descr_sbridge_table), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &pci_dev_descr_ibridge_table), + X86_MATCH_VFM(INTEL_HASWELL_X, &pci_dev_descr_haswell_table), + X86_MATCH_VFM(INTEL_BROADWELL_X, &pci_dev_descr_broadwell_table), + X86_MATCH_VFM(INTEL_BROADWELL_D, &pci_dev_descr_broadwell_table), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &pci_dev_descr_knl_table), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c new file mode 100644 index 000000000000..f9d02af2fc3a --- /dev/null +++ b/drivers/edac/scrub.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC scrub driver controls the memory scrubbers in the + * system. The common sysfs scrub interface abstracts the control of + * various arbitrary scrubbing functionalities into a unified set of + * functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +enum edac_scrub_attributes { + SCRUB_ADDRESS, + SCRUB_SIZE, + SCRUB_ENABLE_BACKGROUND, + SCRUB_MIN_CYCLE_DURATION, + SCRUB_MAX_CYCLE_DURATION, + SCRUB_CUR_CYCLE_DURATION, + SCRUB_MAX_ATTRS +}; + +struct edac_scrub_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_scrub_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS]; + struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_SCRUB_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr) + +#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n") +EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n") + +#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul) +EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul) + +static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance; + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; + + switch (attr_id) { + case SCRUB_ADDRESS: + if (ops->read_addr) { + if (ops->write_addr) + return a->mode; + else + return 0444; + } + break; + case SCRUB_SIZE: + if (ops->read_size) { + if (ops->write_size) + return a->mode; + else + return 0444; + } + break; + case SCRUB_ENABLE_BACKGROUND: + if (ops->get_enabled_bg) { + if (ops->set_enabled_bg) + return a->mode; + else + return 0444; + } + break; + case SCRUB_MIN_CYCLE_DURATION: + if (ops->get_min_cycle) + return a->mode; + break; + case SCRUB_MAX_CYCLE_DURATION: + if (ops->get_max_cycle) + return a->mode; + break; + case SCRUB_CUR_CYCLE_DURATION: + if (ops->get_cycle_duration) { + if (ops->set_cycle_duration) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_SCRUB_ATTR_RO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_WO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_RW(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int scrub_create_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + struct edac_scrub_context *scrub_ctx; + struct attribute_group *group; + int i; + struct edac_scrub_dev_attr dev_attr[] = { + [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance), + [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance), + [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance), + [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance), + [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance), + [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance) + }; + + scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL); + if (!scrub_ctx) + return -ENOMEM; + + group = &scrub_ctx->group; + for (i = 0; i < SCRUB_MAX_ATTRS; i++) { + memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr); + scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; + } + sprintf(scrub_ctx->name, "%s%d", "scrub", instance); + group->name = scrub_ctx->name; + group->attrs = scrub_ctx->scrub_attrs; + group->is_visible = scrub_attr_visible; + + attr_groups[0] = group; + + return 0; +} + +/** + * edac_scrub_get_desc - get EDAC scrub descriptors + * @scrub_dev: client device, with scrub support + * @attr_groups: pointer to attribute group container + * @instance: device's scrub instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!scrub_dev || !attr_groups) + return -EINVAL; + + return scrub_create_desc(scrub_dev, attr_groups, instance); +} diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c index b844e2626fd5..a2b193dc6604 100644 --- a/drivers/edac/sifive_edac.c +++ b/drivers/edac/sifive_edac.c @@ -52,8 +52,7 @@ static int ecc_register(struct platform_device *pdev) platform_set_drvdata(pdev, p); p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc", - 1, 1, NULL, 0, - edac_device_alloc_index()); + 1, 1, edac_device_alloc_index()); if (!p->dci) return -ENOMEM; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 9397abb42c49..aa6593ccda2d 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -33,6 +33,15 @@ static unsigned int nvdimm_count; #define MASK26 0x3FFFFFF /* Mask for 2^26 */ #define MASK29 0x1FFFFFFF /* Mask for 2^29 */ +static struct res_config skx_cfg = { + .type = SKX, + .decs_did = 0x2016, + .busno_cfg_offset = 0xcc, + .ddr_imc_num = 2, + .ddr_chan_num = 3, + .ddr_dimm_num = 2, +}; + static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx) { struct skx_dev *d; @@ -52,7 +61,7 @@ enum munittype { struct munit { u16 did; - u16 devfn[SKX_NUM_IMC]; + u16 devfn[2]; u8 busidx; u8 per_socket; enum munittype mtype; @@ -89,11 +98,11 @@ static int get_all_munits(const struct munit *m) if (!pdev) break; ndev++; - if (m->per_socket == SKX_NUM_IMC) { - for (i = 0; i < SKX_NUM_IMC; i++) + if (m->per_socket == skx_cfg.ddr_imc_num) { + for (i = 0; i < skx_cfg.ddr_imc_num; i++) if (m->devfn[i] == pdev->devfn) break; - if (i == SKX_NUM_IMC) + if (i == skx_cfg.ddr_imc_num) goto fail; } d = get_skx_dev(pdev->bus, m->busidx); @@ -157,14 +166,8 @@ fail: return -ENODEV; } -static struct res_config skx_cfg = { - .type = SKX, - .decs_did = 0x2016, - .busno_cfg_offset = 0xcc, -}; - static const struct x86_cpu_id skx_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x0, 0xf), &skx_cfg), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg), { } }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); @@ -186,11 +189,11 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) /* Only the mcmtr on the first channel is effective */ pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr); - for (i = 0; i < SKX_NUM_CHANNELS; i++) { + for (i = 0; i < cfg->ddr_chan_num; i++) { ndimms = 0; pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg); - for (j = 0; j < SKX_NUM_DIMMS; j++) { + for (j = 0; j < cfg->ddr_dimm_num; j++) { dimm = edac_get_dimm(mci, i, j, 0); pci_read_config_dword(imc->chan[i].cdev, 0x80 + 4 * j, &mtr); @@ -510,7 +513,7 @@ rir_found: } static u8 skx_close_row[] = { - 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33, 34 }; static u8 skx_close_column[] = { @@ -518,7 +521,7 @@ static u8 skx_close_column[] = { }; static u8 skx_open_row[] = { - 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34 }; static u8 skx_open_column[] = { @@ -587,54 +590,6 @@ static struct notifier_block skx_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/skx_test/addr. - */ -static struct dentry *skx_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_skx_debug(void) -{ - skx_test = edac_debugfs_create_dir("skx_test"); - if (!skx_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, skx_test, - NULL, &fops_u64_wo)) { - debugfs_remove(skx_test); - skx_test = NULL; - } -} - -static void teardown_skx_debug(void) -{ - debugfs_remove_recursive(skx_test); -} -#else -static inline void setup_skx_debug(void) {} -static inline void teardown_skx_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - /* * skx_init: * make sure we are running on the correct cpu model @@ -648,7 +603,7 @@ static int __init skx_init(void) const struct munit *m; const char *owner; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; - u8 mc = 0, src_id, node_id; + u8 mc = 0, src_id; struct skx_dev *d; edac_dbg(2, "\n"); @@ -668,6 +623,7 @@ static int __init skx_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm); if (rc) @@ -698,16 +654,16 @@ static int __init skx_init(void) rc = skx_get_src_id(d, 0xf0, &src_id); if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); - for (i = 0; i < SKX_NUM_IMC; i++) { + + edac_dbg(2, "src_id = %d\n", src_id); + for (i = 0; i < cfg->ddr_imc_num; i++) { d->imc[i].mc = mc++; d->imc[i].lmc = i; d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; - rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, + d->imc[i].num_channels = cfg->ddr_chan_num; + d->imc[i].num_dimms = cfg->ddr_dimm_num; + rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev, + pci_name(d->imc[i].chan[0].cdev), "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); if (rc < 0) @@ -728,7 +684,7 @@ static int __init skx_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - setup_skx_debug(); + skx_setup_debug("skx_test"); mce_register_decode_chain(&skx_mce_dec); @@ -742,7 +698,7 @@ static void __exit skx_exit(void) { edac_dbg(2, "\n"); mce_unregister_decode_chain(&skx_mce_dec); - teardown_skx_debug(); + skx_teardown_debug(); if (nvdimm_count) skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f0f8e98f6efb..3276afe43922 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -14,11 +14,14 @@ * Copyright (c) 2018, Intel Corporation. */ +#include <linux/topology.h> #include <linux/acpi.h> #include <linux/dmi.h> #include <linux/adxl.h> +#include <linux/overflow.h> #include <acpi/nfit.h> #include <asm/mce.h> +#include <asm/uv/uv.h> #include "edac_module.h" #include "skx_common.h" @@ -47,8 +50,9 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; -int __init skx_adxl_get(void) +int skx_adxl_get(void) { const char * const *names; int i, j; @@ -110,17 +114,56 @@ err: return -ENODEV; } +EXPORT_SYMBOL_GPL(skx_adxl_get); -void __exit skx_adxl_put(void) +void skx_adxl_put(void) { + adxl_component_count = 0; kfree(adxl_values); kfree(adxl_msg); } +EXPORT_SYMBOL_GPL(skx_adxl_put); -static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) +void skx_init_mc_mapping(struct skx_dev *d) { + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (int i = 0; i < d->num_imc; i++) + d->imc[i].mc_mapping = i; +} +EXPORT_SYMBOL_GPL(skx_init_mc_mapping); + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->imc[lmc].mc_mapping = pmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + for (int lmc = 0; lmc < d->num_imc; lmc++) { + if (d->imc[lmc].mc_mapping == pmc) { + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + return lmc; + } + } + + return -1; +} + +static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) +{ + int i, lmc, len = 0; struct skx_dev *d; - int i, len = 0; if (res->addr >= skx_tohm || (res->addr >= skx_tolm && res->addr < BIT_ULL(32))) { @@ -133,8 +176,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me return false; } + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - if (error_in_1st_level_mem) { + if (err_src == ERR_SRC_2LM_NM) { res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? @@ -150,7 +209,7 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me res->cs = (int)adxl_values[component_indices[INDEX_CS]]; } - if (res->imc > NUM_IMC - 1 || res->imc < 0) { + if (res->imc < 0) { skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); return false; } @@ -168,6 +227,14 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me return false; } + lmc = skx_get_mc_mapping(d, res->imc); + if (lmc < 0) { + skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc); + return false; + } + + res->imc = lmc; + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -187,38 +254,66 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) { skx_mem_cfg_2lm = mem_cfg_2lm; } +EXPORT_SYMBOL_GPL(skx_set_mem_cfg); + +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { driver_decode = decode; skx_show_retry_rd_err_log = show_retry_log; } +EXPORT_SYMBOL_GPL(skx_set_decode); -int skx_get_src_id(struct skx_dev *d, int off, u8 *id) +static int skx_get_pkg_id(struct skx_dev *d, u8 *id) { - u32 reg; + int node; + int cpu; - if (pci_read_config_dword(d->util_all, off, ®)) { - skx_printk(KERN_ERR, "Failed to read src id\n"); - return -ENODEV; + node = pcibus_to_node(d->util_all->bus); + if (numa_valid_node(node)) { + for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + *id = topology_physical_package_id(cpu); + return 0; + } + } } - *id = GET_BITFIELD(reg, 12, 14); - return 0; + skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); + return -ENODEV; } -int skx_get_node_id(struct skx_dev *d, u8 *id) +int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { u32 reg; - if (pci_read_config_dword(d->util_all, 0xf4, ®)) { - skx_printk(KERN_ERR, "Failed to read node id\n"); + /* + * The 3-bit source IDs in PCI configuration space registers are limited + * to 8 unique IDs, and each ID is local to a UPI/QPI domain. + * + * Source IDs cannot be used to map devices to sockets on UV systems + * because they can exceed 8 sockets and have multiple UPI/QPI domains + * with identical, repeating source IDs. + */ + if (is_uv_system()) + return skx_get_pkg_id(d, id); + + if (pci_read_config_dword(d->util_all, off, ®)) { + skx_printk(KERN_ERR, "Failed to read src id\n"); return -ENODEV; } - *id = GET_BITFIELD(reg, 0, 2); + *id = GET_BITFIELD(reg, 12, 14); return 0; } +EXPORT_SYMBOL_GPL(skx_get_src_id); static int get_width(u32 mtr) { @@ -240,10 +335,10 @@ static int get_width(u32 mtr) */ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) { + int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; struct pci_dev *pdev, *prev; struct skx_dev *d; u32 reg; - int ndev = 0; prev = NULL; for (;;) { @@ -251,7 +346,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) if (!pdev) break; ndev++; - d = kzalloc(sizeof(*d), GFP_KERNEL); + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); if (!d) { pci_dev_put(pdev); return -ENOMEM; @@ -274,16 +369,27 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->seg = GET_BITFIELD(reg, 16, 23); } - edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n", - d->bus[0], d->bus[1], d->bus[2], d->bus[3]); + d->num_imc = imc_num; + + edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n", + d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) *list = &dev_edac_list; return ndev; } +EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); + +struct list_head *skx_get_edac_list(void) +{ + return &dev_edac_list; +} +EXPORT_SYMBOL_GPL(skx_get_edac_list); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) { @@ -323,6 +429,14 @@ fail: pci_dev_put(pdev); return -ENODEV; } +EXPORT_SYMBOL_GPL(skx_get_hi_lo); + +void skx_set_hi_lo(u64 tolm, u64 tohm) +{ + skx_tolm = tolm; + skx_tohm = tohm; +} +EXPORT_SYMBOL_GPL(skx_set_hi_lo); static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, int maxval, const char *name) @@ -337,7 +451,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, } #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") -#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows") +#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, @@ -355,7 +469,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, if (imc->hbm_mc) { banks = 32; mtype = MEM_HBM2; - } else if (cfg->support_ddr5 && (amap & 0x8)) { + } else if (cfg->support_ddr5) { banks = 32; mtype = MEM_DDR5; } else { @@ -394,6 +508,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, return 1; } +EXPORT_SYMBOL_GPL(skx_get_dimm_info); int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str) @@ -442,10 +557,11 @@ unknown_size: return (size == 0 || size == ~0ull) ? 0 : 1; } +EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, - const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config, +int skx_register_mci(struct skx_imc *imc, struct device *dev, + const char *dev_name, const char *ctl_name, + const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg) { struct mem_ctl_info *mci; @@ -455,10 +571,10 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, /* Allocate a new MC control structure */ layers[0].type = EDAC_MC_LAYER_CHANNEL; - layers[0].size = NUM_CHANNELS; + layers[0].size = imc->num_channels; layers[0].is_virt_csrow = false; layers[1].type = EDAC_MC_LAYER_SLOT; - layers[1].size = NUM_DIMMS; + layers[1].size = imc->num_dimms; layers[1].is_virt_csrow = true; mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, sizeof(struct skx_pvt)); @@ -474,7 +590,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, pvt->imc = imc; mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, - imc->node_id, imc->lmc); + imc->src_id, imc->lmc); if (!mci->ctl_name) { rc = -ENOMEM; goto fail0; @@ -486,7 +602,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = mod_str; - mci->dev_name = pci_name(pdev); + mci->dev_name = dev_name; mci->ctl_page_to_phys = NULL; rc = get_dimm_config(mci, cfg); @@ -494,7 +610,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, goto fail; /* Record ptr to the generic device */ - mci->pdev = &pdev->dev; + mci->pdev = dev; /* Add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { @@ -512,6 +628,7 @@ fail0: imc->mci = NULL; return rc; } +EXPORT_SYMBOL_GPL(skx_register_mci); static void skx_unregister_mci(struct skx_imc *imc) { @@ -560,51 +677,35 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, tp_event = HW_EVENT_ERR_CORRECTED; } - /* - * According to Intel Architecture spec vol 3B, - * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" - * memory errors should fit one of these masks: - * 000f 0000 1mmm cccc (binary) - * 000f 0010 1mmm cccc (binary) [RAM used as cache] - * where: - * f = Correction Report Filtering Bit. If 1, subsequent errors - * won't be shown - * mmm = error type - * cccc = channel - * If the mask doesn't match, report an error to the parsing logic - */ - if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - scrub_err = true; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + scrub_err = true; + break; + default: + optype = "reserved"; + break; } + if (res->decoded_by_adxl) { - len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { - len = snprintf(skx_msg, MSG_SIZE, + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", @@ -625,25 +726,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } -static bool skx_error_in_1st_level_mem(const struct mce *m) +static enum error_source skx_error_source(const struct mce *m) { - u32 errcode; + u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if (!skx_mem_cfg_2lm) - return false; + if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) + return ERR_SRC_NOT_MEMORY; - errcode = GET_BITFIELD(m->status, 0, 15); + if (!skx_mem_cfg_2lm) + return ERR_SRC_1LM; - if ((errcode & 0xef80) != 0x280) - return false; + if (errcode == MCACOD_EXT_MEM_ERR) + return ERR_SRC_2LM_NM; - return true; + return ERR_SRC_2LM_FM; } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; + enum error_source err_src; struct decoded_addr res; struct mem_ctl_info *mci; char *type; @@ -651,18 +754,24 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; - /* ignore unless this is memory related with an address */ - if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + err_src = skx_error_source(mce); + + /* Ignore unless this is memory related with an address */ + if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); res.mce = mce; - res.addr = mce->addr; + res.addr = mce->addr & MCI_ADDR_PHYSADDR; + if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) { + pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank); + return NOTIFY_DONE; + } /* Try driver decoder first */ if (!(driver_decode && driver_decode(&res))) { /* Then try firmware decoder (ACPI DSM methods) */ - if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) + if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) return NOTIFY_DONE; } @@ -694,6 +803,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, mce->kflags |= MCE_HANDLED_EDAC; return NOTIFY_DONE; } +EXPORT_SYMBOL_GPL(skx_mce_check_error); void skx_remove(void) { @@ -704,7 +814,7 @@ void skx_remove(void) list_for_each_entry_safe(d, tmp, &dev_edac_list, list) { list_del(&d->list); - for (i = 0; i < NUM_IMC; i++) { + for (i = 0; i < d->num_imc; i++) { if (d->imc[i].mci) skx_unregister_mci(&d->imc[i]); @@ -714,7 +824,10 @@ void skx_remove(void) if (d->imc[i].mbase) iounmap(d->imc[i].mbase); - for (j = 0; j < NUM_CHANNELS; j++) { + if (d->imc[i].dev) + put_device(d->imc[i].dev); + + for (j = 0; j < d->imc[i].num_channels; j++) { if (d->imc[i].chan[j].cdev) pci_dev_put(d->imc[i].chan[j].cdev); } @@ -731,3 +844,55 @@ void skx_remove(void) kfree(d); } } +EXPORT_SYMBOL_GPL(skx_remove); + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +void skx_setup_debug(const char *name) +{ + skx_test = edac_debugfs_create_dir(name); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} +EXPORT_SYMBOL_GPL(skx_setup_debug); + +void skx_teardown_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +EXPORT_SYMBOL_GPL(skx_teardown_debug); +#endif /*CONFIG_EDAC_DEBUG*/ + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel server processors"); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 0cbadd3d2cd3..f88038e5b18c 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -29,24 +29,18 @@ #define GET_BITFIELD(v, lo, hi) \ (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) -#define SKX_NUM_IMC 2 /* Memory controllers per socket */ #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ -#define I10NM_NUM_DDR_IMC 4 #define I10NM_NUM_DDR_CHANNELS 2 #define I10NM_NUM_DDR_DIMMS 2 -#define I10NM_NUM_HBM_IMC 16 #define I10NM_NUM_HBM_CHANNELS 2 #define I10NM_NUM_HBM_DIMMS 1 -#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) @@ -57,36 +51,127 @@ #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ /* + * According to Intel Architecture spec vol 3B, + * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" + * memory errors should fit one of these masks: + * 000f 0000 1mmm cccc (binary) + * 000f 0010 1mmm cccc (binary) [RAM used as cache] + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + */ +#define MCACOD_MEM_ERR_MASK 0xef80 +/* + * Errors from either the memory of the 1-level memory system or the + * 2nd level memory (the slow "far" memory) of the 2-level memory system. + */ +#define MCACOD_MEM_CTL_ERR 0x80 +/* + * Errors from the 1st level memory (the fast "near" memory as cache) + * of the 2-level memory system. + */ +#define MCACOD_EXT_MEM_ERR 0x280 + +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 4 +/* Max RRL registers per set. */ +#define NUM_RRL_REG 6 +/* Max correctable error count registers. */ +#define NUM_CECNT_REG 8 + +/* Modes of RRL register set. */ +enum rrl_mode { + /* Last read error from patrol scrub. */ + LRE_SCRUB, + /* Last read error from demand. */ + LRE_DEMAND, + /* First read error from patrol scrub. */ + FRE_SCRUB, + /* First read error from demand. */ + FRE_DEMAND, +}; + +/* RRL registers per {,sub-,pseudo-}channel. */ +struct reg_rrl { + /* RRL register parts. */ + int set_num, reg_num; + enum rrl_mode modes[NUM_RRL_SET]; + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; + /* RRL register widths in byte per set. */ + u8 widths[NUM_RRL_REG]; + /* RRL control bits of the first register per set. */ + u32 v_mask; + u32 uc_mask; + u32 over_mask; + u32 en_patspr_mask; + u32 noover_mask; + u32 en_mask; + + /* CORRERRCNT register parts. */ + int cecnt_num; + u32 cecnt_offsets[NUM_CECNT_REG]; + u8 cecnt_widths[NUM_CECNT_REG]; +}; + +/* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two * memory controllers on the die. */ struct skx_dev { - struct list_head list; + /* {skx,i10nm}_edac */ u8 bus[4]; int seg; struct pci_dev *sad_all; struct pci_dev *util_all; - struct pci_dev *uracu; /* for i10nm CPU */ - struct pci_dev *pcu_cr3; /* for HBM memory detection */ + struct pci_dev *uracu; + struct pci_dev *pcu_cr3; u32 mcroute; + + /* imh_edac */ + /* System-view MMIO base physical addresses. */ + u64 mmio_base_h_north; + u64 mmio_base_h_south; + int pkg; + + int num_imc; + struct list_head list; struct skx_imc { + /* i10nm_edac */ + struct pci_dev *mdev; + + /* imh_edac */ + struct device *dev; + struct mem_ctl_info *mci; - struct pci_dev *mdev; /* for i10nm CPU */ - void __iomem *mbase; /* for i10nm CPU */ - int chan_mmio_sz; /* for i10nm CPU */ + void __iomem *mbase; + int chan_mmio_sz; int num_channels; /* channels per memory controller */ int num_dimms; /* dimms per channel */ bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ - u8 src_id, node_id; + u8 src_id; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping; struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; - u32 retry_rd_err_log_s; - u32 retry_rd_err_log_d; - u32 retry_rd_err_log_d2; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -95,7 +180,7 @@ struct skx_dev { u8 colbits; } dimms[NUM_DIMMS]; } chan[NUM_CHANNELS]; - } imc[NUM_IMC]; + } imc[]; }; struct skx_pvt { @@ -105,7 +190,9 @@ struct skx_pvt { enum type { SKX, I10NM, - SPR + SPR, + GNR, + DMR, }; enum { @@ -122,6 +209,13 @@ enum { INDEX_MAX }; +enum error_source { + ERR_SRC_1LM, + ERR_SRC_2LM_NM, + ERR_SRC_2LM_FM, + ERR_SRC_NOT_MEMORY, +}; + #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) @@ -149,28 +243,82 @@ struct decoded_addr { bool decoded_by_adxl; }; +struct pci_bdf { + u32 bus : 8; + u32 dev : 5; + u32 fun : 3; +}; + struct res_config { enum type type; - /* Configuration agent device ID */ - unsigned int decs_did; - /* Default bus number configuration register offset */ - int busno_cfg_offset; + /* DDR memory controllers per socket */ + int ddr_imc_num; + /* DDR channels per DDR memory controller */ + int ddr_chan_num; + /* DDR DIMMs per DDR memory channel */ + int ddr_dimm_num; /* Per DDR channel memory-mapped I/O size */ int ddr_chan_mmio_sz; + /* HBM memory controllers per socket */ + int hbm_imc_num; + /* HBM channels per HBM memory controller */ + int hbm_chan_num; + /* HBM DIMMs per HBM memory channel */ + int hbm_dimm_num; /* Per HBM channel memory-mapped I/O size */ int hbm_chan_mmio_sz; bool support_ddr5; - /* SAD device number and function number */ - unsigned int sad_all_devfn; - int sad_all_offset; - /* Offsets of retry_rd_err_log registers */ - u32 *offsets_scrub; - u32 *offsets_scrub_hbm0; - u32 *offsets_scrub_hbm1; - u32 *offsets_demand; - u32 *offsets_demand2; - u32 *offsets_demand_hbm0; - u32 *offsets_demand_hbm1; + /* RRL register sets per DDR channel */ + struct reg_rrl *reg_rrl_ddr; + /* RRL register sets per HBM channel */ + struct reg_rrl *reg_rrl_hbm[2]; + union { + /* {skx,i10nm}_edac */ + struct { + /* Configuration agent device ID */ + unsigned int decs_did; + /* Default bus number configuration register offset */ + int busno_cfg_offset; + struct pci_bdf sad_all_bdf; + struct pci_bdf pcu_cr3_bdf; + struct pci_bdf util_all_bdf; + struct pci_bdf uracu_bdf; + struct pci_bdf ddr_mdev_bdf; + struct pci_bdf hbm_mdev_bdf; + int sad_all_offset; + }; + /* imh_edac */ + struct { + /* MMIO base physical address in local package view */ + u64 mmio_base_l_north; + u64 mmio_base_l_south; + u64 ddr_imc_base; + u64 ddr_reg_mcmtr_offset; + u8 ddr_reg_mcmtr_width; + u64 ddr_reg_dimmmtr_offset; + u8 ddr_reg_dimmmtr_width; + u64 ubox_base; + u32 ubox_size; + u32 ubox_reg_mmio_base_offset; + u8 ubox_reg_mmio_base_width; + u32 ubox_reg_socket_id_offset; + u8 ubox_reg_socket_id_width; + u64 pcu_base; + u32 pcu_size; + u32 pcu_reg_capid3_offset; + u8 pcu_reg_capid3_width; + u64 sca_base; + u32 sca_size; + u32 sca_reg_tolm_offset; + u8 sca_reg_tolm_width; + u32 sca_reg_tohm_offset; + u8 sca_reg_tohm_width; + u64 ha_base; + u32 ha_size; + u32 ha_reg_mode_offset; + u8 ha_reg_mode_width; + }; + }; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, @@ -178,17 +326,22 @@ typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, typedef bool (*skx_decode_f)(struct decoded_addr *res); typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); -int __init skx_adxl_get(void); -void __exit skx_adxl_put(void); +int skx_adxl_get(void); +void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); +void skx_init_mc_mapping(struct skx_dev *d); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); -int skx_get_node_id(struct skx_dev *d, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); +struct list_head *skx_get_edac_list(void); + int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); +void skx_set_hi_lo(u64 tolm, u64 tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, @@ -197,7 +350,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, +int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, const char *ctl_name, const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg); @@ -207,4 +360,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void skx_remove(void); +#ifdef CONFIG_EDAC_DEBUG +void skx_setup_debug(const char *name); +void skx_teardown_debug(void); +#else +static inline void skx_setup_debug(const char *name) {} +static inline void skx_teardown_debug(void) {} +#endif + #endif /* _SKX_COMM_EDAC_H */ diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index f7d37c282819..51143b3257de 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -9,9 +9,10 @@ #include <linux/edac.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/sizes.h> #include <linux/interrupt.h> #include <linux/of.h> -#include <linux/of_device.h> #include "edac_module.h" @@ -300,6 +301,7 @@ struct synps_ecc_status { /** * struct synps_edac_priv - DDR memory controller private instance data. * @baseaddr: Base address of the DDR controller. + * @reglock: Concurrent CSRs access lock. * @message: Buffer for framing the event specific info. * @stat: ECC status information. * @p_data: Platform data. @@ -314,6 +316,7 @@ struct synps_ecc_status { */ struct synps_edac_priv { void __iomem *baseaddr; + spinlock_t reglock; char message[SYNPS_EDAC_MSG_SIZE]; struct synps_ecc_status stat; const struct synps_platform_data *p_data; @@ -329,19 +332,29 @@ struct synps_edac_priv { #endif }; +enum synps_platform_type { + ZYNQ, + ZYNQMP, + SYNPS, +}; + /** * struct synps_platform_data - synps platform data structure. + * @platform: Identifies the target hardware platform * @get_error_info: Get EDAC error info. * @get_mtype: Get mtype. * @get_dtype: Get dtype. - * @get_ecc_state: Get ECC state. + * @get_mem_info: Get EDAC memory info * @quirks: To differentiate IPs. */ struct synps_platform_data { + enum synps_platform_type platform; int (*get_error_info)(struct synps_edac_priv *priv); enum mem_type (*get_mtype)(const void __iomem *base); enum dev_type (*get_dtype)(const void __iomem *base); - bool (*get_ecc_state)(void __iomem *base); +#ifdef CONFIG_EDAC_DEBUG + u64 (*get_mem_info)(struct synps_edac_priv *priv); +#endif int quirks; }; @@ -400,6 +413,25 @@ out: return 0; } +#ifdef CONFIG_EDAC_DEBUG +/** + * zynqmp_get_mem_info - Get the current memory info. + * @priv: DDR memory controller private instance data. + * + * Return: host interface address. + */ +static u64 zynqmp_get_mem_info(struct synps_edac_priv *priv) +{ + u64 hif_addr = 0, linear_addr; + + linear_addr = priv->poison_addr; + if (linear_addr >= SZ_32G) + linear_addr = linear_addr - SZ_32G + SZ_2G; + hif_addr = linear_addr >> 3; + return hif_addr; +} +#endif + /** * zynqmp_get_error_info - Get the current ECC error info. * @priv: DDR memory controller private instance data. @@ -409,7 +441,8 @@ out: static int zynqmp_get_error_info(struct synps_edac_priv *priv) { struct synps_ecc_status *p; - u32 regval, clearval = 0; + u32 regval, clearval; + unsigned long flags; void __iomem *base; base = priv->baseaddr; @@ -453,10 +486,14 @@ ue_err: p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK); p->ueinfo.data = readl(base + ECC_UESYND0_OFST); out: - clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT; - clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; + spin_lock_irqsave(&priv->reglock, flags); + + clearval = readl(base + ECC_CLR_OFST) | + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; writel(clearval, base + ECC_CLR_OFST); - writel(0x0, base + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); return 0; } @@ -516,24 +553,41 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) static void enable_intr(struct synps_edac_priv *priv) { + unsigned long flags; + /* Enable UE/CE Interrupts */ - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) - writel(DDR_UE_MASK | DDR_CE_MASK, - priv->baseaddr + ECC_CLR_OFST); - else + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(DDR_UE_MASK | DDR_CE_MASK, + priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); } static void disable_intr(struct synps_edac_priv *priv) { + unsigned long flags; + /* Disable UE/CE Interrupts */ - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) - writel(0x0, priv->baseaddr + ECC_CLR_OFST); - else + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, priv->baseaddr + DDR_QOS_IRQ_DB_OFST); + + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(0, priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); } /** @@ -577,8 +631,6 @@ static irqreturn_t intr_handler(int irq, void *dev_id) /* v3.0 of the controller does not have this register */ if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); - else - enable_intr(priv); return IRQ_HANDLED; } @@ -674,51 +726,38 @@ static enum dev_type zynqmp_get_dtype(const void __iomem *base) return dt; } -/** - * zynq_get_ecc_state - Return the controller ECC enable/disable status. - * @base: DDR memory controller base address. - * - * Get the ECC enable/disable status of the controller. - * - * Return: true if enabled, otherwise false. - */ -static bool zynq_get_ecc_state(void __iomem *base) -{ - enum dev_type dt; - u32 ecctype; - - dt = zynq_get_dtype(base); - if (dt == DEV_UNKNOWN) - return false; - - ecctype = readl(base + SCRUB_OFST) & SCRUB_MODE_MASK; - if ((ecctype == SCRUB_MODE_SECDED) && (dt == DEV_X2)) - return true; - - return false; -} - -/** - * zynqmp_get_ecc_state - Return the controller ECC enable/disable status. - * @base: DDR memory controller base address. - * - * Get the ECC enable/disable status for the controller. - * - * Return: a ECC status boolean i.e true/false - enabled/disabled. - */ -static bool zynqmp_get_ecc_state(void __iomem *base) +static bool get_ecc_state(struct synps_edac_priv *priv) { + u32 ecctype, clearval; enum dev_type dt; - u32 ecctype; - - dt = zynqmp_get_dtype(base); - if (dt == DEV_UNKNOWN) - return false; - ecctype = readl(base + ECC_CFG0_OFST) & SCRUB_MODE_MASK; - if ((ecctype == SCRUB_MODE_SECDED) && - ((dt == DEV_X2) || (dt == DEV_X4) || (dt == DEV_X8))) - return true; + if (priv->p_data->platform == ZYNQ) { + dt = zynq_get_dtype(priv->baseaddr); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(priv->baseaddr + SCRUB_OFST) & SCRUB_MODE_MASK; + if (ecctype == SCRUB_MODE_SECDED && dt == DEV_X2) { + clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_UE_ERR; + writel(clearval, priv->baseaddr + ECC_CTRL_OFST); + writel(0x0, priv->baseaddr + ECC_CTRL_OFST); + return true; + } + } else { + dt = zynqmp_get_dtype(priv->baseaddr); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(priv->baseaddr + ECC_CFG0_OFST) & SCRUB_MODE_MASK; + if (ecctype == SCRUB_MODE_SECDED && + (dt == DEV_X2 || dt == DEV_X4 || dt == DEV_X8)) { + clearval = readl(priv->baseaddr + ECC_CLR_OFST) | + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; + writel(clearval, priv->baseaddr + ECC_CLR_OFST); + return true; + } + } return false; } @@ -888,18 +927,21 @@ static int setup_irq(struct mem_ctl_info *mci, } static const struct synps_platform_data zynq_edac_def = { + .platform = ZYNQ, .get_error_info = zynq_get_error_info, .get_mtype = zynq_get_mtype, .get_dtype = zynq_get_dtype, - .get_ecc_state = zynq_get_ecc_state, .quirks = 0, }; static const struct synps_platform_data zynqmp_edac_def = { + .platform = ZYNQMP, .get_error_info = zynqmp_get_error_info, .get_mtype = zynqmp_get_mtype, .get_dtype = zynqmp_get_dtype, - .get_ecc_state = zynqmp_get_ecc_state, +#ifdef CONFIG_EDAC_DEBUG + .get_mem_info = zynqmp_get_mem_info, +#endif .quirks = (DDR_ECC_INTR_SUPPORT #ifdef CONFIG_EDAC_DEBUG | DDR_ECC_DATA_POISON_SUPPORT @@ -908,10 +950,10 @@ static const struct synps_platform_data zynqmp_edac_def = { }; static const struct synps_platform_data synopsys_edac_def = { + .platform = SYNPS, .get_error_info = zynqmp_get_error_info, .get_mtype = zynqmp_get_mtype, .get_dtype = zynqmp_get_dtype, - .get_ecc_state = zynqmp_get_ecc_state, .quirks = (DDR_ECC_INTR_SUPPORT | DDR_ECC_INTR_SELF_CLEAR #ifdef CONFIG_EDAC_DEBUG | DDR_ECC_DATA_POISON_SUPPORT @@ -953,10 +995,16 @@ MODULE_DEVICE_TABLE(of, synps_edac_match); static void ddr_poison_setup(struct synps_edac_priv *priv) { int col = 0, row = 0, bank = 0, bankgrp = 0, rank = 0, regval; + const struct synps_platform_data *p_data; int index; ulong hif_addr = 0; - hif_addr = priv->poison_addr >> 3; + p_data = priv->p_data; + + if (p_data->get_mem_info) + hif_addr = p_data->get_mem_info(priv); + else + hif_addr = priv->poison_addr >> 3; for (index = 0; index < DDR_MAX_ROW_SHIFT; index++) { if (priv->row_shift[index]) @@ -1325,11 +1373,9 @@ static int mc_probe(struct platform_device *pdev) struct synps_edac_priv *priv; struct mem_ctl_info *mci; void __iomem *baseaddr; - struct resource *res; int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - baseaddr = devm_ioremap_resource(&pdev->dev, res); + baseaddr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(baseaddr)) return PTR_ERR(baseaddr); @@ -1337,10 +1383,6 @@ static int mc_probe(struct platform_device *pdev) if (!p_data) return -ENODEV; - if (!p_data->get_ecc_state(baseaddr)) { - edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n"); - return -ENXIO; - } layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; layers[0].size = SYNPS_EDAC_NR_CSROWS; @@ -1360,6 +1402,13 @@ static int mc_probe(struct platform_device *pdev) priv = mci->pvt_info; priv->baseaddr = baseaddr; priv->p_data = p_data; + if (!get_ecc_state(priv)) { + edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n"); + rc = -ENODEV; + goto free_edac_mc; + } + + spin_lock_init(&priv->reglock); mc_init(mci, pdev); @@ -1411,7 +1460,7 @@ free_edac_mc: * * Return: Unconditionally 0 */ -static int mc_remove(struct platform_device *pdev) +static void mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); struct synps_edac_priv *priv = mci->pvt_info; @@ -1426,8 +1475,6 @@ static int mc_remove(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver synps_edac_mc_driver = { diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index 0bcd9f02c84a..75c04dfc3962 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -35,12 +35,6 @@ enum { ERR_UNKNOWN = 3, }; -#define MAX_SYNDROME_REGS 4 - -struct error_syndrome { - u64 reg[MAX_SYNDROME_REGS]; -}; - struct error_descr { int type; u64 mask; @@ -481,7 +475,7 @@ static int thunderx_create_debugfs_nodes(struct dentry *parent, ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, parent, data, &attrs[i]->fops); - if (!ent) + if (IS_ERR(ent)) break; } @@ -1133,7 +1127,7 @@ static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) decode_register(other, OCX_OTHER_SIZE, ocx_com_errors, ctx->reg_com_int); - strncat(msg, other, OCX_MESSAGE_SIZE); + strlcat(msg, other, OCX_MESSAGE_SIZE); for (lane = 0; lane < OCX_RX_LANES; lane++) if (ctx->reg_com_int & BIT(lane)) { @@ -1142,12 +1136,12 @@ static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) lane, ctx->reg_lane_int[lane], lane, ctx->reg_lane_stat11[lane]); - strncat(msg, other, OCX_MESSAGE_SIZE); + strlcat(msg, other, OCX_MESSAGE_SIZE); decode_register(other, OCX_OTHER_SIZE, ocx_lane_errors, ctx->reg_lane_int[lane]); - strncat(msg, other, OCX_MESSAGE_SIZE); + strlcat(msg, other, OCX_MESSAGE_SIZE); } if (ctx->reg_com_int & OCX_COM_INT_CE) @@ -1217,7 +1211,7 @@ static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) decode_register(other, OCX_OTHER_SIZE, ocx_com_link_errors, ctx->reg_com_link_int); - strncat(msg, other, OCX_MESSAGE_SIZE); + strlcat(msg, other, OCX_MESSAGE_SIZE); if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); @@ -1365,8 +1359,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev, idx = edac_device_alloc_index(); snprintf(name, sizeof(name), "OCX%d", idx); edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), - name, 1, "CCPI", 1, - 0, NULL, 0, idx); + name, 1, "CCPI", 1, 0, idx); if (!edac_dev) { dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); return -ENOMEM; @@ -1896,7 +1889,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); - strncat(msg, other, L2C_MESSAGE_SIZE); + strlcat(msg, other, L2C_MESSAGE_SIZE); if (ctx->reg_int & mask_ue) edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); @@ -2004,8 +1997,7 @@ static int thunderx_l2c_probe(struct pci_dev *pdev, snprintf(name, sizeof(name), fmt, idx); edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), - name, 1, "L2C", 1, 0, - NULL, 0, idx); + name, 1, "L2C", 1, 0, idx); if (!edac_dev) { dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); return -ENOMEM; diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c index 6971ded598de..39cc2ef9cac4 100644 --- a/drivers/edac/ti_edac.c +++ b/drivers/edac/ti_edac.c @@ -312,14 +312,12 @@ err: return ret; } -static int ti_edac_remove(struct platform_device *pdev) +static void ti_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver ti_edac_driver = { diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c new file mode 100644 index 000000000000..5a43b5d43ca2 --- /dev/null +++ b/drivers/edac/versal_edac.c @@ -0,0 +1,1196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx Versal memory controller driver + * Copyright (C) 2023 Advanced Micro Devices, Inc. + */ +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/sizes.h> +#include <linux/firmware/xlnx-zynqmp.h> +#include <linux/firmware/xlnx-event-manager.h> + +#include "edac_module.h" + +/* Granularity of reported error in bytes */ +#define XDDR_EDAC_ERR_GRAIN 1 + +#define XDDR_EDAC_MSG_SIZE 256 +#define EVENT 2 + +#define XDDR_PCSR_OFFSET 0xC +#define XDDR_ISR_OFFSET 0x14 +#define XDDR_IRQ_EN_OFFSET 0x20 +#define XDDR_IRQ1_EN_OFFSET 0x2C +#define XDDR_IRQ_DIS_OFFSET 0x24 +#define XDDR_IRQ_CE_MASK GENMASK(18, 15) +#define XDDR_IRQ_UE_MASK GENMASK(14, 11) + +#define XDDR_REG_CONFIG0_OFFSET 0x258 +#define XDDR_REG_CONFIG0_BUS_WIDTH_MASK GENMASK(19, 18) +#define XDDR_REG_CONFIG0_NUM_CHANS_MASK BIT(17) +#define XDDR_REG_CONFIG0_NUM_RANKS_MASK GENMASK(15, 14) +#define XDDR_REG_CONFIG0_SIZE_MASK GENMASK(10, 8) + +#define XDDR_REG_PINOUT_OFFSET 0x25C +#define XDDR_REG_PINOUT_ECC_EN_MASK GENMASK(7, 5) + +#define ECCW0_FLIP_CTRL 0x109C +#define ECCW0_FLIP0_OFFSET 0x10A0 +#define ECCW0_FLIP0_BITS 31 +#define ECCW0_FLIP1_OFFSET 0x10A4 +#define ECCW1_FLIP_CTRL 0x10AC +#define ECCW1_FLIP0_OFFSET 0x10B0 +#define ECCW1_FLIP1_OFFSET 0x10B4 +#define ECCR0_CERR_STAT_OFFSET 0x10BC +#define ECCR0_CE_ADDR_LO_OFFSET 0x10C0 +#define ECCR0_CE_ADDR_HI_OFFSET 0x10C4 +#define ECCR0_CE_DATA_LO_OFFSET 0x10C8 +#define ECCR0_CE_DATA_HI_OFFSET 0x10CC +#define ECCR0_CE_DATA_PAR_OFFSET 0x10D0 + +#define ECCR0_UERR_STAT_OFFSET 0x10D4 +#define ECCR0_UE_ADDR_LO_OFFSET 0x10D8 +#define ECCR0_UE_ADDR_HI_OFFSET 0x10DC +#define ECCR0_UE_DATA_LO_OFFSET 0x10E0 +#define ECCR0_UE_DATA_HI_OFFSET 0x10E4 +#define ECCR0_UE_DATA_PAR_OFFSET 0x10E8 + +#define ECCR1_CERR_STAT_OFFSET 0x10F4 +#define ECCR1_CE_ADDR_LO_OFFSET 0x10F8 +#define ECCR1_CE_ADDR_HI_OFFSET 0x10FC +#define ECCR1_CE_DATA_LO_OFFSET 0x1100 +#define ECCR1_CE_DATA_HI_OFFSET 0x110C +#define ECCR1_CE_DATA_PAR_OFFSET 0x1108 + +#define ECCR1_UERR_STAT_OFFSET 0x110C +#define ECCR1_UE_ADDR_LO_OFFSET 0x1110 +#define ECCR1_UE_ADDR_HI_OFFSET 0x1114 +#define ECCR1_UE_DATA_LO_OFFSET 0x1118 +#define ECCR1_UE_DATA_HI_OFFSET 0x111C +#define ECCR1_UE_DATA_PAR_OFFSET 0x1120 + +#define XDDR_NOC_REG_ADEC4_OFFSET 0x44 +#define RANK_1_MASK GENMASK(11, 6) +#define LRANK_0_MASK GENMASK(17, 12) +#define LRANK_1_MASK GENMASK(23, 18) +#define MASK_24 GENMASK(29, 24) + +#define XDDR_NOC_REG_ADEC5_OFFSET 0x48 +#define XDDR_NOC_REG_ADEC6_OFFSET 0x4C +#define XDDR_NOC_REG_ADEC7_OFFSET 0x50 +#define XDDR_NOC_REG_ADEC8_OFFSET 0x54 +#define XDDR_NOC_REG_ADEC9_OFFSET 0x58 +#define XDDR_NOC_REG_ADEC10_OFFSET 0x5C + +#define XDDR_NOC_REG_ADEC11_OFFSET 0x60 +#define MASK_0 GENMASK(5, 0) +#define GRP_0_MASK GENMASK(11, 6) +#define GRP_1_MASK GENMASK(17, 12) +#define CH_0_MASK GENMASK(23, 18) + +#define XDDR_NOC_REG_ADEC12_OFFSET 0x71C +#define XDDR_NOC_REG_ADEC13_OFFSET 0x720 + +#define XDDR_NOC_REG_ADEC14_OFFSET 0x724 +#define XDDR_NOC_ROW_MATCH_MASK GENMASK(17, 0) +#define XDDR_NOC_COL_MATCH_MASK GENMASK(27, 18) +#define XDDR_NOC_BANK_MATCH_MASK GENMASK(29, 28) +#define XDDR_NOC_GRP_MATCH_MASK GENMASK(31, 30) + +#define XDDR_NOC_REG_ADEC15_OFFSET 0x728 +#define XDDR_NOC_RANK_MATCH_MASK GENMASK(1, 0) +#define XDDR_NOC_LRANK_MATCH_MASK GENMASK(4, 2) +#define XDDR_NOC_CH_MATCH_MASK BIT(5) +#define XDDR_NOC_MOD_SEL_MASK BIT(6) +#define XDDR_NOC_MATCH_EN_MASK BIT(8) + +#define ECCR_UE_CE_ADDR_HI_ROW_MASK GENMASK(7, 0) + +#define XDDR_EDAC_NR_CSROWS 1 +#define XDDR_EDAC_NR_CHANS 1 + +#define XDDR_BUS_WIDTH_64 0 +#define XDDR_BUS_WIDTH_32 1 +#define XDDR_BUS_WIDTH_16 2 + +#define XDDR_MAX_ROW_CNT 18 +#define XDDR_MAX_COL_CNT 10 +#define XDDR_MAX_RANK_CNT 2 +#define XDDR_MAX_LRANK_CNT 3 +#define XDDR_MAX_BANK_CNT 2 +#define XDDR_MAX_GRP_CNT 2 + +/* + * Config and system registers are usually locked. This is the + * code which unlocks them in order to accept writes. See + * + * https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register + */ +#define PCSR_UNLOCK_VAL 0xF9E8D7C6 +#define PCSR_LOCK_VAL 1 +#define XDDR_ERR_TYPE_CE 0 +#define XDDR_ERR_TYPE_UE 1 + +#define XILINX_DRAM_SIZE_4G 0 +#define XILINX_DRAM_SIZE_6G 1 +#define XILINX_DRAM_SIZE_8G 2 +#define XILINX_DRAM_SIZE_12G 3 +#define XILINX_DRAM_SIZE_16G 4 +#define XILINX_DRAM_SIZE_32G 5 +#define NUM_UE_BITPOS 2 + +/** + * struct ecc_error_info - ECC error log information. + * @burstpos: Burst position. + * @lrank: Logical Rank number. + * @rank: Rank number. + * @group: Group number. + * @bank: Bank number. + * @col: Column number. + * @row: Row number. + * @rowhi: Row number higher bits. + * @i: ECC error info. + */ +union ecc_error_info { + struct { + u32 burstpos:3; + u32 lrank:3; + u32 rank:2; + u32 group:2; + u32 bank:2; + u32 col:10; + u32 row:10; + u32 rowhi; + }; + u64 i; +} __packed; + +union edac_info { + struct { + u32 row0:6; + u32 row1:6; + u32 row2:6; + u32 row3:6; + u32 row4:6; + u32 reserved:2; + }; + struct { + u32 col1:6; + u32 col2:6; + u32 col3:6; + u32 col4:6; + u32 col5:6; + u32 reservedcol:2; + }; + u32 i; +} __packed; + +/** + * struct ecc_status - ECC status information to report. + * @ceinfo: Correctable error log information. + * @ueinfo: Uncorrectable error log information. + * @channel: Channel number. + * @error_type: Error type information. + */ +struct ecc_status { + union ecc_error_info ceinfo[2]; + union ecc_error_info ueinfo[2]; + u8 channel; + u8 error_type; +}; + +/** + * struct edac_priv - DDR memory controller private instance data. + * @ddrmc_baseaddr: Base address of the DDR controller. + * @ddrmc_noc_baseaddr: Base address of the DDRMC NOC. + * @message: Buffer for framing the event specific info. + * @mc_id: Memory controller ID. + * @ce_cnt: Correctable error count. + * @ue_cnt: UnCorrectable error count. + * @stat: ECC status information. + * @lrank_bit: Bit shifts for lrank bit. + * @rank_bit: Bit shifts for rank bit. + * @row_bit: Bit shifts for row bit. + * @col_bit: Bit shifts for column bit. + * @bank_bit: Bit shifts for bank bit. + * @grp_bit: Bit shifts for group bit. + * @ch_bit: Bit shifts for channel bit. + * @err_inject_addr: Data poison address. + * @debugfs: Debugfs handle. + */ +struct edac_priv { + void __iomem *ddrmc_baseaddr; + void __iomem *ddrmc_noc_baseaddr; + char message[XDDR_EDAC_MSG_SIZE]; + u32 mc_id; + u32 ce_cnt; + u32 ue_cnt; + struct ecc_status stat; + u32 lrank_bit[3]; + u32 rank_bit[2]; + u32 row_bit[18]; + u32 col_bit[10]; + u32 bank_bit[2]; + u32 grp_bit[2]; + u32 ch_bit; +#ifdef CONFIG_EDAC_DEBUG + u64 err_inject_addr; + struct dentry *debugfs; +#endif +}; + +static void get_ce_error_info(struct edac_priv *priv) +{ + void __iomem *ddrmc_base; + struct ecc_status *p; + u32 regval; + u64 reghi; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + p->error_type = XDDR_ERR_TYPE_CE; + regval = readl(ddrmc_base + ECCR0_CE_ADDR_LO_OFFSET); + reghi = regval & ECCR_UE_CE_ADDR_HI_ROW_MASK; + p->ceinfo[0].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR0_CE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR0_CE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR0_CE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR0_CE_DATA_PAR_OFFSET)); + + regval = readl(ddrmc_base + ECCR1_CE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET); + p->ceinfo[1].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR1_CE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR1_CE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR1_CE_DATA_PAR_OFFSET)); +} + +static void get_ue_error_info(struct edac_priv *priv) +{ + void __iomem *ddrmc_base; + struct ecc_status *p; + u32 regval; + u64 reghi; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + p->error_type = XDDR_ERR_TYPE_UE; + regval = readl(ddrmc_base + ECCR0_UE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET); + + p->ueinfo[0].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR0_UE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR0_UE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR0_UE_DATA_PAR_OFFSET)); + + regval = readl(ddrmc_base + ECCR1_UE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR1_UE_ADDR_HI_OFFSET); + p->ueinfo[1].i = regval | reghi << 32; + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR1_UE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR1_UE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR1_UE_DATA_PAR_OFFSET)); +} + +static bool get_error_info(struct edac_priv *priv) +{ + u32 eccr0_ceval, eccr1_ceval, eccr0_ueval, eccr1_ueval; + void __iomem *ddrmc_base; + struct ecc_status *p; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + eccr0_ceval = readl(ddrmc_base + ECCR0_CERR_STAT_OFFSET); + eccr1_ceval = readl(ddrmc_base + ECCR1_CERR_STAT_OFFSET); + eccr0_ueval = readl(ddrmc_base + ECCR0_UERR_STAT_OFFSET); + eccr1_ueval = readl(ddrmc_base + ECCR1_UERR_STAT_OFFSET); + + if (!eccr0_ceval && !eccr1_ceval && !eccr0_ueval && !eccr1_ueval) + return 1; + if (!eccr0_ceval) + p->channel = 1; + else + p->channel = 0; + + if (eccr0_ceval || eccr1_ceval) + get_ce_error_info(priv); + + if (eccr0_ueval || eccr1_ueval) { + if (!eccr0_ueval) + p->channel = 1; + else + p->channel = 0; + get_ue_error_info(priv); + } + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, ddrmc_base + XDDR_PCSR_OFFSET); + + writel(0, ddrmc_base + ECCR0_CERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR1_CERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR0_UERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR1_UERR_STAT_OFFSET); + + /* Lock the PCSR registers */ + writel(1, ddrmc_base + XDDR_PCSR_OFFSET); + + return 0; +} + +/** + * convert_to_physical - Convert to physical address. + * @priv: DDR memory controller private instance data. + * @pinf: ECC error info structure. + * + * Return: Physical address of the DDR memory. + */ +static unsigned long convert_to_physical(struct edac_priv *priv, union ecc_error_info pinf) +{ + unsigned long err_addr = 0; + u32 index; + u32 row; + + row = pinf.rowhi << 10 | pinf.row; + for (index = 0; index < XDDR_MAX_ROW_CNT; index++) { + err_addr |= (row & BIT(0)) << priv->row_bit[index]; + row >>= 1; + } + + for (index = 0; index < XDDR_MAX_COL_CNT; index++) { + err_addr |= (pinf.col & BIT(0)) << priv->col_bit[index]; + pinf.col >>= 1; + } + + for (index = 0; index < XDDR_MAX_BANK_CNT; index++) { + err_addr |= (pinf.bank & BIT(0)) << priv->bank_bit[index]; + pinf.bank >>= 1; + } + + for (index = 0; index < XDDR_MAX_GRP_CNT; index++) { + err_addr |= (pinf.group & BIT(0)) << priv->grp_bit[index]; + pinf.group >>= 1; + } + + for (index = 0; index < XDDR_MAX_RANK_CNT; index++) { + err_addr |= (pinf.rank & BIT(0)) << priv->rank_bit[index]; + pinf.rank >>= 1; + } + + for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) { + err_addr |= (pinf.lrank & BIT(0)) << priv->lrank_bit[index]; + pinf.lrank >>= 1; + } + + err_addr |= (priv->stat.channel & BIT(0)) << priv->ch_bit; + + return err_addr; +} + +/** + * handle_error - Handle Correctable and Uncorrectable errors. + * @mci: EDAC memory controller instance. + * @stat: ECC status structure. + * + * Handles ECC correctable and uncorrectable errors. + */ +static void handle_error(struct mem_ctl_info *mci, struct ecc_status *stat) +{ + struct edac_priv *priv = mci->pvt_info; + union ecc_error_info pinf; + + if (stat->error_type == XDDR_ERR_TYPE_CE) { + priv->ce_cnt++; + pinf = stat->ceinfo[stat->channel]; + snprintf(priv->message, XDDR_EDAC_MSG_SIZE, + "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n", + "CE", priv->mc_id, + convert_to_physical(priv, pinf), pinf.burstpos); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + if (stat->error_type == XDDR_ERR_TYPE_UE) { + priv->ue_cnt++; + pinf = stat->ueinfo[stat->channel]; + snprintf(priv->message, XDDR_EDAC_MSG_SIZE, + "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n", + "UE", priv->mc_id, + convert_to_physical(priv, pinf), pinf.burstpos); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + memset(stat, 0, sizeof(*stat)); +} + +/** + * err_callback - Handle Correctable and Uncorrectable errors. + * @payload: payload data. + * @data: mci controller data. + * + * Handles ECC correctable and uncorrectable errors. + */ +static void err_callback(const u32 *payload, void *data) +{ + struct mem_ctl_info *mci = (struct mem_ctl_info *)data; + struct edac_priv *priv; + struct ecc_status *p; + int regval; + + priv = mci->pvt_info; + p = &priv->stat; + + regval = readl(priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); + + if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_CR) + p->error_type = XDDR_ERR_TYPE_CE; + if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_NCR) + p->error_type = XDDR_ERR_TYPE_UE; + + if (get_error_info(priv)) + return; + + handle_error(mci, &priv->stat); + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Clear the ISR */ + writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + edac_dbg(3, "Total error count CE %d UE %d\n", + priv->ce_cnt, priv->ue_cnt); +} + +/** + * get_dwidth - Return the controller memory width. + * @base: DDR memory controller base address. + * + * Get the EDAC device type width appropriate for the controller + * configuration. + * + * Return: a device type width enumeration. + */ +static enum dev_type get_dwidth(const void __iomem *base) +{ + enum dev_type dt; + u32 regval; + u32 width; + + regval = readl(base + XDDR_REG_CONFIG0_OFFSET); + width = FIELD_GET(XDDR_REG_CONFIG0_BUS_WIDTH_MASK, regval); + + switch (width) { + case XDDR_BUS_WIDTH_16: + dt = DEV_X2; + break; + case XDDR_BUS_WIDTH_32: + dt = DEV_X4; + break; + case XDDR_BUS_WIDTH_64: + dt = DEV_X8; + break; + default: + dt = DEV_UNKNOWN; + } + + return dt; +} + +/** + * get_ecc_state - Return the controller ECC enable/disable status. + * @base: DDR memory controller base address. + * + * Get the ECC enable/disable status for the controller. + * + * Return: a ECC status boolean i.e true/false - enabled/disabled. + */ +static bool get_ecc_state(void __iomem *base) +{ + enum dev_type dt; + u32 ecctype; + + dt = get_dwidth(base); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(base + XDDR_REG_PINOUT_OFFSET); + ecctype &= XDDR_REG_PINOUT_ECC_EN_MASK; + + return !!ecctype; +} + +/** + * get_memsize - Get the size of the attached memory device. + * @priv: DDR memory controller private instance data. + * + * Return: the memory size in bytes. + */ +static u64 get_memsize(struct edac_priv *priv) +{ + u32 regval; + u64 size; + + regval = readl(priv->ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); + regval = FIELD_GET(XDDR_REG_CONFIG0_SIZE_MASK, regval); + + switch (regval) { + case XILINX_DRAM_SIZE_4G: + size = 4U; break; + case XILINX_DRAM_SIZE_6G: + size = 6U; break; + case XILINX_DRAM_SIZE_8G: + size = 8U; break; + case XILINX_DRAM_SIZE_12G: + size = 12U; break; + case XILINX_DRAM_SIZE_16G: + size = 16U; break; + case XILINX_DRAM_SIZE_32G: + size = 32U; break; + /* Invalid configuration */ + default: + size = 0; break; + } + + size *= SZ_1G; + return size; +} + +/** + * init_csrows - Initialize the csrow data. + * @mci: EDAC memory controller instance. + * + * Initialize the chip select rows associated with the EDAC memory + * controller instance. + */ +static void init_csrows(struct mem_ctl_info *mci) +{ + struct edac_priv *priv = mci->pvt_info; + struct csrow_info *csi; + struct dimm_info *dimm; + unsigned long size; + u32 row; + int ch; + + size = get_memsize(priv); + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + for (ch = 0; ch < csi->nr_channels; ch++) { + dimm = csi->channels[ch]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = MEM_DDR4; + dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels; + dimm->grain = XDDR_EDAC_ERR_GRAIN; + dimm->dtype = get_dwidth(priv->ddrmc_baseaddr); + } + } +} + +/** + * mc_init - Initialize one driver instance. + * @mci: EDAC memory controller instance. + * @pdev: platform device. + * + * Perform initialization of the EDAC memory controller instance and + * related driver-private data associated with the memory controller the + * instance is bound to. + */ +static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev) +{ + mci->pdev = &pdev->dev; + platform_set_drvdata(pdev, mci); + + /* Initialize controller capabilities and configuration */ + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_HW_SRC; + mci->scrub_mode = SCRUB_NONE; + + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "xlnx_ddr_controller"; + mci->dev_name = dev_name(&pdev->dev); + mci->mod_name = "xlnx_edac"; + + edac_op_state = EDAC_OPSTATE_INT; + + init_csrows(mci); +} + +static void enable_intr(struct edac_priv *priv) +{ + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Enable UE and CE Interrupts to support the interrupt case */ + writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ_EN_OFFSET); + + writel(XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET); + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); +} + +static void disable_intr(struct edac_priv *priv) +{ + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Disable UE/CE Interrupts */ + writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +#ifdef CONFIG_EDAC_DEBUG +/** + * poison_setup - Update poison registers. + * @priv: DDR memory controller private instance data. + * + * Update poison registers as per DDR mapping upon write of the address + * location the fault is injected. + * Return: none. + */ +static void poison_setup(struct edac_priv *priv) +{ + u32 col = 0, row = 0, bank = 0, grp = 0, rank = 0, lrank = 0, ch = 0; + u32 index, regval; + + for (index = 0; index < XDDR_MAX_ROW_CNT; index++) { + row |= (((priv->err_inject_addr >> priv->row_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_COL_CNT; index++) { + col |= (((priv->err_inject_addr >> priv->col_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_BANK_CNT; index++) { + bank |= (((priv->err_inject_addr >> priv->bank_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_GRP_CNT; index++) { + grp |= (((priv->err_inject_addr >> priv->grp_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_RANK_CNT; index++) { + rank |= (((priv->err_inject_addr >> priv->rank_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) { + lrank |= (((priv->err_inject_addr >> priv->lrank_bit[index]) & + BIT(0)) << index); + } + + ch = (priv->err_inject_addr >> priv->ch_bit) & BIT(0); + if (ch) + writel(0xFF, priv->ddrmc_baseaddr + ECCW1_FLIP_CTRL); + else + writel(0xFF, priv->ddrmc_baseaddr + ECCW0_FLIP_CTRL); + + writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC12_OFFSET); + writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC13_OFFSET); + + regval = row & XDDR_NOC_ROW_MATCH_MASK; + regval |= FIELD_PREP(XDDR_NOC_COL_MATCH_MASK, col); + regval |= FIELD_PREP(XDDR_NOC_BANK_MATCH_MASK, bank); + regval |= FIELD_PREP(XDDR_NOC_GRP_MATCH_MASK, grp); + writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC14_OFFSET); + + regval = rank & XDDR_NOC_RANK_MATCH_MASK; + regval |= FIELD_PREP(XDDR_NOC_LRANK_MATCH_MASK, lrank); + regval |= FIELD_PREP(XDDR_NOC_CH_MATCH_MASK, ch); + regval |= (XDDR_NOC_MOD_SEL_MASK | XDDR_NOC_MATCH_EN_MASK); + writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET); +} + +static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos) +{ + u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1; + struct edac_priv *priv = mci->pvt_info; + + if (ce_bitpos < ECCW0_FLIP0_BITS) { + ecc0_flip0 = BIT(ce_bitpos); + ecc1_flip0 = BIT(ce_bitpos); + ecc0_flip1 = 0; + ecc1_flip1 = 0; + } else { + ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS; + ecc0_flip1 = BIT(ce_bitpos); + ecc1_flip1 = BIT(ce_bitpos); + ecc0_flip0 = 0; + ecc1_flip0 = 0; + } + + writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); + writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject a correctable error, the following steps are needed: + * + * - Write the correctable error bit position value: + * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ce + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default + * configuration is there and no addresses are masked. + * + * The row, column, bank, group and rank registers are written to the + * match ADEC bit to generate errors at the particular address. ADEC14 + * and ADEC15 have the match bits. + * + * xddr_inject_data_ce_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ce_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + u8 ce_bitpos; + int ret; + + ret = kstrtou8_from_user(data, count, 0, &ce_bitpos); + if (ret) + return ret; + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ce_store(mci, ce_bitpos); + ret = count; + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + return ret; +} + +static const struct file_operations xddr_inject_ce_fops = { + .open = simple_open, + .write = inject_data_ce_store, + .llseek = generic_file_llseek, +}; + +static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1) +{ + struct edac_priv *priv = mci->pvt_info; + + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject an uncorrectable error, the following steps are needed: + * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ue + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the + * addresses are masked. The row, column, bank, group and rank registers + * are written to the match ADEC bit to generate errors at the + * particular address. ADEC14 and ADEC15 have the match bits. + * + * xddr_inject_data_ue_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. For + * uncorrectable errors + * 2 bit errors are injected. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ue_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + char buf[6], *pbuf, *token[2]; + u32 val0 = 0, val1 = 0; + u8 len, ue0, ue1; + int i, ret; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + if (!token[0] || !token[1]) + return -EFAULT; + + ret = kstrtou8(token[0], 0, &ue0); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &ue1); + if (ret) + return ret; + + if (ue0 < ECCW0_FLIP0_BITS) { + val0 = BIT(ue0); + } else { + ue0 = ue0 - ECCW0_FLIP0_BITS; + val1 = BIT(ue0); + } + + if (ue1 < ECCW0_FLIP0_BITS) { + val0 |= BIT(ue1); + } else { + ue1 = ue1 - ECCW0_FLIP0_BITS; + val1 |= BIT(ue1); + } + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ue_store(mci, val0, val1); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + return count; +} + +static const struct file_operations xddr_inject_ue_fops = { + .open = simple_open, + .write = inject_data_ue_store, + .llseek = generic_file_llseek, +}; + +static void create_debugfs_attributes(struct mem_ctl_info *mci) +{ + struct edac_priv *priv = mci->pvt_info; + + priv->debugfs = edac_debugfs_create_dir(mci->dev_name); + if (!priv->debugfs) + return; + + if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ce_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + + if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ue_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + debugfs_create_x64("address", 0600, priv->debugfs, + &priv->err_inject_addr); + mci->debugfs = priv->debugfs; +} + +static inline void process_bit(struct edac_priv *priv, unsigned int start, u32 regval) +{ + union edac_info rows; + + rows.i = regval; + priv->row_bit[start] = rows.row0; + priv->row_bit[start + 1] = rows.row1; + priv->row_bit[start + 2] = rows.row2; + priv->row_bit[start + 3] = rows.row3; + priv->row_bit[start + 4] = rows.row4; +} + +static void setup_row_address_map(struct edac_priv *priv) +{ + u32 regval; + union edac_info rows; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC5_OFFSET); + process_bit(priv, 0, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC6_OFFSET); + process_bit(priv, 5, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC7_OFFSET); + process_bit(priv, 10, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET); + rows.i = regval; + + priv->row_bit[15] = rows.row0; + priv->row_bit[16] = rows.row1; + priv->row_bit[17] = rows.row2; +} + +static void setup_column_address_map(struct edac_priv *priv) +{ + u32 regval; + union edac_info cols; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET); + priv->col_bit[0] = FIELD_GET(MASK_24, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC9_OFFSET); + cols.i = regval; + priv->col_bit[1] = cols.col1; + priv->col_bit[2] = cols.col2; + priv->col_bit[3] = cols.col3; + priv->col_bit[4] = cols.col4; + priv->col_bit[5] = cols.col5; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET); + cols.i = regval; + priv->col_bit[6] = cols.col1; + priv->col_bit[7] = cols.col2; + priv->col_bit[8] = cols.col3; + priv->col_bit[9] = cols.col4; +} + +static void setup_bank_grp_ch_address_map(struct edac_priv *priv) +{ + u32 regval; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET); + priv->bank_bit[0] = FIELD_GET(MASK_24, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC11_OFFSET); + priv->bank_bit[1] = (regval & MASK_0); + priv->grp_bit[0] = FIELD_GET(GRP_0_MASK, regval); + priv->grp_bit[1] = FIELD_GET(GRP_1_MASK, regval); + priv->ch_bit = FIELD_GET(CH_0_MASK, regval); +} + +static void setup_rank_lrank_address_map(struct edac_priv *priv) +{ + u32 regval; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC4_OFFSET); + priv->rank_bit[0] = (regval & MASK_0); + priv->rank_bit[1] = FIELD_GET(RANK_1_MASK, regval); + priv->lrank_bit[0] = FIELD_GET(LRANK_0_MASK, regval); + priv->lrank_bit[1] = FIELD_GET(LRANK_1_MASK, regval); + priv->lrank_bit[2] = FIELD_GET(MASK_24, regval); +} + +/** + * setup_address_map - Set Address Map by querying ADDRMAP registers. + * @priv: DDR memory controller private instance data. + * + * Set Address Map by querying ADDRMAP registers. + * + * Return: none. + */ +static void setup_address_map(struct edac_priv *priv) +{ + setup_row_address_map(priv); + + setup_column_address_map(priv); + + setup_bank_grp_ch_address_map(priv); + + setup_rank_lrank_address_map(priv); +} +#endif /* CONFIG_EDAC_DEBUG */ + +static const struct of_device_id xlnx_edac_match[] = { + { .compatible = "xlnx,versal-ddrmc", }, + { + /* end of table */ + } +}; + +MODULE_DEVICE_TABLE(of, xlnx_edac_match); +static u32 emif_get_id(struct device_node *node) +{ + u32 addr, my_addr, my_id = 0; + struct device_node *np; + const __be32 *addrp; + + addrp = of_get_address(node, 0, NULL, NULL); + my_addr = (u32)of_translate_address(node, addrp); + + for_each_matching_node(np, xlnx_edac_match) { + if (np == node) + continue; + + addrp = of_get_address(np, 0, NULL, NULL); + addr = (u32)of_translate_address(np, addrp); + + edac_printk(KERN_INFO, EDAC_MC, + "addr=%x, my_addr=%x\n", + addr, my_addr); + + if (addr < my_addr) + my_id++; + } + + return my_id; +} + +static int mc_probe(struct platform_device *pdev) +{ + void __iomem *ddrmc_baseaddr, *ddrmc_noc_baseaddr; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + u8 num_chans, num_csrows; + struct edac_priv *priv; + u32 edac_mc_id, regval; + int rc; + + ddrmc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "base"); + if (IS_ERR(ddrmc_baseaddr)) + return PTR_ERR(ddrmc_baseaddr); + + ddrmc_noc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "noc"); + if (IS_ERR(ddrmc_noc_baseaddr)) + return PTR_ERR(ddrmc_noc_baseaddr); + + if (!get_ecc_state(ddrmc_baseaddr)) + return -ENXIO; + + /* Allocate ID number for the EMIF controller */ + edac_mc_id = emif_get_id(pdev->dev.of_node); + + regval = readl(ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); + num_chans = FIELD_GET(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); + num_chans++; + + num_csrows = FIELD_GET(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); + num_csrows *= 2; + if (!num_csrows) + num_csrows = 1; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = num_csrows; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_chans; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(edac_mc_id, ARRAY_SIZE(layers), layers, + sizeof(struct edac_priv)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed memory allocation for mc instance\n"); + return -ENOMEM; + } + + priv = mci->pvt_info; + priv->ddrmc_baseaddr = ddrmc_baseaddr; + priv->ddrmc_noc_baseaddr = ddrmc_noc_baseaddr; + priv->ce_cnt = 0; + priv->ue_cnt = 0; + priv->mc_id = edac_mc_id; + + mc_init(mci, pdev); + + rc = edac_mc_add_mc(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed to register with EDAC core\n"); + goto free_edac_mc; + } + + rc = xlnx_register_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, + XPM_EVENT_ERROR_MASK_DDRMC_CR | XPM_EVENT_ERROR_MASK_DDRMC_NCR, + false, err_callback, mci); + if (rc) { + if (rc == -EACCES) + rc = -EPROBE_DEFER; + + goto del_mc; + } + +#ifdef CONFIG_EDAC_DEBUG + create_debugfs_attributes(mci); + setup_address_map(priv); +#endif + enable_intr(priv); + return rc; + +del_mc: + edac_mc_del_mc(&pdev->dev); +free_edac_mc: + edac_mc_free(mci); + + return rc; +} + +static void mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + struct edac_priv *priv = mci->pvt_info; + + disable_intr(priv); + +#ifdef CONFIG_EDAC_DEBUG + debugfs_remove_recursive(priv->debugfs); +#endif + + xlnx_unregister_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, + XPM_EVENT_ERROR_MASK_DDRMC_CR | + XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci); + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static struct platform_driver xilinx_ddr_edac_mc_driver = { + .driver = { + .name = "xilinx-ddrmc-edac", + .of_match_table = xlnx_edac_match, + }, + .probe = mc_probe, + .remove = mc_remove, +}; + +module_platform_driver(xilinx_ddr_edac_mc_driver); + +MODULE_AUTHOR("AMD Inc"); +MODULE_DESCRIPTION("Xilinx DDRMC ECC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c new file mode 100644 index 000000000000..1a1092793092 --- /dev/null +++ b/drivers/edac/versalnet_edac.c @@ -0,0 +1,962 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Versal NET memory controller driver + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#include <linux/cdx/edac_cdx_pcol.h> +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/ras.h> +#include <linux/remoteproc.h> +#include <linux/rpmsg.h> +#include <linux/sizes.h> +#include <ras/ras_event.h> + +#include "edac_module.h" + +/* Granularity of reported error in bytes */ +#define MC5_ERR_GRAIN 1 +#define MC_GET_DDR_CONFIG_IN_LEN 4 + +#define MC5_IRQ_CE_MASK GENMASK(18, 15) +#define MC5_IRQ_UE_MASK GENMASK(14, 11) + +#define MC5_RANK_1_MASK GENMASK(11, 6) +#define MASK_24 GENMASK(29, 24) +#define MASK_0 GENMASK(5, 0) + +#define MC5_LRANK_1_MASK GENMASK(11, 6) +#define MC5_LRANK_2_MASK GENMASK(17, 12) +#define MC5_BANK1_MASK GENMASK(11, 6) +#define MC5_GRP_0_MASK GENMASK(17, 12) +#define MC5_GRP_1_MASK GENMASK(23, 18) + +#define MC5_REGHI_ROW 7 +#define MC5_EACHBIT 1 +#define MC5_ERR_TYPE_CE 0 +#define MC5_ERR_TYPE_UE 1 +#define MC5_HIGH_MEM_EN BIT(20) +#define MC5_MEM_MASK GENMASK(19, 0) +#define MC5_X16_BASE 256 +#define MC5_X16_ECC 32 +#define MC5_X16_SIZE (MC5_X16_BASE + MC5_X16_ECC) +#define MC5_X32_SIZE 576 +#define MC5_HIMEM_BASE (256 * SZ_1M) +#define MC5_ILC_HIMEM_EN BIT(28) +#define MC5_ILC_MEM GENMASK(27, 0) +#define MC5_INTERLEAVE_SEL GENMASK(3, 0) +#define MC5_BUS_WIDTH_MASK GENMASK(19, 18) +#define MC5_NUM_CHANS_MASK BIT(17) +#define MC5_RANK_MASK GENMASK(15, 14) + +#define ERROR_LEVEL 2 +#define ERROR_ID 3 +#define TOTAL_ERR_LENGTH 5 +#define MSG_ERR_OFFSET 8 +#define MSG_ERR_LENGTH 9 +#define ERROR_DATA 10 +#define MCDI_RESPONSE 0xFF + +#define REG_MAX 152 +#define ADEC_MAX 152 +#define NUM_CONTROLLERS 8 +#define REGS_PER_CONTROLLER 19 +#define ADEC_NUM 19 +#define BUFFER_SZ 80 + +#define XDDR5_BUS_WIDTH_64 0 +#define XDDR5_BUS_WIDTH_32 1 +#define XDDR5_BUS_WIDTH_16 2 + +/** + * struct ecc_error_info - ECC error log information. + * @burstpos: Burst position. + * @lrank: Logical Rank number. + * @rank: Rank number. + * @group: Group number. + * @bank: Bank number. + * @col: Column number. + * @row: Row number. + * @rowhi: Row number higher bits. + * @i: Combined ECC error vector containing encoded values of burst position, + * rank, bank, column, and row information. + */ +union ecc_error_info { + struct { + u32 burstpos:3; + u32 lrank:4; + u32 rank:2; + u32 group:3; + u32 bank:2; + u32 col:11; + u32 row:7; + u32 rowhi; + }; + u64 i; +} __packed; + +/* Row and column bit positions in the address decoder (ADEC) registers. */ +union row_col_mapping { + struct { + u32 row0:6; + u32 row1:6; + u32 row2:6; + u32 row3:6; + u32 row4:6; + u32 reserved:2; + }; + struct { + u32 col1:6; + u32 col2:6; + u32 col3:6; + u32 col4:6; + u32 col5:6; + u32 reservedcol:2; + }; + u32 i; +} __packed; + +/** + * struct ecc_status - ECC status information to report. + * @ceinfo: Correctable errors. + * @ueinfo: Uncorrected errors. + * @channel: Channel number. + * @error_type: Error type. + */ +struct ecc_status { + union ecc_error_info ceinfo[2]; + union ecc_error_info ueinfo[2]; + u8 channel; + u8 error_type; +}; + +/** + * struct mc_priv - DDR memory controller private instance data. + * @message: Buffer for framing the event specific info. + * @stat: ECC status information. + * @error_id: The error id. + * @error_level: The error level. + * @dwidth: Width of data bus excluding ECC bits. + * @part_len: The support of the message received. + * @regs: The registers sent on the rpmsg. + * @adec: Address decode registers. + * @mci: Memory controller interface. + * @ept: rpmsg endpoint. + * @mcdi: The mcdi handle. + */ +struct mc_priv { + char message[256]; + struct ecc_status stat; + u32 error_id; + u32 error_level; + u32 dwidth; + u32 part_len; + u32 regs[REG_MAX]; + u32 adec[ADEC_MAX]; + struct mem_ctl_info *mci[NUM_CONTROLLERS]; + struct rpmsg_endpoint *ept; + struct cdx_mcdi *mcdi; +}; + +/* + * Address decoder (ADEC) registers to match the order in which the register + * information is received from the firmware. + */ +enum adec_info { + CONF = 0, + ADEC0, + ADEC1, + ADEC2, + ADEC3, + ADEC4, + ADEC5, + ADEC6, + ADEC7, + ADEC8, + ADEC9, + ADEC10, + ADEC11, + ADEC12, + ADEC13, + ADEC14, + ADEC15, + ADEC16, + ADECILC, +}; + +enum reg_info { + ISR = 0, + IMR, + ECCR0_ERR_STATUS, + ECCR0_ADDR_LO, + ECCR0_ADDR_HI, + ECCR0_DATA_LO, + ECCR0_DATA_HI, + ECCR0_PAR, + ECCR1_ERR_STATUS, + ECCR1_ADDR_LO, + ECCR1_ADDR_HI, + ECCR1_DATA_LO, + ECCR1_DATA_HI, + ECCR1_PAR, + XMPU_ERR, + XMPU_ERR_ADDR_L0, + XMPU_ERR_ADDR_HI, + XMPU_ERR_AXI_ID, + ADEC_CHK_ERR_LOG, +}; + +static bool get_ddr_info(u32 *error_data, struct mc_priv *priv) +{ + u32 reglo, reghi, parity, eccr0_val, eccr1_val, isr; + struct ecc_status *p; + + isr = error_data[ISR]; + + if (!(isr & (MC5_IRQ_UE_MASK | MC5_IRQ_CE_MASK))) + return false; + + eccr0_val = error_data[ECCR0_ERR_STATUS]; + eccr1_val = error_data[ECCR1_ERR_STATUS]; + + if (!eccr0_val && !eccr1_val) + return false; + + p = &priv->stat; + + if (!eccr0_val) + p->channel = 1; + else + p->channel = 0; + + reglo = error_data[ECCR0_ADDR_LO]; + reghi = error_data[ECCR0_ADDR_HI]; + if (isr & MC5_IRQ_CE_MASK) + p->ceinfo[0].i = reglo | (u64)reghi << 32; + else if (isr & MC5_IRQ_UE_MASK) + p->ueinfo[0].i = reglo | (u64)reghi << 32; + + parity = error_data[ECCR0_PAR]; + edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n", + reghi, reglo, parity); + + reglo = error_data[ECCR1_ADDR_LO]; + reghi = error_data[ECCR1_ADDR_HI]; + if (isr & MC5_IRQ_CE_MASK) + p->ceinfo[1].i = reglo | (u64)reghi << 32; + else if (isr & MC5_IRQ_UE_MASK) + p->ueinfo[1].i = reglo | (u64)reghi << 32; + + parity = error_data[ECCR1_PAR]; + edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n", + reghi, reglo, parity); + + return true; +} + +/** + * convert_to_physical - Convert @error_data to a physical address. + * @priv: DDR memory controller private instance data. + * @pinf: ECC error info structure. + * @controller: Controller number of the MC5 + * @error_data: the DDRMC5 ADEC address decoder register data + * + * Return: physical address of the DDR memory. + */ +static unsigned long convert_to_physical(struct mc_priv *priv, + union ecc_error_info pinf, + int controller, int *error_data) +{ + u32 row, blk, rsh_req_addr, interleave, ilc_base_ctrl_add, ilc_himem_en, reg, offset; + u64 high_mem_base, high_mem_offset, low_mem_offset, ilcmem_base; + unsigned long err_addr = 0, addr; + union row_col_mapping cols; + union row_col_mapping rows; + u32 col_bit_0; + + row = pinf.rowhi << MC5_REGHI_ROW | pinf.row; + offset = controller * ADEC_NUM; + + reg = error_data[ADEC6]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + row >>= MC5_EACHBIT; + + reg = error_data[ADEC7]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + row >>= MC5_EACHBIT; + + reg = error_data[ADEC8]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + + reg = error_data[ADEC9]; + rows.i = reg; + + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + + col_bit_0 = FIELD_GET(MASK_24, error_data[ADEC9]); + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << col_bit_0; + + cols.i = error_data[ADEC10]; + err_addr |= (pinf.col & 1) << cols.col1; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col2; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col3; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col4; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col5; + pinf.col >>= 1; + + cols.i = error_data[ADEC11]; + err_addr |= (pinf.col & 1) << cols.col1; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col2; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col3; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col4; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col5; + pinf.col >>= 1; + + reg = error_data[ADEC12]; + err_addr |= (pinf.bank & BIT(0)) << (reg & MASK_0); + pinf.bank >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_BANK1_MASK, reg); + pinf.bank >>= MC5_EACHBIT; + + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_0_MASK, reg); + pinf.group >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_1_MASK, reg); + pinf.group >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MASK_24, reg); + pinf.group >>= MC5_EACHBIT; + + reg = error_data[ADEC4]; + err_addr |= (pinf.rank & BIT(0)) << (reg & MASK_0); + pinf.rank >>= MC5_EACHBIT; + err_addr |= (pinf.rank & BIT(0)) << FIELD_GET(MC5_RANK_1_MASK, reg); + pinf.rank >>= MC5_EACHBIT; + + reg = error_data[ADEC5]; + err_addr |= (pinf.lrank & BIT(0)) << (reg & MASK_0); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_1_MASK, reg); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_2_MASK, reg); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MASK_24, reg); + pinf.lrank >>= MC5_EACHBIT; + + high_mem_base = (priv->adec[ADEC2 + offset] & MC5_MEM_MASK) * MC5_HIMEM_BASE; + interleave = priv->adec[ADEC13 + offset] & MC5_INTERLEAVE_SEL; + + high_mem_offset = priv->adec[ADEC3 + offset] & MC5_MEM_MASK; + low_mem_offset = priv->adec[ADEC1 + offset] & MC5_MEM_MASK; + reg = priv->adec[ADEC14 + offset]; + ilc_himem_en = !!(reg & MC5_ILC_HIMEM_EN); + ilcmem_base = (reg & MC5_ILC_MEM) * SZ_1M; + if (ilc_himem_en) + ilc_base_ctrl_add = ilcmem_base - high_mem_offset; + else + ilc_base_ctrl_add = ilcmem_base - low_mem_offset; + + if (priv->dwidth == DEV_X16) { + blk = err_addr / MC5_X16_SIZE; + rsh_req_addr = (blk << 8) + ilc_base_ctrl_add; + err_addr = rsh_req_addr * interleave * 2; + } else { + blk = err_addr / MC5_X32_SIZE; + rsh_req_addr = (blk << 9) + ilc_base_ctrl_add; + err_addr = rsh_req_addr * interleave * 2; + } + + if ((priv->adec[ADEC2 + offset] & MC5_HIGH_MEM_EN) && err_addr >= high_mem_base) + addr = err_addr - high_mem_offset; + else + addr = err_addr - low_mem_offset; + + return addr; +} + +/** + * handle_error - Handle errors. + * @priv: DDR memory controller private instance data. + * @stat: ECC status structure. + * @ctl_num: Controller number of the MC5 + * @error_data: the MC5 ADEC address decoder register data + * + * Handles ECC correctable and uncorrectable errors. + */ +static void handle_error(struct mc_priv *priv, struct ecc_status *stat, + int ctl_num, int *error_data) +{ + union ecc_error_info pinf; + struct mem_ctl_info *mci; + unsigned long pa; + phys_addr_t pfn; + int err; + + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) + return; + + mci = priv->mci[ctl_num]; + + if (stat->error_type == MC5_ERR_TYPE_CE) { + pinf = stat->ceinfo[stat->channel]; + snprintf(priv->message, sizeof(priv->message), + "Error type:%s Controller %d Addr at %lx\n", + "CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data)); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + if (stat->error_type == MC5_ERR_TYPE_UE) { + pinf = stat->ueinfo[stat->channel]; + snprintf(priv->message, sizeof(priv->message), + "Error type:%s controller %d Addr at %lx\n", + "UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data)); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + pa = convert_to_physical(priv, pinf, ctl_num, error_data); + pfn = PHYS_PFN(pa); + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) { + err = memory_failure(pfn, MF_ACTION_REQUIRED); + if (err) + edac_dbg(2, "memory_failure() error: %d", err); + else + edac_dbg(2, "Poison page at PA 0x%lx\n", pa); + } + } +} + +static void mc_init(struct mem_ctl_info *mci, struct device *dev) +{ + struct mc_priv *priv = mci->pvt_info; + struct csrow_info *csi; + struct dimm_info *dimm; + u32 row; + int ch; + + /* Initialize controller capabilities and configuration */ + mci->mtype_cap = MEM_FLAG_DDR5; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_HW_SRC; + mci->scrub_mode = SCRUB_NONE; + + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "VersalNET DDR5"; + mci->dev_name = dev_name(dev); + mci->mod_name = "versalnet_edac"; + + edac_op_state = EDAC_OPSTATE_INT; + + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + for (ch = 0; ch < csi->nr_channels; ch++) { + dimm = csi->channels[ch]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = MEM_DDR5; + dimm->grain = MC5_ERR_GRAIN; + dimm->dtype = priv->dwidth; + } + } +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static unsigned int mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd) +{ + return MCDI_RPC_TIMEOUT; +} + +static void mcdi_request(struct cdx_mcdi *cdx, + const struct cdx_dword *hdr, size_t hdr_len, + const struct cdx_dword *sdu, size_t sdu_len) +{ + void *send_buf; + int ret; + + send_buf = kzalloc(hdr_len + sdu_len, GFP_KERNEL); + if (!send_buf) + return; + + memcpy(send_buf, hdr, hdr_len); + memcpy(send_buf + hdr_len, sdu, sdu_len); + + ret = rpmsg_send(cdx->ept, send_buf, hdr_len + sdu_len); + if (ret) + dev_err(&cdx->rpdev->dev, "Failed to send rpmsg data: %d\n", ret); + + kfree(send_buf); +} + +static const struct cdx_mcdi_ops mcdi_ops = { + .mcdi_rpc_timeout = mcdi_rpc_timeout, + .mcdi_request = mcdi_request, +}; + +static void get_ddr_config(u32 index, u32 *buffer, struct cdx_mcdi *amd_mcdi) +{ + size_t outlen; + int ret; + + MCDI_DECLARE_BUF(inbuf, MC_GET_DDR_CONFIG_IN_LEN); + MCDI_DECLARE_BUF(outbuf, BUFFER_SZ); + + MCDI_SET_DWORD(inbuf, EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX, index); + + ret = cdx_mcdi_rpc(amd_mcdi, MC_CMD_EDAC_GET_DDR_CONFIG, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (!ret) + memcpy(buffer, MCDI_PTR(outbuf, GET_DDR_CONFIG), + (ADEC_NUM * 4)); +} + +static int setup_mcdi(struct mc_priv *mc_priv) +{ + struct cdx_mcdi *amd_mcdi; + int ret, i; + + amd_mcdi = kzalloc(sizeof(*amd_mcdi), GFP_KERNEL); + if (!amd_mcdi) + return -ENOMEM; + + amd_mcdi->mcdi_ops = &mcdi_ops; + ret = cdx_mcdi_init(amd_mcdi); + if (ret) { + kfree(amd_mcdi); + return ret; + } + + amd_mcdi->ept = mc_priv->ept; + mc_priv->mcdi = amd_mcdi; + + for (i = 0; i < NUM_CONTROLLERS; i++) + get_ddr_config(i, &mc_priv->adec[ADEC_NUM * i], amd_mcdi); + + return 0; +} + +static const guid_t amd_versalnet_guid = GUID_INIT(0x82678888, 0xa556, 0x44f2, + 0xb8, 0xb4, 0x45, 0x56, 0x2e, + 0x8c, 0x5b, 0xec); + +static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, + int len, void *priv, u32 src) +{ + struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev); + const guid_t *sec_type = &guid_null; + u32 length, offset, error_id; + u32 *result = (u32 *)data; + struct ecc_status *p; + int i, j, k, sec_sev; + const char *err_str; + u32 *adec_data; + + if (*(u8 *)data == MCDI_RESPONSE) { + cdx_mcdi_process_cmd(mc_priv->mcdi, (struct cdx_dword *)data, len); + return 0; + } + + sec_sev = result[ERROR_LEVEL]; + error_id = result[ERROR_ID]; + length = result[MSG_ERR_LENGTH]; + offset = result[MSG_ERR_OFFSET]; + + /* + * The data can come in two stretches. Construct the regs from two + * messages. The offset indicates the offset from which the data is to + * be taken. + */ + for (i = 0 ; i < length; i++) { + k = offset + i; + j = ERROR_DATA + i; + mc_priv->regs[k] = result[j]; + } + + if (result[TOTAL_ERR_LENGTH] > length) { + if (!mc_priv->part_len) + mc_priv->part_len = length; + else + mc_priv->part_len += length; + + if (mc_priv->part_len < result[TOTAL_ERR_LENGTH]) + return 0; + mc_priv->part_len = 0; + } + + mc_priv->error_id = error_id; + mc_priv->error_level = result[ERROR_LEVEL]; + + switch (error_id) { + case 5: err_str = "General Software Non-Correctable error"; break; + case 6: err_str = "CFU error"; break; + case 7: err_str = "CFRAME error"; break; + case 10: err_str = "DDRMC Microblaze Correctable ECC error"; break; + case 11: err_str = "DDRMC Microblaze Non-Correctable ECC error"; break; + case 15: err_str = "MMCM error"; break; + case 16: err_str = "HNICX Correctable error"; break; + case 17: err_str = "HNICX Non-Correctable error"; break; + + case 18: + p = &mc_priv->stat; + memset(p, 0, sizeof(struct ecc_status)); + p->error_type = MC5_ERR_TYPE_CE; + for (i = 0 ; i < NUM_CONTROLLERS; i++) { + if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) { + adec_data = mc_priv->adec + ADEC_NUM * i; + handle_error(mc_priv, &mc_priv->stat, i, adec_data); + } + } + return 0; + case 19: + p = &mc_priv->stat; + memset(p, 0, sizeof(struct ecc_status)); + p->error_type = MC5_ERR_TYPE_UE; + for (i = 0 ; i < NUM_CONTROLLERS; i++) { + if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) { + adec_data = mc_priv->adec + ADEC_NUM * i; + handle_error(mc_priv, &mc_priv->stat, i, adec_data); + } + } + return 0; + + case 21: err_str = "GT Non-Correctable error"; break; + case 22: err_str = "PL Sysmon Correctable error"; break; + case 23: err_str = "PL Sysmon Non-Correctable error"; break; + case 111: err_str = "LPX unexpected dfx activation error"; break; + case 114: err_str = "INT_LPD Non-Correctable error"; break; + case 116: err_str = "INT_OCM Non-Correctable error"; break; + case 117: err_str = "INT_FPD Correctable error"; break; + case 118: err_str = "INT_FPD Non-Correctable error"; break; + case 120: err_str = "INT_IOU Non-Correctable error"; break; + case 123: err_str = "err_int_irq from APU GIC Distributor"; break; + case 124: err_str = "fault_int_irq from APU GIC Distribute"; break; + case 132 ... 139: err_str = "FPX SPLITTER error"; break; + case 140: err_str = "APU Cluster 0 error"; break; + case 141: err_str = "APU Cluster 1 error"; break; + case 142: err_str = "APU Cluster 2 error"; break; + case 143: err_str = "APU Cluster 3 error"; break; + case 145: err_str = "WWDT1 LPX error"; break; + case 147: err_str = "IPI error"; break; + case 152 ... 153: err_str = "AFIFS error"; break; + case 154 ... 155: err_str = "LPX glitch error"; break; + case 185 ... 186: err_str = "FPX AFIFS error"; break; + case 195 ... 199: err_str = "AFIFM error"; break; + case 108: err_str = "PSM Correctable error"; break; + case 59: err_str = "PMC correctable error"; break; + case 60: err_str = "PMC Un correctable error"; break; + case 43 ... 47: err_str = "PMC Sysmon error"; break; + case 163 ... 184: err_str = "RPU error"; break; + case 148: err_str = "OCM0 correctable error"; break; + case 149: err_str = "OCM1 correctable error"; break; + case 150: err_str = "OCM0 Un-correctable error"; break; + case 151: err_str = "OCM1 Un-correctable error"; break; + case 189: err_str = "PSX_CMN_3 PD block consolidated error"; break; + case 191: err_str = "FPD_INT_WRAP PD block consolidated error"; break; + case 232: err_str = "CRAM Un-Correctable error"; break; + default: err_str = "VERSAL_EDAC_ERR_ID: %d"; break; + } + + snprintf(mc_priv->message, + sizeof(mc_priv->message), + "[VERSAL_EDAC_ERR_ID: %d] Error type: %s", error_id, err_str); + + /* Convert to bytes */ + length = result[TOTAL_ERR_LENGTH] * 4; + log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message, + sec_sev, (void *)&mc_priv->regs, length); + + return 0; +} + +static struct rpmsg_device_id amd_rpmsg_id_table[] = { + { .name = "error_ipc" }, + { }, +}; +MODULE_DEVICE_TABLE(rpmsg, amd_rpmsg_id_table); + +static int rpmsg_probe(struct rpmsg_device *rpdev) +{ + struct rpmsg_channel_info chinfo; + struct mc_priv *pg; + + pg = (struct mc_priv *)amd_rpmsg_id_table[0].driver_data; + chinfo.src = RPMSG_ADDR_ANY; + chinfo.dst = rpdev->dst; + strscpy(chinfo.name, amd_rpmsg_id_table[0].name, + strlen(amd_rpmsg_id_table[0].name)); + + pg->ept = rpmsg_create_ept(rpdev, rpmsg_cb, NULL, chinfo); + if (!pg->ept) + return dev_err_probe(&rpdev->dev, -ENXIO, "Failed to create ept for channel %s\n", + chinfo.name); + + dev_set_drvdata(&rpdev->dev, pg); + + return 0; +} + +static void rpmsg_remove(struct rpmsg_device *rpdev) +{ + struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev); + + rpmsg_destroy_ept(mc_priv->ept); + dev_set_drvdata(&rpdev->dev, NULL); +} + +static struct rpmsg_driver amd_rpmsg_driver = { + .drv.name = KBUILD_MODNAME, + .probe = rpmsg_probe, + .remove = rpmsg_remove, + .callback = rpmsg_cb, + .id_table = amd_rpmsg_id_table, +}; + +static void versal_edac_release(struct device *dev) +{ + kfree(dev); +} + +static int init_versalnet(struct mc_priv *priv, struct platform_device *pdev) +{ + u32 num_chans, rank, dwidth, config; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct device *dev; + enum dev_type dt; + char *name; + int rc, i; + + for (i = 0; i < NUM_CONTROLLERS; i++) { + config = priv->adec[CONF + i * ADEC_NUM]; + num_chans = FIELD_GET(MC5_NUM_CHANS_MASK, config); + rank = 1 << FIELD_GET(MC5_RANK_MASK, config); + dwidth = FIELD_GET(MC5_BUS_WIDTH_MASK, config); + + switch (dwidth) { + case XDDR5_BUS_WIDTH_16: + dt = DEV_X16; + break; + case XDDR5_BUS_WIDTH_32: + dt = DEV_X32; + break; + case XDDR5_BUS_WIDTH_64: + dt = DEV_X64; + break; + default: + dt = DEV_UNKNOWN; + } + + if (dt == DEV_UNKNOWN) + continue; + + /* Find the first enabled device and register that one. */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = rank; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_chans; + layers[1].is_virt_csrow = false; + + rc = -ENOMEM; + mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers, + sizeof(struct mc_priv)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for MC%d\n", i); + goto err_alloc; + } + + priv->mci[i] = mci; + priv->dwidth = dt; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev->release = versal_edac_release; + name = kmalloc(32, GFP_KERNEL); + sprintf(name, "versal-net-ddrmc5-edac-%d", i); + dev->init_name = name; + rc = device_register(dev); + if (rc) + goto err_alloc; + + mci->pdev = dev; + + platform_set_drvdata(pdev, priv); + + mc_init(mci, dev); + rc = edac_mc_add_mc(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, "Failed to register MC%d with EDAC core\n", i); + goto err_alloc; + } + } + return 0; + +err_alloc: + while (i--) { + mci = priv->mci[i]; + if (!mci) + continue; + + if (mci->pdev) { + device_unregister(mci->pdev); + edac_mc_del_mc(mci->pdev); + } + + edac_mc_free(mci); + } + + return rc; +} + +static void remove_versalnet(struct mc_priv *priv) +{ + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < NUM_CONTROLLERS; i++) { + device_unregister(priv->mci[i]->pdev); + mci = edac_mc_del_mc(priv->mci[i]->pdev); + if (!mci) + return; + + edac_mc_free(mci); + } +} + +static int mc_probe(struct platform_device *pdev) +{ + struct device_node *r5_core_node; + struct mc_priv *priv; + struct rproc *rp; + int rc; + + r5_core_node = of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0); + if (!r5_core_node) { + dev_err(&pdev->dev, "amd,rproc: invalid phandle\n"); + return -EINVAL; + } + + rp = rproc_get_by_phandle(r5_core_node->phandle); + if (!rp) + return -EPROBE_DEFER; + + rc = rproc_boot(rp); + if (rc) { + dev_err(&pdev->dev, "Failed to attach to remote processor\n"); + goto err_rproc_boot; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto err_alloc; + } + + amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)priv; + + rc = register_rpmsg_driver(&amd_rpmsg_driver); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, "Failed to register RPMsg driver: %d\n", rc); + goto err_alloc; + } + + rc = setup_mcdi(priv); + if (rc) + goto err_unreg; + + priv->mcdi->r5_rproc = rp; + + rc = init_versalnet(priv, pdev); + if (rc) + goto err_init; + + return 0; + +err_init: + cdx_mcdi_finish(priv->mcdi); + +err_unreg: + unregister_rpmsg_driver(&amd_rpmsg_driver); + +err_alloc: + rproc_shutdown(rp); + +err_rproc_boot: + rproc_put(rp); + + return rc; +} + +static void mc_remove(struct platform_device *pdev) +{ + struct mc_priv *priv = platform_get_drvdata(pdev); + + unregister_rpmsg_driver(&amd_rpmsg_driver); + remove_versalnet(priv); + rproc_shutdown(priv->mcdi->r5_rproc); + cdx_mcdi_finish(priv->mcdi); +} + +static const struct of_device_id amd_edac_match[] = { + { .compatible = "xlnx,versal-net-ddrmc5", }, + {} +}; +MODULE_DEVICE_TABLE(of, amd_edac_match); + +static struct platform_driver amd_ddr_edac_mc_driver = { + .driver = { + .name = "versal-net-edac", + .of_match_table = amd_edac_match, + }, + .probe = mc_probe, + .remove = mc_remove, +}; + +module_platform_driver(amd_ddr_edac_mc_driver); + +MODULE_AUTHOR("AMD Inc"); +MODULE_DESCRIPTION("Versal NET EDAC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index c52b9dd9154c..9955396c9a52 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/regmap.h> +#include <linux/string_choices.h> #include "edac_module.h" @@ -913,8 +914,8 @@ static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np, snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd); edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), - edac_name, 1, "l2c", 1, 2, NULL, - 0, edac_device_alloc_index()); + edac_name, 1, "l2c", 1, 2, + edac_device_alloc_index()); if (!edac_dev) { rc = -ENOMEM; goto err_group; @@ -1208,8 +1209,7 @@ static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np, edac_idx = edac_device_alloc_index(); edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), - "l3c", 1, "l3c", 1, 0, NULL, 0, - edac_idx); + "l3c", 1, "l3c", 1, 0, edac_idx); if (!edac_dev) { rc = -ENOMEM; goto err_release_group; @@ -1408,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "Multiple XGIC write size error\n"); info = readl(ctx->dev_csr + XGICTRANSERRREQINFO); dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n", - info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info), + str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info), info); writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS); @@ -1490,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) if (reg & AGENT_OFFLINE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to offline agent error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & UNIMPL_RBPAGE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to unimplemented page error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & WORD_ALIGNED_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s word aligned access error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & PAGE_ACCESS_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s to page out of range access error\n", - write ? "write" : "read"); + str_write_read(write)); if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) return; if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) @@ -1561,7 +1561,7 @@ rb_skip: err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL); err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH); dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n", - REQTYPE_F2_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_F2_RD(err_addr_hi)), ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi); if (reg & WRERR_RESP_MASK) dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n", @@ -1612,7 +1612,7 @@ chk_iob_axi0: dev_err(edac_dev->dev, "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); @@ -1626,7 +1626,7 @@ chk_iob_axi1: dev_err(edac_dev->dev, "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); } @@ -1748,8 +1748,7 @@ static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np, edac_idx = edac_device_alloc_index(); edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), - "SOC", 1, "SOC", 1, 2, NULL, 0, - edac_idx); + "SOC", 1, "SOC", 1, 2, edac_idx); if (!edac_dev) { rc = -ENOMEM; goto err_release_group; @@ -1960,7 +1959,7 @@ out_err: return rc; } -static int xgene_edac_remove(struct platform_device *pdev) +static void xgene_edac_remove(struct platform_device *pdev) { struct xgene_edac *edac = dev_get_drvdata(&pdev->dev); struct xgene_edac_mc_ctx *mcu; @@ -1981,8 +1980,6 @@ static int xgene_edac_remove(struct platform_device *pdev) list_for_each_entry_safe(node, temp_node, &edac->socs, next) xgene_edac_soc_remove(node); - - return 0; } static const struct of_device_id xgene_edac_of_match[] = { diff --git a/drivers/edac/zynqmp_edac.c b/drivers/edac/zynqmp_edac.c new file mode 100644 index 000000000000..cdffc9e4194d --- /dev/null +++ b/drivers/edac/zynqmp_edac.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP OCM ECC Driver + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + */ + +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define ZYNQMP_OCM_EDAC_MSG_SIZE 256 + +#define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm" + +/* Error/Interrupt registers */ +#define ERR_CTRL_OFST 0x0 +#define OCM_ISR_OFST 0x04 +#define OCM_IMR_OFST 0x08 +#define OCM_IEN_OFST 0x0C +#define OCM_IDS_OFST 0x10 + +/* ECC control register */ +#define ECC_CTRL_OFST 0x14 + +/* Correctable error info registers */ +#define CE_FFA_OFST 0x1C +#define CE_FFD0_OFST 0x20 +#define CE_FFD1_OFST 0x24 +#define CE_FFD2_OFST 0x28 +#define CE_FFD3_OFST 0x2C +#define CE_FFE_OFST 0x30 + +/* Uncorrectable error info registers */ +#define UE_FFA_OFST 0x34 +#define UE_FFD0_OFST 0x38 +#define UE_FFD1_OFST 0x3C +#define UE_FFD2_OFST 0x40 +#define UE_FFD3_OFST 0x44 +#define UE_FFE_OFST 0x48 + +/* ECC control register bit field definitions */ +#define ECC_CTRL_CLR_CE_ERR 0x40 +#define ECC_CTRL_CLR_UE_ERR 0x80 + +/* Fault injection data and count registers */ +#define OCM_FID0_OFST 0x4C +#define OCM_FID1_OFST 0x50 +#define OCM_FID2_OFST 0x54 +#define OCM_FID3_OFST 0x58 +#define OCM_FIC_OFST 0x74 + +#define UE_MAX_BITPOS_LOWER 31 +#define UE_MIN_BITPOS_UPPER 32 +#define UE_MAX_BITPOS_UPPER 63 + +/* Interrupt masks */ +#define OCM_CEINTR_MASK BIT(6) +#define OCM_UEINTR_MASK BIT(7) +#define OCM_ECC_ENABLE_MASK BIT(0) + +#define OCM_FICOUNT_MASK GENMASK(23, 0) +#define OCM_NUM_UE_BITPOS 2 +#define OCM_BASEVAL 0xFFFC0000 +#define EDAC_DEVICE "ZynqMP-OCM" + +/** + * struct ecc_error_info - ECC error log information + * @addr: Fault generated at this address + * @fault_lo: Generated fault data (lower 32-bit) + * @fault_hi: Generated fault data (upper 32-bit) + */ +struct ecc_error_info { + u32 addr; + u32 fault_lo; + u32 fault_hi; +}; + +/** + * struct ecc_status - ECC status information to report + * @ce_cnt: Correctable error count + * @ue_cnt: Uncorrectable error count + * @ceinfo: Correctable error log information + * @ueinfo: Uncorrectable error log information + */ +struct ecc_status { + u32 ce_cnt; + u32 ue_cnt; + struct ecc_error_info ceinfo; + struct ecc_error_info ueinfo; +}; + +/** + * struct edac_priv - OCM private instance data + * @baseaddr: Base address of the OCM + * @message: Buffer for framing the event specific info + * @stat: ECC status information + * @ce_cnt: Correctable Error count + * @ue_cnt: Uncorrectable Error count + * @debugfs_dir: Directory entry for debugfs + * @ce_bitpos: Bit position for Correctable Error + * @ue_bitpos: Array to store UnCorrectable Error bit positions + * @fault_injection_cnt: Fault Injection Counter value + */ +struct edac_priv { + void __iomem *baseaddr; + char message[ZYNQMP_OCM_EDAC_MSG_SIZE]; + struct ecc_status stat; + u32 ce_cnt; + u32 ue_cnt; +#ifdef CONFIG_EDAC_DEBUG + struct dentry *debugfs_dir; + u8 ce_bitpos; + u8 ue_bitpos[OCM_NUM_UE_BITPOS]; + u32 fault_injection_cnt; +#endif +}; + +/** + * get_error_info - Get the current ECC error info + * @base: Pointer to the base address of the OCM + * @p: Pointer to the OCM ECC status structure + * @mask: Status register mask value + * + * Determines there is any ECC error or not + * + */ +static void get_error_info(void __iomem *base, struct ecc_status *p, int mask) +{ + if (mask & OCM_CEINTR_MASK) { + p->ce_cnt++; + p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST); + p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST); + p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST)); + writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST); + } else if (mask & OCM_UEINTR_MASK) { + p->ue_cnt++; + p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST); + p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST); + p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST)); + writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST); + } +} + +/** + * handle_error - Handle error types CE and UE + * @dci: Pointer to the EDAC device instance + * @p: Pointer to the OCM ECC status structure + * + * Handles correctable and uncorrectable errors. + */ +static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p) +{ + struct edac_priv *priv = dci->pvt_info; + struct ecc_error_info *pinf; + + if (p->ce_cnt) { + pinf = &p->ceinfo; + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", + "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo); + edac_device_handle_ce(dci, 0, 0, priv->message); + } + + if (p->ue_cnt) { + pinf = &p->ueinfo; + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", + "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo); + edac_device_handle_ue(dci, 0, 0, priv->message); + } + + memset(p, 0, sizeof(*p)); +} + +/** + * intr_handler - ISR routine + * @irq: irq number + * @dev_id: device id pointer + * + * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise + */ +static irqreturn_t intr_handler(int irq, void *dev_id) +{ + struct edac_device_ctl_info *dci = dev_id; + struct edac_priv *priv = dci->pvt_info; + int regval; + + regval = readl(priv->baseaddr + OCM_ISR_OFST); + if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) { + WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval); + return IRQ_NONE; + } + + get_error_info(priv->baseaddr, &priv->stat, regval); + + priv->ce_cnt += priv->stat.ce_cnt; + priv->ue_cnt += priv->stat.ue_cnt; + handle_error(dci, &priv->stat); + + return IRQ_HANDLED; +} + +/** + * get_eccstate - Return the ECC status + * @base: Pointer to the OCM base address + * + * Get the ECC enable/disable status + * + * Return: ECC status 0/1. + */ +static bool get_eccstate(void __iomem *base) +{ + return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK; +} + +#ifdef CONFIG_EDAC_DEBUG +/** + * write_fault_count - write fault injection count + * @priv: Pointer to the EDAC private struct + * + * Update the fault injection count register, once the counter reaches + * zero, it injects errors + */ +static void write_fault_count(struct edac_priv *priv) +{ + u32 ficount = priv->fault_injection_cnt; + + if (ficount & ~OCM_FICOUNT_MASK) { + ficount &= OCM_FICOUNT_MASK; + edac_printk(KERN_INFO, EDAC_DEVICE, + "Fault injection count value truncated to %d\n", ficount); + } + + writel(ficount, priv->baseaddr + OCM_FIC_OFST); +} + +/* + * To get the Correctable Error injected, the following steps are needed: + * - Setup the optional Fault Injection Count: + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count + * - Write the Correctable Error bit position value: + * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos + */ +static ssize_t inject_ce_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct edac_priv *priv = edac_dev->pvt_info; + int ret; + + if (!data) + return -EFAULT; + + ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos); + if (ret) + return ret; + + if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER) + return -EINVAL; + + if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) { + writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST); + writel(0, priv->baseaddr + OCM_FID1_OFST); + } else { + writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER), + priv->baseaddr + OCM_FID1_OFST); + writel(0, priv->baseaddr + OCM_FID0_OFST); + } + + write_fault_count(priv); + + return count; +} + +static const struct file_operations inject_ce_fops = { + .open = simple_open, + .write = inject_ce_write, + .llseek = generic_file_llseek, +}; + +/* + * To get the Uncorrectable Error injected, the following steps are needed: + * - Setup the optional Fault Injection Count: + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count + * - Write the Uncorrectable Error bit position values: + * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos + */ +static ssize_t inject_ue_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct edac_priv *priv = edac_dev->pvt_info; + char buf[6], *pbuf, *token[2]; + u64 ue_bitpos; + int i, ret; + u8 len; + + if (!data) + return -EFAULT; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < OCM_NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]); + if (ret) + return ret; + + if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER || + priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER) + return -EINVAL; + + if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n"); + return -EINVAL; + } + + ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]); + + writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST); + writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST); + + write_fault_count(priv); + + return count; +} + +static const struct file_operations inject_ue_fops = { + .open = simple_open, + .write = inject_ue_write, + .llseek = generic_file_llseek, +}; + +static void setup_debugfs(struct edac_device_ctl_info *edac_dev) +{ + struct edac_priv *priv = edac_dev->pvt_info; + + priv->debugfs_dir = edac_debugfs_create_dir("ocm"); + if (!priv->debugfs_dir) + return; + + edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir, + &priv->fault_injection_cnt); + edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir, + edac_dev, &inject_ue_fops); + edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir, + edac_dev, &inject_ce_fops); +} +#endif + +static int edac_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci; + struct edac_priv *priv; + void __iomem *baseaddr; + struct resource *res; + int irq, ret; + + baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(baseaddr)) + return PTR_ERR(baseaddr); + + if (!get_eccstate(baseaddr)) { + edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n"); + return -ENXIO; + } + + dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING, + 1, ZYNQMP_OCM_EDAC_STRING, 1, 0, + edac_device_alloc_index()); + if (!dci) + return -ENOMEM; + + priv = dci->pvt_info; + platform_set_drvdata(pdev, dci); + dci->dev = &pdev->dev; + priv->baseaddr = baseaddr; + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = ZYNQMP_OCM_EDAC_STRING; + dci->dev_name = dev_name(&pdev->dev); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto free_dev_ctl; + } + + ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0, + dev_name(&pdev->dev), dci); + if (ret) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n"); + goto free_dev_ctl; + } + + /* Enable UE, CE interrupts */ + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST); + +#ifdef CONFIG_EDAC_DEBUG + setup_debugfs(dci); +#endif + + ret = edac_device_add_device(dci); + if (ret) + goto free_dev_ctl; + + return 0; + +free_dev_ctl: + edac_device_free_ctl_info(dci); + + return ret; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); + struct edac_priv *priv = dci->pvt_info; + + /* Disable UE, CE interrupts */ + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST); + +#ifdef CONFIG_EDAC_DEBUG + debugfs_remove_recursive(priv->debugfs_dir); +#endif + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); +} + +static const struct of_device_id zynqmp_ocm_edac_match[] = { + { .compatible = "xlnx,zynqmp-ocmc-1.0"}, + { /* end of table */ } +}; + +MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match); + +static struct platform_driver zynqmp_ocm_edac_driver = { + .driver = { + .name = "zynqmp-ocm-edac", + .of_match_table = zynqmp_ocm_edac_match, + }, + .probe = edac_probe, + .remove = edac_remove, +}; + +module_platform_driver(zynqmp_ocm_edac_driver); + +MODULE_AUTHOR("Advanced Micro Devices, Inc"); +MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver"); +MODULE_LICENSE("GPL"); |
