diff options
Diffstat (limited to 'drivers/edac')
96 files changed, 33970 insertions, 11110 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 878f09005fad..81e40543ffd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -2,46 +2,30 @@ # EDAC Kconfig # Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com # Licensed and distributed under the GPL -# + +config EDAC_ATOMIC_SCRUB + bool config EDAC_SUPPORT bool menuconfig EDAC - bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM - depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT + tristate "EDAC (Error Detection And Correction) reporting" + depends on HAS_IOMEM && EDAC_SUPPORT && RAS help - EDAC is designed to report errors in the core system. - These are low-level errors that are reported in the CPU or - supporting chipset or other subsystems: + EDAC is a subsystem along with hardware-specific drivers designed to + report hardware errors. These are low-level errors that are reported + in the CPU or supporting chipset or other subsystems: memory errors, cache errors, PCI errors, thermal throttling, etc.. If unsure, select 'Y'. - If this code is reporting problems on your system, please - see the EDAC project web pages for more information at: - - <http://bluesmoke.sourceforge.net/> - - and: - - <http://buttersideup.com/edacwiki> - - There is also a mailing list for the EDAC project, which can - be found via the sourceforge page. + The mailing list for the EDAC project is linux-edac@vger.kernel.org. if EDAC -config EDAC_LEGACY_SYSFS - bool "EDAC legacy sysfs" - default y - help - Enable the compatibility sysfs nodes. - Use 'Y' if your edac utilities aren't ported to work with the newer - structures. - config EDAC_DEBUG bool "Debugging" + select DEBUG_FS help This turns on debugging information for the entire EDAC subsystem. You do so by inserting edac_module with "edac_debug_level=x." Valid @@ -52,7 +36,7 @@ config EDAC_DECODE_MCE tristate "Decode MCEs in human-readable form (only on AMD for now)" depends on CPU_SUP_AMD && X86_MCE_AMD default y - ---help--- + help Enable this option if you want to decode Machine Check Exceptions occurring on your machine in human-readable form. @@ -60,30 +44,10 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MCE_INJ - tristate "Simple MCE injection interface over /sysfs" - depends on EDAC_DECODE_MCE - default n - help - This is a simple interface to inject MCEs over /sysfs and test - the MCE decoding code in EDAC. - - This is currently AMD-only. - -config EDAC_MM_EDAC - tristate "Main Memory EDAC (Error Detection And Correction) reporting" - help - Some systems are able to detect and correct errors in main - memory. EDAC can report statistics on memory error - detection and correction (EDAC - or commonly referred to ECC - errors). EDAC will also try to decode where these errors - occurred so that a particular failing memory module can be - replaced. If unsure, select 'Y'. - config EDAC_GHES - bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) - default y + tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES + select UEFI_CPER help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the @@ -103,21 +67,50 @@ config EDAC_GHES In doubt, say 'Y'. +config EDAC_SCRUB + bool "EDAC scrub feature" + help + The EDAC scrub feature is optional and is designed to control the + memory scrubbers in the system. The common sysfs scrub interface + abstracts the control of various arbitrary scrubbing functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC scrub feature. + +config EDAC_ECS + bool "EDAC ECS (Error Check Scrub) feature" + help + The EDAC ECS feature is optional and is designed to control on-die + error check scrub (e.g., DDR5 ECS) in the system. The common sysfs + ECS interface abstracts the control of various ECS functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC ECS feature. + +config EDAC_MEM_REPAIR + bool "EDAC memory repair feature" + help + The EDAC memory repair feature is optional and is designed to control + the memory devices with repair features, such as Post Package Repair + (PPR), memory sparing etc. The common sysfs memory repair interface + abstracts the control of various memory repair functionalities into + a unified set of functions. + Say 'y/n' to enable/disable EDAC memory repair feature. + config EDAC_AMD64 - tristate "AMD64 (Opteron, Athlon64) K8, F10h" - depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE + tristate "AMD64 (Opteron, Athlon64)" + depends on AMD_NB && EDAC_DECODE_MCE + depends on AMD_NODE + imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on - the AMD64 families of memory controllers (K8 and F10h) + the AMD64 families (>= K8) of memory controllers. -config EDAC_AMD64_ERROR_INJECTION - bool "Sysfs HW Error injection facilities" - depends on EDAC_AMD64 - help - Recent Opterons (Family 10h and later) provide for Memory Error - Injection into the ECC detection circuits. The amd64_edac module - allows the operator/user to inject Uncorrectable and Correctable - errors into DRAM. + When EDAC_DEBUG is enabled, hardware error injection facilities + through sysfs are available: + + AMD CPUs up to and excluding family 0x17 provide for Memory + Error Injection into the ECC detection circuits. The amd64_edac + module allows the operator/user to inject Uncorrectable and + Correctable errors into DRAM. When enabled, in each of the respective memory controller directories (/sys/devices/system/edac/mc/mcX), there are 3 input files: @@ -129,30 +122,37 @@ config EDAC_AMD64_ERROR_INJECTION In addition, there are two control files, inject_read and inject_write, which trigger the DRAM ECC Read and Write respectively. +config EDAC_AL_MC + tristate "Amazon's Annapurna Lab Memory Controller" + depends on (ARCH_ALPINE || COMPILE_TEST) + help + Support for error detection and correction for Amazon's Annapurna + Labs Alpine chips which allow 1 bit correction and 2 bits detection. + config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the AMD 76x series of chipsets used with the Athlon processor. config EDAC_E7XXX tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. config EDAC_I82443BXGX tristate "Intel 82443BX/GX (440BX/GX)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 depends on BROKEN help Support for error detection and correction on the Intel @@ -160,49 +160,56 @@ config EDAC_I82443BXGX config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. config EDAC_I82975X tristate "Intel 82975x (D82975x)" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel DP82975x server chipsets. config EDAC_I3000 tristate "Intel 3000/3010" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3000 and 3010 server chipsets. config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. +config EDAC_IE31200 + tristate "Intel e312xx" + depends on PCI && X86 && X86_MCE_INTEL + help + Support for error detection and correction on the Intel + E3-1200 based DRAM controllers. + config EDAC_X38 tristate "Intel X38" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel X38 server chipsets. config EDAC_I5400 tristate "Intel 5400 (Seaburg) chipsets" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on @@ -211,161 +218,382 @@ config EDAC_I7CORE config EDAC_I82860 tristate "Intel 82860" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel 82860 chipset. config EDAC_R82600 tristate "Radisys 82600 embedded chipset" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Radisys 82600 embedded chipset. config EDAC_I5000 tristate "Intel Greencreek/Blackford chipset" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI + depends on BROKEN help Support for error detection and correction the Intel Greekcreek/Blackford chipsets. config EDAC_I5100 tristate "Intel San Clemente MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel San Clemente MCH. config EDAC_I7300 tristate "Intel Clarksboro MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). config EDAC_SBRIDGE - tristate "Intel Sandy-Bridge Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC" + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel - Sandy Bridge Integrated Memory Controller. + Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. -config EDAC_MPC85XX - tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) +config EDAC_SKX + tristate "Intel Skylake server Integrated MC" + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y + select DMI + select ACPI_ADXL help - Support for error detection and correction on the Freescale - MPC8349, MPC8560, MPC8540, MPC8548 + Support for error detection and correction the Intel + Skylake server Integrated Memory Controllers. If your + system has non-volatile DIMMs you should also manually + select CONFIG_ACPI_NFIT. -config EDAC_MV64X60 - tristate "Marvell MV64x60" - depends on EDAC_MM_EDAC && MV64X60 +config EDAC_I10NM + tristate "Intel 10nm server Integrated MC" + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y + select DMI + select ACPI_ADXL help - Support for error detection and correction on the Marvell - MV64360 and MV64460 chipsets. + Support for error detection and correction the Intel + 10nm server Integrated Memory Controllers. If your + system has non-volatile DIMMs you should also manually + select CONFIG_ACPI_NFIT. -config EDAC_PASEMI - tristate "PA Semi PWRficient" - depends on EDAC_MM_EDAC && PCI - depends on PPC_PASEMI +config EDAC_IMH + tristate "Intel Integrated Memory/IO Hub MC" + depends on X86_64 && X86_MCE_INTEL && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y + select DMI + select ACPI_ADXL help - Support for error detection and correction on PA Semi - PWRficient. + Support for error detection and correction the Intel + Integrated Memory/IO Hub Memory Controller. This MC IP is + first used on the Diamond Rapids servers but may appear on + others in the future. -config EDAC_CELL - tristate "Cell Broadband Engine memory controller" - depends on EDAC_MM_EDAC && PPC_CELL_COMMON +config EDAC_PND2 + tristate "Intel Pondicherry2" + depends on PCI && X86_64 && X86_MCE_INTEL + select P2SB if X86 help - Support for error detection and correction on the - Cell Broadband Engine internal memory controller - on platform without a hypervisor + Support for error detection and correction on the Intel + Pondicherry2 Integrated Memory Controller. This SoC IP is + first used on the Apollo Lake platform and Denverton + micro-server but may appear on others in the future. -config EDAC_PPC4XX - tristate "PPC4xx IBM DDR2 Memory Controller" - depends on EDAC_MM_EDAC && 4xx +config EDAC_IGEN6 + tristate "Intel client SoC Integrated MC" + depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG + depends on X86_64 && X86_MCE_INTEL help - This enables support for EDAC on the ECC memory used - with the IBM DDR2 memory controller found in various - PowerPC 4xx embedded processors such as the 405EX[r], - 440SP, 440SPe, 460EX, 460GT and 460SX. + Support for error detection and correction on the Intel + client SoC Integrated Memory Controller using In-Band ECC IP. + This In-Band ECC is first used on the Elkhart Lake SoC but + may appear on others in the future. -config EDAC_AMD8131 - tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE +config EDAC_MPC85XX + bool "Freescale MPC83xx / MPC85xx" + depends on FSL_SOC && EDAC=y help - Support for error detection and correction on the - AMD8131 HyperTransport PCI-X Tunnel chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. + Support for error detection and correction on the Freescale + MPC8349, MPC8560, MPC8540, MPC8548, T4240 -config EDAC_AMD8111 - tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE +config EDAC_LAYERSCAPE + tristate "Freescale Layerscape DDR" + depends on ARCH_LAYERSCAPE || SOC_LS1021A help - Support for error detection and correction on the - AMD8111 HyperTransport I/O Hub chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. + Support for error detection and correction on Freescale memory + controllers on Layerscape SoCs. + +config EDAC_PASEMI + tristate "PA Semi PWRficient" + depends on PPC_PASEMI && PCI + help + Support for error detection and correction on PA Semi + PWRficient. config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" - depends on EDAC_MM_EDAC && PPC64 + depends on PPC64 help Support for error detection and correction on the IBM CPC925 Bridge and Memory Controller, which is a companion chip to the PowerPC 970 family of processors. -config EDAC_TILE - tristate "Tilera Memory Controller" - depends on EDAC_MM_EDAC && TILE - default y - help - Support for error detection and correction on the - Tilera memory controller. - config EDAC_HIGHBANK_MC tristate "Highbank Memory Controller" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_HIGHBANK_L2 tristate "Highbank L2 Cache" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_OCTEON_PC tristate "Cavium Octeon Primary Caches" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON help Support for error detection and correction on the primary caches of the cnMIPS cores of Cavium Octeon family SOCs. config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + depends on PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. +config EDAC_THUNDERX + tristate "Cavium ThunderX EDAC" + depends on ARM64 + depends on PCI + help + Support for error detection and correction on the + Cavium ThunderX memory controllers (LMC), Cache + Coherent Processor Interconnect (CCPI) and L2 cache + blocks (TAD, CBC, MCI). + +config EDAC_ALTERA + bool "Altera SOCFPGA ECC" + depends on EDAC=y && ARCH_INTEL_SOCFPGA + help + Support for error detection and correction on the + Altera SOCs. This is the global enable for the + various Altera peripherals. + +config EDAC_ALTERA_SDRAM + bool "Altera SDRAM ECC" + depends on EDAC_ALTERA=y + help + Support for error detection and correction on the + Altera SDRAM Memory for Altera SoCs. Note that the + preloader must initialize the SDRAM before loading + the kernel. + +config EDAC_ALTERA_L2C + bool "Altera L2 Cache ECC" + depends on EDAC_ALTERA=y && CACHE_L2X0 + help + Support for error detection and correction on the + Altera L2 cache Memory for Altera SoCs. This option + requires L2 cache. + +config EDAC_ALTERA_OCRAM + bool "Altera On-Chip RAM ECC" + depends on EDAC_ALTERA=y && SRAM && GENERIC_ALLOCATOR + help + Support for error detection and correction on the + Altera On-Chip RAM Memory for Altera SoCs. + +config EDAC_ALTERA_ETHERNET + bool "Altera Ethernet FIFO ECC" + depends on EDAC_ALTERA=y + help + Support for error detection and correction on the + Altera Ethernet FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_NAND + bool "Altera NAND FIFO ECC" + depends on EDAC_ALTERA=y && MTD_NAND_DENALI + help + Support for error detection and correction on the + Altera NAND FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_DMA + bool "Altera DMA FIFO ECC" + depends on EDAC_ALTERA=y && PL330_DMA=y + help + Support for error detection and correction on the + Altera DMA FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_USB + bool "Altera USB FIFO ECC" + depends on EDAC_ALTERA=y && USB_DWC2 + help + Support for error detection and correction on the + Altera USB FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_QSPI + bool "Altera QSPI FIFO ECC" + depends on EDAC_ALTERA=y && SPI_CADENCE_QUADSPI + help + Support for error detection and correction on the + Altera QSPI FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_SDMMC + bool "Altera SDMMC FIFO ECC" + depends on EDAC_ALTERA=y && MMC_DW + help + Support for error detection and correction on the + Altera SDMMC FIFO Memory for Altera SoCs. + +config EDAC_SIFIVE + bool "Sifive platform EDAC driver" + depends on EDAC=y && SIFIVE_CCACHE + help + Support for error detection and correction on the SiFive SoCs. + +config EDAC_ARMADA_XP + bool "Marvell Armada XP DDR and L2 Cache ECC" + depends on MACH_MVEBU_V7 + help + Support for error correction and detection on the Marvell Aramada XP + DDR RAM and L2 cache controllers. + +config EDAC_SYNOPSYS + tristate "Synopsys DDR Memory Controller" + depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || ARCH_MXC + help + Support for error detection and correction on the Synopsys DDR + memory controller. + +config EDAC_XGENE + tristate "APM X-Gene SoC" + depends on (ARM64 || COMPILE_TEST) + help + Support for error detection and correction on the + APM X-Gene family of SOCs. + +config EDAC_TI + tristate "Texas Instruments DDR3 ECC Controller" + depends on ARCH_KEYSTONE || SOC_DRA7XX + help + Support for error detection and correction on the TI SoCs. + +config EDAC_QCOM + tristate "QCOM EDAC Controller" + depends on ARCH_QCOM && QCOM_LLCC + help + Support for error detection and correction on the + Qualcomm Technologies, Inc. SoCs. + + This driver reports Single Bit Errors (SBEs) and Double Bit Errors (DBEs). + As of now, it supports error reporting for Last Level Cache Controller (LLCC) + of Tag RAM and Data RAM. + + For debugging issues having to do with stability and overall system + health, you should probably say 'Y' here. + +config EDAC_ASPEED + tristate "Aspeed AST BMC SoC" + depends on ARCH_ASPEED + help + Support for error detection and correction on the Aspeed AST BMC SoC. + + First, ECC must be configured in the bootloader. Then, this driver + will expose error counters via the EDAC kernel framework. + +config EDAC_BLUEFIELD + tristate "Mellanox BlueField Memory ECC" + depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST) + help + Support for error detection and correction on the + Mellanox BlueField SoCs. + +config EDAC_DMC520 + tristate "ARM DMC-520 ECC" + depends on ARM64 + help + Support for error detection and correction on the + SoCs with ARM DMC-520 DRAM controller. + +config EDAC_ZYNQMP + tristate "Xilinx ZynqMP OCM Controller" + depends on ARCH_ZYNQMP || COMPILE_TEST + help + This driver supports error detection and correction for the + Xilinx ZynqMP OCM (On Chip Memory) controller. It can also be + built as a module. In that case it will be called zynqmp_edac. + +config EDAC_NPCM + tristate "Nuvoton NPCM DDR Memory Controller" + depends on (ARCH_NPCM || COMPILE_TEST) + help + Support for error detection and correction on the Nuvoton NPCM DDR + memory controller. + + The memory controller supports single bit error correction, double bit + error detection (in-line ECC in which a section 1/8th of the memory + device used to store data is used for ECC storage). + +config EDAC_VERSAL + tristate "Xilinx Versal DDR Memory Controller" + depends on ARCH_ZYNQMP || COMPILE_TEST + help + Support for error detection and correction on the Xilinx Versal DDR + memory controller. + + Report both single bit errors (CE) and double bit errors (UE). + Support injecting both correctable and uncorrectable errors + for debugging purposes. + +config EDAC_LOONGSON + tristate "Loongson Memory Controller" + depends on LOONGARCH && ACPI + help + Support for error detection and correction on the Loongson + family memory controller. This driver reports single bit + errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 + are compatible. + +config EDAC_CORTEX_A72 + tristate "ARM Cortex A72" + depends on ARM64 + help + Support for L1/L2 cache error detection for ARM Cortex A72 processor. + The detected and reported errors are from reading CPU/L2 memory error + syndrome registers. + +config EDAC_VERSALNET + tristate "AMD VersalNET DDR Controller" + depends on CDX_CONTROLLER && ARCH_ZYNQMP + help + Support for single bit error correction, double bit error detection + and other system errors from various IP subsystems like RPU, NOCs, + HNICX, PL on the AMD Versal NET DDR memory controller. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 4154ed6a02c6..8429b1e856bc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,22 +6,26 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o -obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o -edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o -edac_core-y += edac_module.o edac_device_sysfs.o +edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o +edac_core-y += edac_module.o edac_device_sysfs.o wq.o + +edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o +edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o +edac_core-$(CONFIG_EDAC_ECS) += ecs.o +edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif obj-$(CONFIG_EDAC_GHES) += ghes_edac.o -obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o +obj-$(CONFIG_EDAC_AL_MC) += al_mc_edac.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o @@ -30,6 +34,8 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o +obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o +obj-$(CONFIG_EDAC_IGEN6) += igen6_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o @@ -37,30 +43,53 @@ obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o obj-$(CONFIG_EDAC_I3000) += i3000_edac.o obj-$(CONFIG_EDAC_I3200) += i3200_edac.o +obj-$(CONFIG_EDAC_IE31200) += ie31200_edac.o obj-$(CONFIG_EDAC_X38) += x38_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o +obj-$(CONFIG_EDAC_AMD64) += amd64_edac.o + +obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o -amd64_edac_mod-y := amd64_edac.o -amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o -amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o +mpc85xx_edac_mod-y := fsl_ddr_edac.o mpc85xx_edac.o +obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o -obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o +layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o +obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o -obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o -obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o -obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o -obj-$(CONFIG_EDAC_CELL) += cell_edac.o -obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o -obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o -obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o +skx_edac_common-y := skx_common.o -obj-$(CONFIG_EDAC_TILE) += tile_edac.o +skx_edac-y := skx_base.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o -obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o +i10nm_edac-y := i10nm_base.o +obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o + +imh_edac-y := imh_base.o +obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o + +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o +obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o + +obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o +obj-$(CONFIG_EDAC_SIFIVE) += sifive_edac.o +obj-$(CONFIG_EDAC_ARMADA_XP) += armada_xp_edac.o +obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o +obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o +obj-$(CONFIG_EDAC_TI) += ti_edac.o +obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o +obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o +obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o +obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o +obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o +obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o +obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o +obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o +obj-$(CONFIG_EDAC_VERSALNET) += versalnet_edac.o +obj-$(CONFIG_EDAC_CORTEX_A72) += a72_edac.o diff --git a/drivers/edac/a72_edac.c b/drivers/edac/a72_edac.c new file mode 100644 index 000000000000..9262d75c3855 --- /dev/null +++ b/drivers/edac/a72_edac.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cortex A72 EDAC L1 and L2 cache error detection + * + * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de> + * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com> + * + * Based on Code from: + * Copyright (c) 2018, NXP Semiconductor + * Author: York Sun <york.sun@nxp.com> + */ + +#include <linux/module.h> +#include <linux/of.h> +#include <linux/bitfield.h> +#include <asm/smp_plat.h> + +#include "edac_module.h" + +#define DRVNAME "a72-edac" + +#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2) +#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3) + +#define CPUMERRSR_EL1_RAMID GENMASK(30, 24) +#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18) + +#define CPUMERRSR_EL1_VALID BIT(31) +#define CPUMERRSR_EL1_FATAL BIT(63) +#define L2MERRSR_EL1_VALID BIT(31) +#define L2MERRSR_EL1_FATAL BIT(63) + +#define L1_I_TAG_RAM 0x00 +#define L1_I_DATA_RAM 0x01 +#define L1_D_TAG_RAM 0x08 +#define L1_D_DATA_RAM 0x09 +#define TLB_RAM 0x18 + +#define MESSAGE_SIZE 64 + +struct mem_err_synd_reg { + u64 cpu_mesr; + u64 l2_mesr; +}; + +static struct cpumask compat_mask; + +static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu, + struct mem_err_synd_reg *mesr) +{ + u64 cpu_mesr = mesr->cpu_mesr; + u64 l2_mesr = mesr->l2_mesr; + char msg[MESSAGE_SIZE]; + + if (cpu_mesr & CPUMERRSR_EL1_VALID) { + const char *str; + bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL; + + switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) { + case L1_I_TAG_RAM: + str = "L1-I Tag RAM"; + break; + case L1_I_DATA_RAM: + str = "L1-I Data RAM"; + break; + case L1_D_TAG_RAM: + str = "L1-D Tag RAM"; + break; + case L1_D_DATA_RAM: + str = "L1-D Data RAM"; + break; + case TLB_RAM: + str = "TLB RAM"; + break; + default: + str = "Unspecified"; + break; + } + + snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d", + str, fatal ? "fatal" : "correctable", cpu); + + if (fatal) + edac_device_handle_ue(edac_ctl, cpu, 0, msg); + else + edac_device_handle_ce(edac_ctl, cpu, 0, msg); + } + + if (l2_mesr & L2MERRSR_EL1_VALID) { + bool fatal = l2_mesr & L2MERRSR_EL1_FATAL; + + snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx", + fatal ? "fatal" : "correctable", cpu, + FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr)); + if (fatal) + edac_device_handle_ue(edac_ctl, cpu, 1, msg); + else + edac_device_handle_ce(edac_ctl, cpu, 1, msg); + } +} + +static void read_errors(void *data) +{ + struct mem_err_synd_reg *mesr = data; + + mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1); + if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) { + write_sysreg_s(0, SYS_CPUMERRSR_EL1); + isb(); + } + mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1); + if (mesr->l2_mesr & L2MERRSR_EL1_VALID) { + write_sysreg_s(0, SYS_L2MERRSR_EL1); + isb(); + } +} + +static void a72_edac_check(struct edac_device_ctl_info *edac_ctl) +{ + struct mem_err_synd_reg mesr; + int cpu; + + cpus_read_lock(); + for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) { + smp_call_function_single(cpu, read_errors, &mesr, true); + report_errors(edac_ctl, cpu, &mesr); + } + cpus_read_unlock(); +} + +static int a72_edac_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edac_ctl; + struct device *dev = &pdev->dev; + int rc; + + edac_ctl = edac_device_alloc_ctl_info(0, "cpu", + num_possible_cpus(), "L", 2, 1, + edac_device_alloc_index()); + if (!edac_ctl) + return -ENOMEM; + + edac_ctl->edac_check = a72_edac_check; + edac_ctl->dev = dev; + edac_ctl->mod_name = dev_name(dev); + edac_ctl->dev_name = dev_name(dev); + edac_ctl->ctl_name = DRVNAME; + dev_set_drvdata(dev, edac_ctl); + + rc = edac_device_add_device(edac_ctl); + if (rc) + goto out_dev; + + return 0; + +out_dev: + edac_device_free_ctl_info(edac_ctl); + + return rc; +} + +static void a72_edac_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev); + + edac_device_del_device(edac_ctl->dev); + edac_device_free_ctl_info(edac_ctl); +} + +static const struct of_device_id cortex_arm64_edac_of_match[] = { + { .compatible = "arm,cortex-a72" }, + {} +}; +MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match); + +static struct platform_driver a72_edac_driver = { + .probe = a72_edac_probe, + .remove = a72_edac_remove, + .driver = { + .name = DRVNAME, + }, +}; + +static struct platform_device *a72_pdev; + +static int __init a72_edac_driver_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu); + if (np) { + if (of_match_node(cortex_arm64_edac_of_match, np) && + of_property_read_bool(np, "edac-enabled")) { + cpumask_set_cpu(cpu, &compat_mask); + } + } else { + pr_warn("failed to find device node for CPU %d\n", cpu); + } + } + + if (cpumask_empty(&compat_mask)) + return 0; + + a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0); + if (IS_ERR(a72_pdev)) { + pr_err("failed to register A72 EDAC device\n"); + return PTR_ERR(a72_pdev); + } + + return platform_driver_register(&a72_edac_driver); +} + +static void __exit a72_edac_driver_exit(void) +{ + platform_device_unregister(a72_pdev); + platform_driver_unregister(&a72_edac_driver); +} + +module_init(a72_edac_driver_init); +module_exit(a72_edac_driver_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); +MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver"); diff --git a/drivers/edac/al_mc_edac.c b/drivers/edac/al_mc_edac.c new file mode 100644 index 000000000000..178b9e581a72 --- /dev/null +++ b/drivers/edac/al_mc_edac.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ +#include <linux/bitfield.h> +#include <linux/bitops.h> +#include <linux/edac.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include "edac_module.h" + +/* Registers Offset */ +#define AL_MC_ECC_CFG 0x70 +#define AL_MC_ECC_CLEAR 0x7c +#define AL_MC_ECC_ERR_COUNT 0x80 +#define AL_MC_ECC_CE_ADDR0 0x84 +#define AL_MC_ECC_CE_ADDR1 0x88 +#define AL_MC_ECC_UE_ADDR0 0xa4 +#define AL_MC_ECC_UE_ADDR1 0xa8 +#define AL_MC_ECC_CE_SYND0 0x8c +#define AL_MC_ECC_CE_SYND1 0x90 +#define AL_MC_ECC_CE_SYND2 0x94 +#define AL_MC_ECC_UE_SYND0 0xac +#define AL_MC_ECC_UE_SYND1 0xb0 +#define AL_MC_ECC_UE_SYND2 0xb4 + +/* Registers Fields */ +#define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4) + +#define AL_MC_ECC_CLEAR_UE_COUNT BIT(3) +#define AL_MC_ECC_CLEAR_CE_COUNT BIT(2) +#define AL_MC_ECC_CLEAR_UE_ERR BIT(1) +#define AL_MC_ECC_CLEAR_CE_ERR BIT(0) + +#define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16) +#define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0) + +#define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24) +#define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0) + +#define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24) +#define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16) +#define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0) + +#define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24) +#define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0) + +#define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24) +#define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16) +#define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0) + +#define DRV_NAME "al_mc_edac" +#define AL_MC_EDAC_MSG_MAX 256 + +struct al_mc_edac { + void __iomem *mmio_base; + spinlock_t lock; + int irq_ce; + int irq_ue; +}; + +static void prepare_msg(char *message, size_t buffer_size, + enum hw_event_mc_err_type type, + u8 rank, u32 row, u8 bg, u8 bank, u16 column, + u32 syn0, u32 syn1, u32 syn2) +{ + snprintf(message, buffer_size, + "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x", + type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE", + rank, row, bg, bank, column, syn0, syn1, syn2); +} + +static int handle_ce(struct mem_ctl_info *mci) +{ + u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row; + struct al_mc_edac *al_mc = mci->pvt_info; + char msg[AL_MC_EDAC_MSG_MAX]; + u16 ce_count, column; + unsigned long flags; + u8 rank, bg, bank; + + eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); + ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt); + if (!ce_count) + return 0; + + ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0); + ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1); + ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0); + ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1); + ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2); + + writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR, + al_mc->mmio_base + AL_MC_ECC_CLEAR); + + dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", + ecccaddr0, ecccaddr1); + + rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0); + row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0); + + bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1); + bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1); + column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1); + + prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED, + rank, row, bg, bank, column, + ecccsyn0, ecccsyn1, ecccsyn2); + + spin_lock_irqsave(&al_mc->lock, flags); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); + spin_unlock_irqrestore(&al_mc->lock, flags); + + return ce_count; +} + +static int handle_ue(struct mem_ctl_info *mci) +{ + u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row; + struct al_mc_edac *al_mc = mci->pvt_info; + char msg[AL_MC_EDAC_MSG_MAX]; + u16 ue_count, column; + unsigned long flags; + u8 rank, bg, bank; + + eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); + ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt); + if (!ue_count) + return 0; + + eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0); + eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1); + eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0); + eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1); + eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2); + + writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR, + al_mc->mmio_base + AL_MC_ECC_CLEAR); + + dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", + eccuaddr0, eccuaddr1); + + rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0); + row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0); + + bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1); + bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1); + column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1); + + prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED, + rank, row, bg, bank, column, + eccusyn0, eccusyn1, eccusyn2); + + spin_lock_irqsave(&al_mc->lock, flags); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); + spin_unlock_irqrestore(&al_mc->lock, flags); + + return ue_count; +} + +static void al_mc_edac_check(struct mem_ctl_info *mci) +{ + struct al_mc_edac *al_mc = mci->pvt_info; + + if (al_mc->irq_ue <= 0) + handle_ue(mci); + + if (al_mc->irq_ce <= 0) + handle_ce(mci); +} + +static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info) +{ + struct platform_device *pdev = info; + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + if (handle_ue(mci)) + return IRQ_HANDLED; + return IRQ_NONE; +} + +static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info) +{ + struct platform_device *pdev = info; + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + if (handle_ce(mci)) + return IRQ_HANDLED; + return IRQ_NONE; +} + +static enum scrub_type get_scrub_mode(void __iomem *mmio_base) +{ + u32 ecccfg0; + + ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG); + + if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0)) + return SCRUB_NONE; + else + return SCRUB_HW_SRC; +} + +static void devm_al_mc_edac_free(void *data) +{ + edac_mc_free(data); +} + +static void devm_al_mc_edac_del(void *data) +{ + edac_mc_del_mc(data); +} + +static int al_mc_edac_probe(struct platform_device *pdev) +{ + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct al_mc_edac *al_mc; + void __iomem *mmio_base; + struct dimm_info *dimm; + int ret; + + mmio_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mmio_base)) { + dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", + PTR_ERR(mmio_base)); + return PTR_ERR(mmio_base); + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct al_mc_edac)); + if (!mci) + return -ENOMEM; + + ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci); + if (ret) + return ret; + + platform_set_drvdata(pdev, mci); + al_mc = mci->pvt_info; + + al_mc->mmio_base = mmio_base; + + al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue"); + if (al_mc->irq_ue <= 0) + dev_dbg(&pdev->dev, + "no IRQ defined for UE - falling back to polling\n"); + + al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce"); + if (al_mc->irq_ce <= 0) + dev_dbg(&pdev->dev, + "no IRQ defined for CE - falling back to polling\n"); + + /* + * In case both interrupts (ue/ce) are to be found, use interrupt mode. + * In case none of the interrupt are foud, use polling mode. + * In case only one interrupt is found, use interrupt mode for it but + * keep polling mode enable for the other. + */ + if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) { + edac_op_state = EDAC_OPSTATE_POLL; + mci->edac_check = al_mc_edac_check; + } else { + edac_op_state = EDAC_OPSTATE_INT; + } + + spin_lock_init(&al_mc->lock); + + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = DRV_NAME; + mci->ctl_name = "al_mc"; + mci->pdev = &pdev->dev; + mci->scrub_mode = get_scrub_mode(mmio_base); + + dimm = *mci->dimms; + dimm->grain = 1; + + ret = edac_mc_add_mc(mci); + if (ret < 0) { + dev_err(&pdev->dev, + "fail to add memory controller device (%d)\n", + ret); + return ret; + } + + ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); + if (ret) + return ret; + + if (al_mc->irq_ue > 0) { + ret = devm_request_irq(&pdev->dev, + al_mc->irq_ue, + al_mc_edac_irq_handler_ue, + IRQF_SHARED, + pdev->name, + pdev); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to request UE IRQ %d (%d)\n", + al_mc->irq_ue, ret); + return ret; + } + } + + if (al_mc->irq_ce > 0) { + ret = devm_request_irq(&pdev->dev, + al_mc->irq_ce, + al_mc_edac_irq_handler_ce, + IRQF_SHARED, + pdev->name, + pdev); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to request CE IRQ %d (%d)\n", + al_mc->irq_ce, ret); + return ret; + } + } + + return 0; +} + +static const struct of_device_id al_mc_edac_of_match[] = { + { .compatible = "amazon,al-mc-edac", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, al_mc_edac_of_match); + +static struct platform_driver al_mc_edac_driver = { + .probe = al_mc_edac_probe, + .driver = { + .name = DRV_NAME, + .of_match_table = al_mc_edac_of_match, + }, +}; + +module_platform_driver(al_mc_edac_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Talel Shenhar"); +MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver"); diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c new file mode 100644 index 000000000000..0c5b94e64ea1 --- /dev/null +++ b/drivers/edac/altera_edac.c @@ -0,0 +1,2228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017-2018, Intel Corporation. All rights reserved + * Copyright Altera Corporation (C) 2014-2016. All rights reserved. + * Copyright 2011-2012 Calxeda, Inc. + */ + +#include <asm/cacheflush.h> +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/firmware/intel/stratix10-smc.h> +#include <linux/genalloc.h> +#include <linux/interrupt.h> +#include <linux/irqchip/chained_irq.h> +#include <linux/kernel.h> +#include <linux/mfd/altera-sysmgr.h> +#include <linux/mfd/syscon.h> +#include <linux/notifier.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/panic_notifier.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/types.h> +#include <linux/uaccess.h> + +#include "altera_edac.h" +#include "edac_module.h" + +#define EDAC_MOD_STR "altera_edac" +#define EDAC_DEVICE "Altera" + +#ifdef CONFIG_EDAC_ALTERA_SDRAM +static const struct altr_sdram_prv_data c5_data = { + .ecc_ctrl_offset = CV_CTLCFG_OFST, + .ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN, + .ecc_stat_offset = CV_DRAMSTS_OFST, + .ecc_stat_ce_mask = CV_DRAMSTS_SBEERR, + .ecc_stat_ue_mask = CV_DRAMSTS_DBEERR, + .ecc_saddr_offset = CV_ERRADDR_OFST, + .ecc_daddr_offset = CV_ERRADDR_OFST, + .ecc_cecnt_offset = CV_SBECOUNT_OFST, + .ecc_uecnt_offset = CV_DBECOUNT_OFST, + .ecc_irq_en_offset = CV_DRAMINTR_OFST, + .ecc_irq_en_mask = CV_DRAMINTR_INTREN, + .ecc_irq_clr_offset = CV_DRAMINTR_OFST, + .ecc_irq_clr_mask = (CV_DRAMINTR_INTRCLR | CV_DRAMINTR_INTREN), + .ecc_cnt_rst_offset = CV_DRAMINTR_OFST, + .ecc_cnt_rst_mask = CV_DRAMINTR_INTRCLR, + .ce_ue_trgr_offset = CV_CTLCFG_OFST, + .ce_set_mask = CV_CTLCFG_GEN_SB_ERR, + .ue_set_mask = CV_CTLCFG_GEN_DB_ERR, +}; + +static const struct altr_sdram_prv_data a10_data = { + .ecc_ctrl_offset = A10_ECCCTRL1_OFST, + .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN, + .ecc_stat_offset = A10_INTSTAT_OFST, + .ecc_stat_ce_mask = A10_INTSTAT_SBEERR, + .ecc_stat_ue_mask = A10_INTSTAT_DBEERR, + .ecc_saddr_offset = A10_SERRADDR_OFST, + .ecc_daddr_offset = A10_DERRADDR_OFST, + .ecc_irq_en_offset = A10_ERRINTEN_OFST, + .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK, + .ecc_irq_clr_offset = A10_INTSTAT_OFST, + .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR), + .ecc_cnt_rst_offset = A10_ECCCTRL1_OFST, + .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK, + .ce_ue_trgr_offset = A10_DIAGINTTEST_OFST, + .ce_set_mask = A10_DIAGINT_TSERRA_MASK, + .ue_set_mask = A10_DIAGINT_TDERRA_MASK, +}; + +/*********************** EDAC Memory Controller Functions ****************/ + +/* The SDRAM controller uses the EDAC Memory Controller framework. */ + +static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct altr_sdram_mc_data *drvdata = mci->pvt_info; + const struct altr_sdram_prv_data *priv = drvdata->data; + u32 status, err_count = 1, err_addr; + + regmap_read(drvdata->mc_vbase, priv->ecc_stat_offset, &status); + + if (status & priv->ecc_stat_ue_mask) { + regmap_read(drvdata->mc_vbase, priv->ecc_daddr_offset, + &err_addr); + if (priv->ecc_uecnt_offset) + regmap_read(drvdata->mc_vbase, priv->ecc_uecnt_offset, + &err_count); + panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n", + err_count, err_addr); + } + if (status & priv->ecc_stat_ce_mask) { + regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, + &err_addr); + if (priv->ecc_cecnt_offset) + regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, + &err_count); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, 0, + 0, 0, -1, mci->ctl_name, ""); + /* Clear IRQ to resume */ + regmap_write(drvdata->mc_vbase, priv->ecc_irq_clr_offset, + priv->ecc_irq_clr_mask); + + return IRQ_HANDLED; + } + return IRQ_NONE; +} + +static ssize_t altr_sdr_mc_err_inject_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct mem_ctl_info *mci = file->private_data; + struct altr_sdram_mc_data *drvdata = mci->pvt_info; + const struct altr_sdram_prv_data *priv = drvdata->data; + u32 *ptemp; + dma_addr_t dma_handle; + u32 reg, read_reg; + + ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL); + if (!ptemp) { + edac_printk(KERN_ERR, EDAC_MC, + "Inject: Buffer Allocation error\n"); + return -ENOMEM; + } + + regmap_read(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + &read_reg); + read_reg &= ~(priv->ce_set_mask | priv->ue_set_mask); + + /* Error are injected by writing a word while the SBE or DBE + * bit in the CTLCFG register is set. Reading the word will + * trigger the SBE or DBE error and the corresponding IRQ. + */ + if (count == 3) { + edac_printk(KERN_ALERT, EDAC_MC, + "Inject Double bit error\n"); + local_irq_disable(); + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + (read_reg | priv->ue_set_mask)); + local_irq_enable(); + } else { + edac_printk(KERN_ALERT, EDAC_MC, + "Inject Single bit error\n"); + local_irq_disable(); + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + (read_reg | priv->ce_set_mask)); + local_irq_enable(); + } + + ptemp[0] = 0x5A5A5A5A; + ptemp[1] = 0xA5A5A5A5; + + /* Clear the error injection bits */ + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, read_reg); + /* Ensure it has been written out */ + wmb(); + + /* + * To trigger the error, we need to read the data back + * (the data was written with errors above). + * The READ_ONCE macros and printk are used to prevent the + * the compiler optimizing these reads out. + */ + reg = READ_ONCE(ptemp[0]); + read_reg = READ_ONCE(ptemp[1]); + /* Force Read */ + rmb(); + + edac_printk(KERN_ALERT, EDAC_MC, "Read Data [0x%X, 0x%X]\n", + reg, read_reg); + + dma_free_coherent(mci->pdev, 16, ptemp, dma_handle); + + return count; +} + +static const struct file_operations altr_sdr_mc_debug_inject_fops = { + .open = simple_open, + .write = altr_sdr_mc_err_inject_write, + .llseek = generic_file_llseek, +}; + +static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci) +{ + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; + + if (!mci->debugfs) + return; + + edac_debugfs_create_file("altr_trigger", S_IWUSR, mci->debugfs, mci, + &altr_sdr_mc_debug_inject_fops); +} + +/* Get total memory size from Open Firmware DTB */ +static unsigned long get_total_mem(void) +{ + struct device_node *np = NULL; + struct resource res; + int ret; + unsigned long total_mem = 0; + + for_each_node_by_type(np, "memory") { + ret = of_address_to_resource(np, 0, &res); + if (ret) + continue; + + total_mem += resource_size(&res); + } + edac_dbg(0, "total_mem 0x%lx\n", total_mem); + return total_mem; +} + +static const struct of_device_id altr_sdram_ctrl_of_match[] = { + { .compatible = "altr,sdram-edac", .data = &c5_data}, + { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); + +static int a10_init(struct regmap *mc_vbase) +{ + if (regmap_update_bits(mc_vbase, A10_INTMODE_OFST, + A10_INTMODE_SB_INT, A10_INTMODE_SB_INT)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error setting SB IRQ mode\n"); + return -ENODEV; + } + + if (regmap_write(mc_vbase, A10_SERRCNTREG_OFST, 1)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error setting trigger count\n"); + return -ENODEV; + } + + return 0; +} + +static int a10_unmask_irq(struct platform_device *pdev, u32 mask) +{ + void __iomem *sm_base; + int ret = 0; + + if (!request_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32), + dev_name(&pdev->dev))) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to request mem region\n"); + return -EBUSY; + } + + sm_base = ioremap(A10_SYMAN_INTMASK_CLR, sizeof(u32)); + if (!sm_base) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to ioremap device\n"); + + ret = -ENOMEM; + goto release; + } + + iowrite32(mask, sm_base); + + iounmap(sm_base); + +release: + release_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32)); + + return ret; +} + +static int altr_sdram_probe(struct platform_device *pdev) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct altr_sdram_mc_data *drvdata; + const struct altr_sdram_prv_data *priv; + struct regmap *mc_vbase; + struct dimm_info *dimm; + u32 read_reg; + int irq, irq2, res = 0; + unsigned long mem_size, irqflags = 0; + + /* Grab the register range from the sdr controller in device tree */ + mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sdr-syscon"); + if (IS_ERR(mc_vbase)) { + edac_printk(KERN_ERR, EDAC_MC, + "regmap for altr,sdr-syscon lookup failed.\n"); + return -ENODEV; + } + + /* Check specific dependencies for the module */ + priv = device_get_match_data(&pdev->dev); + + /* Validate the SDRAM controller has ECC enabled */ + if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) || + ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) { + edac_printk(KERN_ERR, EDAC_MC, + "No ECC/ECC disabled [0x%08X]\n", read_reg); + return -ENODEV; + } + + /* Grab memory size from device tree. */ + mem_size = get_total_mem(); + if (!mem_size) { + edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n"); + return -ENODEV; + } + + /* Ensure the SDRAM Interrupt is disabled */ + if (regmap_update_bits(mc_vbase, priv->ecc_irq_en_offset, + priv->ecc_irq_en_mask, 0)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error disabling SDRAM ECC IRQ\n"); + return -ENODEV; + } + + /* Toggle to clear the SDRAM Error count */ + if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset, + priv->ecc_cnt_rst_mask, + priv->ecc_cnt_rst_mask)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error clearing SDRAM ECC count\n"); + return -ENODEV; + } + + if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset, + priv->ecc_cnt_rst_mask, 0)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error clearing SDRAM ECC count\n"); + return -ENODEV; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + edac_printk(KERN_ERR, EDAC_MC, + "No irq %d in DT\n", irq); + return irq; + } + + /* Arria10 has a 2nd IRQ */ + irq2 = platform_get_irq(pdev, 1); + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct altr_sdram_mc_data)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + drvdata = mci->pvt_info; + drvdata->mc_vbase = mc_vbase; + drvdata->data = priv; + platform_set_drvdata(pdev, mci); + + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to get managed device resource\n"); + res = -ENOMEM; + goto free; + } + + mci->mtype_cap = MEM_FLAG_DDR3; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_SW_SRC; + mci->dev_name = dev_name(&pdev->dev); + + dimm = *mci->dimms; + dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1; + dimm->grain = 8; + dimm->dtype = DEV_X8; + dimm->mtype = MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + + res = edac_mc_add_mc(mci); + if (res < 0) + goto err; + + /* Only the Arria10 has separate IRQs */ + if (of_machine_is_compatible("altr,socfpga-arria10")) { + /* Arria10 specific initialization */ + res = a10_init(mc_vbase); + if (res < 0) + goto err2; + + res = devm_request_irq(&pdev->dev, irq2, + altr_sdram_mc_err_handler, + IRQF_SHARED, dev_name(&pdev->dev), mci); + if (res < 0) { + edac_mc_printk(mci, KERN_ERR, + "Unable to request irq %d\n", irq2); + res = -ENODEV; + goto err2; + } + + res = a10_unmask_irq(pdev, A10_DDR0_IRQ_MASK); + if (res < 0) + goto err2; + + irqflags = IRQF_SHARED; + } + + res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, + irqflags, dev_name(&pdev->dev), mci); + if (res < 0) { + edac_mc_printk(mci, KERN_ERR, + "Unable to request irq %d\n", irq); + res = -ENODEV; + goto err2; + } + + /* Infrastructure ready - enable the IRQ */ + if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, + priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { + edac_mc_printk(mci, KERN_ERR, + "Error enabling SDRAM ECC IRQ\n"); + res = -ENODEV; + goto err2; + } + + altr_sdr_mc_create_debugfs_nodes(mci); + + devres_close_group(&pdev->dev, NULL); + + return 0; + +err2: + edac_mc_del_mc(&pdev->dev); +err: + devres_release_group(&pdev->dev, NULL); +free: + edac_mc_free(mci); + edac_printk(KERN_ERR, EDAC_MC, + "EDAC Probe Failed; Error %d\n", res); + + return res; +} + +static void altr_sdram_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + platform_set_drvdata(pdev, NULL); +} + +/* + * If you want to suspend, need to disable EDAC by removing it + * from the device tree or defconfig. + */ +#ifdef CONFIG_PM +static int altr_sdram_prepare(struct device *dev) +{ + pr_err("Suspend not allowed when EDAC is enabled.\n"); + + return -EPERM; +} + +static const struct dev_pm_ops altr_sdram_pm_ops = { + .prepare = altr_sdram_prepare, +}; +#endif + +static struct platform_driver altr_sdram_edac_driver = { + .probe = altr_sdram_probe, + .remove = altr_sdram_remove, + .driver = { + .name = "altr_sdram_edac", +#ifdef CONFIG_PM + .pm = &altr_sdram_pm_ops, +#endif + .of_match_table = altr_sdram_ctrl_of_match, + }, +}; + +module_platform_driver(altr_sdram_edac_driver); + +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + +/************************* EDAC Parent Probe *************************/ + +static const struct of_device_id altr_edac_device_of_match[]; + +static const struct of_device_id altr_edac_of_match[] = { + { .compatible = "altr,socfpga-ecc-manager" }, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_of_match); + +static int altr_edac_probe(struct platform_device *pdev) +{ + of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match, + NULL, &pdev->dev); + return 0; +} + +static struct platform_driver altr_edac_driver = { + .probe = altr_edac_probe, + .driver = { + .name = "socfpga_ecc_manager", + .of_match_table = altr_edac_of_match, + }, +}; +module_platform_driver(altr_edac_driver); + +/************************* EDAC Device Functions *************************/ + +/* + * EDAC Device Functions (shared between various IPs). + * The discrete memories use the EDAC Device framework. The probe + * and error handling functions are very similar between memories + * so they are shared. The memory allocation and freeing for EDAC + * trigger testing are different for each memory. + */ + +#ifdef CONFIG_EDAC_ALTERA_OCRAM +static const struct edac_device_prv_data ocramecc_data; +#endif +#ifdef CONFIG_EDAC_ALTERA_L2C +static const struct edac_device_prv_data l2ecc_data; +#endif +#ifdef CONFIG_EDAC_ALTERA_OCRAM +static const struct edac_device_prv_data a10_ocramecc_data; +#endif +#ifdef CONFIG_EDAC_ALTERA_L2C +static const struct edac_device_prv_data a10_l2ecc_data; +#endif + +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) +{ + irqreturn_t ret_value = IRQ_NONE; + struct edac_device_ctl_info *dci = dev_id; + struct altr_edac_device_dev *drvdata = dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + + if (irq == drvdata->sb_irq) { + if (priv->ce_clear_mask) + writel(priv->ce_clear_mask, drvdata->base); + edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name); + ret_value = IRQ_HANDLED; + } else if (irq == drvdata->db_irq) { + if (priv->ue_clear_mask) + writel(priv->ue_clear_mask, drvdata->base); + edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + ret_value = IRQ_HANDLED; + } else { + WARN_ON(1); + } + + return ret_value; +} + +static ssize_t __maybe_unused +altr_edac_device_trig(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) + +{ + u32 *ptemp, i, error_mask; + int result = 0; + u8 trig_type; + unsigned long flags; + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void *generic_ptr = edac_dci->dev; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + if (!priv->alloc_mem) + return -ENOMEM; + + /* + * Note that generic_ptr is initialized to the device * but in + * some alloc_functions, this is overridden and returns data. + */ + ptemp = priv->alloc_mem(priv->trig_alloc_sz, &generic_ptr); + if (!ptemp) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Inject: Buffer Allocation error\n"); + return -ENOMEM; + } + + if (trig_type == ALTR_UE_TRIGGER_CHAR) + error_mask = priv->ue_set_mask; + else + error_mask = priv->ce_set_mask; + + edac_printk(KERN_ALERT, EDAC_DEVICE, + "Trigger Error Mask (0x%X)\n", error_mask); + + local_irq_save(flags); + /* write ECC corrupted data out. */ + for (i = 0; i < (priv->trig_alloc_sz / sizeof(*ptemp)); i++) { + /* Read data so we're in the correct state */ + rmb(); + if (READ_ONCE(ptemp[i])) + result = -1; + /* Toggle Error bit (it is latched), leave ECC enabled */ + writel(error_mask, (drvdata->base + priv->set_err_ofst)); + writel(priv->ecc_enable_mask, (drvdata->base + + priv->set_err_ofst)); + ptemp[i] = i; + } + /* Ensure it has been written out */ + wmb(); + local_irq_restore(flags); + + if (result) + edac_printk(KERN_ERR, EDAC_DEVICE, "Mem Not Cleared\n"); + + /* Read out written data. ECC error caused here */ + for (i = 0; i < ALTR_TRIGGER_READ_WRD_CNT; i++) + if (READ_ONCE(ptemp[i]) != i) + edac_printk(KERN_ERR, EDAC_DEVICE, + "Read doesn't match written data\n"); + + if (priv->free_mem) + priv->free_mem(ptemp, priv->trig_alloc_sz, generic_ptr); + + return count; +} + +static const struct file_operations altr_edac_device_inject_fops __maybe_unused = { + .open = simple_open, + .write = altr_edac_device_trig, + .llseek = generic_file_llseek, +}; + +static ssize_t __maybe_unused +altr_edac_a10_device_trig(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject_fops __maybe_unused = { + .open = simple_open, + .write = altr_edac_a10_device_trig, + .llseek = generic_file_llseek, +}; + +static ssize_t __maybe_unused +altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject2_fops __maybe_unused = { + .open = simple_open, + .write = altr_edac_a10_device_trig2, + .llseek = generic_file_llseek, +}; + +static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, + const struct edac_device_prv_data *priv) +{ + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; + + drvdata->debugfs_dir = edac_debugfs_create_dir(drvdata->edac_dev_name); + if (!drvdata->debugfs_dir) + return; + + if (!edac_debugfs_create_file("altr_trigger", S_IWUSR, + drvdata->debugfs_dir, edac_dci, + priv->inject_fops)) + debugfs_remove_recursive(drvdata->debugfs_dir); +} + +static const struct of_device_id altr_edac_device_of_match[] = { +#ifdef CONFIG_EDAC_ALTERA_L2C + { .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_OCRAM + { .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data }, +#endif + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_device_of_match); + +/* + * altr_edac_device_probe() + * This is a generic EDAC device driver that will support + * various Altera memory devices such as the L2 cache ECC and + * OCRAM ECC as well as the memories for other peripherals. + * Module specific initialization is done by passing the + * function index in the device tree. + */ +static int altr_edac_device_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *drvdata; + struct resource *r; + int res = 0; + struct device_node *np = pdev->dev.of_node; + char *ecc_name = (char *)np->name; + static int dev_instance; + + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to open devm\n"); + return -ENOMEM; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get mem resource\n"); + res = -ENODEV; + goto fail; + } + + if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r), + dev_name(&pdev->dev))) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error requesting mem region\n", ecc_name); + res = -EBUSY; + goto fail; + } + + dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name, + 1, ecc_name, 1, 0, dev_instance++); + + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate EDAC device\n", ecc_name); + res = -ENOMEM; + goto fail; + } + + drvdata = dci->pvt_info; + dci->dev = &pdev->dev; + platform_set_drvdata(pdev, dci); + drvdata->edac_dev_name = ecc_name; + + drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!drvdata->base) { + res = -ENOMEM; + goto fail1; + } + + /* Get driver specific data for this EDAC device */ + drvdata->data = of_match_node(altr_edac_device_of_match, np)->data; + + /* Check specific dependencies for the module */ + if (drvdata->data->setup) { + res = drvdata->data->setup(drvdata); + if (res) + goto fail1; + } + + drvdata->sb_irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, drvdata->sb_irq, + altr_edac_device_handler, + 0, dev_name(&pdev->dev), dci); + if (res) + goto fail1; + + drvdata->db_irq = platform_get_irq(pdev, 1); + res = devm_request_irq(&pdev->dev, drvdata->db_irq, + altr_edac_device_handler, + 0, dev_name(&pdev->dev), dci); + if (res) + goto fail1; + + dci->mod_name = "Altera ECC Manager"; + dci->dev_name = drvdata->edac_dev_name; + + res = edac_device_add_device(dci); + if (res) + goto fail1; + + altr_create_edacdev_dbgfs(dci, drvdata->data); + + devres_close_group(&pdev->dev, NULL); + + return 0; + +fail1: + edac_device_free_ctl_info(dci); +fail: + devres_release_group(&pdev->dev, NULL); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, res); + + return res; +} + +static void altr_edac_device_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); + struct altr_edac_device_dev *drvdata = dci->pvt_info; + + debugfs_remove_recursive(drvdata->debugfs_dir); + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); +} + +static struct platform_driver altr_edac_device_driver = { + .probe = altr_edac_device_probe, + .remove = altr_edac_device_remove, + .driver = { + .name = "altr_edac_device", + .of_match_table = altr_edac_device_of_match, + }, +}; +module_platform_driver(altr_edac_device_driver); + +/******************* Arria10 Device ECC Shared Functions *****************/ + +/* + * Test for memory's ECC dependencies upon entry because platform specific + * startup should have initialized the memory and enabled the ECC. + * Can't turn on ECC here because accessing un-initialized memory will + * cause CE/UE errors possibly causing an ABORT. + */ +static int __maybe_unused +altr_check_ecc_deps(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} + +static irqreturn_t __maybe_unused altr_edac_a10_ecc_irq(int irq, void *dev_id) +{ + struct altr_edac_device_dev *dci = dev_id; + void __iomem *base = dci->base; + + if (irq == dci->sb_irq) { + writel(ALTR_A10_ECC_SERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + + return IRQ_HANDLED; + } else if (irq == dci->db_irq) { + writel(ALTR_A10_ECC_DERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + if (dci->data->panic) + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + + return IRQ_HANDLED; + } + + WARN_ON(1); + + return IRQ_NONE; +} + +/******************* Arria10 Memory Buffer Functions *********************/ + +static inline int a10_get_irq_mask(struct device_node *np) +{ + int irq; + const u32 *handle = of_get_property(np, "interrupts", NULL); + + if (!handle) + return -ENODEV; + irq = be32_to_cpup(handle); + return irq; +} + +static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value |= bit_mask; + writel(value, ioaddr); +} + +static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value &= ~bit_mask; + writel(value, ioaddr); +} + +static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + return (value & bit_mask) ? 1 : 0; +} + +/* + * This function uses the memory initialization block in the Arria10 ECC + * controller to initialize/clear the entire memory data and ECC data. + */ +static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) +{ + int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US; + u32 init_mask, stat_mask, clear_mask; + int ret = 0; + + if (port) { + init_mask = ALTR_A10_ECC_INITB; + stat_mask = ALTR_A10_ECC_INITCOMPLETEB; + clear_mask = ALTR_A10_ECC_ERRPENB_MASK; + } else { + init_mask = ALTR_A10_ECC_INITA; + stat_mask = ALTR_A10_ECC_INITCOMPLETEA; + clear_mask = ALTR_A10_ECC_ERRPENA_MASK; + } + + ecc_set_bits(init_mask, (ioaddr + ALTR_A10_ECC_CTRL_OFST)); + while (limit--) { + if (ecc_test_bits(stat_mask, + (ioaddr + ALTR_A10_ECC_INITSTAT_OFST))) + break; + udelay(1); + } + if (limit < 0) + ret = -EBUSY; + + /* Clear any pending ECC interrupts */ + writel(clear_mask, (ioaddr + ALTR_A10_ECC_INTSTAT_OFST)); + + return ret; +} + +static __init int __maybe_unused +altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, + u32 ecc_ctrl_en_mask, bool dual_port) +{ + int ret = 0; + void __iomem *ecc_block_base; + struct regmap *ecc_mgr_map; + char *ecc_name; + struct device_node *np_eccmgr; + + ecc_name = (char *)np->name; + + /* Get the ECC Manager - parent of the device EDACs */ + np_eccmgr = of_get_parent(np); + + ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); + + of_node_put(np_eccmgr); + if (IS_ERR(ecc_mgr_map)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get syscon altr,sysmgr-syscon\n"); + return -ENODEV; + } + + /* Map the ECC Block */ + ecc_block_base = of_iomap(np, 0); + if (!ecc_block_base) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to map %s ECC block\n", ecc_name); + return -ENODEV; + } + + /* Disable ECC */ + regmap_write(ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, irq_mask); + writel(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTENR_OFST)); + ecc_clear_bits(ecc_ctrl_en_mask, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + /* Ensure all writes complete */ + wmb(); + /* Use HW initialization block to initialize memory for ECC */ + ret = altr_init_memory_port(ecc_block_base, 0); + if (ret) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "ECC: cannot init %s PORTA memory\n", ecc_name); + goto out; + } + + if (dual_port) { + ret = altr_init_memory_port(ecc_block_base, 1); + if (ret) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "ECC: cannot init %s PORTB memory\n", + ecc_name); + goto out; + } + } + + /* Enable ECC */ + ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + + ALTR_A10_ECC_CTRL_OFST)); + writel(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTENS_OFST)); + regmap_write(ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_CLR_OFST, irq_mask); + /* Ensure all writes complete */ + wmb(); +out: + iounmap(ecc_block_base); + return ret; +} + +static int validate_parent_available(struct device_node *np); +static const struct of_device_id altr_edac_a10_device_of_match[]; +static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) +{ + int irq; + struct device_node *child, *np; + + np = of_find_compatible_node(NULL, NULL, + "altr,socfpga-a10-ecc-manager"); + if (!np) { + edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n"); + return -ENODEV; + } + + for_each_child_of_node(np, child) { + const struct of_device_id *pdev_id; + const struct edac_device_prv_data *prv; + + if (!of_device_is_available(child)) + continue; + if (!of_device_is_compatible(child, compat)) + continue; + + if (validate_parent_available(child)) + continue; + + irq = a10_get_irq_mask(child); + if (irq < 0) + continue; + + /* Get matching node and check for valid result */ + pdev_id = of_match_node(altr_edac_a10_device_of_match, child); + if (IS_ERR_OR_NULL(pdev_id)) + continue; + + /* Validate private data pointer before dereferencing */ + prv = pdev_id->data; + if (!prv) + continue; + + altr_init_a10_ecc_block(child, BIT(irq), + prv->ecc_enable_mask, 0); + } + + of_node_put(np); + return 0; +} + +/*********************** SDRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDRAM + +/* + * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable + * register if ECC is enabled. Linux checks the ECC Enable register to + * determine ECC status. + * Use an SMC call (which always works) to determine ECC enablement. + */ +static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device) +{ + const struct edac_device_prv_data *prv = device->data; + unsigned long sdram_ecc_addr; + struct arm_smccc_res result; + struct device_node *np; + phys_addr_t sdram_addr; + u32 read_reg; + int ret; + + np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl"); + if (!np) + goto sdram_err; + + sdram_addr = of_translate_address(np, of_get_address(np, 0, + NULL, NULL)); + of_node_put(np); + sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst; + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr, + 0, 0, 0, 0, 0, 0, &result); + read_reg = (unsigned int)result.a1; + ret = (int)result.a0; + if (!ret && (read_reg & prv->ecc_enable_mask)) + return 0; + +sdram_err: + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} + +static const struct edac_device_prv_data s10_sdramecc_data = { + .setup = altr_s10_sdram_check_ecc_deps, + .ce_clear_mask = ALTR_S10_ECC_SERRPENA, + .ue_clear_mask = ALTR_S10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_S10_ECC_EN, + .ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST, + .ce_set_mask = ALTR_S10_ECC_TSERRA, + .ue_set_mask = ALTR_S10_ECC_TDERRA, + .set_err_ofst = ALTR_S10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + +/*********************** OCRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_OCRAM + +static void *ocram_alloc_mem(size_t size, void **other) +{ + struct device_node *np; + struct gen_pool *gp; + void *sram_addr; + + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc"); + if (!np) + return NULL; + + gp = of_gen_pool_get(np, "iram", 0); + of_node_put(np); + if (!gp) + return NULL; + + sram_addr = (void *)gen_pool_alloc(gp, size); + if (!sram_addr) + return NULL; + + memset(sram_addr, 0, size); + /* Ensure data is written out */ + wmb(); + + /* Remember this handle for freeing later */ + *other = gp; + + return sram_addr; +} + +static void ocram_free_mem(void *p, size_t size, void *other) +{ + gen_pool_free((struct gen_pool *)other, (unsigned long)p, size); +} + +static const struct edac_device_prv_data ocramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), + .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR), + .alloc_mem = ocram_alloc_mem, + .free_mem = ocram_free_mem, + .ecc_enable_mask = ALTR_OCR_ECC_EN, + .ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET, + .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS), + .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), + .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET, + .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, +}; + +static int __maybe_unused +altr_check_ocram_deps_init(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + int ret; + + ret = altr_check_ecc_deps(device); + if (ret) + return ret; + + /* + * Verify that OCRAM has been initialized. + * During a warm reset, OCRAM contents are retained, but the control + * and status registers are reset to their default values. Therefore, + * ECC must be explicitly re-enabled in the control register. + * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set. + */ + if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, + (base + ALTR_A10_ECC_INITSTAT_OFST))) { + if (!ecc_test_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST))) + ecc_set_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST)); + else + return -ENODEV; + } + + /* Enable IRQ on Single Bit Error */ + writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST)); + /* Ensure all writes complete */ + wmb(); + + return 0; +} + +static const struct edac_device_prv_data a10_ocramecc_data = { + .setup = altr_check_ocram_deps_init, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM, + .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject2_fops, + /* + * OCRAM panic on uncorrectable error because sleep/resume + * functions and FPGA contents are stored in OCRAM. Prefer + * a kernel panic over executing/loading corrupted data. + */ + .panic = true, +}; + +#endif /* CONFIG_EDAC_ALTERA_OCRAM */ + +/********************* L2 Cache EDAC Device Functions ********************/ + +#ifdef CONFIG_EDAC_ALTERA_L2C + +static void *l2_alloc_mem(size_t size, void **other) +{ + struct device *dev = *other; + void *ptemp = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!ptemp) + return NULL; + + /* Make sure everything is written out */ + wmb(); + + /* + * Clean all cache levels up to LoC (includes L2) + * This ensures the corrupted data is written into + * L2 cache for readback test (which causes ECC error). + */ + flush_cache_all(); + + return ptemp; +} + +static void l2_free_mem(void *p, size_t size, void *other) +{ + struct device *dev = other; + + if (dev && p) + devm_kfree(dev, p); +} + +/* + * altr_l2_check_deps() + * Test for L2 cache ECC dependencies upon entry because + * platform specific startup should have initialized the L2 + * memory and enabled the ECC. + * Bail if ECC is not enabled. + * Note that L2 Cache Enable is forced at build time. + */ +static int altr_l2_check_deps(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if ((readl(base) & prv->ecc_enable_mask) == + prv->ecc_enable_mask) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "L2: No ECC present, or ECC disabled\n"); + return -ENODEV; +} + +static irqreturn_t altr_edac_a10_l2_irq(int irq, void *dev_id) +{ + struct altr_edac_device_dev *dci = dev_id; + + if (irq == dci->sb_irq) { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_SB); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + + return IRQ_HANDLED; + } else if (irq == dci->db_irq) { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_MB); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + + return IRQ_HANDLED; + } + + WARN_ON(1); + + return IRQ_NONE; +} + +static const struct edac_device_prv_data l2ecc_data = { + .setup = altr_l2_check_deps, + .ce_clear_mask = 0, + .ue_clear_mask = 0, + .alloc_mem = l2_alloc_mem, + .free_mem = l2_free_mem, + .ecc_enable_mask = ALTR_L2_ECC_EN, + .ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS), + .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD), + .set_err_ofst = ALTR_L2_ECC_REG_OFFSET, + .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, +}; + +static const struct edac_device_prv_data a10_l2ecc_data = { + .setup = altr_l2_check_deps, + .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR, + .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2, + .alloc_mem = l2_alloc_mem, + .free_mem = l2_free_mem, + .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL, + .ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK, + .ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK, + .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST, + .ecc_irq_handler = altr_edac_a10_l2_irq, + .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_L2C */ + +/********************* Ethernet Device Functions ********************/ + +#ifdef CONFIG_EDAC_ALTERA_ETHERNET + +static int __init socfpga_init_ethernet_ecc(struct altr_edac_device_dev *dev) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(dev); +} + +static const struct edac_device_prv_data a10_enetecc_data = { + .setup = socfpga_init_ethernet_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_ETHERNET */ + +/********************** NAND Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_NAND + +static int __init socfpga_init_nand_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + +static const struct edac_device_prv_data a10_nandecc_data = { + .setup = socfpga_init_nand_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_NAND */ + +/********************** DMA Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_DMA + +static int __init socfpga_init_dma_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + +static const struct edac_device_prv_data a10_dmaecc_data = { + .setup = socfpga_init_dma_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_DMA */ + +/********************** USB Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_USB + +static int __init socfpga_init_usb_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + +static const struct edac_device_prv_data a10_usbecc_data = { + .setup = socfpga_init_usb_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_USB */ + +/********************** QSPI Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_QSPI + +static int __init socfpga_init_qspi_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + +static const struct edac_device_prv_data a10_qspiecc_data = { + .setup = socfpga_init_qspi_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_QSPI */ + +/********************* SDMMC Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDMMC + +static const struct edac_device_prv_data a10_sdmmceccb_data; +static int altr_portb_setup(struct altr_edac_device_dev *device) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *altdev; + char *ecc_name = "sdmmcb-ecc"; + int edac_idx, rc; + struct device_node *np; + const struct edac_device_prv_data *prv = &a10_sdmmceccb_data; + + rc = altr_check_ecc_deps(device); + if (rc) + return rc; + + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); + if (!np) { + edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); + return -ENODEV; + } + + /* Create the PortB EDAC device */ + edac_idx = edac_device_alloc_index(); + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, 1, + ecc_name, 1, 0, edac_idx); + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate PortB EDAC device\n", + ecc_name); + return -ENOMEM; + } + + /* Initialize the PortB EDAC device structure from PortA structure */ + altdev = dci->pvt_info; + *altdev = *device; + + if (!devres_open_group(&altdev->ddev, altr_portb_setup, GFP_KERNEL)) + return -ENOMEM; + + /* Update PortB specific values */ + altdev->edac_dev_name = ecc_name; + altdev->edac_idx = edac_idx; + altdev->edac_dev = dci; + altdev->data = prv; + dci->dev = &altdev->ddev; + dci->ctl_name = "Altera ECC Manager"; + dci->mod_name = ecc_name; + dci->dev_name = ecc_name; + + /* + * Update the PortB IRQs - A10 has 4, S10 has 2, Index accordingly + * + * FIXME: Instead of ifdefs with different architectures the driver + * should properly use compatibles. + */ +#ifdef CONFIG_64BIT + altdev->sb_irq = irq_of_parse_and_map(np, 1); +#else + altdev->sb_irq = irq_of_parse_and_map(np, 2); +#endif + if (!altdev->sb_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB SBIRQ alloc\n"); + rc = -ENODEV; + goto err_release_group_1; + } + rc = devm_request_irq(&altdev->ddev, altdev->sb_irq, + prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "PortB SBERR IRQ error\n"); + goto err_release_group_1; + } + +#ifdef CONFIG_64BIT + /* Use IRQ to determine SError origin instead of assigning IRQ */ + rc = of_property_read_u32_index(np, "interrupts", 1, &altdev->db_irq); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Error PortB DBIRQ alloc\n"); + goto err_release_group_1; + } +#else + altdev->db_irq = irq_of_parse_and_map(np, 3); + if (!altdev->db_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB DBIRQ alloc\n"); + rc = -ENODEV; + goto err_release_group_1; + } + rc = devm_request_irq(&altdev->ddev, altdev->db_irq, + prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "PortB DBERR IRQ error\n"); + goto err_release_group_1; + } +#endif + + rc = edac_device_add_device(dci); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "edac_device_add_device portB failed\n"); + rc = -ENOMEM; + goto err_release_group_1; + } + altr_create_edacdev_dbgfs(dci, prv); + + list_add(&altdev->next, &altdev->edac->a10_ecc_devices); + + devres_remove_group(&altdev->ddev, altr_portb_setup); + + return 0; + +err_release_group_1: + edac_device_free_ctl_info(dci); + devres_release_group(&altdev->ddev, altr_portb_setup); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, rc); + return rc; +} + +static int __init socfpga_init_sdmmc_ecc(struct altr_edac_device_dev *device) +{ + int rc = -ENODEV; + struct device_node *child; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); + if (!child) + return -ENODEV; + + if (!of_device_is_available(child)) + goto exit; + + if (validate_parent_available(child)) + goto exit; + + /* Init portB */ + rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, + a10_sdmmceccb_data.ecc_enable_mask, 1); + if (rc) + goto exit; + + /* Setup portB */ + return altr_portb_setup(device); + +exit: + of_node_put(child); + return rc; +} + +static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) +{ + struct altr_edac_device_dev *ad = dev_id; + void __iomem *base = ad->base; + const struct edac_device_prv_data *priv = ad->data; + + if (irq == ad->sb_irq) { + writel(priv->ce_clear_mask, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(ad->edac_dev, 0, 0, ad->edac_dev_name); + return IRQ_HANDLED; + } else if (irq == ad->db_irq) { + writel(priv->ue_clear_mask, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(ad->edac_dev, 0, 0, ad->edac_dev_name); + return IRQ_HANDLED; + } + + WARN_ONCE(1, "Unhandled IRQ%d on Port B.", irq); + + return IRQ_NONE; +} + +static const struct edac_device_prv_data a10_sdmmcecca_data = { + .setup = socfpga_init_sdmmc_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_SERRPENA, + .ue_set_mask = ALTR_A10_ECC_DERRPENA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static const struct edac_device_prv_data a10_sdmmceccb_data = { + .setup = socfpga_init_sdmmc_ecc, + .ce_clear_mask = ALTR_A10_ECC_SERRPENB, + .ue_clear_mask = ALTR_A10_ECC_DERRPENB, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRB, + .ue_set_mask = ALTR_A10_ECC_TDERRB, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq_portb, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +#endif /* CONFIG_EDAC_ALTERA_SDMMC */ + +/********************* Arria10 EDAC Device Functions *************************/ +static const struct of_device_id altr_edac_a10_device_of_match[] = { +#ifdef CONFIG_EDAC_ALTERA_L2C + { .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_OCRAM + { .compatible = "altr,socfpga-a10-ocram-ecc", + .data = &a10_ocramecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_ETHERNET + { .compatible = "altr,socfpga-eth-mac-ecc", + .data = &a10_enetecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_NAND + { .compatible = "altr,socfpga-nand-ecc", .data = &a10_nandecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_DMA + { .compatible = "altr,socfpga-dma-ecc", .data = &a10_dmaecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_USB + { .compatible = "altr,socfpga-usb-ecc", .data = &a10_usbecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_QSPI + { .compatible = "altr,socfpga-qspi-ecc", .data = &a10_qspiecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_SDMMC + { .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_SDRAM + { .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data }, +#endif + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); + +/* + * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5 + * because 2 IRQs are shared among the all ECC peripherals. The ECC + * manager manages the IRQs and the children. + * Based on xgene_edac.c peripheral code. + */ + +static ssize_t __maybe_unused +altr_edac_a10_device_trig(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) + writew(priv->ue_set_mask, set_addr); + else + writew(priv->ce_set_mask, set_addr); + + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +/* + * The Stratix10 EDAC Error Injection Functions differ from Arria10 + * slightly. A few Arria10 peripherals can use this injection function. + * Inject the error into the memory and then readback to trigger the IRQ. + */ +static ssize_t __maybe_unused +altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) { + writew(priv->ue_set_mask, set_addr); + } else { + /* Setup read/write of 4 bytes */ + writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); + /* Setup Address to 0 */ + writel(0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to read & ecc & data override */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup write for single bit change */ + writel(readl(drvdata->base + ECC_BLK_RDATA0_OFST) ^ 0x1, + drvdata->base + ECC_BLK_WDATA0_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA1_OFST), + drvdata->base + ECC_BLK_WDATA1_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA2_OFST), + drvdata->base + ECC_BLK_WDATA2_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA3_OFST), + drvdata->base + ECC_BLK_WDATA3_OFST); + + /* Copy Read ECC to Write ECC */ + writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), + drvdata->base + ECC_BLK_WECC0_OFST); + writel(readl(drvdata->base + ECC_BLK_RECC1_OFST), + drvdata->base + ECC_BLK_WECC1_OFST); + /* Setup accctrl to write & ecc override & data override */ + writel(ECC_WRITE_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc overwrite & data overwrite */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + } + + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +static void altr_edac_a10_irq_handler(struct irq_desc *desc) +{ + int dberr, bit, sm_offset, irq_status; + struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + int irq = irq_desc_get_irq(desc); + unsigned long bits; + + dberr = (irq == edac->db_irq) ? 1 : 0; + sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST : + A10_SYSMGR_ECC_INTSTAT_SERR_OFST; + + chained_irq_enter(chip, desc); + + regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); + + bits = irq_status; + for_each_set_bit(bit, &bits, 32) + generic_handle_domain_irq(edac->domain, dberr * 32 + bit); + + chained_irq_exit(chip, desc); +} + +static int validate_parent_available(struct device_node *np) +{ + struct device_node *parent; + int ret = 0; + + /* SDRAM must be present for Linux (implied parent) */ + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + return 0; + + /* Ensure parent device is enabled if parent node exists */ + parent = of_parse_phandle(np, "altr,ecc-parent", 0); + if (parent && !of_device_is_available(parent)) + ret = -ENODEV; + + of_node_put(parent); + return ret; +} + +static int get_s10_sdram_edac_resource(struct device_node *np, + struct resource *res) +{ + struct device_node *parent; + int ret; + + parent = of_parse_phandle(np, "altr,sdr-syscon", 0); + if (!parent) + return -ENODEV; + + ret = of_address_to_resource(parent, 0, res); + of_node_put(parent); + + return ret; +} + +static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, + struct device_node *np) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *altdev; + char *ecc_name = (char *)np->name; + struct resource res; + int edac_idx; + int rc = 0; + const struct edac_device_prv_data *prv; + /* Get matching node and check for valid result */ + const struct of_device_id *pdev_id = + of_match_node(altr_edac_a10_device_of_match, np); + if (IS_ERR_OR_NULL(pdev_id)) + return -ENODEV; + + /* Get driver specific data for this EDAC device */ + prv = pdev_id->data; + if (IS_ERR_OR_NULL(prv)) + return -ENODEV; + + if (validate_parent_available(np)) + return -ENODEV; + + if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) + return -ENOMEM; + + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + rc = get_s10_sdram_edac_resource(np, &res); + else + rc = of_address_to_resource(np, 0, &res); + + if (rc < 0) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: no resource address\n", ecc_name); + goto err_release_group; + } + + edac_idx = edac_device_alloc_index(); + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, + 1, ecc_name, 1, 0, edac_idx); + + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate EDAC device\n", ecc_name); + rc = -ENOMEM; + goto err_release_group; + } + + altdev = dci->pvt_info; + dci->dev = edac->dev; + altdev->edac_dev_name = ecc_name; + altdev->edac_idx = edac_idx; + altdev->edac = edac; + altdev->edac_dev = dci; + altdev->data = prv; + altdev->ddev = *edac->dev; + dci->dev = &altdev->ddev; + dci->ctl_name = "Altera ECC Manager"; + dci->mod_name = ecc_name; + dci->dev_name = ecc_name; + + altdev->base = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(altdev->base)) { + rc = PTR_ERR(altdev->base); + goto err_release_group1; + } + + /* Check specific dependencies for the module */ + if (altdev->data->setup) { + rc = altdev->data->setup(altdev); + if (rc) + goto err_release_group1; + } + + altdev->sb_irq = irq_of_parse_and_map(np, 0); + if (!altdev->sb_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating SBIRQ\n"); + rc = -ENODEV; + goto err_release_group1; + } + rc = devm_request_irq(edac->dev, altdev->sb_irq, prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n"); + goto err_release_group1; + } + +#ifdef CONFIG_64BIT + /* Use IRQ to determine SError origin instead of assigning IRQ */ + rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to parse DB IRQ index\n"); + goto err_release_group1; + } +#else + altdev->db_irq = irq_of_parse_and_map(np, 1); + if (!altdev->db_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n"); + rc = -ENODEV; + goto err_release_group1; + } + rc = devm_request_irq(edac->dev, altdev->db_irq, prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); + goto err_release_group1; + } +#endif + + rc = edac_device_add_device(dci); + if (rc) { + dev_err(edac->dev, "edac_device_add_device failed\n"); + rc = -ENOMEM; + goto err_release_group1; + } + + altr_create_edacdev_dbgfs(dci, prv); + + list_add(&altdev->next, &edac->a10_ecc_devices); + + devres_remove_group(edac->dev, altr_edac_a10_device_add); + + return 0; + +err_release_group1: + edac_device_free_ctl_info(dci); +err_release_group: + devres_release_group(edac->dev, NULL); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, rc); + + return rc; +} + +static void a10_eccmgr_irq_mask(struct irq_data *d) +{ + struct altr_arria10_edac *edac = irq_data_get_irq_chip_data(d); + + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + BIT(d->hwirq)); +} + +static void a10_eccmgr_irq_unmask(struct irq_data *d) +{ + struct altr_arria10_edac *edac = irq_data_get_irq_chip_data(d); + + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_CLR_OFST, + BIT(d->hwirq)); +} + +static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct altr_arria10_edac *edac = d->host_data; + + irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq); + irq_set_chip_data(irq, edac); + irq_set_noprobe(irq); + + return 0; +} + +static const struct irq_domain_ops a10_eccmgr_ic_ops = { + .map = a10_eccmgr_irqdomain_map, + .xlate = irq_domain_xlate_twocell, +}; + +/************** Stratix 10 EDAC Double Bit Error Handler ************/ +#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) + +#ifdef CONFIG_64BIT +/* panic routine issues reboot on non-zero panic_timeout */ +extern int panic_timeout; + +/* + * The double bit error is handled through SError which is fatal. This is + * called as a panic notifier to printout ECC error info as part of the panic. + */ +static int s10_edac_dberr_handler(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct altr_arria10_edac *edac = to_a10edac(this, panic_notifier); + int err_addr, dberror; + + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, + &dberror); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); + if (dberror & S10_DBE_IRQ_MASK) { + struct list_head *position; + struct altr_edac_device_dev *ed; + struct arm_smccc_res result; + + /* Find the matching DBE in the list of devices */ + list_for_each(position, &edac->a10_ecc_devices) { + ed = list_entry(position, struct altr_edac_device_dev, + next); + if (!(BIT(ed->db_irq) & dberror)) + continue; + + writel(ALTR_A10_ECC_DERRPENA, + ed->base + ALTR_A10_ECC_INTSTAT_OFST); + err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "EDAC: [Fatal DBE on %s @ 0x%08X]\n", + ed->edac_dev_name, err_addr); + break; + } + /* Notify the System through SMC. Reboot delay = 1 second */ + panic_timeout = 1; + arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0, + 0, 0, &result); + } + + return NOTIFY_DONE; +} +#endif + +/****************** Arria 10 EDAC Probe Function *********************/ +static int altr_edac_a10_probe(struct platform_device *pdev) +{ + struct altr_arria10_edac *edac; + struct device_node *child; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->a10_ecc_devices); + + edac->ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon"); + + if (IS_ERR(edac->ecc_mgr_map)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get syscon altr,sysmgr-syscon\n"); + return PTR_ERR(edac->ecc_mgr_map); + } + + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + + edac->irq_chip.name = pdev->dev.of_node->name; + edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; + edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; + edac->domain = irq_domain_create_linear(dev_fwnode(&pdev->dev), 64, &a10_eccmgr_ic_ops, + edac); + if (!edac->domain) { + dev_err(&pdev->dev, "Error adding IRQ domain\n"); + return -ENOMEM; + } + + edac->sb_irq = platform_get_irq(pdev, 0); + if (edac->sb_irq < 0) + return edac->sb_irq; + + irq_set_chained_handler_and_data(edac->sb_irq, + altr_edac_a10_irq_handler, + edac); + +#ifdef CONFIG_64BIT + { + int dberror, err_addr; + + edac->panic_notifier.notifier_call = s10_edac_dberr_handler; + atomic_notifier_chain_register(&panic_notifier_list, + &edac->panic_notifier); + + /* Printout a message if uncorrectable error previously. */ + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, + &dberror); + if (dberror) { + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + &err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "Previous Boot UE detected[0x%X] @ 0x%X\n", + dberror, err_addr); + /* Reset the sticky registers */ + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_VAL_OFST, 0); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, 0); + } + } +#else + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) + return edac->db_irq; + + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, edac); +#endif + + for_each_child_of_node(pdev->dev.of_node, child) { + if (!of_device_is_available(child)) + continue; + + if (of_match_node(altr_edac_a10_device_of_match, child)) + altr_edac_a10_device_add(edac, child); + +#ifdef CONFIG_EDAC_ALTERA_SDRAM + else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) + of_platform_populate(pdev->dev.of_node, + altr_sdram_ctrl_of_match, + NULL, &pdev->dev); +#endif + } + + return 0; +} + +static const struct of_device_id altr_edac_a10_of_match[] = { + { .compatible = "altr,socfpga-a10-ecc-manager" }, + { .compatible = "altr,socfpga-s10-ecc-manager" }, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); + +static struct platform_driver altr_edac_a10_driver = { + .probe = altr_edac_a10_probe, + .driver = { + .name = "socfpga_a10_ecc_manager", + .of_match_table = altr_edac_a10_of_match, + }, +}; +module_platform_driver(altr_edac_a10_driver); + +MODULE_AUTHOR("Thor Thayer"); +MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h new file mode 100644 index 000000000000..7248d24c4908 --- /dev/null +++ b/drivers/edac/altera_edac.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2017-2018, Intel Corporation + * Copyright (C) 2015 Altera Corporation + */ + +#ifndef _ALTERA_EDAC_H +#define _ALTERA_EDAC_H + +#include <linux/arm-smccc.h> +#include <linux/edac.h> +#include <linux/types.h> + +/* SDRAM Controller CtrlCfg Register */ +#define CV_CTLCFG_OFST 0x00 + +/* SDRAM Controller CtrlCfg Register Bit Masks */ +#define CV_CTLCFG_ECC_EN 0x400 +#define CV_CTLCFG_ECC_CORR_EN 0x800 +#define CV_CTLCFG_GEN_SB_ERR 0x2000 +#define CV_CTLCFG_GEN_DB_ERR 0x4000 + +#define CV_CTLCFG_ECC_AUTO_EN (CV_CTLCFG_ECC_EN) + +/* SDRAM Controller Address Width Register */ +#define CV_DRAMADDRW_OFST 0x2C + +/* SDRAM Controller Address Widths Field Register */ +#define DRAMADDRW_COLBIT_MASK 0x001F +#define DRAMADDRW_COLBIT_SHIFT 0 +#define DRAMADDRW_ROWBIT_MASK 0x03E0 +#define DRAMADDRW_ROWBIT_SHIFT 5 +#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00 +#define CV_DRAMADDRW_BANKBIT_SHIFT 10 +#define CV_DRAMADDRW_CSBIT_MASK 0xE000 +#define CV_DRAMADDRW_CSBIT_SHIFT 13 + +/* SDRAM Controller Interface Data Width Register */ +#define CV_DRAMIFWIDTH_OFST 0x30 + +/* SDRAM Controller Interface Data Width Defines */ +#define CV_DRAMIFWIDTH_16B_ECC 24 +#define CV_DRAMIFWIDTH_32B_ECC 40 + +/* SDRAM Controller DRAM Status Register */ +#define CV_DRAMSTS_OFST 0x38 + +/* SDRAM Controller DRAM Status Register Bit Masks */ +#define CV_DRAMSTS_SBEERR 0x04 +#define CV_DRAMSTS_DBEERR 0x08 +#define CV_DRAMSTS_CORR_DROP 0x10 + +/* SDRAM Controller DRAM IRQ Register */ +#define CV_DRAMINTR_OFST 0x3C + +/* SDRAM Controller DRAM IRQ Register Bit Masks */ +#define CV_DRAMINTR_INTREN 0x01 +#define CV_DRAMINTR_SBEMASK 0x02 +#define CV_DRAMINTR_DBEMASK 0x04 +#define CV_DRAMINTR_CORRDROPMASK 0x08 +#define CV_DRAMINTR_INTRCLR 0x10 + +/* SDRAM Controller Single Bit Error Count Register */ +#define CV_SBECOUNT_OFST 0x40 + +/* SDRAM Controller Double Bit Error Count Register */ +#define CV_DBECOUNT_OFST 0x44 + +/* SDRAM Controller ECC Error Address Register */ +#define CV_ERRADDR_OFST 0x48 + +/*-----------------------------------------*/ + +/* SDRAM Controller EccCtrl Register */ +#define A10_ECCCTRL1_OFST 0x00 + +/* SDRAM Controller EccCtrl Register Bit Masks */ +#define A10_ECCCTRL1_ECC_EN 0x001 +#define A10_ECCCTRL1_CNT_RST 0x010 +#define A10_ECCCTRL1_AWB_CNT_RST 0x100 +#define A10_ECC_CNT_RESET_MASK (A10_ECCCTRL1_CNT_RST | \ + A10_ECCCTRL1_AWB_CNT_RST) + +/* SDRAM Controller Address Width Register */ +#define CV_DRAMADDRW 0xFFC2502C +#define A10_DRAMADDRW 0xFFCFA0A8 +#define S10_DRAMADDRW 0xF80110E0 + +/* SDRAM Controller Address Widths Field Register */ +#define DRAMADDRW_COLBIT_MASK 0x001F +#define DRAMADDRW_COLBIT_SHIFT 0 +#define DRAMADDRW_ROWBIT_MASK 0x03E0 +#define DRAMADDRW_ROWBIT_SHIFT 5 +#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00 +#define CV_DRAMADDRW_BANKBIT_SHIFT 10 +#define CV_DRAMADDRW_CSBIT_MASK 0xE000 +#define CV_DRAMADDRW_CSBIT_SHIFT 13 + +#define A10_DRAMADDRW_BANKBIT_MASK 0x3C00 +#define A10_DRAMADDRW_BANKBIT_SHIFT 10 +#define A10_DRAMADDRW_GRPBIT_MASK 0xC000 +#define A10_DRAMADDRW_GRPBIT_SHIFT 14 +#define A10_DRAMADDRW_CSBIT_MASK 0x70000 +#define A10_DRAMADDRW_CSBIT_SHIFT 16 + +/* SDRAM Controller Interface Data Width Register */ +#define CV_DRAMIFWIDTH 0xFFC25030 +#define A10_DRAMIFWIDTH 0xFFCFB008 +#define S10_DRAMIFWIDTH 0xF8011008 + +/* SDRAM Controller Interface Data Width Defines */ +#define CV_DRAMIFWIDTH_16B_ECC 24 +#define CV_DRAMIFWIDTH_32B_ECC 40 + +#define A10_DRAMIFWIDTH_16B 0x0 +#define A10_DRAMIFWIDTH_32B 0x1 +#define A10_DRAMIFWIDTH_64B 0x2 + +/* SDRAM Controller DRAM IRQ Register */ +#define A10_ERRINTEN_OFST 0x10 + +/* SDRAM Controller DRAM IRQ Register Bit Masks */ +#define A10_ERRINTEN_SERRINTEN 0x01 +#define A10_ERRINTEN_DERRINTEN 0x02 +#define A10_ECC_IRQ_EN_MASK (A10_ERRINTEN_SERRINTEN | \ + A10_ERRINTEN_DERRINTEN) + +/* SDRAM Interrupt Mode Register */ +#define A10_INTMODE_OFST 0x1C +#define A10_INTMODE_SB_INT 1 + +/* SDRAM Controller Error Status Register */ +#define A10_INTSTAT_OFST 0x20 + +/* SDRAM Controller Error Status Register Bit Masks */ +#define A10_INTSTAT_SBEERR 0x01 +#define A10_INTSTAT_DBEERR 0x02 + +/* SDRAM Controller ECC Error Address Register */ +#define A10_DERRADDR_OFST 0x2C +#define A10_SERRADDR_OFST 0x30 + +/* SDRAM Controller ECC Diagnostic Register */ +#define A10_DIAGINTTEST_OFST 0x24 + +#define A10_DIAGINT_TSERRA_MASK 0x0001 +#define A10_DIAGINT_TDERRA_MASK 0x0100 + +#define A10_SBERR_IRQ 34 +#define A10_DBERR_IRQ 32 + +/* SDRAM Single Bit Error Count Compare Set Register */ +#define A10_SERRCNTREG_OFST 0x3C + +#define A10_SYMAN_INTMASK_CLR 0xFFD06098 +#define A10_INTMASK_CLR_OFST 0x10 +#define A10_DDR0_IRQ_MASK BIT(17) + +struct altr_sdram_prv_data { + int ecc_ctrl_offset; + int ecc_ctl_en_mask; + int ecc_cecnt_offset; + int ecc_uecnt_offset; + int ecc_stat_offset; + int ecc_stat_ce_mask; + int ecc_stat_ue_mask; + int ecc_saddr_offset; + int ecc_daddr_offset; + int ecc_irq_en_offset; + int ecc_irq_en_mask; + int ecc_irq_clr_offset; + int ecc_irq_clr_mask; + int ecc_cnt_rst_offset; + int ecc_cnt_rst_mask; + struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr; + int ecc_enable_mask; + int ce_set_mask; + int ue_set_mask; + int ce_ue_trgr_offset; +}; + +/* Altera SDRAM Memory Controller data */ +struct altr_sdram_mc_data { + struct regmap *mc_vbase; + int sb_irq; + int db_irq; + const struct altr_sdram_prv_data *data; +}; + +/************************** EDAC Device Defines **************************/ +/***** General Device Trigger Defines *****/ +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ + +/******* Cyclone5 and Arria5 Defines *******/ +/* OCRAM ECC Management Group Defines */ +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_REG_OFFSET 0x00 +#define ALTR_OCR_ECC_EN BIT(0) +#define ALTR_OCR_ECC_INJS BIT(1) +#define ALTR_OCR_ECC_INJD BIT(2) +#define ALTR_OCR_ECC_SERR BIT(3) +#define ALTR_OCR_ECC_DERR BIT(4) + +/* L2 ECC Management Group Defines */ +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_REG_OFFSET 0x00 +#define ALTR_L2_ECC_EN BIT(0) +#define ALTR_L2_ECC_INJS BIT(1) +#define ALTR_L2_ECC_INJD BIT(2) + +/* Arria10 General ECC Block Module Defines */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_ECC_EN BIT(0) +#define ALTR_A10_ECC_INITA BIT(16) +#define ALTR_A10_ECC_INITB BIT(24) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_ERRINTENS_OFST 0x14 +#define ALTR_A10_ECC_ERRINTENR_OFST 0x18 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTMODE_OFST 0x1C +#define ALTR_A10_ECC_INTMODE BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +#define ALTR_A10_ECC_SERRPENB BIT(16) +#define ALTR_A10_ECC_DERRPENB BIT(24) +#define ALTR_A10_ECC_ERRPENB_MASK (ALTR_A10_ECC_SERRPENB | \ + ALTR_A10_ECC_DERRPENB) + +#define ALTR_A10_ECC_INTTEST_OFST 0x24 +#define ALTR_A10_ECC_TSERRA BIT(0) +#define ALTR_A10_ECC_TDERRA BIT(8) +#define ALTR_A10_ECC_TSERRB BIT(16) +#define ALTR_A10_ECC_TDERRB BIT(24) + +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) + +#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C +#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 +#define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0) +#define A10_SYSMGR_ECC_INTSTAT_OCRAM BIT(1) + +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15) +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB BIT(31) + +/* Arria 10 L2 ECC Management Group Defines */ +#define ALTR_A10_L2_ECC_CTL_OFST 0x0 +#define ALTR_A10_L2_ECC_EN_CTL BIT(0) + +#define ALTR_A10_L2_ECC_STATUS 0xFFD060A4 +#define ALTR_A10_L2_ECC_STAT_OFST 0xA4 +#define ALTR_A10_L2_ECC_SERR_PEND BIT(0) +#define ALTR_A10_L2_ECC_MERR_PEND BIT(0) + +#define ALTR_A10_L2_ECC_CLR_OFST 0x4 +#define ALTR_A10_L2_ECC_SERR_CLR BIT(15) +#define ALTR_A10_L2_ECC_MERR_CLR BIT(31) + +#define ALTR_A10_L2_ECC_INJ_OFST ALTR_A10_L2_ECC_CTL_OFST +#define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101 +#define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101 + +/* Arria 10 OCRAM ECC Management Group Defines */ +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) + +/* Arria 10 Ethernet ECC Management Group Defines */ +#define ALTR_A10_COMMON_ECC_EN_CTL BIT(0) + +/* Arria 10 SDMMC ECC Management Group Defines */ +#define ALTR_A10_SDMMC_IRQ_MASK (BIT(16) | BIT(15)) + +/* A10 ECC Controller memory initialization timeout */ +#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 + +/************* Stratix10 Defines **************/ +#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00 +#define ALTR_S10_ECC_EN BIT(0) + +#define ALTR_S10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_S10_ECC_ERRINTENS_OFST 0x14 +#define ALTR_S10_ECC_ERRINTENR_OFST 0x18 +#define ALTR_S10_ECC_SERRINTEN BIT(0) + +#define ALTR_S10_ECC_INTMODE_OFST 0x1C +#define ALTR_S10_ECC_INTMODE BIT(0) + +#define ALTR_S10_ECC_INTSTAT_OFST 0x20 +#define ALTR_S10_ECC_SERRPENA BIT(0) +#define ALTR_S10_ECC_DERRPENA BIT(8) +#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \ + ALTR_S10_ECC_DERRPENA) + +#define ALTR_S10_ECC_INTTEST_OFST 0x24 +#define ALTR_S10_ECC_TSERRA BIT(0) +#define ALTR_S10_ECC_TDERRA BIT(8) +#define ALTR_S10_ECC_TSERRB BIT(16) +#define ALTR_S10_ECC_TDERRB BIT(24) + +#define ALTR_S10_DERR_ADDRA_OFST 0x2C + +/* Stratix10 ECC Manager Defines */ +#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 + +/* Sticky registers for Uncorrected Errors */ +#define S10_SYSMGR_UE_VAL_OFST 0x220 +#define S10_SYSMGR_UE_ADDR_OFST 0x224 + +#define S10_DDR0_IRQ_MASK BIT(16) +#define S10_DBE_IRQ_MASK 0x3FFFE + +/* Define ECC Block Offsets for peripherals */ +#define ECC_BLK_ADDRESS_OFST 0x40 +#define ECC_BLK_RDATA0_OFST 0x44 +#define ECC_BLK_RDATA1_OFST 0x48 +#define ECC_BLK_RDATA2_OFST 0x4C +#define ECC_BLK_RDATA3_OFST 0x50 +#define ECC_BLK_WDATA0_OFST 0x54 +#define ECC_BLK_WDATA1_OFST 0x58 +#define ECC_BLK_WDATA2_OFST 0x5C +#define ECC_BLK_WDATA3_OFST 0x60 +#define ECC_BLK_RECC0_OFST 0x64 +#define ECC_BLK_RECC1_OFST 0x68 +#define ECC_BLK_WECC0_OFST 0x6C +#define ECC_BLK_WECC1_OFST 0x70 +#define ECC_BLK_DBYTECTRL_OFST 0x74 +#define ECC_BLK_ACCCTRL_OFST 0x78 +#define ECC_BLK_STARTACC_OFST 0x7C + +#define ECC_XACT_KICK 0x10000 +#define ECC_WORD_WRITE 0xFF +#define ECC_WRITE_DOVR 0x101 +#define ECC_WRITE_EDOVR 0x103 +#define ECC_READ_EOVR 0x2 +#define ECC_READ_EDOVR 0x3 + +struct altr_edac_device_dev; + +struct edac_device_prv_data { + int (*setup)(struct altr_edac_device_dev *device); + int ce_clear_mask; + int ue_clear_mask; + int irq_status_mask; + void * (*alloc_mem)(size_t size, void **other); + void (*free_mem)(void *p, size_t size, void *other); + int ecc_enable_mask; + int ecc_en_ofst; + int ce_set_mask; + int ue_set_mask; + int set_err_ofst; + irqreturn_t (*ecc_irq_handler)(int irq, void *dev_id); + int trig_alloc_sz; + const struct file_operations *inject_fops; + bool panic; +}; + +struct altr_edac_device_dev { + struct list_head next; + void __iomem *base; + int sb_irq; + int db_irq; + const struct edac_device_prv_data *data; + struct dentry *debugfs_dir; + char *edac_dev_name; + struct altr_arria10_edac *edac; + struct edac_device_ctl_info *edac_dev; + struct device ddev; + int edac_idx; +}; + +struct altr_arria10_edac { + struct device *dev; + struct regmap *ecc_mgr_map; + int sb_irq; + int db_irq; + struct irq_domain *domain; + struct irq_chip irq_chip; + struct list_head a10_ecc_devices; + struct notifier_block panic_notifier; +}; + +#endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8b6a0343c220..2391f3469961 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/ras.h> +#include <linux/string_choices.h> #include "amd64_edac.h" -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> +#include <asm/amd/node.h> -static struct edac_pci_ctl_info *amd64_ctl_pci; - -static int report_gart_errors; -module_param(report_gart_errors, int, 0644); +static struct edac_pci_ctl_info *pci_ctl; /* * Set by command line parameter. If BIOS has enabled the ECC, this override is @@ -15,15 +16,26 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -/* - * count successfully initialized driver instances for setup_pci_device() - */ -static atomic_t drv_instances = ATOMIC_INIT(0); +static inline u32 get_umc_reg(struct amd64_pvt *pvt, u32 reg) +{ + if (!pvt->flags.zn_regs_v2) + return reg; + + switch (reg) { + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; + } + + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg); + return 0; +} -/* Per-node driver instances */ -static struct mem_ctl_info **mcis; +/* Per-node stuff */ static struct ecc_settings **ecc_stngs; +/* Device for the PCI component */ +static struct device *pci_ctl_dev; + /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -70,7 +82,7 @@ int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, amd64_warn("%s: error reading F%dx%03x.\n", func, PCI_FUNC(pdev->devfn), offset); - return err; + return pcibios_err_to_errno(err); } int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, @@ -83,64 +95,77 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, amd64_warn("%s: error writing to F%dx%03x.\n", func, PCI_FUNC(pdev->devfn), offset); - return err; + return pcibios_err_to_errno(err); +} + +/* + * Select DCT to which PCI cfg accesses are routed + */ +static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) +{ + u32 reg = 0; + + amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); + reg &= (pvt->model == 0x30) ? ~3 : ~1; + reg |= dct; + amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); } /* * * Depending on the family, F2 DCT reads need special handling: * - * K8: has a single DCT only + * K8: has a single DCT only and no address offsets >= 0x100 * * F10h: each DCT has its own set of regs * DCT0 -> F2x040.. * DCT1 -> F2x140.. * - * F15h: we select which DCT we access using F1x10C[DctCfgSel] - * * F16h: has only 1 DCT + * + * F15h: we select which DCT we access using F1x10C[DctCfgSel] */ -static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, - const char *func) +static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct, + int offset, u32 *val) { - if (addr >= 0x100) - return -EINVAL; - - return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); -} + switch (pvt->fam) { + case 0xf: + if (dct || offset >= 0x100) + return -EINVAL; + break; -static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, - const char *func) -{ - return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); -} + case 0x10: + if (dct) { + /* + * Note: If ganging is enabled, barring the regs + * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx + * return 0. (cf. Section 2.8.1 F10h BKDG) + */ + if (dct_ganging_enabled(pvt)) + return 0; -/* - * Select DCT to which PCI cfg accesses are routed - */ -static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) -{ - u32 reg = 0; + offset += 0x100; + } + break; - amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); - reg &= 0xfffffffe; - reg |= dct; - amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); -} + case 0x15: + /* + * F15h: F2x1xx addresses do not map explicitly to DCT1. + * We should select which DCT we access using F1x10C[DctCfgSel] + */ + dct = (dct && pvt->model == 0x30) ? 3 : dct; + f15h_select_dct(pvt, dct); + break; -static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, - const char *func) -{ - u8 dct = 0; + case 0x16: + if (dct) + return -EINVAL; + break; - if (addr >= 0x140 && addr <= 0x1a0) { - dct = 1; - addr -= 0x100; + default: + break; } - - f15h_select_dct(pvt, dct); - - return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); + return amd64_read_pci_cfg(pvt->F2, offset, val); } /* @@ -158,10 +183,10 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, */ /* - * scan the scrub rate mapping table for a close or matching bandwidth value to + * Scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found. */ -static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) +static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) { u32 scrubval; int i; @@ -189,7 +214,14 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F); + if (pvt->fam == 0x15 && pvt->model == 0x60) { + f15h_select_dct(pvt, 0); + pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); + f15h_select_dct(pvt, 1); + pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); + } else { + pci_write_bits32(pvt->F3, SCRCTRL, scrubval, 0x001F); + } if (scrubval) return scrubrates[i].bandwidth; @@ -197,32 +229,43 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) return 0; } -static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw) +static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw) { struct amd64_pvt *pvt = mci->pvt_info; u32 min_scrubrate = 0x5; - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) min_scrubrate = 0x0; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) - f15h_select_dct(pvt, 0); + if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate); + if (pvt->model == 0x60) + min_scrubrate = 0x6; + } + return __set_scrub_rate(pvt, bw, min_scrubrate); } -static int amd64_get_scrub_rate(struct mem_ctl_info *mci) +static int get_scrub_rate(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; - u32 scrubval = 0; int i, retval = -EINVAL; + u32 scrubval = 0; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) - f15h_select_dct(pvt, 0); + if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); + if (pvt->model == 0x60) + amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); + else + amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); + } else { + amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); + } scrubval = scrubval & 0x001F; @@ -239,8 +282,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * returns true if the SysAddr given by sys_addr matches the * DRAM base/limit associated with node_id */ -static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - u8 nid) +static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid) { u64 addr; @@ -284,7 +326,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, if (intlv_en == 0) { for (node_id = 0; node_id < DRAM_RANGES; node_id++) { - if (amd64_base_limit_match(pvt, sys_addr, node_id)) + if (base_limit_match(pvt, sys_addr, node_id)) goto found; } goto err_no_match; @@ -308,7 +350,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, } /* sanity test for sys_addr */ - if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) { + if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) { amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" "range for node %d with node interleaving enabled.\n", __func__, sys_addr, node_id); @@ -335,31 +377,32 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, u64 csbase, csmask, base_bits, mask_bits; u8 addr_shift; - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow]; - base_bits = GENMASK(21, 31) | GENMASK(9, 15); - mask_bits = GENMASK(21, 29) | GENMASK(9, 15); + base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9); + mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9); addr_shift = 4; /* - * F16h needs two addr_shift values: 8 for high and 6 for low - * (cf. F16h BKDG). - */ - } else if (boot_cpu_data.x86 == 0x16) { + * F16h and F15h, models 30h and later need two addr_shift values: + * 8 for high and 6 for low (cf. F16h BKDG). + */ + } else if (pvt->fam == 0x16 || + (pvt->fam == 0x15 && pvt->model >= 0x30)) { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow >> 1]; - *base = (csbase & GENMASK(5, 15)) << 6; - *base |= (csbase & GENMASK(19, 30)) << 8; + *base = (csbase & GENMASK_ULL(15, 5)) << 6; + *base |= (csbase & GENMASK_ULL(30, 19)) << 8; *mask = ~0ULL; /* poke holes for the csmask */ - *mask &= ~((GENMASK(5, 15) << 6) | - (GENMASK(19, 30) << 8)); + *mask &= ~((GENMASK_ULL(15, 5) << 6) | + (GENMASK_ULL(30, 19) << 8)); - *mask |= (csmask & GENMASK(5, 15)) << 6; - *mask |= (csmask & GENMASK(19, 30)) << 8; + *mask |= (csmask & GENMASK_ULL(15, 5)) << 6; + *mask |= (csmask & GENMASK_ULL(30, 19)) << 8; return; } else { @@ -367,10 +410,12 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, csmask = pvt->csels[dct].csmasks[csrow >> 1]; addr_shift = 8; - if (boot_cpu_data.x86 == 0x15) - base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13); + if (pvt->fam == 0x15) + base_bits = mask_bits = + GENMASK_ULL(30,19) | GENMASK_ULL(13,5); else - base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13); + base_bits = mask_bits = + GENMASK_ULL(28,19) | GENMASK_ULL(13,5); } *base = (csbase & base_bits) << addr_shift; @@ -391,6 +436,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, #define for_each_chip_select_mask(i, dct, pvt) \ for (i = 0; i < pvt->csels[dct].m_cnt; i++) +#define for_each_umc(i) \ + for (i = 0; i < pvt->max_mcs; i++) + /* * @input_addr is an InputAddr associated with the node given by mci. Return the * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr). @@ -441,20 +489,20 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr) * complete 32-bit values despite the fact that the bitfields in the DHAR * only represent bits 31-24 of the base and offset values. */ -int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, - u64 *hole_offset, u64 *hole_size) +static int get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, + u64 *hole_offset, u64 *hole_size) { struct amd64_pvt *pvt = mci->pvt_info; /* only revE and later have the DRAM Hole Address Register */ - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) { edac_dbg(1, " revision %d for node %d does not support DHAR\n", pvt->ext_model, pvt->mc_node_id); return 1; } /* valid for Fam10h and above */ - if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) { + if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) { edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n"); return 1; } @@ -486,10 +534,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, *hole_base = dhar_base(pvt); *hole_size = (1ULL << 32) - *hole_base; - if (boot_cpu_data.x86 > 0xf) - *hole_offset = f10_dhar_offset(pvt); - else - *hole_offset = k8_dhar_offset(pvt); + *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt) + : k8_dhar_offset(pvt); edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n", pvt->mc_node_id, (unsigned long)*hole_base, @@ -497,7 +543,287 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, return 0; } -EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info); + +#ifdef CONFIG_EDAC_DEBUG +#define EDAC_DCT_ATTR_SHOW(reg) \ +static ssize_t reg##_show(struct device *dev, \ + struct device_attribute *mattr, char *data) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct amd64_pvt *pvt = mci->pvt_info; \ + \ + return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \ +} + +EDAC_DCT_ATTR_SHOW(dhar); +EDAC_DCT_ATTR_SHOW(dbam0); +EDAC_DCT_ATTR_SHOW(top_mem); +EDAC_DCT_ATTR_SHOW(top_mem2); + +static ssize_t dram_hole_show(struct device *dev, struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + + u64 hole_base = 0; + u64 hole_offset = 0; + u64 hole_size = 0; + + get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); + + return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset, + hole_size); +} + +/* + * update NUM_DBG_ATTRS in case you add new members + */ +static DEVICE_ATTR(dhar, S_IRUGO, dhar_show, NULL); +static DEVICE_ATTR(dbam, S_IRUGO, dbam0_show, NULL); +static DEVICE_ATTR(topmem, S_IRUGO, top_mem_show, NULL); +static DEVICE_ATTR(topmem2, S_IRUGO, top_mem2_show, NULL); +static DEVICE_ATTR_RO(dram_hole); + +static struct attribute *dbg_attrs[] = { + &dev_attr_dhar.attr, + &dev_attr_dbam.attr, + &dev_attr_topmem.attr, + &dev_attr_topmem2.attr, + &dev_attr_dram_hole.attr, + NULL +}; + +static const struct attribute_group dbg_group = { + .attrs = dbg_attrs, +}; + +static ssize_t inject_section_show(struct device *dev, + struct device_attribute *mattr, char *buf) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.section); +} + +/* + * store error injection section value which refers to one of 4 16-byte sections + * within a 64-byte cacheline + * + * range: 0..3 + */ +static ssize_t inject_section_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + unsigned long value; + int ret; + + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; + + if (value > 3) { + amd64_warn("%s: invalid section 0x%lx\n", __func__, value); + return -EINVAL; + } + + pvt->injection.section = (u32) value; + return count; +} + +static ssize_t inject_word_show(struct device *dev, + struct device_attribute *mattr, char *buf) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.word); +} + +/* + * store error injection word value which refers to one of 9 16-bit word of the + * 16-byte (128-bit + ECC bits) section + * + * range: 0..8 + */ +static ssize_t inject_word_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + unsigned long value; + int ret; + + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; + + if (value > 8) { + amd64_warn("%s: invalid word 0x%lx\n", __func__, value); + return -EINVAL; + } + + pvt->injection.word = (u32) value; + return count; +} + +static ssize_t inject_ecc_vector_show(struct device *dev, + struct device_attribute *mattr, + char *buf) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.bit_map); +} + +/* + * store 16 bit error injection vector which enables injecting errors to the + * corresponding bit within the error injection word above. When used during a + * DRAM ECC read, it holds the contents of the of the DRAM ECC bits. + */ +static ssize_t inject_ecc_vector_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + unsigned long value; + int ret; + + ret = kstrtoul(data, 16, &value); + if (ret < 0) + return ret; + + if (value & 0xFFFF0000) { + amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); + return -EINVAL; + } + + pvt->injection.bit_map = (u32) value; + return count; +} + +/* + * Do a DRAM ECC read. Assemble staged values in the pvt area, format into + * fields needed by the injection registers and read the NB Array Data Port. + */ +static ssize_t inject_read_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + unsigned long value; + u32 section, word_bits; + int ret; + + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; + + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); + + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + + word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); + + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; +} + +/* + * Do a DRAM ECC write. Assemble staged values in the pvt area and format into + * fields needed by the injection registers. + */ +static ssize_t inject_write_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct amd64_pvt *pvt = mci->pvt_info; + u32 section, word_bits, tmp; + unsigned long value; + int ret; + + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; + + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); + + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + + word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); + + pr_notice_once("Don't forget to decrease MCE polling interval in\n" + "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" + "so that you can get the error report faster.\n"); + + on_each_cpu(disable_caches, NULL, 1); + + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + + retry: + /* wait until injection happens */ + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); + if (tmp & F10_NB_ARR_ECC_WR_REQ) { + cpu_relax(); + goto retry; + } + + on_each_cpu(enable_caches, NULL, 1); + + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; +} + +/* + * update NUM_INJ_ATTRS in case you add new members + */ + +static DEVICE_ATTR_RW(inject_section); +static DEVICE_ATTR_RW(inject_word); +static DEVICE_ATTR_RW(inject_ecc_vector); +static DEVICE_ATTR_WO(inject_write); +static DEVICE_ATTR_WO(inject_read); + +static struct attribute *inj_attrs[] = { + &dev_attr_inject_section.attr, + &dev_attr_inject_word.attr, + &dev_attr_inject_ecc_vector.attr, + &dev_attr_inject_write.attr, + &dev_attr_inject_read.attr, + NULL +}; + +static umode_t inj_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); + struct amd64_pvt *pvt = mci->pvt_info; + + /* Families which have that injection hw */ + if (pvt->fam >= 0x10 && pvt->fam <= 0x16) + return attr->mode; + + return 0; +} + +static const struct attribute_group inj_group = { + .attrs = inj_attrs, + .is_visible = inj_is_visible, +}; +#endif /* CONFIG_EDAC_DEBUG */ /* * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is @@ -536,8 +862,7 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) dram_base = get_dram_base(pvt, pvt->mc_node_id); - ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, - &hole_size); + ret = get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); if (!ret) { if ((sys_addr >= (1ULL << 32)) && (sys_addr < ((1ULL << 32) + hole_size))) { @@ -561,7 +886,7 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture * Programmer's Manual Volume 1 Application Programming. */ - dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base; + dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base; edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n", (unsigned long)sys_addr, (unsigned long)dram_addr); @@ -597,7 +922,7 @@ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr) * concerning translating a DramAddr to an InputAddr. */ intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) + + input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) + (dram_addr & 0xfff); edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n", @@ -618,7 +943,7 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) input_addr = dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr)); - edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n", + edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n", (unsigned long)sys_addr, (unsigned long)input_addr); return input_addr; @@ -652,18 +977,96 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } +/* + * See AMD PPR DF::LclNodeTypeMap + * + * This register gives information for nodes of the same type within a system. + * + * Reading this register from a GPU node will tell how many GPU nodes are in the + * system and what the lowest AMD Node ID value is for the GPU nodes. Use this + * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC. + */ +static struct local_node_map { + u16 node_count; + u16 base_node_id; +} gpu_node_map; + +#define PCI_DEVICE_ID_AMD_MI200_DF_F1 0x14d1 +#define REG_LOCAL_NODE_TYPE_MAP 0x144 + +/* Local Node Type Map (LNTM) fields */ +#define LNTM_NODE_COUNT GENMASK(27, 16) +#define LNTM_BASE_NODE_ID GENMASK(11, 0) + +static int gpu_get_node_map(struct amd64_pvt *pvt) +{ + struct pci_dev *pdev; + int ret; + u32 tmp; + + /* + * Mapping of nodes from hardware-provided AMD Node ID to a + * Linux logical one is applicable for MI200 models. Therefore, + * return early for other heterogeneous systems. + */ + if (pvt->F3->device != PCI_DEVICE_ID_AMD_MI200_DF_F3) + return 0; + + /* + * Node ID 0 is reserved for CPUs. Therefore, a non-zero Node ID + * means the values have been already cached. + */ + if (gpu_node_map.base_node_id) + return 0; + + pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL); + if (!pdev) { + ret = -ENODEV; + goto out; + } + + ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); + if (ret) { + ret = pcibios_err_to_errno(ret); + goto out; + } + + gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp); + gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp); + +out: + pci_dev_put(pdev); + return ret; +} + +static int fixup_node_id(int node_id, struct mce *m) +{ + /* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */ + u8 nid = (m->ipid >> 44) & 0xF; + + if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2) + return node_id; + + /* Nodes below the GPU base node are CPU nodes and don't need a fixup. */ + if (nid < gpu_node_map.base_node_id) + return node_id; + + /* Convert the hardware-provided AMD Node ID to a Linux logical one. */ + return nid - gpu_node_map.base_node_id + 1; +} + static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable. */ -static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) +static unsigned long dct_determine_edac_cap(struct amd64_pvt *pvt) { - u8 bit; unsigned long edac_cap = EDAC_FLAG_NONE; + u8 bit; - bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) + bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) ? 19 : 17; @@ -673,107 +1076,458 @@ static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) return edac_cap; } -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8); +static unsigned long umc_determine_edac_cap(struct amd64_pvt *pvt) +{ + u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; + unsigned long edac_cap = EDAC_FLAG_NONE; + + for_each_umc(i) { + if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) + continue; + + umc_en_mask |= BIT(i); + + /* UMC Configuration bit 12 (DimmEccEn) */ + if (pvt->umc[i].umc_cfg & BIT(12)) + dimm_ecc_en_mask |= BIT(i); + } + + if (umc_en_mask == dimm_ecc_en_mask) + edac_cap = EDAC_FLAG_SECDED; + + return edac_cap; +} + +/* + * debug routine to display the memory sizes of all logical DIMMs and its + * CSROWs + */ +static void dct_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +{ + u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; + u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; + int dimm, size0, size1; + + if (pvt->fam == 0xf) { + /* K8 families < revF not supported yet */ + if (pvt->ext_model < K8_REV_F) + return; + + WARN_ON(ctrl != 0); + } + + if (pvt->fam == 0x10) { + dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 + : pvt->dbam0; + dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? + pvt->csels[1].csbases : + pvt->csels[0].csbases; + } else if (ctrl) { + dbam = pvt->dbam0; + dcsb = pvt->csels[1].csbases; + } + edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, dbam); + + edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); + + /* Dump memory sizes for DIMM and its CSROWs */ + for (dimm = 0; dimm < 4; dimm++) { + size0 = 0; + if (dcsb[dimm * 2] & DCSB_CS_ENABLE) + /* + * For F15m60h, we need multiplier for LRDIMM cs_size + * calculation. We pass dimm value to the dbam_to_cs + * mapper so we can find the multiplier from the + * corresponding DCSM. + */ + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); + + size1 = 0; + if (dcsb[dimm * 2 + 1] & DCSB_CS_ENABLE) + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); + + amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", + dimm * 2, size0, + dimm * 2 + 1, size1); + } +} + -static void amd64_dump_dramcfg_low(u32 dclr, int chan) +static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); - edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n", - (dclr & BIT(16)) ? "un" : "", - (dclr & BIT(19)) ? "yes" : "no"); + if (pvt->dram_type == MEM_LRDDR3) { + u32 dcsm = pvt->csels[chan].csmasks[0]; + /* + * It's assumed all LRDIMMs in a DCT are going to be of + * same 'type' until proven otherwise. So, use a cs + * value of '0' here to get dcsm value. + */ + edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); + } + + edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19))); + edac_dbg(1, " PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + str_enabled_disabled(dclr & BIT(8))); - if (boot_cpu_data.x86 == 0x10) + if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + str_yes_no(dclr & BIT(12)), + str_yes_no(dclr & BIT(13)), + str_yes_no(dclr & BIT(14)), + str_yes_no(dclr & BIT(15))); } -/* Display and decode various NB registers for debug purposes. */ -static void dump_misc_regs(struct amd64_pvt *pvt) +#define CS_EVEN_PRIMARY BIT(0) +#define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) +#define CS_3R_INTERLEAVE BIT(4) + +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) + +static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) +{ + u8 base, count = 0; + int cs_mode = 0; + + if (csrow_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_PRIMARY; + + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; + + if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_SECONDARY; + + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; + + /* + * 3 Rank inteleaving support. + * There should be only three bases enabled and their two masks should + * be equal. + */ + for_each_chip_select(base, ctrl, pvt) + count += csrow_enabled(base, ctrl, pvt); + + if (count == 3 && + pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) { + edac_dbg(1, "3R interleaving in use.\n"); + cs_mode |= CS_3R_INTERLEAVE; + } + + return cs_mode; +} + +static int calculate_cs_size(u32 mask, unsigned int cs_mode) +{ + int msb, weight, num_zero_bits; + u32 deinterleaved_mask; + + if (!mask) + return 0; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. + */ + msb = fls(mask) - 1; + weight = hweight_long(mask); + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); + + /* Take the number of zero bits off from the top of the mask. */ + deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); + + return (deinterleaved_mask >> 2) + 1; +} + +static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, + unsigned int cs_mode, int csrow_nr, int dimm) +{ + int size; + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = calculate_cs_size(addr_mask, cs_mode); + + edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); + size += calculate_cs_size(addr_mask_sec, cs_mode); + + /* Return size in MBs. */ + return size >> 10; +} + +static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask = 0, addr_mask_sec = 0; + int cs_mask_nr = csrow_nr; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; + + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; + + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; + + /* + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. + */ + dimm = csrow_nr >> 1; + + if (!pvt->flags.zn_regs_v2) + cs_mask_nr >>= 1; + + if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) + addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; + + if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) + addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; + + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); +} + +static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +{ + int dimm, size0, size1, cs0, cs1, cs_mode; + + edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); + + for (dimm = 0; dimm < 2; dimm++) { + cs0 = dimm * 2; + cs1 = dimm * 2 + 1; + + cs_mode = umc_get_cs_mode(dimm, ctrl, pvt); + + size0 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs0); + size1 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs1); + + amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", + cs0, size0, + cs1, size1); + } +} + +static void umc_dump_misc_regs(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); + edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); + edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); + edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); + edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); + + edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", + i, str_yes_no(umc->umc_cap_hi & BIT(30)), + str_yes_no(umc->umc_cap_hi & BIT(31))); + edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", + i, str_yes_no(umc->umc_cfg & BIT(12))); + edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", + i, str_yes_no(umc->dimm_cfg & BIT(6))); + edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", + i, str_yes_no(umc->dimm_cfg & BIT(7))); + + umc_debug_display_dimm_sizes(pvt, i); + } +} + +static void dct_dump_misc_regs(struct amd64_pvt *pvt) { edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); edac_dbg(1, " NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL)); edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_SECDED), + str_yes_no(pvt->nbcap & NBCAP_CHIPKILL)); - amd64_dump_dramcfg_low(pvt->dclr0, 0); + debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n", pvt->dhar, dhar_base(pvt), - (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt) - : f10_dhar_offset(pvt)); - - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + (pvt->fam == 0xf) ? k8_dhar_offset(pvt) + : f10_dhar_offset(pvt)); - amd64_debug_display_dimm_sizes(pvt, 0); + dct_debug_display_dimm_sizes(pvt, 0); /* everything below this point is Fam10h and above */ - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; - amd64_debug_display_dimm_sizes(pvt, 1); - - amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4")); + dct_debug_display_dimm_sizes(pvt, 1); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) - amd64_dump_dramcfg_low(pvt->dclr1, 1); + debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); + + edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt))); + + amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } /* - * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] + * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ -static void prep_chip_selects(struct amd64_pvt *pvt) +static void dct_prep_chip_selects(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; + } else if (pvt->fam == 0x15 && pvt->model == 0x30) { + pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; + pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void umc_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2; + } +} + +static void umc_read_base_mask(struct amd64_pvt *pvt) +{ + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; + int cs, umc; + u32 tmp; + + for_each_umc(umc) { + umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + + for_each_chip_select(cs, umc, pvt) { + base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; + + base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, &tmp)) { + *base = tmp; + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + } + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, &tmp)) { + *base_sec = tmp; + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); + } + } + + umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(pvt, UMCCH_ADDR_MASK_SEC); + + for_each_chip_select_mask(cs, umc, pvt) { + mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; + + mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, &tmp)) { + *mask = tmp; + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, &tmp)) { + *mask_sec = tmp; + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); + } + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ -static void read_dct_base_mask(struct amd64_pvt *pvt) +static void dct_read_base_mask(struct amd64_pvt *pvt) { int cs; - prep_chip_selects(pvt); - for_each_chip_select(cs, 0, pvt) { int reg0 = DCSB0 + (cs * 4); int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (!amd64_read_dct_pci_cfg(pvt, reg0, base0)) + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", cs, *base0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf) continue; - if (!amd64_read_dct_pci_cfg(pvt, reg1, base1)) + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, reg1); + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { @@ -782,94 +1536,157 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0)) + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", cs, *mask0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf) continue; - if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1)) + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, reg1); + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } -static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs) +static void umc_determine_memory_type(struct amd64_pvt *pvt) { - enum mem_type type; + struct amd64_umc *umc; + u32 i; - /* F15h supports only DDR3 */ - if (boot_cpu_data.x86 >= 0x15) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) { - if (pvt->dchr0 & DDR3_MODE) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; - } else { - type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; - } + for_each_umc(i) { + umc = &pvt->umc[i]; + + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) { + umc->dram_type = MEM_EMPTY; + continue; + } - amd64_info("CS%d: %s\n", cs, edac_mem_types[type]); + /* + * Check if the system supports the "DDR Type" field in UMC Config + * and has DDR5 DIMMs in use. + */ + if (pvt->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR5; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR5; + else + umc->dram_type = MEM_DDR5; + } else { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; + else + umc->dram_type = MEM_DDR4; + } - return type; + edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); + } } -/* Get the number of DCT channels the memory controller is using. */ -static int k8_early_channel_count(struct amd64_pvt *pvt) +static void dct_determine_memory_type(struct amd64_pvt *pvt) { - int flag; + u32 dram_ctrl, dcsm; - if (pvt->ext_model >= K8_REV_F) - /* RevF (NPT) and later */ - flag = pvt->dclr0 & WIDTH_128; - else - /* RevE and earlier */ - flag = pvt->dclr0 & REVE_WIDTH_128; + switch (pvt->fam) { + case 0xf: + if (pvt->ext_model >= K8_REV_F) + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; + return; + + case 0x10: + if (pvt->dchr0 & DDR3_MODE) + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; + return; + + case 0x15: + if (pvt->model < 0x60) + goto ddr3; + + /* + * Model 0x60h needs special handling: + * + * We use a Chip Select value of '0' to obtain dcsm. + * Theoretically, it is possible to populate LRDIMMs of different + * 'Rank' value on a DCT. But this is not the common case. So, + * it's reasonable to assume all DIMMs are going to be of same + * 'type' until proven otherwise. + */ + amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl); + dcsm = pvt->csels[0].csmasks[0]; + + if (((dram_ctrl >> 8) & 0x7) == 0x2) + pvt->dram_type = MEM_DDR4; + else if (pvt->dclr0 & BIT(16)) + pvt->dram_type = MEM_DDR3; + else if (dcsm & 0x3) + pvt->dram_type = MEM_LRDDR3; + else + pvt->dram_type = MEM_RDDR3; - /* not used */ - pvt->dclr1 = 0; + return; + + case 0x16: + goto ddr3; + + default: + WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); + pvt->dram_type = MEM_EMPTY; + } - return (flag) ? 2 : 1; + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); + return; + +ddr3: + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; } /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ -static u64 get_error_address(struct mce *m) +static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; - u64 addr; + u16 mce_nid = topology_amd_node_id(m->extcpu); + struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; + u64 addr; + + mci = edac_mc_find(mce_nid); + if (!mci) + return 0; + + pvt = mci->pvt_info; - if (c->x86 == 0xf) { + if (pvt->fam == 0xf) { start_bit = 3; end_bit = 39; } - addr = m->addr & GENMASK(start_bit, end_bit); + addr = m->addr & GENMASK_ULL(end_bit, start_bit); /* * Erratum 637 workaround */ - if (c->x86 == 0x15) { - struct amd64_pvt *pvt; + if (pvt->fam == 0x15) { u64 cc6_base, tmp_addr; u32 tmp; - u16 mce_nid; u8 intlv_en; - if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) + if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7) return addr; - mce_nid = amd_get_nb_id(m->extcpu); - pvt = mcis[mce_nid]->pvt_info; amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp); intlv_en = tmp >> 21 & 0x7; /* add [47:27] + 3 trailing bits */ - cc6_base = (tmp & GENMASK(0, 20)) << 3; + cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3; /* reverse and add DramIntlvEn */ cc6_base |= intlv_en ^ 0x7; @@ -878,18 +1695,18 @@ static u64 get_error_address(struct mce *m) cc6_base <<= 24; if (!intlv_en) - return cc6_base | (addr & GENMASK(0, 23)); + return cc6_base | (addr & GENMASK_ULL(23, 0)); amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp); /* faster log2 */ - tmp_addr = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1); + tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1); /* OR DramIntlvSel into bits [14:12] */ - tmp_addr |= (tmp & GENMASK(21, 23)) >> 9; + tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9; /* add remaining [11:0] bits from original MC4_ADDR */ - tmp_addr |= addr & GENMASK(0, 11); + tmp_addr |= addr & GENMASK_ULL(11, 0); return cc6_base | tmp_addr; } @@ -916,15 +1733,15 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct amd_northbridge *nb; - struct pci_dev *misc, *f1 = NULL; - struct cpuinfo_x86 *c = &boot_cpu_data; + struct pci_dev *f1 = NULL; + unsigned int pci_func; int off = range << 3; u32 llim; amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); - if (c->x86 == 0xf) + if (pvt->fam == 0xf) return; if (!dram_rw(pvt, range)) @@ -934,26 +1751,32 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); /* F15h: factor in CC6 save area by reading dst node's limit reg */ - if (c->x86 != 0x15) + if (pvt->fam != 0x15) return; nb = node_to_amd_nb(dram_dst_node(pvt, range)); if (WARN_ON(!nb)) return; - misc = nb->misc; - f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); + if (pvt->model == 0x60) + pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + else if (pvt->model == 0x30) + pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + else + pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1; + + f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); if (WARN_ON(!f1)) return; amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); - pvt->ranges[range].lim.lo &= GENMASK(0, 15); + pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0); /* {[39:27],111b} */ pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; - pvt->ranges[range].lim.hi &= GENMASK(0, 7); + pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0); /* [47:40] */ pvt->ranges[range].lim.hi |= llim >> 13; @@ -1030,7 +1853,7 @@ static int ddr2_cs_size(unsigned i, bool dct_width) } static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1076,65 +1899,35 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, } } -/* - * Get the number of DCT channels in use. - * - * Return: - * number of Memory Channels in operation - * Pass back: - * contents of the DCL0_LOW register - */ -static int f1x_early_channel_count(struct amd64_pvt *pvt) +static int ddr3_cs_size(unsigned i, bool dct_width) { - int i, j, channels = 0; - - /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ - if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128)) - return 2; - - /* - * Need to check if in unganged mode: In such, there are 2 channels, - * but they are not in 128 bit mode and thus the above 'dclr0' status - * bit will be OFF. - * - * Need to check DCT0[0] and DCT1[0] to see if only one of them has - * their CSEnable bit on. If so, then SINGLE DIMM case. - */ - edac_dbg(0, "Data width is not 128 bits - need more decoding\n"); - - /* - * Check DRAM Bank Address Mapping values for each DIMM to see if there - * is more than just one DIMM present in unganged mode. Need to check - * both controllers since DIMMs can be placed in either one. - */ - for (i = 0; i < 2; i++) { - u32 dbam = (i ? pvt->dbam1 : pvt->dbam0); - - for (j = 0; j < 4; j++) { - if (DBAM_DIMM(j, dbam) > 0) { - channels++; - break; - } - } - } + unsigned shift = 0; + int cs_size = 0; - if (channels > 2) - channels = 2; + if (i == 0 || i == 3 || i == 4) + cs_size = -1; + else if (i <= 2) + shift = i; + else if (i == 12) + shift = 7; + else if (!(i & 0x1)) + shift = i >> 1; + else + shift = (i + 1) >> 1; - amd64_info("MCT channel count: %d\n", channels); + if (cs_size != -1) + cs_size = (128 * (1 << !!dct_width)) << shift; - return channels; + return cs_size; } -static int ddr3_cs_size(unsigned i, bool dct_width) +static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply) { unsigned shift = 0; int cs_size = 0; - if (i == 0 || i == 3 || i == 4) + if (i < 4 || i == 6) cs_size = -1; - else if (i <= 2) - shift = i; else if (i == 12) shift = 7; else if (!(i & 0x1)) @@ -1143,13 +1936,28 @@ static int ddr3_cs_size(unsigned i, bool dct_width) shift = (i + 1) >> 1; if (cs_size != -1) - cs_size = (128 * (1 << !!dct_width)) << shift; + cs_size = rank_multiply * (128 << shift); + + return cs_size; +} + +static int ddr4_cs_size(unsigned i) +{ + int cs_size = 0; + + if (i == 0) + cs_size = -1; + else if (i == 1) + cs_size = 1024; + else + /* Min cs_size = 1G */ + cs_size = 1024 * (1 << (i >> 1)); return cs_size; } static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1165,18 +1973,49 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, * F15h supports only 64bit DCT interfaces */ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); return ddr3_cs_size(cs_mode, false); } +/* F15h M60h supports DDR4 mapping as well.. */ +static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode, int cs_mask_nr) +{ + int cs_size; + u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr]; + + WARN_ON(cs_mode > 12); + + if (pvt->dram_type == MEM_DDR4) { + if (cs_mode > 9) + return -1; + + cs_size = ddr4_cs_size(cs_mode); + } else if (pvt->dram_type == MEM_LRDDR3) { + unsigned rank_multiply = dcsm & 0xf; + + if (rank_multiply == 3) + rank_multiply = 4; + cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply); + } else { + /* Minimum cs size is 512mb for F15hM60h*/ + if (cs_mode == 0x1) + return -1; + + cs_size = ddr3_cs_size(cs_mode, false); + } + + return cs_size; +} + /* - * F16h has only limited cs_modes + * F16h and F15h model 30h have only limited cs_modes. */ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); @@ -1190,10 +2029,10 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, static void read_dram_ctl_register(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; - if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) { + if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) { edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n", pvt->dct_sel_lo, dct_sel_baseaddr(pvt)); @@ -1202,19 +2041,50 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) edac_dbg(0, " Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + str_yes_no(dct_high_range_enabled(pvt))); edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + str_enabled_disabled(dct_data_intlv_enabled(pvt)), + str_yes_no(dct_memory_cleared(pvt))); edac_dbg(0, " channel interleave: %s, " "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + str_enabled_disabled(dct_interleave_enabled(pvt)), dct_sel_interleave_addr(pvt)); } - amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi); + amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi); +} + +/* + * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG, + * 2.10.12 Memory Interleaving Modes). + */ +static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, + u8 intlv_en, int num_dcts_intlv, + u32 dct_sel) +{ + u8 channel = 0; + u8 select; + + if (!(intlv_en)) + return (u8)(dct_sel); + + if (num_dcts_intlv == 2) { + select = (sys_addr >> 8) & 0x3; + channel = select ? 0x3 : 0; + } else if (num_dcts_intlv == 4) { + u8 intlv_addr = dct_sel_interleave_addr(pvt); + switch (intlv_addr) { + case 0x4: + channel = (sys_addr >> 8) & 0x3; + break; + case 0x5: + channel = (sys_addr >> 9) & 0x3; + break; + } + } + return channel; } /* @@ -1244,11 +2114,17 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, if (intlv_addr & 0x2) { u8 shift = intlv_addr & 0x1 ? 9 : 6; - u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2; + u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1; return ((sys_addr >> shift) & 1) ^ temp; } + if (intlv_addr & 0x4) { + u8 shift = intlv_addr & 0x1 ? 9 : 8; + + return (sys_addr >> shift) & 1; + } + return (sys_addr >> (12 + hweight8(intlv_en))) & 1; } @@ -1266,7 +2142,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 chan_off; u64 dram_base = get_dram_base(pvt, range); u64 hole_off = f10_dhar_offset(pvt); - u64 dct_sel_base_off = (pvt->dct_sel_hi & 0xFFFFFC00) << 16; + u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16; if (hi_rng) { /* @@ -1303,7 +2179,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, chan_off = dram_base; } - return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47)); + return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23)); } /* @@ -1343,7 +2219,7 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) int cs_found = -EINVAL; int csrow; - mci = mcis[nid]; + mci = edac_mc_find(nid); if (!mci) return cs_found; @@ -1366,6 +2242,10 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) (in_addr & cs_mask), (cs_base & cs_mask)); if ((in_addr & cs_mask) == (cs_base & cs_mask)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + cs_found = csrow; + break; + } cs_found = f10_process_possible_spare(pvt, dct, csrow); edac_dbg(1, " MATCH csrow=%d\n", cs_found); @@ -1384,15 +2264,13 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) { u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr; - if (boot_cpu_data.x86 == 0x10) { + if (pvt->fam == 0x10) { /* only revC3 and revE have that feature */ - if (boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model < 0xa && - boot_cpu_data.x86_mask < 3)) + if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3)) return sys_addr; } - amd64_read_dct_pci_cfg(pvt, SWAP_INTLV_REG, &swap_reg); + amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg); if (!(swap_reg & 0x1)) return sys_addr; @@ -1492,20 +2370,146 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, return cs_found; } -static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, - int *chan_sel) +static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range, + u64 sys_addr, int *chan_sel) +{ + int cs_found = -EINVAL; + int num_dcts_intlv = 0; + u64 chan_addr, chan_offset; + u64 dct_base, dct_limit; + u32 dct_cont_base_reg, dct_cont_limit_reg, tmp; + u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en; + + u64 dhar_offset = f10_dhar_offset(pvt); + u8 intlv_addr = dct_sel_interleave_addr(pvt); + u8 node_id = dram_dst_node(pvt, range); + u8 intlv_en = dram_intlv_en(pvt, range); + + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg); + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg); + + dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0)); + dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7); + + edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", + range, sys_addr, get_dram_limit(pvt, range)); + + if (!(get_dram_base(pvt, range) <= sys_addr) && + !(get_dram_limit(pvt, range) >= sys_addr)) + return -EINVAL; + + if (dhar_valid(pvt) && + dhar_base(pvt) <= sys_addr && + sys_addr < BIT_64(32)) { + amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n", + sys_addr); + return -EINVAL; + } + + /* Verify sys_addr is within DCT Range. */ + dct_base = (u64) dct_sel_baseaddr(pvt); + dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF; + + if (!(dct_cont_base_reg & BIT(0)) && + !(dct_base <= (sys_addr >> 27) && + dct_limit >= (sys_addr >> 27))) + return -EINVAL; + + /* Verify number of dct's that participate in channel interleaving. */ + num_dcts_intlv = (int) hweight8(intlv_en); + + if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4)) + return -EINVAL; + + if (pvt->model >= 0x60) + channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en); + else + channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, + num_dcts_intlv, dct_sel); + + /* Verify we stay within the MAX number of channels allowed */ + if (channel > 3) + return -EINVAL; + + leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0)); + + /* Get normalized DCT addr */ + if (leg_mmio_hole && (sys_addr >= BIT_64(32))) + chan_offset = dhar_offset; + else + chan_offset = dct_base << 27; + + chan_addr = sys_addr - chan_offset; + + /* remove channel interleave */ + if (num_dcts_intlv == 2) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 9) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 10) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; + + } else if (num_dcts_intlv == 4) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 10) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 11) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; + } + + if (dct_offset_en) { + amd64_read_pci_cfg(pvt->F1, + DRAM_CONT_HIGH_OFF + (int) channel * 4, + &tmp); + chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27; + } + + f15h_select_dct(pvt, channel); + + edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); + + /* + * Find Chip select: + * if channel = 3, then alias it to 1. This is because, in F15 M30h, + * there is support for 4 DCT's, but only 2 are currently functional. + * They are DCT0 and DCT3. But we have read all registers of DCT3 into + * pvt->csels[1]. So we need to use '1' here to get correct info. + * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications. + */ + alias_channel = (channel == 3) ? 1 : channel; + + cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel); + + if (cs_found >= 0) + *chan_sel = alias_channel; + + return cs_found; +} + +static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, + u64 sys_addr, + int *chan_sel) { int cs_found = -EINVAL; unsigned range; for (range = 0; range < DRAM_RANGES; range++) { - if (!dram_rw(pvt, range)) continue; - if ((get_dram_base(pvt, range) <= sys_addr) && - (get_dram_limit(pvt, range) >= sys_addr)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) + cs_found = f15_m30h_match_to_this_node(pvt, range, + sys_addr, + chan_sel); + else if ((get_dram_base(pvt, range) <= sys_addr) && + (get_dram_limit(pvt, range) >= sys_addr)) { cs_found = f1x_match_to_this_node(pvt, range, sys_addr, chan_sel); if (cs_found >= 0) @@ -1545,99 +2549,6 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, } /* - * debug routine to display the memory sizes of all logical DIMMs and its - * CSROWs - */ -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) -{ - int dimm, size0, size1; - u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; - u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; - - if (boot_cpu_data.x86 == 0xf) { - /* K8 families < revF not supported yet */ - if (pvt->ext_model < K8_REV_F) - return; - else - WARN_ON(ctrl != 0); - } - - dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 : pvt->dbam0; - dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases - : pvt->csels[0].csbases; - - edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", - ctrl, dbam); - - edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); - - /* Dump memory sizes for DIMM and its CSROWs */ - for (dimm = 0; dimm < 4; dimm++) { - - size0 = 0; - if (dcsb[dimm*2] & DCSB_CS_ENABLE) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); - - size1 = 0; - if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); - - amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, - dimm * 2 + 1, size1); - } -} - -static struct amd64_family_type amd64_family_types[] = { - [K8_CPUS] = { - .ctl_name = "K8", - .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC, - .ops = { - .early_channel_count = k8_early_channel_count, - .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, - .dbam_to_cs = k8_dbam_to_chip_select, - .read_dct_pci_cfg = k8_read_dct_pci_cfg, - } - }, - [F10_CPUS] = { - .ctl_name = "F10h", - .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f10_dbam_to_chip_select, - .read_dct_pci_cfg = f10_read_dct_pci_cfg, - } - }, - [F15_CPUS] = { - .ctl_name = "F15h", - .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f15_dbam_to_chip_select, - .read_dct_pci_cfg = f15_read_dct_pci_cfg, - } - }, - [F16_CPUS] = { - .ctl_name = "F16h", - .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, - .ops = { - .early_channel_count = f1x_early_channel_count, - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - .read_dct_pci_cfg = f10_read_dct_pci_cfg, - } - }, -}; - -/* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm * uses those to find the symbol in error and thus the DIMM. @@ -1748,14 +2659,11 @@ static int map_err_sym_to_channel(int err_sym, int sym_size) case 0x20: case 0x21: return 0; - break; case 0x22: case 0x23: return 1; - break; default: return err_sym >> 4; - break; } /* x8 symbols */ else @@ -1765,17 +2673,12 @@ static int map_err_sym_to_channel(int err_sym, int sym_size) WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n", err_sym); return -1; - break; - case 0x11: return 0; - break; case 0x12: return 1; - break; default: return err_sym >> 3; - break; } return -1; } @@ -1801,7 +2704,7 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); } -static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, +static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err, u8 ecc_type) { enum hw_event_mc_err_type err_type; @@ -1811,6 +2714,8 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, err_type = HW_EVENT_ERR_CORRECTED; else if (ecc_type == 1) err_type = HW_EVENT_ERR_UNCORRECTED; + else if (ecc_type == 3) + err_type = HW_EVENT_ERR_DEFERRED; else { WARN(1, "Something is rotten in the state of Denmark.\n"); return; @@ -1827,7 +2732,13 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, string = "Failed to map error addr to a csrow"; break; case ERR_CHANNEL: - string = "unknown syndrome - possible error reporting race"; + string = "Unknown syndrome - possible error reporting race"; + break; + case ERR_SYND: + string = "MCA_SYND not valid - unknown syndrome and csrow"; + break; + case ERR_NORM_ADDR: + string = "Cannot decode normalized address"; break; default: string = "WTF error"; @@ -1840,16 +2751,22 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, string, ""); } -static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, - struct mce *m) +static inline void decode_bus_error(int node_id, struct mce *m) { - struct amd64_pvt *pvt = mci->pvt_info; + struct mem_ctl_info *mci; + struct amd64_pvt *pvt; u8 ecc_type = (m->status >> 45) & 0x3; u8 xec = XEC(m->status, 0x1f); u16 ec = EC(m->status); u64 sys_addr; struct err_info err; + mci = edac_mc_find(node_id); + if (!mci) + return; + + pvt = mci->pvt_info; + /* Bail out early if this was an 'observed' error */ if (PP(ec) == NBSL_PP_OBS) return; @@ -1860,48 +2777,115 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, memset(&err, 0, sizeof(err)); - sys_addr = get_error_address(m); + sys_addr = get_error_address(pvt, m); if (ecc_type == 2) err.syndrome = extract_syndrome(m->status); pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); - __log_bus_error(mci, &err, ecc_type); + __log_ecc_error(mci, &err, ecc_type); +} + +/* + * To find the UMC channel represented by this bank we need to match on its + * instance_id. The instance_id of a bank is held in the lower 32 bits of its + * IPID. + * + * Currently, we can derive the channel number by looking at the 6th nibble in + * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel + * number. + * + * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of + * the MCA_SYND[ErrorInformation] field. + */ +static void umc_get_err_info(struct mce *m, struct err_info *err) +{ + err->channel = (m->ipid & GENMASK(31, 0)) >> 20; + err->csrow = m->synd & 0x7; } -void amd64_decode_bus_error(int node_id, struct mce *m) +static void decode_umc_error(int node_id, struct mce *m) { - __amd64_decode_bus_error(mcis[node_id], m); + u8 ecc_type = (m->status >> 45) & 0x3; + struct mem_ctl_info *mci; + unsigned long sys_addr; + struct amd64_pvt *pvt; + struct atl_err a_err; + struct err_info err; + + node_id = fixup_node_id(node_id, m); + + mci = edac_mc_find(node_id); + if (!mci) + return; + + pvt = mci->pvt_info; + + memset(&err, 0, sizeof(err)); + + if (m->status & MCI_STATUS_DEFERRED) + ecc_type = 3; + + if (!(m->status & MCI_STATUS_SYNDV)) { + err.err_code = ERR_SYND; + goto log_error; + } + + if (ecc_type == 2) { + u8 length = (m->synd >> 18) & 0x3f; + + if (length) + err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0); + else + err.err_code = ERR_CHANNEL; + } + + pvt->ops->get_err_info(m, &err); + + a_err.addr = m->addr; + a_err.ipid = m->ipid; + a_err.cpu = m->extcpu; + + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + + error_address_to_page_and_offset(sys_addr, &err); + +log_error: + __log_ecc_error(mci, &err, ecc_type); } /* - * Use pvt->F2 which contains the F2 CPU PCI device to get the related - * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error. + * Use pvt->F3 which contains the F3 CPU PCI device to get the related + * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. */ -static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) +static int +reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) { /* Reserve the ADDRESS MAP Device */ - pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2); + pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); if (!pvt->F1) { - amd64_err("error address map device not found: " - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, f1_id); + edac_dbg(1, "F1 not found: device 0x%x\n", pci_id1); return -ENODEV; } - /* Reserve the MISC Device */ - pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2); - if (!pvt->F3) { + /* Reserve the DCT Device */ + pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); + if (!pvt->F2) { pci_dev_put(pvt->F1); pvt->F1 = NULL; - amd64_err("error F3 device not found: " - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, f3_id); - + edac_dbg(1, "F2 not found: device 0x%x\n", pci_id2); return -ENODEV; } + + if (!pci_ctl_dev) + pci_ctl_dev = &pvt->F2->dev; + edac_dbg(1, "F1: %s\n", pci_name(pvt->F1)); edac_dbg(1, "F2: %s\n", pci_name(pvt->F2)); edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); @@ -1909,37 +2893,80 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) return 0; } -static void free_mc_sibling_devs(struct amd64_pvt *pvt) +static void determine_ecc_sym_sz(struct amd64_pvt *pvt) { - pci_dev_put(pvt->F1); - pci_dev_put(pvt->F3); + pvt->ecc_sym_sz = 4; + + if (pvt->fam >= 0x10) { + u32 tmp; + + amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); + /* F16h has only DCT0, so no need to read dbam1. */ + if (pvt->fam != 0x16) + amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1); + + /* F10h, revD and later can do x8 ECC too. */ + if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25)) + pvt->ecc_sym_sz = 8; + } +} + +/* + * Retrieve the hardware registers of the memory controller. + */ +static void umc_read_mc_regs(struct amd64_pvt *pvt) +{ + u8 nid = pvt->mc_node_id; + struct amd64_umc *umc; + u32 i, tmp, umc_base; + + /* Read registers from each UMC */ + for_each_umc(i) { + + umc_base = get_umc_base(i); + umc = &pvt->umc[i]; + + if (!amd_smn_read(nid, umc_base + get_umc_reg(pvt, UMCCH_DIMM_CFG), &tmp)) + umc->dimm_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp)) + umc->umc_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp)) + umc->sdp_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp)) + umc->ecc_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &tmp)) + umc->umc_cap_hi = tmp; + } } /* * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs) */ -static void read_mc_regs(struct amd64_pvt *pvt) +static void dct_read_mc_regs(struct amd64_pvt *pvt) { - struct cpuinfo_x86 *c = &boot_cpu_data; + unsigned int range; u64 msr_val; - u32 tmp; - unsigned range; /* * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since - * those are Read-As-Zero + * those are Read-As-Zero. */ - rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); + rdmsrq(MSR_K8_TOP_MEM1, pvt->top_mem); edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); - /* check first whether TOP_MEM2 is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr_val); - if (msr_val & (1U << 21)) { - rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); + /* Check first whether TOP_MEM2 is enabled: */ + rdmsrq(MSR_AMD64_SYSCFG, msr_val); + if (msr_val & BIT(21)) { + rdmsrq(MSR_K8_TOP_MEM2, pvt->top_mem2); edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); - } else + } else { edac_dbg(0, " TOP_MEM2 disabled\n"); + } amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); @@ -1968,34 +2995,20 @@ static void read_mc_regs(struct amd64_pvt *pvt) dram_dst_node(pvt, range)); } - read_dct_base_mask(pvt); - amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar); - amd64_read_dct_pci_cfg(pvt, DBAM0, &pvt->dbam0); + amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0); amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare); - amd64_read_dct_pci_cfg(pvt, DCLR0, &pvt->dclr0); - amd64_read_dct_pci_cfg(pvt, DCHR0, &pvt->dchr0); + amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0); + amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0); if (!dct_ganging_enabled(pvt)) { - amd64_read_dct_pci_cfg(pvt, DCLR1, &pvt->dclr1); - amd64_read_dct_pci_cfg(pvt, DCHR1, &pvt->dchr1); + amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1); + amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1); } - pvt->ecc_sym_sz = 4; - - if (c->x86 >= 0x10) { - amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); - if (c->x86 != 0x16) - /* F16h has only DCT0 */ - amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); - - /* F10h, revD and later can do x8 ECC too */ - if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25)) - pvt->ecc_sym_sz = 8; - } - dump_misc_regs(pvt); + determine_ecc_sym_sz(pvt); } /* @@ -2032,22 +3045,16 @@ static void read_mc_regs(struct amd64_pvt *pvt) * encompasses * */ -static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +static u32 dct_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { - u32 cs_mode, nr_pages; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; + u32 cs_mode, nr_pages; + csrow_nr >>= 1; + cs_mode = DBAM_DIMM(csrow_nr, dbam); - /* - * The math on this doesn't look right on the surface because x/2*4 can - * be simplified to x*2 but this expression makes use of the fact that - * it is integral math where 1/2=0. This intermediate value becomes the - * number of bits to shift the DBAM register to extract the proper CSROW - * field. - */ - cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); - - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", csrow_nr, dct, cs_mode); @@ -2056,19 +3063,75 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) return nr_pages; } +static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) +{ + int csrow_nr = csrow_nr_orig; + u32 cs_mode, nr_pages; + + cs_mode = umc_get_cs_mode(csrow_nr >> 1, dct, pvt); + + nr_pages = umc_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; + + edac_dbg(0, "csrow: %d, channel: %d, cs_mode %d\n", + csrow_nr_orig, dct, cs_mode); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); + + return nr_pages; +} + +static void umc_init_csrows(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; + enum dev_type dev_type = DEV_UNKNOWN; + struct dimm_info *dimm; + u8 umc, cs; + + if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { + edac_mode = EDAC_S16ECD16ED; + dev_type = DEV_X16; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { + edac_mode = EDAC_S8ECD8ED; + dev_type = DEV_X8; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { + edac_mode = EDAC_S4ECD4ED; + dev_type = DEV_X4; + } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { + edac_mode = EDAC_SECDED; + } + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + dimm = mci->csrows[cs]->channels[umc]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = umc_get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = pvt->umc[umc].dram_type; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + dimm->grain = 64; + } + } +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. */ -static int init_csrows(struct mem_ctl_info *mci) +static void dct_init_csrows(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; struct csrow_info *csrow; struct dimm_info *dimm; - enum edac_type edac_mode; - enum mem_type mtype; - int i, j, empty = 1; int nr_pages = 0; + int i, j; u32 val; amd64_read_pci_cfg(pvt->F3, NBCFG, &val); @@ -2086,52 +3149,46 @@ static int init_csrows(struct mem_ctl_info *mci) bool row_dct0 = !!csrow_enabled(i, 0, pvt); bool row_dct1 = false; - if (boot_cpu_data.x86 != 0xf) + if (pvt->fam != 0xf) row_dct1 = !!csrow_enabled(i, 1, pvt); if (!row_dct0 && !row_dct1) continue; csrow = mci->csrows[i]; - empty = 0; edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, i); if (row_dct0) { - nr_pages = amd64_csrow_nr_pages(pvt, 0, i); + nr_pages = dct_get_csrow_nr_pages(pvt, 0, i); csrow->channels[0]->dimm->nr_pages = nr_pages; } /* K8 has only one DCT */ - if (boot_cpu_data.x86 != 0xf && row_dct1) { - int row_dct1_pages = amd64_csrow_nr_pages(pvt, 1, i); + if (pvt->fam != 0xf && row_dct1) { + int row_dct1_pages = dct_get_csrow_nr_pages(pvt, 1, i); csrow->channels[1]->dimm->nr_pages = row_dct1_pages; nr_pages += row_dct1_pages; } - mtype = amd64_determine_memory_type(pvt, i); - edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); - /* - * determine whether CHIPKILL or JUST ECC or NO ECC is operating - */ - if (pvt->nbcfg & NBCFG_ECC_ENABLE) - edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? - EDAC_S4ECD4ED : EDAC_SECDED; - else - edac_mode = EDAC_NONE; + /* Determine DIMM ECC mode: */ + if (pvt->nbcfg & NBCFG_ECC_ENABLE) { + edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) + ? EDAC_S4ECD4ED + : EDAC_SECDED; + } - for (j = 0; j < pvt->channel_count; j++) { + for (j = 0; j < pvt->max_mcs; j++) { dimm = csrow->channels[j]->dimm; - dimm->mtype = mtype; + dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } - - return empty; } /* get all cores on this DCT */ @@ -2140,12 +3197,12 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) int cpu; for_each_online_cpu(cpu) - if (amd_get_nb_id(cpu) == nid) + if (topology_amd_node_id(cpu) == nid) cpumask_set_cpu(cpu, mask); } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) +static bool nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2165,8 +3222,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) nbe = reg->l & MSR_MCGCTL_NBE; edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + cpu, reg->q, str_enabled_disabled(nbe)); if (!nbe) goto out; @@ -2185,7 +3241,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { amd64_warn("%s: error allocating mask\n", __func__); - return false; + return -ENOMEM; } get_cpus_on_this_dct_cpumask(cmask, nid); @@ -2273,7 +3329,6 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, { u32 value, mask = 0x3; /* UECC/CECC enable */ - if (!s->nbctl_valid) return; @@ -2295,69 +3350,81 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -/* - * EDAC requires that the BIOS have ECC enabled before - * taking over the processing of ECC errors. A command line - * option allows to force-enable hardware ECC later in - * enable_ecc_error_reporting(). - */ -static const char *ecc_msg = - "ECC disabled in the BIOS or no ECC capability, module will not load.\n" - " Either enable ECC checking or force module loading by setting " - "'ecc_enable_override'.\n" - " (Note that use of the override may cause unknown side effects.)\n"; - -static bool ecc_enabled(struct pci_dev *F3, u16 nid) +static bool dct_ecc_enabled(struct amd64_pvt *pvt) { - u32 value; - u8 ecc_en = 0; + u16 nid = pvt->mc_node_id; bool nb_mce_en = false; + u8 ecc_en = 0; + u32 value; - amd64_read_pci_cfg(F3, NBCFG, &value); + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); ecc_en = !!(value & NBCFG_ECC_ENABLE); - amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); - nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid); + nb_mce_en = nb_mce_bank_enabled_on_node(nid); if (!nb_mce_en) - amd64_notice("NB MCE bank disabled, set MSR " - "0x%08x[4] on node %d to enable.\n", - MSR_IA32_MCG_CTL, nid); + edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", + MSR_IA32_MCG_CTL, nid); - if (!ecc_en || !nb_mce_en) { - amd64_notice("%s", ecc_msg); - return false; - } - return true; + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); + + return ecc_en && nb_mce_en; } -static int set_mc_sysfs_attrs(struct mem_ctl_info *mci) +static bool umc_ecc_enabled(struct amd64_pvt *pvt) { - int rc; + struct amd64_umc *umc; + bool ecc_en = false; + int i; - rc = amd64_create_sysfs_dbg_files(mci); - if (rc < 0) - return rc; + /* Check whether at least one UMC is enabled: */ + for_each_umc(i) { + umc = &pvt->umc[i]; - if (boot_cpu_data.x86 >= 0x10) { - rc = amd64_create_sysfs_inject_files(mci); - if (rc < 0) - return rc; + if (umc->sdp_ctrl & UMC_SDP_INIT && + umc->umc_cap_hi & UMC_ECC_ENABLED) { + ecc_en = true; + break; + } } - return 0; + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en)); + + return ecc_en; } -static void del_mc_sysfs_attrs(struct mem_ctl_info *mci) +static inline void +umc_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { - amd64_remove_sysfs_dbg_files(mci); + u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; + + for_each_umc(i) { + if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { + ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); + cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); + + dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); + dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); + } + } + + /* Set chipkill only if ECC is enabled: */ + if (ecc_en) { + mci->edac_ctl_cap |= EDAC_FLAG_SECDED; + + if (!cpk_en) + return; - if (boot_cpu_data.x86 >= 0x10) - amd64_remove_sysfs_inject_files(mci); + if (dev_x4) + mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + else if (dev_x16) + mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; + else + mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; + } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci, - struct amd64_family_type *fam) +static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -2370,170 +3437,567 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, if (pvt->nbcap & NBCAP_CHIPKILL) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - mci->edac_cap = amd64_determine_edac_cap(pvt); + mci->edac_cap = dct_determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = EDAC_AMD64_VERSION; - mci->ctl_name = fam->ctl_name; - mci->dev_name = pci_name(pvt->F2); + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ - mci->set_sdram_scrub_rate = amd64_set_scrub_rate; - mci->get_sdram_scrub_rate = amd64_get_scrub_rate; + mci->set_sdram_scrub_rate = set_scrub_rate; + mci->get_sdram_scrub_rate = get_scrub_rate; + + dct_init_csrows(mci); +} + +static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + + umc_determine_edac_ctl_cap(mci, pvt); + + mci->edac_cap = umc_determine_edac_cap(pvt); + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); + mci->ctl_page_to_phys = NULL; + + umc_init_csrows(mci); +} + +static int dct_hw_info_get(struct amd64_pvt *pvt) +{ + int ret = reserve_mc_sibling_devs(pvt, pvt->f1_id, pvt->f2_id); + + if (ret) + return ret; + + dct_prep_chip_selects(pvt); + dct_read_base_mask(pvt); + dct_read_mc_regs(pvt); + dct_determine_memory_type(pvt); + + return 0; +} + +static int umc_hw_info_get(struct amd64_pvt *pvt) +{ + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; + + umc_prep_chip_selects(pvt); + umc_read_base_mask(pvt); + umc_read_mc_regs(pvt); + umc_determine_memory_type(pvt); + + return 0; } /* - * returns a pointer to the family descriptor on success, NULL otherwise. + * The CPUs have one channel per UMC, so UMC number is equivalent to a + * channel number. The GPUs have 8 channels per UMC, so the UMC number no + * longer works as a channel number. + * + * The channel number within a GPU UMC is given in MCA_IPID[15:12]. + * However, the IDs are split such that two UMC values go to one UMC, and + * the channel numbers are split in two groups of four. + * + * Refer to comment on gpu_get_umc_base(). + * + * For example, + * UMC0 CH[3:0] = 0x0005[3:0]000 + * UMC0 CH[7:4] = 0x0015[3:0]000 + * UMC1 CH[3:0] = 0x0025[3:0]000 + * UMC1 CH[7:4] = 0x0035[3:0]000 */ -static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) +static void gpu_get_err_info(struct mce *m, struct err_info *err) +{ + u8 ch = (m->ipid & GENMASK(31, 0)) >> 20; + u8 phy = ((m->ipid >> 12) & 0xf); + + err->channel = ch % 2 ? phy + 4 : phy; + err->csrow = phy; +} + +static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; + + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); +} + +static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +{ + int size, cs_mode, cs = 0; + + edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); + + cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + + for_each_chip_select(cs, ctrl, pvt) { + size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs); + amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size); + } +} + +static void gpu_dump_misc_regs(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); + edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); + edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); + edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i); + + gpu_debug_display_dimm_sizes(pvt, i); + } +} + +static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { - u8 fam = boot_cpu_data.x86; - struct amd64_family_type *fam_type = NULL; + u32 nr_pages; + int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + + nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; + + edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); + + return nr_pages; +} + +static void gpu_init_csrows(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + u8 umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + dimm = mci->csrows[umc]->channels[cs]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs); + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = pvt->dram_type; + dimm->dtype = DEV_X16; + dimm->grain = 64; + } + } +} + +static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + + mci->mtype_cap = MEM_FLAG_HBM2; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->edac_cap = EDAC_FLAG_EC; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); + mci->ctl_page_to_phys = NULL; + + gpu_init_csrows(mci); +} + +/* ECC is enabled by default on GPU nodes */ +static bool gpu_ecc_enabled(struct amd64_pvt *pvt) +{ + return true; +} + +static inline u32 gpu_get_umc_base(struct amd64_pvt *pvt, u8 umc, u8 channel) +{ + /* + * On CPUs, there is one channel per UMC, so UMC numbering equals + * channel numbering. On GPUs, there are eight channels per UMC, + * so the channel numbering is different from UMC numbering. + * + * On CPU nodes channels are selected in 6th nibble + * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000; + * + * On GPU nodes channels are selected in 3rd nibble + * HBM chX[3:0]= [Y ]5X[3:0]000; + * HBM chX[7:4]= [Y+1]5X[3:0]000 + * + * On MI300 APU nodes, same as GPU nodes but channels are selected + * in the base address of 0x90000 + */ + umc *= 2; + + if (channel >= 4) + umc++; + + return pvt->gpu_umc_base + (umc << 20) + ((channel % 4) << 12); +} + +static void gpu_read_mc_regs(struct amd64_pvt *pvt) +{ + u8 nid = pvt->mc_node_id; + struct amd64_umc *umc; + u32 i, tmp, umc_base; + + /* Read registers from each UMC */ + for_each_umc(i) { + umc_base = gpu_get_umc_base(pvt, i, 0); + umc = &pvt->umc[i]; + + if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp)) + umc->umc_cfg = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp)) + umc->sdp_ctrl = tmp; + + if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp)) + umc->ecc_ctrl = tmp; + } +} - switch (fam) { +static void gpu_read_base_mask(struct amd64_pvt *pvt) +{ + u32 base_reg, mask_reg; + u32 *base, *mask; + int umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + base_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_BASE_ADDR; + base = &pvt->csels[umc].csbases[cs]; + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) { + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + } + + mask_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_ADDR_MASK; + mask = &pvt->csels[umc].csmasks[cs]; + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) { + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + } + } +} + +static void gpu_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 8; + pvt->csels[umc].m_cnt = 8; + } +} + +static int gpu_hw_info_get(struct amd64_pvt *pvt) +{ + int ret; + + ret = gpu_get_node_map(pvt); + if (ret) + return ret; + + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; + + gpu_prep_chip_selects(pvt); + gpu_read_base_mask(pvt); + gpu_read_mc_regs(pvt); + + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + pci_dev_put(pvt->F1); + pci_dev_put(pvt->F2); + kfree(pvt->umc); + kfree(pvt->csels); +} + +static struct low_ops umc_ops = { + .hw_info_get = umc_hw_info_get, + .ecc_enabled = umc_ecc_enabled, + .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, + .dump_misc_regs = umc_dump_misc_regs, + .get_err_info = umc_get_err_info, +}; + +static struct low_ops gpu_ops = { + .hw_info_get = gpu_hw_info_get, + .ecc_enabled = gpu_ecc_enabled, + .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs, + .dump_misc_regs = gpu_dump_misc_regs, + .get_err_info = gpu_get_err_info, +}; + +/* Use Family 16h versions for defaults and adjust as needed below. */ +static struct low_ops dct_ops = { + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .hw_info_get = dct_hw_info_get, + .ecc_enabled = dct_ecc_enabled, + .setup_mci_misc_attrs = dct_setup_mci_misc_attrs, + .dump_misc_regs = dct_dump_misc_regs, +}; + +static int per_family_init(struct amd64_pvt *pvt) +{ + pvt->ext_model = boot_cpu_data.x86_model >> 4; + pvt->stepping = boot_cpu_data.x86_stepping; + pvt->model = boot_cpu_data.x86_model; + pvt->fam = boot_cpu_data.x86; + char *tmp_name = NULL; + pvt->max_mcs = 2; + + /* + * Decide on which ops group to use here and do any family/model + * overrides below. + */ + if (pvt->fam >= 0x17) + pvt->ops = &umc_ops; + else + pvt->ops = &dct_ops; + + switch (pvt->fam) { case 0xf: - fam_type = &amd64_family_types[K8_CPUS]; - pvt->ops = &amd64_family_types[K8_CPUS].ops; + tmp_name = (pvt->ext_model >= K8_REV_F) ? + "K8 revF or later" : "K8 revE or earlier"; + pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; + pvt->ops->map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow; + pvt->ops->dbam_to_cs = k8_dbam_to_chip_select; break; case 0x10: - fam_type = &amd64_family_types[F10_CPUS]; - pvt->ops = &amd64_family_types[F10_CPUS].ops; + pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; + pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; break; case 0x15: - fam_type = &amd64_family_types[F15_CPUS]; - pvt->ops = &amd64_family_types[F15_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; + break; + case 0x60: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; + pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; + break; + case 0x13: + /* Richland is only client */ + return -ENODEV; + default: + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; + pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; + break; + } break; case 0x16: - fam_type = &amd64_family_types[F16_CPUS]; - pvt->ops = &amd64_family_types[F16_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; + break; + default: + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; + break; + } + break; + + case 0x17: + switch (pvt->model) { + case 0x30 ... 0x3f: + pvt->max_mcs = 8; + break; + default: + break; + } + break; + + case 0x18: + break; + + case 0x19: + switch (pvt->model) { + case 0x00 ... 0x0f: + pvt->max_mcs = 8; + break; + case 0x10 ... 0x1f: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x30 ... 0x3f: + if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) { + tmp_name = "MI200"; + pvt->max_mcs = 4; + pvt->dram_type = MEM_HBM2; + pvt->gpu_umc_base = 0x50000; + pvt->ops = &gpu_ops; + } else { + pvt->max_mcs = 8; + } + break; + case 0x60 ... 0x6f: + pvt->flags.zn_regs_v2 = 1; + break; + case 0x70 ... 0x7f: + pvt->max_mcs = 4; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x90 ... 0x9f: + pvt->max_mcs = 4; + pvt->dram_type = MEM_HBM3; + pvt->gpu_umc_base = 0x90000; + pvt->ops = &gpu_ops; + break; + case 0xa0 ... 0xaf: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; + break; + } + break; + + case 0x1A: + switch (pvt->model) { + case 0x00 ... 0x1f: + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x40 ... 0x4f: + pvt->flags.zn_regs_v2 = 1; + break; + case 0x50 ... 0x57: + case 0xc0 ... 0xc7: + pvt->max_mcs = 16; + pvt->flags.zn_regs_v2 = 1; + break; + case 0x90 ... 0x9f: + case 0xa0 ... 0xaf: + pvt->max_mcs = 8; + pvt->flags.zn_regs_v2 = 1; + break; + } break; default: amd64_err("Unsupported family!\n"); - return NULL; + return -ENODEV; } - pvt->ext_model = boot_cpu_data.x86_model >> 4; - - amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, - (fam == 0xf ? - (pvt->ext_model >= K8_REV_F ? "revF or later " - : "revE or earlier ") - : ""), pvt->mc_node_id); - return fam_type; -} - -static int amd64_init_one_instance(struct pci_dev *F2) -{ - struct amd64_pvt *pvt = NULL; - struct amd64_family_type *fam_type = NULL; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - int err = 0, ret; - u16 nid = amd_get_node_id(F2); + if (tmp_name) + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name); + else + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", + pvt->fam, pvt->model); - ret = -ENOMEM; - pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); - if (!pvt) - goto err_ret; + pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL); + if (!pvt->csels) + return -ENOMEM; - pvt->mc_node_id = nid; - pvt->F2 = F2; + return 0; +} - ret = -EINVAL; - fam_type = amd64_per_family_init(pvt); - if (!fam_type) - goto err_free; +static const struct attribute_group *amd64_edac_attr_groups[] = { +#ifdef CONFIG_EDAC_DEBUG + &dbg_group, + &inj_group, +#endif + NULL +}; - ret = -ENODEV; - err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id); - if (err) - goto err_free; +/* + * For heterogeneous and APU models EDAC CHIP_SELECT and CHANNEL layers + * should be swapped to fit into the layers. + */ +static unsigned int get_layer_size(struct amd64_pvt *pvt, u8 layer) +{ + bool is_gpu = (pvt->ops == &gpu_ops); - read_mc_regs(pvt); + if (!layer) + return is_gpu ? pvt->max_mcs + : pvt->csels[0].b_cnt; + else + return is_gpu ? pvt->csels[0].b_cnt + : pvt->max_mcs; +} - /* - * We need to determine how many memory channels there are. Then use - * that information for calculating the size of the dynamic instance - * tables in the 'mci' structure. - */ - ret = -EINVAL; - pvt->channel_count = pvt->ops->early_channel_count(pvt); - if (pvt->channel_count < 0) - goto err_siblings; +static int init_one_instance(struct amd64_pvt *pvt) +{ + struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; + int ret = -ENOMEM; - ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = pvt->csels[0].b_cnt; + layers[0].size = get_layer_size(pvt, 0); layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = pvt->channel_count; + layers[1].size = get_layer_size(pvt, 1); layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); + + mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); if (!mci) - goto err_siblings; + return ret; mci->pvt_info = pvt; - mci->pdev = &pvt->F2->dev; - - setup_mci_misc_attrs(mci, fam_type); + mci->pdev = &pvt->F3->dev; - if (init_csrows(mci)) - mci->edac_cap = EDAC_FLAG_NONE; + pvt->ops->setup_mci_misc_attrs(mci); ret = -ENODEV; - if (edac_mc_add_mc(mci)) { + if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_mc; + edac_mc_free(mci); + return ret; } - if (set_mc_sysfs_attrs(mci)) { - edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_sysfs; - } - - /* register stuff with EDAC MCE */ - if (report_gart_errors) - amd_report_gart_errors(true); - - amd_register_ecc_decoder(amd64_decode_bus_error); - - mcis[nid] = mci; - - atomic_inc(&drv_instances); return 0; +} -err_add_sysfs: - edac_mc_del_mc(mci->pdev); -err_add_mc: - edac_mc_free(mci); - -err_siblings: - free_mc_sibling_devs(pvt); +static bool instance_has_memory(struct amd64_pvt *pvt) +{ + bool cs_enabled = false; + int cs = 0, dct = 0; -err_free: - kfree(pvt); + for (dct = 0; dct < pvt->max_mcs; dct++) { + for_each_chip_select(cs, dct, pvt) + cs_enabled |= csrow_enabled(cs, dct, pvt); + } -err_ret: - return ret; + return cs_enabled; } -static int amd64_probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int probe_one_instance(unsigned int nid) { - u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct amd64_pvt *pvt = NULL; struct ecc_settings *s; - int ret = 0; - - ret = pci_enable_device(pdev); - if (ret < 0) { - edac_dbg(0, "ret=%d\n", ret); - return -EIO; - } + int ret; ret = -ENOMEM; s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); @@ -2542,27 +4006,65 @@ static int amd64_probe_one_instance(struct pci_dev *pdev, ecc_stngs[nid] = s; - if (!ecc_enabled(F3, nid)) { + pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); + if (!pvt) + goto err_settings; + + pvt->mc_node_id = nid; + pvt->F3 = F3; + + ret = per_family_init(pvt); + if (ret < 0) + goto err_enable; + + ret = pvt->ops->hw_info_get(pvt); + if (ret < 0) + goto err_enable; + + ret = 0; + if (!instance_has_memory(pvt)) { + amd64_info("Node %d: No DIMMs detected.\n", nid); + goto err_enable; + } + + if (!pvt->ops->ecc_enabled(pvt)) { ret = -ENODEV; if (!ecc_enable_override) goto err_enable; - amd64_warn("Forcing ECC on!\n"); + if (boot_cpu_data.x86 >= 0x17) { + amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS."); + goto err_enable; + } else + amd64_warn("Forcing ECC on!\n"); if (!enable_ecc_error_reporting(s, nid, F3)) goto err_enable; } - ret = amd64_init_one_instance(pdev); + ret = init_one_instance(pvt); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); - restore_ecc_error_reporting(s, nid, F3); + + if (boot_cpu_data.x86 < 0x17) + restore_ecc_error_reporting(s, nid, F3); + + goto err_enable; } + amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id); + + /* Display and decode various registers for debug purposes. */ + pvt->ops->dump_misc_regs(pvt); + return ret; err_enable: + hw_info_put(pvt); + kfree(pvt); + +err_settings: kfree(s); ecc_stngs[nid] = NULL; @@ -2570,18 +4072,15 @@ err_out: return ret; } -static void amd64_remove_one_instance(struct pci_dev *pdev) +static void remove_one_instance(unsigned int nid) { - struct mem_ctl_info *mci; - struct amd64_pvt *pvt; - u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; + struct mem_ctl_info *mci; + struct amd64_pvt *pvt; - mci = find_mci_by_dev(&pdev->dev); - del_mc_sysfs_attrs(mci); /* Remove from EDAC CORE tracking list */ - mci = edac_mc_del_mc(&pdev->dev); + mci = edac_mc_del_mc(&F3->dev); if (!mci) return; @@ -2589,160 +4088,135 @@ static void amd64_remove_one_instance(struct pci_dev *pdev) restore_ecc_error_reporting(s, nid, F3); - free_mc_sibling_devs(pvt); - - /* unregister from EDAC MCE */ - amd_report_gart_errors(false); - amd_unregister_ecc_decoder(amd64_decode_bus_error); - kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; /* Free the EDAC CORE resources */ mci->pvt_info = NULL; - mcis[nid] = NULL; + hw_info_put(pvt); kfree(pvt); edac_mc_free(mci); } -/* - * This table is part of the interface for loading drivers for PCI devices. The - * PCI core identifies what devices are on a system during boot, and then - * inquiry this table to see if this driver is for a given device found. - */ -static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = { - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_10H_NB_DRAM, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_15H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_16H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - - {0, } -}; -MODULE_DEVICE_TABLE(pci, amd64_pci_table); - -static struct pci_driver amd64_pci_driver = { - .name = EDAC_MOD_STR, - .probe = amd64_probe_one_instance, - .remove = amd64_remove_one_instance, - .id_table = amd64_pci_table, -}; - static void setup_pci_device(void) { - struct mem_ctl_info *mci; - struct amd64_pvt *pvt; - - if (amd64_ctl_pci) + if (pci_ctl) return; - mci = mcis[0]; - if (mci) { - - pvt = mci->pvt_info; - amd64_ctl_pci = - edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); - - if (!amd64_ctl_pci) { - pr_warning("%s(): Unable to create PCI control\n", - __func__); - - pr_warning("%s(): PCI error report via EDAC not set\n", - __func__); - } + pci_ctl = edac_pci_create_generic_ctl(pci_ctl_dev, EDAC_MOD_STR); + if (!pci_ctl) { + pr_warn("%s(): Unable to create PCI control\n", __func__); + pr_warn("%s(): PCI error report via EDAC not set\n", __func__); } } +static const struct x86_cpu_id amd64_cpuids[] = { + X86_MATCH_VENDOR_FAM(AMD, 0x0F, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x10, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x16, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL), + X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x1A, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); + static int __init amd64_edac_init(void) { + const char *owner; int err = -ENODEV; + int i; - printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); + if (ghes_get_devices()) + return -EBUSY; - opstate_init(); + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (!x86_match_cpu(amd64_cpuids)) + return -ENODEV; + + if (!amd_nb_num()) + return -ENODEV; - if (amd_cache_northbridges() < 0) - goto err_ret; + opstate_init(); err = -ENOMEM; - mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL); - ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL); - if (!(mcis && ecc_stngs)) + ecc_stngs = kcalloc(amd_nb_num(), sizeof(ecc_stngs[0]), GFP_KERNEL); + if (!ecc_stngs) goto err_free; msrs = msrs_alloc(); if (!msrs) goto err_free; - err = pci_register_driver(&amd64_pci_driver); - if (err) + for (i = 0; i < amd_nb_num(); i++) { + err = probe_one_instance(i); + if (err) { + /* unwind properly */ + while (--i >= 0) + remove_one_instance(i); + + goto err_pci; + } + } + + if (!edac_has_mcs()) { + err = -ENODEV; goto err_pci; + } - err = -ENODEV; - if (!atomic_read(&drv_instances)) - goto err_no_instances; + /* register stuff with EDAC MCE */ + if (boot_cpu_data.x86 >= 0x17) { + amd_register_ecc_decoder(decode_umc_error); + } else { + amd_register_ecc_decoder(decode_bus_error); + setup_pci_device(); + } - setup_pci_device(); - return 0; +#ifdef CONFIG_X86_32 + amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); +#endif -err_no_instances: - pci_unregister_driver(&amd64_pci_driver); + return 0; err_pci: + pci_ctl_dev = NULL; + msrs_free(msrs); msrs = NULL; err_free: - kfree(mcis); - mcis = NULL; - kfree(ecc_stngs); ecc_stngs = NULL; -err_ret: return err; } static void __exit amd64_edac_exit(void) { - if (amd64_ctl_pci) - edac_pci_release_generic_ctl(amd64_ctl_pci); + int i; + + if (pci_ctl) + edac_pci_release_generic_ctl(pci_ctl); + + /* unregister from EDAC MCE */ + if (boot_cpu_data.x86 >= 0x17) + amd_unregister_ecc_decoder(decode_umc_error); + else + amd_unregister_ecc_decoder(decode_bus_error); - pci_unregister_driver(&amd64_pci_driver); + for (i = 0; i < amd_nb_num(); i++) + remove_one_instance(i); kfree(ecc_stngs); ecc_stngs = NULL; - kfree(mcis); - mcis = NULL; + pci_ctl_dev = NULL; msrs_free(msrs); msrs = NULL; @@ -2752,10 +4226,8 @@ module_init(amd64_edac_init); module_exit(amd64_edac_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, " - "Dave Peterson, Thayne Harbaugh"); -MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " - EDAC_AMD64_VERSION); +MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD"); +MODULE_DESCRIPTION("MC support for AMD64 memory controllers"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 2c6f113bae2b..1757c1b99fc8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -2,64 +2,10 @@ * AMD64 class Memory Controller kernel module * * Copyright (c) 2009 SoftwareBitMaker. - * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2009-15 Advanced Micro Devices, Inc. * * This file may be distributed under the terms of the * GNU General Public License. - * - * Originally Written by Thayne Harbaugh - * - * Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>: - * - K8 CPU Revision D and greater support - * - * Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>: - * - Module largely rewritten, with new (and hopefully correct) - * code for dealing with node and chip select interleaving, - * various code cleanup, and bug fixes - * - Added support for memory hoisting using DRAM hole address - * register - * - * Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>: - * -K8 Rev (1207) revision support added, required Revision - * specific mini-driver code to support Rev F as well as - * prior revisions - * - * Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>: - * -Family 10h revision support added. New PCI Device IDs, - * indicating new changes. Actual registers modified - * were slight, less than the Rev E to Rev F transition - * but changing the PCI Device ID was the proper thing to - * do, as it provides for almost automactic family - * detection. The mods to Rev F required more family - * information detection. - * - * Changes/Fixes by Borislav Petkov <bp@alien8.de>: - * - misc fixes and code cleanups - * - * This module is based on the following documents - * (available from http://www.amd.com/): - * - * Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD - * Opteron Processors - * AMD publication #: 26094 - *` Revision: 3.26 - * - * Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh - * Processors - * AMD publication #: 32559 - * Revision: 3.00 - * Issue Date: May 2006 - * - * Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h - * Processors - * AMD publication #: 31116 - * Revision: 3.00 - * Issue Date: September 07, 2007 - * - * Sections in the first 2 documents are no longer in sync with each other. - * The Family 10h BKDG was totally re-written from scratch with a new - * presentation model. - * Therefore, comments that refer to a Document section might be off. */ #include <linux/module.h> @@ -70,24 +16,20 @@ #include <linux/slab.h> #include <linux/mmzone.h> #include <linux/edac.h> +#include <linux/bitfield.h> +#include <asm/cpu_device_id.h> #include <asm/msr.h> -#include "edac_core.h" +#include "edac_module.h" #include "mce_amd.h" -#define amd64_debug(fmt, arg...) \ - edac_printk(KERN_DEBUG, "amd64", fmt, ##arg) - #define amd64_info(fmt, arg...) \ edac_printk(KERN_INFO, "amd64", fmt, ##arg) -#define amd64_notice(fmt, arg...) \ - edac_printk(KERN_NOTICE, "amd64", fmt, ##arg) - #define amd64_warn(fmt, arg...) \ - edac_printk(KERN_WARNING, "amd64", fmt, ##arg) + edac_printk(KERN_WARNING, "amd64", "Warning: " fmt, ##arg) #define amd64_err(fmt, arg...) \ - edac_printk(KERN_ERR, "amd64", fmt, ##arg) + edac_printk(KERN_ERR, "amd64", "Error: " fmt, ##arg) #define amd64_mc_warn(mci, fmt, arg...) \ edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg) @@ -144,7 +86,6 @@ * sections 3.5.4 and 3.5.5 for more information. */ -#define EDAC_AMD64_VERSION "3.4.0" #define EDAC_MOD_STR "amd64_edac" /* Extended Model from CPUID, for CPU Revision numbers */ @@ -159,21 +100,21 @@ #define ON true #define OFF false -/* - * Create a contiguous bitmask starting at bit position @lo and ending at - * position @hi. For example - * - * GENMASK(21, 39) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(lo, hi) (((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) +#define MAX_CTL_NAMELEN 19 /* * PCI-defined configuration space registers */ #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F1 0x1571 +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F2 0x1572 #define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 /* * Function 1 - Address Map @@ -181,13 +122,22 @@ #define DRAM_BASE_LO 0x40 #define DRAM_LIMIT_LO 0x44 -#define dram_intlv_en(pvt, i) ((u8)((pvt->ranges[i].base.lo >> 8) & 0x7)) +/* + * F15 M30h D18F1x2[1C:00] + */ +#define DRAM_CONT_BASE 0x200 +#define DRAM_CONT_LIMIT 0x204 + +/* + * F15 M30h D18F1x2[4C:40] + */ +#define DRAM_CONT_HIGH_OFF 0x240 + #define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3)) #define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7)) #define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7)) #define DHAR 0xf0 -#define dhar_valid(pvt) ((pvt)->dhar & BIT(0)) #define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1)) #define dhar_base(pvt) ((pvt)->dhar & 0xff000000) #define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16) @@ -214,7 +164,10 @@ #define DCSM0 0x60 #define DCSM1 0x160 -#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE) + +#define DRAM_CONTROL 0x78 #define DBAM0 0x80 #define DBAM1 0x180 @@ -234,8 +187,6 @@ #define DDR3_MODE BIT(8) #define DCT_SEL_LO 0x110 -#define dct_sel_baseaddr(pvt) ((pvt)->dct_sel_lo & 0xFFFFF800) -#define dct_sel_interleave_addr(pvt) (((pvt)->dct_sel_lo >> 6) & 0x3) #define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0)) #define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2)) @@ -248,6 +199,8 @@ #define DCT_SEL_HI 0x114 +#define F15H_M60H_SCRCTRL 0x1C8 + /* * Function 3 - Misc Control */ @@ -293,13 +246,29 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -enum amd_families { - K8_CPUS = 0, - F10_CPUS, - F15_CPUS, - F16_CPUS, - NUM_FAMILIES, -}; +/* F17h */ + +/* F0: */ +#define DF_DHAR 0x104 + +/* UMC CH register offsets */ +#define UMCCH_BASE_ADDR 0x0 +#define UMCCH_BASE_ADDR_SEC 0x10 +#define UMCCH_ADDR_MASK 0x20 +#define UMCCH_ADDR_MASK_SEC 0x28 +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30 +#define UMCCH_DIMM_CFG 0x80 +#define UMCCH_DIMM_CFG_DDR5 0x90 +#define UMCCH_UMC_CFG 0x100 +#define UMCCH_SDP_CTRL 0x104 +#define UMCCH_ECC_CTRL 0x14C +#define UMCCH_UMC_CAP_HI 0xDF4 + +/* UMC CH bitfields */ +#define UMC_ECC_CHIPKILL_CAP BIT(31) +#define UMC_ECC_ENABLED BIT(30) + +#define UMC_SDP_INIT BIT(31) /* Error injection control structure */ struct error_injection { @@ -324,12 +293,35 @@ struct dram_range { /* A DCT chip selects collection */ struct chip_select { u32 csbases[NUM_CHIPSELECTS]; + u32 csbases_sec[NUM_CHIPSELECTS]; u8 b_cnt; u32 csmasks[NUM_CHIPSELECTS]; + u32 csmasks_sec[NUM_CHIPSELECTS]; u8 m_cnt; }; +struct amd64_umc { + u32 dimm_cfg; /* DIMM Configuration reg */ + u32 umc_cfg; /* Configuration reg */ + u32 sdp_ctrl; /* SDP Control reg */ + u32 ecc_ctrl; /* DRAM ECC Control reg */ + u32 umc_cap_hi; /* Capabilities High reg */ + + /* cache the dram_type */ + enum mem_type dram_type; +}; + +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_pvt { struct low_ops *ops; @@ -337,8 +329,11 @@ struct amd64_pvt { struct pci_dev *F1, *F2, *F3; u16 mc_node_id; /* MC index of this MC node */ + u8 fam; /* CPU family */ + u8 model; /* ... model */ + u8 stepping; /* ... stepping */ + int ext_model; /* extended model value of this node */ - int channel_count; /* Raw registers */ u32 dclr0; /* DRAM Configuration Low DCT0 reg */ @@ -347,13 +342,12 @@ struct amd64_pvt { u32 dchr1; /* DRAM Configuration High DCT1 reg */ u32 nbcap; /* North Bridge Capabilities */ u32 nbcfg; /* F10 North Bridge Configuration */ - u32 ext_nbcfg; /* Extended F10 North Bridge Configuration */ u32 dhar; /* DRAM Hoist reg */ u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT */ - struct chip_select csels[2]; + /* one for each DCT/UMC */ + struct chip_select *csels; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; @@ -364,12 +358,29 @@ struct amd64_pvt { u32 dct_sel_lo; /* DRAM Controller Select Low */ u32 dct_sel_hi; /* DRAM Controller Select High */ u32 online_spare; /* On-Line spare Reg */ + u32 gpu_umc_base; /* Base address used for channel selection on GPUs */ - /* x4 or x8 syndromes in use */ + /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; + char ctl_name[MAX_CTL_NAMELEN]; + u16 f1_id, f2_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; + + struct amd64_family_flags flags; /* place to store error injection parameters prior to issue */ struct error_injection injection; + + /* + * cache the dram_type + * + * NOTE: Don't use this for Family 17h and later. + * Use dram_type in struct amd64_umc instead. + */ + enum mem_type dram_type; + + struct amd64_umc *umc; /* UMC registers */ }; enum err_codes { @@ -377,6 +388,8 @@ enum err_codes { ERR_NODE = -1, ERR_CSROW = -2, ERR_CHANNEL = -3, + ERR_SYND = -4, + ERR_NORM_ADDR = -5, }; struct err_info { @@ -389,6 +402,12 @@ struct err_info { u32 offset; }; +static inline u32 get_umc_base(u8 channel) +{ + /* chY: 0xY50000 */ + return 0x50000 + (channel << 20); +} + static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) { u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; @@ -414,6 +433,14 @@ static inline u16 extract_syndrome(u64 status) return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00); } +static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) + return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) | + ((pvt->dct_sel_lo >> 6) & 0x3); + + return ((pvt)->dct_sel_lo >> 6) & 0x3; +} /* * per-node ECC settings descriptor */ @@ -427,51 +454,20 @@ struct ecc_settings { } flags; }; -#ifdef CONFIG_EDAC_DEBUG -int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci); -void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci); - -#else -static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci) -{ - return 0; -} -static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci) -{ -} -#endif - -#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION -int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci); -void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci); - -#else -static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci) -{ - return 0; -} -static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) -{ -} -#endif - /* * Each of the PCI Device IDs types have their own set of hardware accessor * functions and per device encoding/decoding logic. */ struct low_ops { - int (*early_channel_count) (struct amd64_pvt *pvt); - void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, - struct err_info *); - int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); - int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, - u32 *val, const char *func); -}; - -struct amd64_family_type { - const char *ctl_name; - u16 f1_id, f3_id; - struct low_ops ops; + void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, u64 sys_addr, + struct err_info *err); + int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct, + unsigned int cs_mode, int cs_mask_nr); + int (*hw_info_get)(struct amd64_pvt *pvt); + bool (*ecc_enabled)(struct amd64_pvt *pvt); + void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); + void (*dump_misc_regs)(struct amd64_pvt *pvt); + void (*get_err_info)(struct mce *m, struct err_info *err); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, @@ -485,12 +481,6 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, #define amd64_write_pci_cfg(pdev, offset, val) \ __amd64_write_pci_cfg_dword(pdev, offset, val, __func__) -#define amd64_read_dct_pci_cfg(pvt, offset, val) \ - pvt->ops->read_dct_pci_cfg(pvt, offset, val, __func__) - -int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, - u64 *hole_offset, u64 *hole_size); - #define to_mci(k) container_of(k, struct mem_ctl_info, dev) /* Injection helpers */ @@ -504,3 +494,33 @@ static inline void enable_caches(void *dummy) { write_cr0(read_cr0() & ~X86_CR0_CD); } + +static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp); + return (u8) tmp & 0xF; + } + return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7; +} + +static inline u8 dhar_valid(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 1) & BIT(0); + } + return (pvt)->dhar & BIT(0); +} + +static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 11) & 0x1FFF; + } + return (pvt)->dct_sel_lo & 0xFFFFF800; +} diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c deleted file mode 100644 index 2c1bbf740605..000000000000 --- a/drivers/edac/amd64_edac_dbg.c +++ /dev/null @@ -1,73 +0,0 @@ -#include "amd64_edac.h" - -#define EDAC_DCT_ATTR_SHOW(reg) \ -static ssize_t amd64_##reg##_show(struct device *dev, \ - struct device_attribute *mattr, \ - char *data) \ -{ \ - struct mem_ctl_info *mci = to_mci(dev); \ - struct amd64_pvt *pvt = mci->pvt_info; \ - return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \ -} - -EDAC_DCT_ATTR_SHOW(dhar); -EDAC_DCT_ATTR_SHOW(dbam0); -EDAC_DCT_ATTR_SHOW(top_mem); -EDAC_DCT_ATTR_SHOW(top_mem2); - -static ssize_t amd64_hole_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - - u64 hole_base = 0; - u64 hole_offset = 0; - u64 hole_size = 0; - - amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); - - return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset, - hole_size); -} - -/* - * update NUM_DBG_ATTRS in case you add new members - */ -static DEVICE_ATTR(dhar, S_IRUGO, amd64_dhar_show, NULL); -static DEVICE_ATTR(dbam, S_IRUGO, amd64_dbam0_show, NULL); -static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL); -static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL); -static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL); - -int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci) -{ - int rc; - - rc = device_create_file(&mci->dev, &dev_attr_dhar); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_dbam); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_topmem); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_topmem2); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_dram_hole); - if (rc < 0) - return rc; - - return 0; -} - -void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci) -{ - device_remove_file(&mci->dev, &dev_attr_dhar); - device_remove_file(&mci->dev, &dev_attr_dbam); - device_remove_file(&mci->dev, &dev_attr_topmem); - device_remove_file(&mci->dev, &dev_attr_topmem2); - device_remove_file(&mci->dev, &dev_attr_dram_hole); -} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c deleted file mode 100644 index 0d66ae68d468..000000000000 --- a/drivers/edac/amd64_edac_inj.c +++ /dev/null @@ -1,241 +0,0 @@ -#include "amd64_edac.h" - -static ssize_t amd64_inject_section_show(struct device *dev, - struct device_attribute *mattr, - char *buf) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - return sprintf(buf, "0x%x\n", pvt->injection.section); -} - -/* - * store error injection section value which refers to one of 4 16-byte sections - * within a 64-byte cacheline - * - * range: 0..3 - */ -static ssize_t amd64_inject_section_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - unsigned long value; - int ret; - - ret = kstrtoul(data, 10, &value); - if (ret < 0) - return ret; - - if (value > 3) { - amd64_warn("%s: invalid section 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.section = (u32) value; - return count; -} - -static ssize_t amd64_inject_word_show(struct device *dev, - struct device_attribute *mattr, - char *buf) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - return sprintf(buf, "0x%x\n", pvt->injection.word); -} - -/* - * store error injection word value which refers to one of 9 16-bit word of the - * 16-byte (128-bit + ECC bits) section - * - * range: 0..8 - */ -static ssize_t amd64_inject_word_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - unsigned long value; - int ret; - - ret = kstrtoul(data, 10, &value); - if (ret < 0) - return ret; - - if (value > 8) { - amd64_warn("%s: invalid word 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.word = (u32) value; - return count; -} - -static ssize_t amd64_inject_ecc_vector_show(struct device *dev, - struct device_attribute *mattr, - char *buf) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - return sprintf(buf, "0x%x\n", pvt->injection.bit_map); -} - -/* - * store 16 bit error injection vector which enables injecting errors to the - * corresponding bit within the error injection word above. When used during a - * DRAM ECC read, it holds the contents of the of the DRAM ECC bits. - */ -static ssize_t amd64_inject_ecc_vector_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - unsigned long value; - int ret; - - ret = kstrtoul(data, 16, &value); - if (ret < 0) - return ret; - - if (value & 0xFFFF0000) { - amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.bit_map = (u32) value; - return count; -} - -/* - * Do a DRAM ECC read. Assemble staged values in the pvt area, format into - * fields needed by the injection registers and read the NB Array Data Port. - */ -static ssize_t amd64_inject_read_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - unsigned long value; - u32 section, word_bits; - int ret; - - ret = kstrtoul(data, 10, &value); - if (ret < 0) - return ret; - - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); - - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - - word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); - - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); - - edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); - - return count; -} - -/* - * Do a DRAM ECC write. Assemble staged values in the pvt area and format into - * fields needed by the injection registers. - */ -static ssize_t amd64_inject_write_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct amd64_pvt *pvt = mci->pvt_info; - u32 section, word_bits, tmp; - unsigned long value; - int ret; - - ret = kstrtoul(data, 10, &value); - if (ret < 0) - return ret; - - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); - - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - - word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); - - pr_notice_once("Don't forget to decrease MCE polling interval in\n" - "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" - "so that you can get the error report faster.\n"); - - on_each_cpu(disable_caches, NULL, 1); - - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); - - retry: - /* wait until injection happens */ - amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); - if (tmp & F10_NB_ARR_ECC_WR_REQ) { - cpu_relax(); - goto retry; - } - - on_each_cpu(enable_caches, NULL, 1); - - edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); - - return count; -} - -/* - * update NUM_INJ_ATTRS in case you add new members - */ - -static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, - amd64_inject_section_show, amd64_inject_section_store); -static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, - amd64_inject_word_show, amd64_inject_word_store); -static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, - amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store); -static DEVICE_ATTR(inject_write, S_IWUSR, - NULL, amd64_inject_write_store); -static DEVICE_ATTR(inject_read, S_IWUSR, - NULL, amd64_inject_read_store); - - -int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci) -{ - int rc; - - rc = device_create_file(&mci->dev, &dev_attr_inject_section); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_word); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_write); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_read); - if (rc < 0) - return rc; - - return 0; -} - -void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) -{ - device_remove_file(&mci->dev, &dev_attr_inject_section); - device_remove_file(&mci->dev, &dev_attr_inject_word); - device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector); - device_remove_file(&mci->dev, &dev_attr_inject_write); - device_remove_file(&mci->dev, &dev_attr_inject_read); -} diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index 96e3ee3460a5..2a49f68a7cf9 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -17,9 +17,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define AMD76X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "amd76x_edac" #define amd76x_printk(level, fmt, arg...) \ @@ -180,7 +179,6 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, static void amd76x_check(struct mem_ctl_info *mci) { struct amd76x_error_info info; - edac_dbg(3, "\n"); amd76x_get_error_info(mci, &info); amd76x_process_error_info(mci, &info, 1); } @@ -263,7 +261,6 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = ems_mode ? (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = AMD76X_REVISION; mci->ctl_name = amd76x_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = amd76x_check; @@ -333,7 +330,7 @@ static void amd76x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(amd76x_pci_tbl) = { +static const struct pci_device_id amd76x_pci_tbl[] = { { PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762}, diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c deleted file mode 100644 index ddd890052ce2..000000000000 --- a/drivers/edac/amd8111_edac.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * amd8111_edac.c, AMD8111 Hyper Transport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/bitops.h> -#include <linux/edac.h> -#include <linux/pci_ids.h> -#include <asm/io.h> - -#include "edac_core.h" -#include "edac_module.h" -#include "amd8111_edac.h" - -#define AMD8111_EDAC_REVISION " Ver: 1.0.0" -#define AMD8111_EDAC_MOD_STR "amd8111_edac" - -#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 - -enum amd8111_edac_devs { - LPC_BRIDGE = 0, -}; - -enum amd8111_edac_pcis { - PCI_BRIDGE = 0, -}; - -/* Wrapper functions for accessing PCI configuration space */ -static int edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); - - return ret; -} - -static void edac_pci_read_byte(struct pci_dev *dev, int reg, u8 *val8) -{ - int ret; - - ret = pci_read_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -static void edac_pci_write_byte(struct pci_dev *dev, int reg, u8 val8) -{ - int ret; - - ret = pci_write_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -/* - * device-specific methods for amd8111 PCI Bridge Controller - * - * Error Reporting and Handling for amd8111 chipset could be found - * in its datasheet 3.1.2 section, P37 - */ -static void amd8111_pci_bridge_init(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - /* First clear error detection flags on the host interface */ - - /* Clear SSE/SMA/STA flags in the global status register*/ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Clear CRC and Link Fail flags in HT Link Control reg */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_CLEAR_MASK) - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Second clear all fault on the secondary interface */ - - /* Clear error flags in the memory-base limit reg. */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Expired flag in Interrupt/Bridge Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - /* Last enable error detections */ - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Enable System Error reporting in global status register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 |= PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Enable CRC Sync flood packets to HyperTransport Link */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 |= HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Enable SSE reporting etc in Interrupt control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 |= PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_exit(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Disable System Error reporting */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 &= ~PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Disable CRC flood packets */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 &= ~HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Disable DTSERREN/MARSP/SERREN in Interrupt Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 &= ~PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8111_pci_info *pci_info = edac_dev->pvt_info; - struct pci_dev *dev = pci_info->dev; - u32 val32; - - /* Check out PCI Bridge Status and Command Register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) { - printk(KERN_INFO "Error(s) in PCI bridge status and command" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "SSE: %d, RMA: %d, RTA: %d\n", - (val32 & PCI_STSCMD_SSE) != 0, - (val32 & PCI_STSCMD_RMA) != 0, - (val32 & PCI_STSCMD_RTA) != 0); - - val32 |= PCI_STSCMD_CLEAR_MASK; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out HyperTransport Link Control Register */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_LKFAIL) { - printk(KERN_INFO "Error(s) in hypertransport link control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "LKFAIL: %d\n", - (val32 & HT_LINK_LKFAIL) != 0); - - val32 |= HT_LINK_LKFAIL; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Interrupt and Bridge Control Register */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_DTSTAT) { - printk(KERN_INFO "Error(s) in PCI interrupt and bridge control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "DTSTAT: %d\n", - (val32 & PCI_INTBRG_CTRL_DTSTAT) != 0); - - val32 |= PCI_INTBRG_CTRL_DTSTAT; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Bridge Memory Base-Limit Register */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in mem limit register on %s device\n", - pci_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - (val32 & MEM_LIMIT_DPE) != 0, - (val32 & MEM_LIMIT_RSE) != 0, - (val32 & MEM_LIMIT_RMA) != 0, - (val32 & MEM_LIMIT_RTA) != 0, - (val32 & MEM_LIMIT_STA) != 0, - (val32 & MEM_LIMIT_MDPE) != 0); - - val32 |= MEM_LIMIT_CLEAR_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct resource *legacy_io_res; -static int at_compat_reg_broken; -#define LEGACY_NR_PORTS 1 - -/* device-specific methods for amd8111 LPC Bridge device */ -static void amd8111_lpc_bridge_init(struct amd8111_dev_info *dev_info) -{ - u8 val8; - struct pci_dev *dev = dev_info->dev; - - /* First clear REG_AT_COMPAT[SERR, IOCHK] if necessary */ - legacy_io_res = request_region(REG_AT_COMPAT, LEGACY_NR_PORTS, - AMD8111_EDAC_MOD_STR); - if (!legacy_io_res) - printk(KERN_INFO "%s: failed to request legacy I/O region " - "start %d, len %d\n", __func__, - REG_AT_COMPAT, LEGACY_NR_PORTS); - else { - val8 = __do_inb(REG_AT_COMPAT); - if (val8 == 0xff) { /* buggy port */ - printk(KERN_INFO "%s: port %d is buggy, not supported" - " by hardware?\n", __func__, REG_AT_COMPAT); - at_compat_reg_broken = 1; - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); - legacy_io_res = NULL; - } else { - u8 out8 = 0; - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) - __do_outb(out8, REG_AT_COMPAT); - } - } - - /* Second clear error flags on LPC bridge */ - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); -} - -static void amd8111_lpc_bridge_exit(struct amd8111_dev_info *dev_info) -{ - if (legacy_io_res) - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); -} - -static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev) -{ - struct amd8111_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u8 val8; - - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in IO control register on %s device\n", - dev_info->ctl_name); - printk(KERN_INFO "LPC ERR: %d, PW2LPC: %d\n", - (val8 & IO_CTRL_1_LPC_ERR) != 0, - (val8 & IO_CTRL_1_PW2LPC) != 0); - - val8 |= IO_CTRL_1_CLEAR_MASK; - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); - - edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); - } - - if (at_compat_reg_broken == 0) { - u8 out8 = 0; - val8 = __do_inb(REG_AT_COMPAT); - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) { - __do_outb(out8, REG_AT_COMPAT); - edac_device_handle_ue(edac_dev, 0, 0, - edac_dev->ctl_name); - } - } -} - -/* General devices represented by edac_device_ctl_info */ -static struct amd8111_dev_info amd8111_devices[] = { - [LPC_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_LPC, - .ctl_name = "lpc", - .init = amd8111_lpc_bridge_init, - .exit = amd8111_lpc_bridge_exit, - .check = amd8111_lpc_bridge_check, - }, - {0}, -}; - -/* PCI controllers represented by edac_pci_ctl_info */ -static struct amd8111_pci_info amd8111_pcis[] = { - [PCI_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_PCI, - .ctl_name = "AMD8111_PCI_Controller", - .init = amd8111_pci_bridge_init, - .exit = amd8111_pci_bridge_exit, - .check = amd8111_pci_bridge_check, - }, - {0}, -}; - -static int amd8111_dev_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data]; - - dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - dev_info->err_dev, NULL); - - if (!dev_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - return -ENODEV; - } - - if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - return -ENODEV; - } - - /* - * we do not allocate extra private structure for - * edac_device_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_device_alloc_index(); - dev_info->edac_dev = - edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, - NULL, 0, 0, - NULL, 0, dev_info->edac_idx); - if (!dev_info->edac_dev) - return -ENOMEM; - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = dev_info->check; - - if (dev_info->init) - dev_info->init(dev_info); - - if (edac_device_add_device(dev_info->edac_dev) > 0) { - printk(KERN_ERR "failed to add edac_dev for %s\n", - dev_info->ctl_name); - edac_device_free_ctl_info(dev_info->edac_dev); - return -ENODEV; - } - - printk(KERN_INFO "added one edac_dev on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - - return 0; -} - -static void amd8111_dev_remove(struct pci_dev *dev) -{ - struct amd8111_dev_info *dev_info; - - for (dev_info = amd8111_devices; dev_info->err_dev; dev_info++) - if (dev_info->dev->device == dev->device) - break; - - if (!dev_info->err_dev) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_device_del_device(dev_info->edac_dev->dev); - edac_device_free_ctl_info(dev_info->edac_dev); - } - - if (dev_info->exit) - dev_info->exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static int amd8111_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data]; - - pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - pci_info->err_dev, NULL); - - if (!pci_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - return -ENODEV; - } - - if (pci_enable_device(pci_info->dev)) { - pci_dev_put(pci_info->dev); - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - return -ENODEV; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - pci_info->edac_idx = edac_pci_alloc_index(); - pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name); - if (!pci_info->edac_dev) - return -ENOMEM; - - pci_info->edac_dev->pvt_info = pci_info; - pci_info->edac_dev->dev = &pci_info->dev->dev; - pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - pci_info->edac_dev->ctl_name = pci_info->ctl_name; - pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - pci_info->edac_dev->edac_check = pci_info->check; - - if (pci_info->init) - pci_info->init(pci_info); - - if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) { - printk(KERN_ERR "failed to add edac_pci for %s\n", - pci_info->ctl_name); - edac_pci_free_ctl_info(pci_info->edac_dev); - return -ENODEV; - } - - printk(KERN_INFO "added one edac_pci on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - - return 0; -} - -static void amd8111_pci_remove(struct pci_dev *dev) -{ - struct amd8111_pci_info *pci_info; - - for (pci_info = amd8111_pcis; pci_info->err_dev; pci_info++) - if (pci_info->dev->device == dev->device) - break; - - if (!pci_info->err_dev) /* should never happen */ - return; - - if (pci_info->edac_dev) { - edac_pci_del_device(pci_info->edac_dev->dev); - edac_pci_free_ctl_info(pci_info->edac_dev); - } - - if (pci_info->exit) - pci_info->exit(pci_info); - - pci_dev_put(pci_info->dev); -} - -/* PCI Device ID talbe for general EDAC device */ -static const struct pci_device_id amd8111_edac_dev_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_LPC), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = LPC_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_dev_tbl); - -static struct pci_driver amd8111_edac_dev_driver = { - .name = "AMD8111_EDAC_DEV", - .probe = amd8111_dev_probe, - .remove = amd8111_dev_remove, - .id_table = amd8111_edac_dev_tbl, -}; - -/* PCI Device ID table for EDAC PCI controller */ -static const struct pci_device_id amd8111_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_PCI), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = PCI_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_pci_tbl); - -static struct pci_driver amd8111_edac_pci_driver = { - .name = "AMD8111_EDAC_PCI", - .probe = amd8111_pci_probe, - .remove = amd8111_pci_remove, - .id_table = amd8111_edac_pci_tbl, -}; - -static int __init amd8111_edac_init(void) -{ - int val; - - printk(KERN_INFO "AMD8111 EDAC driver " AMD8111_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - val = pci_register_driver(&amd8111_edac_dev_driver); - val |= pci_register_driver(&amd8111_edac_pci_driver); - - return val; -} - -static void __exit amd8111_edac_exit(void) -{ - pci_unregister_driver(&amd8111_edac_pci_driver); - pci_unregister_driver(&amd8111_edac_dev_driver); -} - - -module_init(amd8111_edac_init); -module_exit(amd8111_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n"); -MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module"); diff --git a/drivers/edac/amd8111_edac.h b/drivers/edac/amd8111_edac.h deleted file mode 100644 index 35794331debc..000000000000 --- a/drivers/edac/amd8111_edac.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * amd8111_edac.h, EDAC defs for AMD8111 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _AMD8111_EDAC_H_ -#define _AMD8111_EDAC_H_ - -/************************************************************ - * PCI Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_PCI_STSCMD 0x04 -enum pci_stscmd_bits { - PCI_STSCMD_SSE = BIT(30), - PCI_STSCMD_RMA = BIT(29), - PCI_STSCMD_RTA = BIT(28), - PCI_STSCMD_SERREN = BIT(8), - PCI_STSCMD_CLEAR_MASK = (PCI_STSCMD_SSE | - PCI_STSCMD_RMA | - PCI_STSCMD_RTA) -}; - -/************************************************************ - * PCI Bridge Memory Base-Limit Register, DevA:0x1c - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_CLEAR_MASK = (MEM_LIMIT_DPE | - MEM_LIMIT_RSE | - MEM_LIMIT_RMA | - MEM_LIMIT_RTA | - MEM_LIMIT_STA | - MEM_LIMIT_MDPE) -}; - -/************************************************************ - * HyperTransport Link Control Register, DevA:0xc4 - ************************************************************/ -#define REG_HT_LINK 0xc4 -enum ht_link_bits { - HT_LINK_LKFAIL = BIT(4), - HT_LINK_CRCFEN = BIT(1), - HT_LINK_CLEAR_MASK = (HT_LINK_LKFAIL) -}; - -/************************************************************ - * PCI Bridge Interrupt and Bridge Control, DevA:0x3c - ************************************************************/ -#define REG_PCI_INTBRG_CTRL 0x3c -enum pci_intbrg_ctrl_bits { - PCI_INTBRG_CTRL_DTSERREN = BIT(27), - PCI_INTBRG_CTRL_DTSTAT = BIT(26), - PCI_INTBRG_CTRL_MARSP = BIT(21), - PCI_INTBRG_CTRL_SERREN = BIT(17), - PCI_INTBRG_CTRL_PEREN = BIT(16), - PCI_INTBRG_CTRL_CLEAR_MASK = (PCI_INTBRG_CTRL_DTSTAT), - PCI_INTBRG_CTRL_POLL_MASK = (PCI_INTBRG_CTRL_DTSERREN | - PCI_INTBRG_CTRL_MARSP | - PCI_INTBRG_CTRL_SERREN) -}; - -/************************************************************ - * I/O Control 1 Register, DevB:0x40 - ************************************************************/ -#define REG_IO_CTRL_1 0x40 -enum io_ctrl_1_bits { - IO_CTRL_1_NMIONERR = BIT(7), - IO_CTRL_1_LPC_ERR = BIT(6), - IO_CTRL_1_PW2LPC = BIT(1), - IO_CTRL_1_CLEAR_MASK = (IO_CTRL_1_LPC_ERR | IO_CTRL_1_PW2LPC) -}; - -/************************************************************ - * Legacy I/O Space Registers - ************************************************************/ -#define REG_AT_COMPAT 0x61 -enum at_compat_bits { - AT_COMPAT_SERR = BIT(7), - AT_COMPAT_IOCHK = BIT(6), - AT_COMPAT_CLRIOCHK = BIT(3), - AT_COMPAT_CLRSERR = BIT(2), -}; - -struct amd8111_dev_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* device index */ - char *ctl_name; - struct edac_device_ctl_info *edac_dev; - void (*init)(struct amd8111_dev_info *dev_info); - void (*exit)(struct amd8111_dev_info *dev_info); - void (*check)(struct edac_device_ctl_info *edac_dev); -}; - -struct amd8111_pci_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* pci index */ - const char *ctl_name; - struct edac_pci_ctl_info *edac_dev; - void (*init)(struct amd8111_pci_info *dev_info); - void (*exit)(struct amd8111_pci_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8111_EDAC_H_ */ diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c deleted file mode 100644 index a5c680561c73..000000000000 --- a/drivers/edac/amd8131_edac.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * amd8131_edac.c, AMD8131 hypertransport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/bitops.h> -#include <linux/edac.h> -#include <linux/pci_ids.h> - -#include "edac_core.h" -#include "edac_module.h" -#include "amd8131_edac.h" - -#define AMD8131_EDAC_REVISION " Ver: 1.0.0" -#define AMD8131_EDAC_MOD_STR "amd8131_edac" - -/* Wrapper functions for accessing PCI configuration space */ -static void edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -static char * const bridge_str[] = { - [NORTH_A] = "NORTH A", - [NORTH_B] = "NORTH B", - [SOUTH_A] = "SOUTH A", - [SOUTH_B] = "SOUTH B", - [NO_BRIDGE] = "NO BRIDGE", -}; - -/* Support up to two AMD8131 chipsets on a platform */ -static struct amd8131_dev_info amd8131_devices[] = { - { - .inst = NORTH_A, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_A, - .ctl_name = "AMD8131_PCIX_NORTH_A", - }, - { - .inst = NORTH_B, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_B, - .ctl_name = "AMD8131_PCIX_NORTH_B", - }, - { - .inst = SOUTH_A, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_A, - .ctl_name = "AMD8131_PCIX_SOUTH_A", - }, - { - .inst = SOUTH_B, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_B, - .ctl_name = "AMD8131_PCIX_SOUTH_B", - }, - {.inst = NO_BRIDGE,}, -}; - -static void amd8131_pcix_init(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* First clear error detection flags */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Timedout flag */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Clear CRC Error flag on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Clear CRC Error flag on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - /* - * Then enable all error detections. - * - * Setup Discard Timer Sync Flood Enable, - * System Error Enable and Parity Error Enable. - */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 |= INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Enable overall SERR Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 |= STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Setup CRC Flood Enable for link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Setup CRC Flood Enable for link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_exit(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* Disable SERR, PERR and DTSE Error detection */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 &= ~(INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE); - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Disable overall System Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 &= ~STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Disable CRC Sync Flood on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Disable CRC Sync Flood on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8131_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u32 val32; - - /* Check PCI-X Bridge Memory Base-Limit Register for errors */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) { - printk(KERN_INFO "Error(s) in mem limit register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - val32 & MEM_LIMIT_DPE, - val32 & MEM_LIMIT_RSE, - val32 & MEM_LIMIT_RMA, - val32 & MEM_LIMIT_RTA, - val32 & MEM_LIMIT_STA, - val32 & MEM_LIMIT_MDPE); - - val32 |= MEM_LIMIT_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if Discard Timer timed out */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) { - printk(KERN_INFO "Error(s) in interrupt and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DTS: %d\n", val32 & INT_CTLR_DTS); - - val32 |= INT_CTLR_DTS; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_A); - - val32 |= LNK_CTRL_CRCERR_A; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_B); - - val32 |= LNK_CTRL_CRCERR_B; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct amd8131_info amd8131_chipset = { - .err_dev = PCI_DEVICE_ID_AMD_8131_APIC, - .devices = amd8131_devices, - .init = amd8131_pcix_init, - .exit = amd8131_pcix_exit, - .check = amd8131_pcix_check, -}; - -/* - * There are 4 PCIX Bridges on ATCA-6101 that share the same PCI Device ID, - * so amd8131_probe() would be called by kernel 4 times, with different - * address of pci_dev for each of them each time. - */ -static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return -ENODEV; - - /* - * We can't call pci_get_device() as we are used to do because - * there are 4 of them but pci_dev_get() instead. - */ - dev_info->dev = pci_dev_get(dev); - - if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - return -ENODEV; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_pci_alloc_index(); - dev_info->edac_dev = edac_pci_alloc_ctl_info(0, dev_info->ctl_name); - if (!dev_info->edac_dev) - return -ENOMEM; - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = amd8131_chipset.check; - - if (amd8131_chipset.init) - amd8131_chipset.init(dev_info); - - if (edac_pci_add_device(dev_info->edac_dev, dev_info->edac_idx) > 0) { - printk(KERN_ERR "failed edac_pci_add_device() for %s\n", - dev_info->ctl_name); - edac_pci_free_ctl_info(dev_info->edac_dev); - return -ENODEV; - } - - printk(KERN_INFO "added one device on AMD8131 " - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - - return 0; -} - -static void amd8131_remove(struct pci_dev *dev) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_pci_del_device(dev_info->edac_dev->dev); - edac_pci_free_ctl_info(dev_info->edac_dev); - } - - if (amd8131_chipset.exit) - amd8131_chipset.exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static const struct pci_device_id amd8131_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8131_BRIDGE), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = 0, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8131_edac_pci_tbl); - -static struct pci_driver amd8131_edac_driver = { - .name = AMD8131_EDAC_MOD_STR, - .probe = amd8131_probe, - .remove = amd8131_remove, - .id_table = amd8131_edac_pci_tbl, -}; - -static int __init amd8131_edac_init(void) -{ - printk(KERN_INFO "AMD8131 EDAC driver " AMD8131_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - return pci_register_driver(&amd8131_edac_driver); -} - -static void __exit amd8131_edac_exit(void) -{ - pci_unregister_driver(&amd8131_edac_driver); -} - -module_init(amd8131_edac_init); -module_exit(amd8131_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n"); -MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module"); diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h deleted file mode 100644 index 6f8b07131ec4..000000000000 --- a/drivers/edac/amd8131_edac.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * amd8131_edac.h, EDAC defs for AMD8131 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * Benjamin Walsh <benjamin.walsh@windriver.com> - * Hu Yongqi <yongqi.hu@windriver.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _AMD8131_EDAC_H_ -#define _AMD8131_EDAC_H_ - -#define DEVFN_PCIX_BRIDGE_NORTH_A 8 -#define DEVFN_PCIX_BRIDGE_NORTH_B 16 -#define DEVFN_PCIX_BRIDGE_SOUTH_A 24 -#define DEVFN_PCIX_BRIDGE_SOUTH_B 32 - -/************************************************************ - * PCI-X Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_STS_CMD 0x04 -enum sts_cmd_bits { - STS_CMD_SSE = BIT(30), - STS_CMD_SERREN = BIT(8) -}; - -/************************************************************ - * PCI-X Bridge Interrupt and Bridge Control Register, - ************************************************************/ -#define REG_INT_CTLR 0x3c -enum int_ctlr_bits { - INT_CTLR_DTSE = BIT(27), - INT_CTLR_DTS = BIT(26), - INT_CTLR_SERR = BIT(17), - INT_CTLR_PERR = BIT(16) -}; - -/************************************************************ - * PCI-X Bridge Memory Base-Limit Register, DevA:0x1C - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_MASK = MEM_LIMIT_DPE|MEM_LIMIT_RSE|MEM_LIMIT_RMA| - MEM_LIMIT_RTA|MEM_LIMIT_STA|MEM_LIMIT_MDPE -}; - -/************************************************************ - * Link Configuration And Control Register, side A - ************************************************************/ -#define REG_LNK_CTRL_A 0xc4 - -/************************************************************ - * Link Configuration And Control Register, side B - ************************************************************/ -#define REG_LNK_CTRL_B 0xc8 - -enum lnk_ctrl_bits { - LNK_CTRL_CRCERR_A = BIT(9), - LNK_CTRL_CRCERR_B = BIT(8), - LNK_CTRL_CRCFEN = BIT(1) -}; - -enum pcix_bridge_inst { - NORTH_A = 0, - NORTH_B = 1, - SOUTH_A = 2, - SOUTH_B = 3, - NO_BRIDGE = 4 -}; - -struct amd8131_dev_info { - int devfn; - enum pcix_bridge_inst inst; - struct pci_dev *dev; - int edac_idx; /* pci device index */ - char *ctl_name; - struct edac_pci_ctl_info *edac_dev; -}; - -/* - * AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC - * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are - * four PCIX Bridges on ATCA-6101 altogether. - * - * These PCIX Bridges share the same PCI Device ID and are all of - * Function Zero, they could be discrimated by their pci_dev->devfn. - * They share the same set of init/check/exit methods, and their - * private structures are collected in the devices[] array. - */ -struct amd8131_info { - u16 err_dev; /* PCI Device ID for AMD8131 APIC*/ - struct amd8131_dev_info *devices; - void (*init)(struct amd8131_dev_info *dev_info); - void (*exit)(struct amd8131_dev_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8131_EDAC_H_ */ - diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c new file mode 100644 index 000000000000..d64248fcf4c0 --- /dev/null +++ b/drivers/edac/armada_xp_edac.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Pengutronix, Jan Luebbe <kernel@pengutronix.de> + */ + +#include <linux/kernel.h> +#include <linux/edac.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <asm/hardware/cache-l2x0.h> +#include <asm/hardware/cache-aurora-l2.h> + +#include "edac_mc.h" +#include "edac_device.h" +#include "edac_module.h" + +/************************ EDAC MC (DDR RAM) ********************************/ + +#define SDRAM_NUM_CS 4 + +#define SDRAM_CONFIG_REG 0x0 +#define SDRAM_CONFIG_ECC_MASK BIT(18) +#define SDRAM_CONFIG_REGISTERED_MASK BIT(17) +#define SDRAM_CONFIG_BUS_WIDTH_MASK BIT(15) + +#define SDRAM_ADDR_CTRL_REG 0x10 +#define SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs) (20+cs) +#define SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(cs) (0x1 << SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs)) +#define SDRAM_ADDR_CTRL_ADDR_SEL_MASK(cs) BIT(16+cs) +#define SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs) (cs*4+2) +#define SDRAM_ADDR_CTRL_SIZE_LOW_MASK(cs) (0x3 << SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs)) +#define SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs) (cs*4) +#define SDRAM_ADDR_CTRL_STRUCT_MASK(cs) (0x3 << SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs)) + +#define SDRAM_ERR_DATA_H_REG 0x40 +#define SDRAM_ERR_DATA_L_REG 0x44 + +#define SDRAM_ERR_RECV_ECC_REG 0x48 +#define SDRAM_ERR_RECV_ECC_VALUE_MASK 0xff + +#define SDRAM_ERR_CALC_ECC_REG 0x4c +#define SDRAM_ERR_CALC_ECC_ROW_OFFSET 8 +#define SDRAM_ERR_CALC_ECC_ROW_MASK (0xffff << SDRAM_ERR_CALC_ECC_ROW_OFFSET) +#define SDRAM_ERR_CALC_ECC_VALUE_MASK 0xff + +#define SDRAM_ERR_ADDR_REG 0x50 +#define SDRAM_ERR_ADDR_BANK_OFFSET 23 +#define SDRAM_ERR_ADDR_BANK_MASK (0x7 << SDRAM_ERR_ADDR_BANK_OFFSET) +#define SDRAM_ERR_ADDR_COL_OFFSET 8 +#define SDRAM_ERR_ADDR_COL_MASK (0x7fff << SDRAM_ERR_ADDR_COL_OFFSET) +#define SDRAM_ERR_ADDR_CS_OFFSET 1 +#define SDRAM_ERR_ADDR_CS_MASK (0x3 << SDRAM_ERR_ADDR_CS_OFFSET) +#define SDRAM_ERR_ADDR_TYPE_MASK BIT(0) + +#define SDRAM_ERR_CTRL_REG 0x54 +#define SDRAM_ERR_CTRL_THR_OFFSET 16 +#define SDRAM_ERR_CTRL_THR_MASK (0xff << SDRAM_ERR_CTRL_THR_OFFSET) +#define SDRAM_ERR_CTRL_PROP_MASK BIT(9) + +#define SDRAM_ERR_SBE_COUNT_REG 0x58 +#define SDRAM_ERR_DBE_COUNT_REG 0x5c + +#define SDRAM_ERR_CAUSE_ERR_REG 0xd0 +#define SDRAM_ERR_CAUSE_MSG_REG 0xd8 +#define SDRAM_ERR_CAUSE_DBE_MASK BIT(1) +#define SDRAM_ERR_CAUSE_SBE_MASK BIT(0) + +#define SDRAM_RANK_CTRL_REG 0x1e0 +#define SDRAM_RANK_CTRL_EXIST_MASK(cs) BIT(cs) + +struct axp_mc_drvdata { + void __iomem *base; + /* width in bytes */ + unsigned int width; + /* bank interleaving */ + bool cs_addr_sel[SDRAM_NUM_CS]; + + char msg[128]; +}; + +/* derived from "DRAM Address Multiplexing" in the ARMADA XP Functional Spec */ +static uint32_t axp_mc_calc_address(struct axp_mc_drvdata *drvdata, + uint8_t cs, uint8_t bank, uint16_t row, + uint16_t col) +{ + if (drvdata->width == 8) { + /* 64 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xfff8) << 16) | + ((bank & 0x7) << 16) | + ((row & 0x7) << 13) | + ((col & 0x3ff) << 3)); + else + return (((row & 0xffff << 16) | + ((bank & 0x7) << 13) | + ((col & 0x3ff)) << 3)); + } else if (drvdata->width == 4) { + /* 32 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xfff0) << 15) | + ((bank & 0x7) << 16) | + ((row & 0xf) << 12) | + ((col & 0x3ff) << 2)); + else + return (((row & 0xffff << 15) | + ((bank & 0x7) << 12) | + ((col & 0x3ff)) << 2)); + } else { + /* 16 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xffe0) << 14) | + ((bank & 0x7) << 16) | + ((row & 0x1f) << 11) | + ((col & 0x3ff) << 1)); + else + return (((row & 0xffff << 14) | + ((bank & 0x7) << 11) | + ((col & 0x3ff)) << 1)); + } +} + +static void axp_mc_check(struct mem_ctl_info *mci) +{ + struct axp_mc_drvdata *drvdata = mci->pvt_info; + uint32_t data_h, data_l, recv_ecc, calc_ecc, addr; + uint32_t cnt_sbe, cnt_dbe, cause_err, cause_msg; + uint32_t row_val, col_val, bank_val, addr_val; + uint8_t syndrome_val, cs_val; + char *msg = drvdata->msg; + + data_h = readl(drvdata->base + SDRAM_ERR_DATA_H_REG); + data_l = readl(drvdata->base + SDRAM_ERR_DATA_L_REG); + recv_ecc = readl(drvdata->base + SDRAM_ERR_RECV_ECC_REG); + calc_ecc = readl(drvdata->base + SDRAM_ERR_CALC_ECC_REG); + addr = readl(drvdata->base + SDRAM_ERR_ADDR_REG); + cnt_sbe = readl(drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + cnt_dbe = readl(drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + cause_err = readl(drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + cause_msg = readl(drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear cause registers */ + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), + drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), + drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear error counter registers */ + if (cnt_sbe) + writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + if (cnt_dbe) + writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + + if (!cnt_sbe && !cnt_dbe) + return; + + if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) { + if (cnt_sbe) + cnt_sbe--; + else + dev_warn(mci->pdev, "inconsistent SBE count detected\n"); + } else { + if (cnt_dbe) + cnt_dbe--; + else + dev_warn(mci->pdev, "inconsistent DBE count detected\n"); + } + + /* report earlier errors */ + if (cnt_sbe) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + cnt_sbe, /* error count */ + 0, 0, 0, /* pfn, offset, syndrome */ + -1, -1, -1, /* top, mid, low layer */ + mci->ctl_name, + "details unavailable (multiple errors)"); + if (cnt_dbe) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + cnt_dbe, /* error count */ + 0, 0, 0, /* pfn, offset, syndrome */ + -1, -1, -1, /* top, mid, low layer */ + mci->ctl_name, + "details unavailable (multiple errors)"); + + /* report details for most recent error */ + cs_val = (addr & SDRAM_ERR_ADDR_CS_MASK) >> SDRAM_ERR_ADDR_CS_OFFSET; + bank_val = (addr & SDRAM_ERR_ADDR_BANK_MASK) >> SDRAM_ERR_ADDR_BANK_OFFSET; + row_val = (calc_ecc & SDRAM_ERR_CALC_ECC_ROW_MASK) >> SDRAM_ERR_CALC_ECC_ROW_OFFSET; + col_val = (addr & SDRAM_ERR_ADDR_COL_MASK) >> SDRAM_ERR_ADDR_COL_OFFSET; + syndrome_val = (recv_ecc ^ calc_ecc) & 0xff; + addr_val = axp_mc_calc_address(drvdata, cs_val, bank_val, row_val, + col_val); + msg += sprintf(msg, "row=0x%04x ", row_val); /* 11 chars */ + msg += sprintf(msg, "bank=0x%x ", bank_val); /* 9 chars */ + msg += sprintf(msg, "col=0x%04x ", col_val); /* 11 chars */ + msg += sprintf(msg, "cs=%d", cs_val); /* 4 chars */ + + if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, /* error count */ + addr_val >> PAGE_SHIFT, + addr_val & ~PAGE_MASK, + syndrome_val, + cs_val, -1, -1, /* top, mid, low layer */ + mci->ctl_name, drvdata->msg); + } else { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, /* error count */ + addr_val >> PAGE_SHIFT, + addr_val & ~PAGE_MASK, + syndrome_val, + cs_val, -1, -1, /* top, mid, low layer */ + mci->ctl_name, drvdata->msg); + } +} + +static void axp_mc_read_config(struct mem_ctl_info *mci) +{ + struct axp_mc_drvdata *drvdata = mci->pvt_info; + uint32_t config, addr_ctrl, rank_ctrl; + unsigned int i, cs_struct, cs_size; + struct dimm_info *dimm; + + config = readl(drvdata->base + SDRAM_CONFIG_REG); + if (config & SDRAM_CONFIG_BUS_WIDTH_MASK) + /* 64 bit */ + drvdata->width = 8; + else + /* 32 bit */ + drvdata->width = 4; + + addr_ctrl = readl(drvdata->base + SDRAM_ADDR_CTRL_REG); + rank_ctrl = readl(drvdata->base + SDRAM_RANK_CTRL_REG); + for (i = 0; i < SDRAM_NUM_CS; i++) { + dimm = mci->dimms[i]; + + if (!(rank_ctrl & SDRAM_RANK_CTRL_EXIST_MASK(i))) + continue; + + drvdata->cs_addr_sel[i] = + !!(addr_ctrl & SDRAM_ADDR_CTRL_ADDR_SEL_MASK(i)); + + cs_struct = (addr_ctrl & SDRAM_ADDR_CTRL_STRUCT_MASK(i)) >> SDRAM_ADDR_CTRL_STRUCT_OFFSET(i); + cs_size = ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(i)) >> (SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(i) - 2) | + ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_LOW_MASK(i)) >> SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(i))); + + switch (cs_size) { + case 0: /* 2GBit */ + dimm->nr_pages = 524288; + break; + case 1: /* 256MBit */ + dimm->nr_pages = 65536; + break; + case 2: /* 512MBit */ + dimm->nr_pages = 131072; + break; + case 3: /* 1GBit */ + dimm->nr_pages = 262144; + break; + case 4: /* 4GBit */ + dimm->nr_pages = 1048576; + break; + case 5: /* 8GBit */ + dimm->nr_pages = 2097152; + break; + } + dimm->grain = 8; + dimm->dtype = cs_struct ? DEV_X16 : DEV_X8; + dimm->mtype = (config & SDRAM_CONFIG_REGISTERED_MASK) ? + MEM_RDDR3 : MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + } +} + +static const struct of_device_id axp_mc_of_match[] = { + {.compatible = "marvell,armada-xp-sdram-controller",}, + {}, +}; +MODULE_DEVICE_TABLE(of, axp_mc_of_match); + +static int axp_mc_probe(struct platform_device *pdev) +{ + struct axp_mc_drvdata *drvdata; + struct edac_mc_layer layers[1]; + const struct of_device_id *id; + struct mem_ctl_info *mci; + void __iomem *base; + uint32_t config; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "Unable to map regs\n"); + return PTR_ERR(base); + } + + config = readl(base + SDRAM_CONFIG_REG); + if (!(config & SDRAM_CONFIG_ECC_MASK)) { + dev_warn(&pdev->dev, "SDRAM ECC is not enabled\n"); + return -EINVAL; + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = SDRAM_NUM_CS; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*drvdata)); + if (!mci) + return -ENOMEM; + + drvdata = mci->pvt_info; + drvdata->base = base; + mci->pdev = &pdev->dev; + platform_set_drvdata(pdev, mci); + + id = of_match_device(axp_mc_of_match, &pdev->dev); + mci->edac_check = axp_mc_check; + mci->mtype_cap = MEM_FLAG_DDR3; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = pdev->dev.driver->name; + mci->ctl_name = id ? id->compatible : "unknown"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + axp_mc_read_config(mci); + + /* These SoCs have a reduced width bus */ + if (of_machine_is_compatible("marvell,armada380") || + of_machine_is_compatible("marvell,armadaxp-98dx3236")) + drvdata->width /= 2; + + /* configure SBE threshold */ + /* it seems that SBEs are not captured otherwise */ + writel(1 << SDRAM_ERR_CTRL_THR_OFFSET, drvdata->base + SDRAM_ERR_CTRL_REG); + + /* clear cause registers */ + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear counter registers */ + writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + + if (edac_mc_add_mc(mci)) { + edac_mc_free(mci); + return -EINVAL; + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static void axp_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + platform_set_drvdata(pdev, NULL); +} + +static struct platform_driver axp_mc_driver = { + .probe = axp_mc_probe, + .remove = axp_mc_remove, + .driver = { + .name = "armada_xp_mc_edac", + .of_match_table = of_match_ptr(axp_mc_of_match), + }, +}; + +/************************ EDAC Device (L2 Cache) ***************************/ + +struct aurora_l2_drvdata { + void __iomem *base; + + char msg[128]; + + /* error injection via debugfs */ + uint32_t inject_addr; + uint32_t inject_mask; + uint8_t inject_ctl; + + struct dentry *debugfs; +}; + +#ifdef CONFIG_EDAC_DEBUG +static void aurora_l2_inject(struct aurora_l2_drvdata *drvdata) +{ + drvdata->inject_addr &= AURORA_ERR_INJECT_CTL_ADDR_MASK; + drvdata->inject_ctl &= AURORA_ERR_INJECT_CTL_EN_MASK; + writel(0, drvdata->base + AURORA_ERR_INJECT_CTL_REG); + writel(drvdata->inject_mask, drvdata->base + AURORA_ERR_INJECT_MASK_REG); + writel(drvdata->inject_addr | drvdata->inject_ctl, drvdata->base + AURORA_ERR_INJECT_CTL_REG); +} +#endif + +static void aurora_l2_check(struct edac_device_ctl_info *dci) +{ + struct aurora_l2_drvdata *drvdata = dci->pvt_info; + uint32_t cnt, src, txn, err, attr_cap, addr_cap, way_cap; + unsigned int cnt_ce, cnt_ue; + char *msg = drvdata->msg; + size_t size = sizeof(drvdata->msg); + size_t len = 0; + + cnt = readl(drvdata->base + AURORA_ERR_CNT_REG); + attr_cap = readl(drvdata->base + AURORA_ERR_ATTR_CAP_REG); + addr_cap = readl(drvdata->base + AURORA_ERR_ADDR_CAP_REG); + way_cap = readl(drvdata->base + AURORA_ERR_WAY_CAP_REG); + + cnt_ce = (cnt & AURORA_ERR_CNT_CE_MASK) >> AURORA_ERR_CNT_CE_OFFSET; + cnt_ue = (cnt & AURORA_ERR_CNT_UE_MASK) >> AURORA_ERR_CNT_UE_OFFSET; + /* clear error counter registers */ + if (cnt_ce || cnt_ue) + writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG); + + if (!(attr_cap & AURORA_ERR_ATTR_CAP_VALID)) + goto clear_remaining; + + src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF; + if (src <= 3) + len += scnprintf(msg+len, size-len, "src=CPU%d ", src); + else + len += scnprintf(msg+len, size-len, "src=IO "); + + txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF; + switch (txn) { + case 0: + len += scnprintf(msg+len, size-len, "txn=Data-Read "); + break; + case 1: + len += scnprintf(msg+len, size-len, "txn=Isn-Read "); + break; + case 2: + len += scnprintf(msg+len, size-len, "txn=Clean-Flush "); + break; + case 3: + len += scnprintf(msg+len, size-len, "txn=Eviction "); + break; + case 4: + len += scnprintf(msg+len, size-len, + "txn=Read-Modify-Write "); + break; + } + + err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF; + switch (err) { + case 0: + len += scnprintf(msg+len, size-len, "err=CorrECC "); + break; + case 1: + len += scnprintf(msg+len, size-len, "err=UnCorrECC "); + break; + case 2: + len += scnprintf(msg+len, size-len, "err=TagParity "); + break; + } + + len += scnprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK); + len += scnprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF); + len += scnprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET); + + /* clear error capture registers */ + writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG); + if (err) { + /* UnCorrECC or TagParity */ + if (cnt_ue) + cnt_ue--; + edac_device_handle_ue(dci, 0, 0, drvdata->msg); + } else { + if (cnt_ce) + cnt_ce--; + edac_device_handle_ce(dci, 0, 0, drvdata->msg); + } + +clear_remaining: + /* report remaining errors */ + while (cnt_ue--) + edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)"); + while (cnt_ce--) + edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)"); +} + +static void aurora_l2_poll(struct edac_device_ctl_info *dci) +{ +#ifdef CONFIG_EDAC_DEBUG + struct aurora_l2_drvdata *drvdata = dci->pvt_info; +#endif + + aurora_l2_check(dci); +#ifdef CONFIG_EDAC_DEBUG + aurora_l2_inject(drvdata); +#endif +} + +static const struct of_device_id aurora_l2_of_match[] = { + {.compatible = "marvell,aurora-system-cache",}, + {}, +}; +MODULE_DEVICE_TABLE(of, aurora_l2_of_match); + +static int aurora_l2_probe(struct platform_device *pdev) +{ + struct aurora_l2_drvdata *drvdata; + struct edac_device_ctl_info *dci; + const struct of_device_id *id; + uint32_t l2x0_aux_ctrl; + void __iomem *base; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "Unable to map regs\n"); + return PTR_ERR(base); + } + + l2x0_aux_ctrl = readl(base + L2X0_AUX_CTRL); + if (!(l2x0_aux_ctrl & AURORA_ACR_PARITY_EN)) + dev_warn(&pdev->dev, "tag parity is not enabled\n"); + if (!(l2x0_aux_ctrl & AURORA_ACR_ECC_EN)) + dev_warn(&pdev->dev, "data ECC is not enabled\n"); + + dci = edac_device_alloc_ctl_info(sizeof(*drvdata), + "cpu", 1, "L", 1, 2, 0); + if (!dci) + return -ENOMEM; + + drvdata = dci->pvt_info; + drvdata->base = base; + dci->dev = &pdev->dev; + platform_set_drvdata(pdev, dci); + + id = of_match_device(aurora_l2_of_match, &pdev->dev); + dci->edac_check = aurora_l2_poll; + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; + dci->dev_name = dev_name(&pdev->dev); + + /* clear registers */ + writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG); + writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG); + + if (edac_device_add_device(dci)) { + edac_device_free_ctl_info(dci); + return -EINVAL; + } + +#ifdef CONFIG_EDAC_DEBUG + drvdata->debugfs = edac_debugfs_create_dir(dev_name(&pdev->dev)); + if (drvdata->debugfs) { + edac_debugfs_create_x32("inject_addr", 0644, + drvdata->debugfs, + &drvdata->inject_addr); + edac_debugfs_create_x32("inject_mask", 0644, + drvdata->debugfs, + &drvdata->inject_mask); + edac_debugfs_create_x8("inject_ctl", 0644, + drvdata->debugfs, &drvdata->inject_ctl); + } +#endif + + return 0; +} + +static void aurora_l2_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); +#ifdef CONFIG_EDAC_DEBUG + struct aurora_l2_drvdata *drvdata = dci->pvt_info; + + edac_debugfs_remove_recursive(drvdata->debugfs); +#endif + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); + platform_set_drvdata(pdev, NULL); +} + +static struct platform_driver aurora_l2_driver = { + .probe = aurora_l2_probe, + .remove = aurora_l2_remove, + .driver = { + .name = "aurora_l2_edac", + .of_match_table = of_match_ptr(aurora_l2_of_match), + }, +}; + +/************************ Driver registration ******************************/ + +static struct platform_driver * const drivers[] = { + &axp_mc_driver, + &aurora_l2_driver, +}; + +static int __init armada_xp_edac_init(void) +{ + int res; + + if (ghes_get_devices()) + return -EBUSY; + + /* only polling is supported */ + edac_op_state = EDAC_OPSTATE_POLL; + + res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); + if (res) + pr_warn("Armada XP EDAC drivers fail to register\n"); + + return 0; +} +module_init(armada_xp_edac_init); + +static void __exit armada_xp_edac_exit(void) +{ + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); +} +module_exit(armada_xp_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Pengutronix"); +MODULE_DESCRIPTION("EDAC Drivers for Marvell Armada XP SDRAM and L2 Cache Controller"); diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c new file mode 100644 index 000000000000..dadb8acbee3d --- /dev/null +++ b/drivers/edac/aspeed_edac.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2018, 2019 Cisco Systems + */ + +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/stop_machine.h> +#include <linux/io.h> +#include <linux/of_address.h> +#include <linux/regmap.h> +#include "edac_module.h" + + +#define DRV_NAME "aspeed-edac" + + +#define ASPEED_MCR_PROT 0x00 /* protection key register */ +#define ASPEED_MCR_CONF 0x04 /* configuration register */ +#define ASPEED_MCR_INTR_CTRL 0x50 /* interrupt control/status register */ +#define ASPEED_MCR_ADDR_UNREC 0x58 /* address of first un-recoverable error */ +#define ASPEED_MCR_ADDR_REC 0x5c /* address of last recoverable error */ +#define ASPEED_MCR_LAST ASPEED_MCR_ADDR_REC + + +#define ASPEED_MCR_PROT_PASSWD 0xfc600309 +#define ASPEED_MCR_CONF_DRAM_TYPE BIT(4) +#define ASPEED_MCR_CONF_ECC BIT(7) +#define ASPEED_MCR_INTR_CTRL_CLEAR BIT(31) +#define ASPEED_MCR_INTR_CTRL_CNT_REC GENMASK(23, 16) +#define ASPEED_MCR_INTR_CTRL_CNT_UNREC GENMASK(15, 12) +#define ASPEED_MCR_INTR_CTRL_ENABLE (BIT(0) | BIT(1)) + + +static struct regmap *aspeed_regmap; + + +static int regmap_reg_write(void *context, unsigned int reg, unsigned int val) +{ + void __iomem *regs = (void __iomem *)context; + + /* enable write to MCR register set */ + writel(ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT); + + writel(val, regs + reg); + + /* disable write to MCR register set */ + writel(~ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT); + + return 0; +} + + +static int regmap_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + void __iomem *regs = (void __iomem *)context; + + *val = readl(regs + reg); + + return 0; +} + +static bool regmap_is_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case ASPEED_MCR_PROT: + case ASPEED_MCR_INTR_CTRL: + case ASPEED_MCR_ADDR_UNREC: + case ASPEED_MCR_ADDR_REC: + return true; + default: + return false; + } +} + + +static const struct regmap_config aspeed_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = ASPEED_MCR_LAST, + .reg_write = regmap_reg_write, + .reg_read = regmap_reg_read, + .volatile_reg = regmap_is_volatile, + .fast_io = true, +}; + + +static void count_rec(struct mem_ctl_info *mci, u8 rec_cnt, u32 rec_addr) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 page, offset, syndrome; + + if (!rec_cnt) + return; + + /* report first few errors (if there are) */ + /* note: no addresses are recorded */ + if (rec_cnt > 1) { + /* page, offset and syndrome are not available */ + page = 0; + offset = 0; + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, rec_cnt-1, + page, offset, syndrome, 0, 0, -1, + "address(es) not available", ""); + } + + /* report last error */ + /* note: rec_addr is the last recoverable error addr */ + page = rec_addr >> PAGE_SHIFT; + offset = rec_addr & ~PAGE_MASK; + /* syndrome is not available */ + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + csrow->first_page + page, offset, syndrome, + 0, 0, -1, "", ""); +} + + +static void count_un_rec(struct mem_ctl_info *mci, u8 un_rec_cnt, + u32 un_rec_addr) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 page, offset, syndrome; + + if (!un_rec_cnt) + return; + + /* report 1. error */ + /* note: un_rec_addr is the first unrecoverable error addr */ + page = un_rec_addr >> PAGE_SHIFT; + offset = un_rec_addr & ~PAGE_MASK; + /* syndrome is not available */ + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + csrow->first_page + page, offset, syndrome, + 0, 0, -1, "", ""); + + /* report further errors (if there are) */ + /* note: no addresses are recorded */ + if (un_rec_cnt > 1) { + /* page, offset and syndrome are not available */ + page = 0; + offset = 0; + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, un_rec_cnt-1, + page, offset, syndrome, 0, 0, -1, + "address(es) not available", ""); + } +} + + +static irqreturn_t mcr_isr(int irq, void *arg) +{ + struct mem_ctl_info *mci = arg; + u32 rec_addr, un_rec_addr; + u32 reg50, reg5c, reg58; + u8 rec_cnt, un_rec_cnt; + + regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, ®50); + dev_dbg(mci->pdev, "received edac interrupt w/ mcr register 50: 0x%x\n", + reg50); + + /* collect data about recoverable and unrecoverable errors */ + rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_REC) >> 16; + un_rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_UNREC) >> 12; + + dev_dbg(mci->pdev, "%d recoverable interrupts and %d unrecoverable interrupts\n", + rec_cnt, un_rec_cnt); + + regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_UNREC, ®58); + un_rec_addr = reg58; + + regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_REC, ®5c); + rec_addr = reg5c; + + /* clear interrupt flags and error counters: */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_CLEAR, + ASPEED_MCR_INTR_CTRL_CLEAR); + + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_CLEAR, 0); + + /* process recoverable and unrecoverable errors */ + count_rec(mci, rec_cnt, rec_addr); + count_un_rec(mci, un_rec_cnt, un_rec_addr); + + if (!rec_cnt && !un_rec_cnt) + dev_dbg(mci->pdev, "received edac interrupt, but did not find any ECC counters\n"); + + regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, ®50); + dev_dbg(mci->pdev, "edac interrupt handled. mcr reg 50 is now: 0x%x\n", + reg50); + + return IRQ_HANDLED; +} + + +static int config_irq(void *ctx, struct platform_device *pdev) +{ + int irq; + int rc; + + /* register interrupt handler */ + irq = platform_get_irq(pdev, 0); + dev_dbg(&pdev->dev, "got irq %d\n", irq); + if (irq < 0) + return irq; + + rc = devm_request_irq(&pdev->dev, irq, mcr_isr, IRQF_TRIGGER_HIGH, + DRV_NAME, ctx); + if (rc) { + dev_err(&pdev->dev, "unable to request irq %d\n", irq); + return rc; + } + + /* enable interrupts */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_ENABLE, + ASPEED_MCR_INTR_CTRL_ENABLE); + + return 0; +} + + +static int init_csrows(struct mem_ctl_info *mci) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 nr_pages, dram_type; + struct dimm_info *dimm; + struct device_node *np; + struct resource r; + u32 reg04; + int rc; + + /* retrieve info about physical memory from device tree */ + np = of_find_node_by_name(NULL, "memory"); + if (!np) { + dev_err(mci->pdev, "dt: missing /memory node\n"); + return -ENODEV; + } + + rc = of_address_to_resource(np, 0, &r); + + of_node_put(np); + + if (rc) { + dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n"); + return rc; + } + + dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n", + &r, PAGE_SHIFT); + + csrow->first_page = r.start >> PAGE_SHIFT; + nr_pages = resource_size(&r) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + nr_pages - 1; + + regmap_read(aspeed_regmap, ASPEED_MCR_CONF, ®04); + dram_type = (reg04 & ASPEED_MCR_CONF_DRAM_TYPE) ? MEM_DDR4 : MEM_DDR3; + + dimm = csrow->channels[0]->dimm; + dimm->mtype = dram_type; + dimm->edac_mode = EDAC_SECDED; + dimm->nr_pages = nr_pages / csrow->nr_channels; + + dev_dbg(mci->pdev, "initialized dimm with first_page=0x%lx and nr_pages=0x%x\n", + csrow->first_page, nr_pages); + + return 0; +} + + +static int aspeed_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + void __iomem *regs; + u32 reg04; + int rc; + + regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + aspeed_regmap = devm_regmap_init(dev, NULL, (__force void *)regs, + &aspeed_regmap_config); + if (IS_ERR(aspeed_regmap)) + return PTR_ERR(aspeed_regmap); + + /* bail out if ECC mode is not configured */ + regmap_read(aspeed_regmap, ASPEED_MCR_CONF, ®04); + if (!(reg04 & ASPEED_MCR_CONF_ECC)) { + dev_err(&pdev->dev, "ECC mode is not configured in u-boot\n"); + return -EPERM; + } + + edac_op_state = EDAC_OPSTATE_INT; + + /* allocate & init EDAC MC data structure */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + mci->mod_name = DRV_NAME; + mci->ctl_name = "MIC"; + mci->dev_name = dev_name(&pdev->dev); + + rc = init_csrows(mci); + if (rc) { + dev_err(&pdev->dev, "failed to init csrows\n"); + goto probe_exit02; + } + + platform_set_drvdata(pdev, mci); + + /* register with edac core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(&pdev->dev, "failed to register with EDAC core\n"); + goto probe_exit02; + } + + /* register interrupt handler and enable interrupts */ + rc = config_irq(mci, pdev); + if (rc) { + dev_err(&pdev->dev, "failed setting up irq\n"); + goto probe_exit01; + } + + return 0; + +probe_exit01: + edac_mc_del_mc(&pdev->dev); +probe_exit02: + edac_mc_free(mci); + return rc; +} + + +static void aspeed_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + + /* disable interrupts */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_ENABLE, 0); + + /* free resources */ + mci = edac_mc_del_mc(&pdev->dev); + if (mci) + edac_mc_free(mci); +} + + +static const struct of_device_id aspeed_of_match[] = { + { .compatible = "aspeed,ast2400-sdram-edac" }, + { .compatible = "aspeed,ast2500-sdram-edac" }, + { .compatible = "aspeed,ast2600-sdram-edac" }, + {}, +}; + +MODULE_DEVICE_TABLE(of, aspeed_of_match); + +static struct platform_driver aspeed_driver = { + .driver = { + .name = DRV_NAME, + .of_match_table = aspeed_of_match + }, + .probe = aspeed_probe, + .remove = aspeed_remove +}; +module_platform_driver(aspeed_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefan Schaeckeler <sschaeck@cisco.com>"); +MODULE_DESCRIPTION("Aspeed BMC SoC EDAC driver"); +MODULE_VERSION("1.0"); diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c new file mode 100644 index 000000000000..ae3bb7afa103 --- /dev/null +++ b/drivers/edac/bluefield_edac.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Bluefield-specific EDAC driver. + * + * Copyright (c) 2019 Mellanox Technologies. + */ + +#include <linux/acpi.h> +#include <linux/arm-smccc.h> +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define DRIVER_NAME "bluefield-edac" + +/* + * Mellanox BlueField EMI (External Memory Interface) register definitions. + */ + +#define MLXBF_ECC_CNT 0x340 +#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) +#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) + +#define MLXBF_ECC_ERR 0x348 +#define MLXBF_ECC_ERR__SECC BIT(0) +#define MLXBF_ECC_ERR__DECC BIT(16) + +#define MLXBF_ECC_LATCH_SEL 0x354 +#define MLXBF_ECC_LATCH_SEL__START BIT(24) + +#define MLXBF_ERR_ADDR_0 0x358 + +#define MLXBF_ERR_ADDR_1 0x37c + +#define MLXBF_SYNDROM 0x35c +#define MLXBF_SYNDROM__DERR BIT(0) +#define MLXBF_SYNDROM__SERR BIT(1) +#define MLXBF_SYNDROM__SYN GENMASK(25, 16) + +#define MLXBF_ADD_INFO 0x364 +#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) + +#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 +#define MLXBF_EDAC_ERROR_GRAIN 8 + +#define MLXBF_WRITE_REG_32 (0x82000009) +#define MLXBF_READ_REG_32 (0x8200000A) +#define MLXBF_SIP_SVC_VERSION (0x8200ff03) + +#define MLXBF_SMCCC_ACCESS_VIOLATION (-4) + +#define MLXBF_SVC_REQ_MAJOR 0 +#define MLXBF_SVC_REQ_MINOR 3 + +/* + * Request MLXBF_SIP_GET_DIMM_INFO + * + * Retrieve information about DIMM on a certain slot. + * + * Call register usage: + * a0: MLXBF_SIP_GET_DIMM_INFO + * a1: (Memory controller index) << 16 | (Dimm index in memory controller) + * a2-7: not used. + * + * Return status: + * a0: MLXBF_DIMM_INFO defined below describing the DIMM. + * a1-3: not used. + */ +#define MLXBF_SIP_GET_DIMM_INFO 0x82000008 + +/* Format for the SMC response about the memory information */ +#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) +#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) +#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) +#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) +#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) +#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) + +struct bluefield_edac_priv { + /* pointer to device structure */ + struct device *dev; + int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; + void __iomem *emi_base; + int dimm_per_mc; + /* access to secure regs supported */ + bool svc_sreg_support; + /* SMC table# for secure regs access */ + u32 sreg_tbl; +}; + +static u64 smc_call1(u64 smc_op, u64 smc_arg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, + 0, 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + + *result = (u32)res.a1; + return 0; +} + +static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) +{ + struct arm_smccc_res res; + int status; + + arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, + 0, 0, 0, 0, &res); + + status = res.a0; + + if (status == SMCCC_RET_NOT_SUPPORTED || + status == MLXBF_SMCCC_ACCESS_VIOLATION) + return -1; + else + return 0; +} + +static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_readl(addr, result, priv->sreg_tbl); + else + *result = readl(addr); + + return err; +} + +static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) +{ + void __iomem *addr; + int err = 0; + + addr = priv->emi_base + offset; + + if (priv->svc_sreg_support) + err = secure_writel(addr, data, priv->sreg_tbl); + else + writel(data, addr); + + return err; +} + +/* + * Gather the ECC information from the External Memory Interface registers + * and report it to the edac handler. + */ +static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, + int error_cnt, + int is_single_ecc) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 dram_additional_info, err_prank, edea0, edea1; + u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; + enum hw_event_mc_err_type ecc_type; + u64 ecc_dimm_addr; + int ecc_dimm, err; + + ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : + HW_EVENT_ERR_UNCORRECTED; + + /* + * Tell the External Memory Interface to populate the relevant + * registers with information about the last ECC error occurrence. + */ + ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; + err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); + if (err) + dev_err(priv->dev, "ECC latch select write failed.\n"); + + /* + * Verify that the ECC reported info in the registers is of the + * same type as the one asked to report. If not, just report the + * error without the detailed information. + */ + err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); + if (err) { + dev_err(priv->dev, "DRAM syndrom read failed.\n"); + return; + } + + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); + derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); + syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); + + if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { + edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, + 0, 0, -1, mci->ctl_name, ""); + return; + } + + err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); + if (err) { + dev_err(priv->dev, "DRAM additional info read failed.\n"); + return; + } + + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); + + ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; + + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); + if (err) { + dev_err(priv->dev, "Error addr 0 read failed.\n"); + return; + } + + err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); + if (err) { + dev_err(priv->dev, "Error addr 1 read failed.\n"); + return; + } + + ecc_dimm_addr = ((u64)edea1 << 32) | edea0; + + edac_mc_handle_error(ecc_type, mci, error_cnt, + PFN_DOWN(ecc_dimm_addr), + offset_in_page(ecc_dimm_addr), + syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); +} + +static void bluefield_edac_check(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + int err; + + /* + * The memory controller might not be initialized by the firmware + * when there isn't memory, which may lead to bad register readings. + */ + if (mci->edac_cap == EDAC_FLAG_NONE) + return; + + err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); + if (err) { + dev_err(priv->dev, "ECC count read failed.\n"); + return; + } + + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); + double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); + + if (single_error_count) { + ecc_error |= MLXBF_ECC_ERR__SECC; + + bluefield_gather_report_ecc(mci, single_error_count, 1); + } + + if (double_error_count) { + ecc_error |= MLXBF_ECC_ERR__DECC; + + bluefield_gather_report_ecc(mci, double_error_count, 0); + } + + /* Write to clear reported errors. */ + if (ecc_count) { + err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); + if (err) + dev_err(priv->dev, "ECC Error write failed.\n"); + } +} + +/* Initialize the DIMMs information for the given memory controller. */ +static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u64 mem_ctrl_idx = mci->mc_idx; + struct dimm_info *dimm; + u64 smc_info, smc_arg; + int is_empty = 1, i; + + for (i = 0; i < priv->dimm_per_mc; i++) { + dimm = mci->dimms[i]; + + smc_arg = mem_ctrl_idx << 16 | i; + smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); + + if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { + dimm->mtype = MEM_EMPTY; + continue; + } + + is_empty = 0; + + dimm->edac_mode = EDAC_SECDED; + + if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) + dimm->mtype = MEM_NVDIMM; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) + dimm->mtype = MEM_LRDDR4; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) + dimm->mtype = MEM_RDDR4; + else + dimm->mtype = MEM_DDR4; + + dimm->nr_pages = + FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * + (SZ_1G / PAGE_SIZE); + dimm->grain = MLXBF_EDAC_ERROR_GRAIN; + + /* Mem controller for BlueField only supports x4, x8 and x16 */ + switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { + case 4: + dimm->dtype = DEV_X4; + break; + case 8: + dimm->dtype = DEV_X8; + break; + case 16: + dimm->dtype = DEV_X16; + break; + default: + dimm->dtype = DEV_UNKNOWN; + } + + priv->dimm_ranks[i] = + FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); + } + + if (is_empty) + mci->edac_cap = EDAC_FLAG_NONE; + else + mci->edac_cap = EDAC_FLAG_SECDED; +} + +static int bluefield_edac_mc_probe(struct platform_device *pdev) +{ + struct bluefield_edac_priv *priv; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct arm_smccc_res res; + struct mem_ctl_info *mci; + struct resource *emi_res; + unsigned int mc_idx, dimm_count; + int rc, ret; + + /* Read the MSS (Memory SubSystem) index from ACPI table. */ + if (device_property_read_u32(dev, "mss_number", &mc_idx)) { + dev_warn(dev, "bf_edac: MSS number unknown\n"); + return -EINVAL; + } + + /* Read the DIMMs per MC from ACPI table. */ + if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { + dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); + return -EINVAL; + } + + if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { + dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); + return -EINVAL; + } + + emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!emi_res) + return -EINVAL; + + layers[0].type = EDAC_MC_LAYER_SLOT; + layers[0].size = dimm_count; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); + if (!mci) + return -ENOMEM; + + priv = mci->pvt_info; + priv->dev = dev; + + /* + * The "sec_reg_block" property in the ACPI table determines the method + * the driver uses to access the EMI registers: + * a) property is not present - directly access registers via readl/writel + * b) property is present - indirectly access registers via SMC calls + * (assuming required Silicon Provider service version found) + */ + if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { + priv->svc_sreg_support = false; + } else { + /* + * Check for minimum required Arm Silicon Provider (SiP) service + * version, ensuring support of required SMC function IDs. + */ + arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 == MLXBF_SVC_REQ_MAJOR && + res.a1 >= MLXBF_SVC_REQ_MINOR) { + priv->svc_sreg_support = true; + } else { + dev_err(dev, "Required SMCs are not supported.\n"); + ret = -EINVAL; + goto err; + } + } + + priv->dimm_per_mc = dimm_count; + if (!priv->svc_sreg_support) { + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + } else { + priv->emi_base = (void __iomem *)emi_res->start; + } + + mci->pdev = dev; + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->mod_name = DRIVER_NAME; + mci->ctl_name = "BlueField_Memory_Controller"; + mci->dev_name = dev_name(dev); + mci->edac_check = bluefield_edac_check; + + /* Initialize mci with the actual populated DIMM information. */ + bluefield_edac_init_dimms(mci); + + platform_set_drvdata(pdev, mci); + + /* Register with EDAC core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(dev, "failed to register with EDAC core\n"); + ret = rc; + goto err; + } + + /* Only POLL mode supported so far. */ + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; + +err: + edac_mc_free(mci); + + return ret; +} + +static void bluefield_edac_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static const struct acpi_device_id bluefield_mc_acpi_ids[] = { + {"MLNXBF08", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); + +static struct platform_driver bluefield_edac_mc_driver = { + .driver = { + .name = DRIVER_NAME, + .acpi_match_table = bluefield_mc_acpi_ids, + }, + .probe = bluefield_edac_mc_probe, + .remove = bluefield_edac_mc_remove, +}; + +module_platform_driver(bluefield_edac_mc_driver); + +MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c deleted file mode 100644 index c2eaf334b90b..000000000000 --- a/drivers/edac/cell_edac.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Cell MIC driver for ECC counting - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * - * This file may be distributed under the terms of the - * GNU General Public License. - */ -#undef DEBUG - -#include <linux/edac.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/stop_machine.h> -#include <linux/io.h> -#include <asm/machdep.h> -#include <asm/cell-regs.h> - -#include "edac_core.h" - -struct cell_edac_priv -{ - struct cbe_mic_tm_regs __iomem *regs; - int node; - int chanmask; -#ifdef DEBUG - u64 prev_fir; -#endif -}; - -static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) -{ - struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = mci->csrows[0]; - unsigned long address, pfn, offset, syndrome; - - dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n", - priv->node, chan, ar); - - /* Address decoding is likely a bit bogus, to dbl check */ - address = (ar & 0xffffffffe0000000ul) >> 29; - if (priv->chanmask == 0x3) - address = (address << 1) | chan; - pfn = address >> PAGE_SHIFT; - offset = address & ~PAGE_MASK; - syndrome = (ar & 0x000000001fe00000ul) >> 21; - - /* TODO: Decoding of the error address */ - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - csrow->first_page + pfn, offset, syndrome, - 0, chan, -1, "", ""); -} - -static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) -{ - struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = mci->csrows[0]; - unsigned long address, pfn, offset; - - dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n", - priv->node, chan, ar); - - /* Address decoding is likely a bit bogus, to dbl check */ - address = (ar & 0xffffffffe0000000ul) >> 29; - if (priv->chanmask == 0x3) - address = (address << 1) | chan; - pfn = address >> PAGE_SHIFT; - offset = address & ~PAGE_MASK; - - /* TODO: Decoding of the error address */ - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - csrow->first_page + pfn, offset, 0, - 0, chan, -1, "", ""); -} - -static void cell_edac_check(struct mem_ctl_info *mci) -{ - struct cell_edac_priv *priv = mci->pvt_info; - u64 fir, addreg, clear = 0; - - fir = in_be64(&priv->regs->mic_fir); -#ifdef DEBUG - if (fir != priv->prev_fir) { - dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir); - priv->prev_fir = fir; - } -#endif - if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_0); - clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET; - cell_edac_count_ce(mci, 0, addreg); - } - if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_1); - clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET; - cell_edac_count_ce(mci, 1, addreg); - } - if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_0); - clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET; - cell_edac_count_ue(mci, 0, addreg); - } - if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) { - addreg = in_be64(&priv->regs->mic_df_ecc_address_1); - clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET; - cell_edac_count_ue(mci, 1, addreg); - } - - /* The procedure for clearing FIR bits is a bit ... weird */ - if (clear) { - fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK); - fir |= CBE_MIC_FIR_ECC_RESET_MASK; - fir &= ~clear; - out_be64(&priv->regs->mic_fir, fir); - (void)in_be64(&priv->regs->mic_fir); - - mb(); /* sync up */ -#ifdef DEBUG - fir = in_be64(&priv->regs->mic_fir); - dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir); -#endif - } -} - -static void cell_edac_init_csrows(struct mem_ctl_info *mci) -{ - struct csrow_info *csrow = mci->csrows[0]; - struct dimm_info *dimm; - struct cell_edac_priv *priv = mci->pvt_info; - struct device_node *np; - int j; - u32 nr_pages; - - for (np = NULL; - (np = of_find_node_by_name(np, "memory")) != NULL;) { - struct resource r; - - /* We "know" that the Cell firmware only creates one entry - * in the "memory" nodes. If that changes, this code will - * need to be adapted. - */ - if (of_address_to_resource(np, 0, &r)) - continue; - if (of_node_to_nid(np) != priv->node) - continue; - csrow->first_page = r.start >> PAGE_SHIFT; - nr_pages = resource_size(&r) >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + nr_pages - 1; - - for (j = 0; j < csrow->nr_channels; j++) { - dimm = csrow->channels[j]->dimm; - dimm->mtype = MEM_XDR; - dimm->edac_mode = EDAC_SECDED; - dimm->nr_pages = nr_pages / csrow->nr_channels; - } - dev_dbg(mci->pdev, - "Initialized on node %d, chanmask=0x%x," - " first_page=0x%lx, nr_pages=0x%x\n", - priv->node, priv->chanmask, - csrow->first_page, nr_pages); - break; - } -} - -static int cell_edac_probe(struct platform_device *pdev) -{ - struct cbe_mic_tm_regs __iomem *regs; - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct cell_edac_priv *priv; - u64 reg; - int rc, chanmask, num_chans; - - regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id)); - if (regs == NULL) - return -ENODEV; - - edac_op_state = EDAC_OPSTATE_POLL; - - /* Get channel population */ - reg = in_be64(®s->mic_mnt_cfg); - dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016llx\n", reg); - chanmask = 0; - if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP) - chanmask |= 0x1; - if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP) - chanmask |= 0x2; - if (chanmask == 0) { - dev_warn(&pdev->dev, - "Yuck ! No channel populated ? Aborting !\n"); - return -ENODEV; - } - dev_dbg(&pdev->dev, "Initial FIR = 0x%016llx\n", - in_be64(®s->mic_fir)); - - /* Allocate & init EDAC MC data structure */ - num_chans = chanmask == 3 ? 2 : 1; - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = num_chans; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers, - sizeof(struct cell_edac_priv)); - if (mci == NULL) - return -ENOMEM; - priv = mci->pvt_info; - priv->regs = regs; - priv->node = pdev->id; - priv->chanmask = chanmask; - mci->pdev = &pdev->dev; - mci->mtype_cap = MEM_FLAG_XDR; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; - mci->mod_name = "cell_edac"; - mci->ctl_name = "MIC"; - mci->dev_name = dev_name(&pdev->dev); - mci->edac_check = cell_edac_check; - cell_edac_init_csrows(mci); - - /* Register with EDAC core */ - rc = edac_mc_add_mc(mci); - if (rc) { - dev_err(&pdev->dev, "failed to register with EDAC core\n"); - edac_mc_free(mci); - return rc; - } - - return 0; -} - -static int cell_edac_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); - if (mci) - edac_mc_free(mci); - return 0; -} - -static struct platform_driver cell_edac_driver = { - .driver = { - .name = "cbe-mic", - .owner = THIS_MODULE, - }, - .probe = cell_edac_probe, - .remove = cell_edac_remove, -}; - -static int __init cell_edac_init(void) -{ - /* Sanity check registers data structure */ - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_ecc_address_0) != 0xf8); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_ecc_address_1) != 0x1b8); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_df_config) != 0x218); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_fir) != 0x230); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_mnt_cfg) != 0x210); - BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, - mic_exc) != 0x208); - - return platform_driver_register(&cell_edac_driver); -} - -static void __exit cell_edac_exit(void) -{ - platform_driver_unregister(&cell_edac_driver); -} - -module_init(cell_edac_init); -module_exit(cell_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>"); -MODULE_DESCRIPTION("ECC counting for Cell MIC"); diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 7f3c57113ba1..9c9e4369c041 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * cpc925_edac.c, EDAC driver for IBM CPC925 Bridge and Memory Controller. * * Copyright (c) 2008 Wind River Systems, Inc. * * Authors: Cao Qingtao <qingtao.cao@windriver.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/module.h> @@ -27,7 +15,6 @@ #include <linux/platform_device.h> #include <linux/gfp.h> -#include "edac_core.h" #include "edac_module.h" #define CPC925_EDAC_REVISION " Ver: 1.0.0" @@ -562,7 +549,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) if (apiexcp & UECC_EXCP_DETECTED) { cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n"); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, pfn, offset, 0, csrow, -1, -1, mci->ctl_name, ""); @@ -594,8 +581,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) /******************** CPU err device********************************/ static u32 cpc925_cpu_mask_disabled(void) { - struct device_node *cpus; - struct device_node *cpunode = NULL; + struct device_node *cpunode; static u32 mask = 0; /* use cached value if available */ @@ -604,22 +590,10 @@ static u32 cpc925_cpu_mask_disabled(void) mask = APIMASK_ADI0 | APIMASK_ADI1; - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) { - cpc925_printk(KERN_DEBUG, "No /cpus node !\n"); - return 0; - } - - while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) { + for_each_of_cpu_node(cpunode) { const u32 *reg = of_get_property(cpunode, "reg", NULL); - - if (strcmp(cpunode->type, "cpu")) { - cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name); - continue; - } - if (reg == NULL || *reg > 2) { - cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name); + cpc925_printk(KERN_ERR, "Bad reg value at %pOF\n", cpunode); continue; } @@ -634,9 +608,6 @@ static u32 cpc925_cpu_mask_disabled(void) "Assuming PI id is equal to CPU MPIC id!\n"); } - of_node_put(cpunode); - of_node_put(cpus); - return mask; } @@ -789,7 +760,7 @@ static struct cpc925_dev_info cpc925_devs[] = { .exit = cpc925_htlink_exit, .check = cpc925_htlink_check, }, - {0}, /* Terminated by NULL */ + { } }; /* @@ -826,7 +797,7 @@ static void cpc925_add_edac_devices(void __iomem *vbase) dev_info->edac_idx = edac_device_alloc_index(); dev_info->edac_dev = edac_device_alloc_ctl_info(0, dev_info->ctl_name, - 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx); + 1, NULL, 0, 0, dev_info->edac_idx); if (!dev_info->edac_dev) { cpc925_printk(KERN_ERR, "No memory for edac device\n"); goto err1; @@ -1000,7 +971,6 @@ static int cpc925_probe(struct platform_device *pdev) mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = CPC925_EDAC_MOD_STR; - mci->mod_ver = CPC925_EDAC_REVISION; mci->ctl_name = pdev->name; if (edac_op_state == EDAC_OPSTATE_POLL) @@ -1040,7 +1010,7 @@ out: return res; } -static int cpc925_remove(struct platform_device *pdev) +static void cpc925_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); @@ -1053,8 +1023,6 @@ static int cpc925_remove(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver cpc925_edac_driver = { diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c new file mode 100644 index 000000000000..8195fc9c9354 --- /dev/null +++ b/drivers/edac/debugfs.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/string_choices.h> + +#include "edac_module.h" + +static struct dentry *edac_debugfs; + +static ssize_t edac_fake_inject_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + static enum hw_event_mc_err_type type; + u16 errcount = mci->fake_inject_count; + + if (!errcount) + errcount = 1; + + type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED + : HW_EVENT_ERR_CORRECTED; + + printk(KERN_DEBUG + "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", + errcount, + (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", + str_plural(errcount), + mci->fake_inject_layer[0], + mci->fake_inject_layer[1], + mci->fake_inject_layer[2] + ); + edac_mc_handle_error(type, mci, errcount, 0, 0, 0, + mci->fake_inject_layer[0], + mci->fake_inject_layer[1], + mci->fake_inject_layer[2], + "FAKE ERROR", "for EDAC testing only"); + + return count; +} + +static const struct file_operations debug_fake_inject_fops = { + .open = simple_open, + .write = edac_fake_inject_write, + .llseek = generic_file_llseek, +}; + +void __init edac_debugfs_init(void) +{ + edac_debugfs = debugfs_create_dir("edac", NULL); +} + +void edac_debugfs_exit(void) +{ + debugfs_remove_recursive(edac_debugfs); +} + +void edac_create_debugfs_nodes(struct mem_ctl_info *mci) +{ + struct dentry *parent; + char name[80]; + int i; + + parent = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs); + + for (i = 0; i < mci->n_layers; i++) { + sprintf(name, "fake_inject_%s", + edac_layer_name[mci->layers[i].type]); + debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_layer[i]); + } + + debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_ue); + + debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_count); + + debugfs_create_file("fake_inject", S_IWUSR, parent, &mci->dev, + &debug_fake_inject_fops); + + mci->debugfs = parent; +} + +/* Create a toplevel dir under EDAC's debugfs hierarchy */ +struct dentry *edac_debugfs_create_dir(const char *dirname) +{ + if (!edac_debugfs) + return NULL; + + return debugfs_create_dir(dirname, edac_debugfs); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_dir); + +/* Create a toplevel dir under EDAC's debugfs hierarchy with parent @parent */ +struct dentry * +edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) +{ + return debugfs_create_dir(dirname, parent); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_dir_at); + +/* + * Create a file under EDAC's hierarchy or a sub-hierarchy: + * + * @name: file name + * @mode: file permissions + * @parent: parent dentry. If NULL, it becomes the toplevel EDAC dir + * @data: private data of caller + * @fops: file operations of this file + */ +struct dentry * +edac_debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, + void *data, const struct file_operations *fops) +{ + if (!parent) + parent = edac_debugfs; + + return debugfs_create_file(name, mode, parent, data, fops); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_file); + +/* Wrapper for debugfs_create_x8() */ +void edac_debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, u8 *value) +{ + if (!parent) + parent = edac_debugfs; + + debugfs_create_x8(name, mode, parent, value); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_x8); + +/* Wrapper for debugfs_create_x16() */ +void edac_debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, u16 *value) +{ + if (!parent) + parent = edac_debugfs; + + debugfs_create_x16(name, mode, parent, value); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_x16); + +/* Wrapper for debugfs_create_x32() */ +void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) +{ + if (!parent) + parent = edac_debugfs; + + debugfs_create_x32(name, mode, parent, value); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_x32); diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c new file mode 100644 index 000000000000..64a4d0a07032 --- /dev/null +++ b/drivers/edac/dmc520_edac.c @@ -0,0 +1,652 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * EDAC driver for DMC-520 memory controller. + * + * The driver supports 10 interrupt lines, + * though only dram_ecc_errc and dram_ecc_errd are currently handled. + * + * Authors: Rui Zhao <ruizhao@microsoft.com> + * Lei Wang <lewan@microsoft.com> + * Shiping Ji <shji@microsoft.com> + */ + +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include "edac_mc.h" + +/* DMC-520 registers */ +#define REG_OFFSET_FEATURE_CONFIG 0x130 +#define REG_OFFSET_ECC_ERRC_COUNT_31_00 0x158 +#define REG_OFFSET_ECC_ERRC_COUNT_63_32 0x15C +#define REG_OFFSET_ECC_ERRD_COUNT_31_00 0x160 +#define REG_OFFSET_ECC_ERRD_COUNT_63_32 0x164 +#define REG_OFFSET_INTERRUPT_CONTROL 0x500 +#define REG_OFFSET_INTERRUPT_CLR 0x508 +#define REG_OFFSET_INTERRUPT_STATUS 0x510 +#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 0x528 +#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 0x52C +#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00 0x530 +#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32 0x534 +#define REG_OFFSET_ADDRESS_CONTROL_NOW 0x1010 +#define REG_OFFSET_MEMORY_TYPE_NOW 0x1128 +#define REG_OFFSET_SCRUB_CONTROL0_NOW 0x1170 +#define REG_OFFSET_FORMAT_CONTROL 0x18 + +/* DMC-520 types, masks and bitfields */ +#define RAM_ECC_INT_CE_BIT BIT(0) +#define RAM_ECC_INT_UE_BIT BIT(1) +#define DRAM_ECC_INT_CE_BIT BIT(2) +#define DRAM_ECC_INT_UE_BIT BIT(3) +#define FAILED_ACCESS_INT_BIT BIT(4) +#define FAILED_PROG_INT_BIT BIT(5) +#define LINK_ERR_INT_BIT BIT(6) +#define TEMPERATURE_EVENT_INT_BIT BIT(7) +#define ARCH_FSM_INT_BIT BIT(8) +#define PHY_REQUEST_INT_BIT BIT(9) +#define MEMORY_WIDTH_MASK GENMASK(1, 0) +#define SCRUB_TRIGGER0_NEXT_MASK GENMASK(1, 0) +#define REG_FIELD_DRAM_ECC_ENABLED GENMASK(1, 0) +#define REG_FIELD_MEMORY_TYPE GENMASK(2, 0) +#define REG_FIELD_DEVICE_WIDTH GENMASK(9, 8) +#define REG_FIELD_ADDRESS_CONTROL_COL GENMASK(2, 0) +#define REG_FIELD_ADDRESS_CONTROL_ROW GENMASK(10, 8) +#define REG_FIELD_ADDRESS_CONTROL_BANK GENMASK(18, 16) +#define REG_FIELD_ADDRESS_CONTROL_RANK GENMASK(25, 24) +#define REG_FIELD_ERR_INFO_LOW_VALID BIT(0) +#define REG_FIELD_ERR_INFO_LOW_COL GENMASK(10, 1) +#define REG_FIELD_ERR_INFO_LOW_ROW GENMASK(28, 11) +#define REG_FIELD_ERR_INFO_LOW_RANK GENMASK(31, 29) +#define REG_FIELD_ERR_INFO_HIGH_BANK GENMASK(3, 0) +#define REG_FIELD_ERR_INFO_HIGH_VALID BIT(31) + +#define DRAM_ADDRESS_CONTROL_MIN_COL_BITS 8 +#define DRAM_ADDRESS_CONTROL_MIN_ROW_BITS 11 + +#define DMC520_SCRUB_TRIGGER_ERR_DETECT 2 +#define DMC520_SCRUB_TRIGGER_IDLE 3 + +/* Driver settings */ +/* + * The max-length message would be: "rank:7 bank:15 row:262143 col:1023". + * Max length is 34. Using a 40-size buffer is enough. + */ +#define DMC520_MSG_BUF_SIZE 40 +#define EDAC_MOD_NAME "dmc520-edac" +#define EDAC_CTL_NAME "dmc520" + +/* the data bus width for the attached memory chips. */ +enum dmc520_mem_width { + MEM_WIDTH_X32 = 2, + MEM_WIDTH_X64 = 3 +}; + +/* memory type */ +enum dmc520_mem_type { + MEM_TYPE_DDR3 = 1, + MEM_TYPE_DDR4 = 2 +}; + +/* memory device width */ +enum dmc520_dev_width { + DEV_WIDTH_X4 = 0, + DEV_WIDTH_X8 = 1, + DEV_WIDTH_X16 = 2 +}; + +struct ecc_error_info { + u32 col; + u32 row; + u32 bank; + u32 rank; +}; + +/* The interrupt config */ +struct dmc520_irq_config { + char *name; + int mask; +}; + +/* The interrupt mappings */ +static struct dmc520_irq_config dmc520_irq_configs[] = { + { + .name = "ram_ecc_errc", + .mask = RAM_ECC_INT_CE_BIT + }, + { + .name = "ram_ecc_errd", + .mask = RAM_ECC_INT_UE_BIT + }, + { + .name = "dram_ecc_errc", + .mask = DRAM_ECC_INT_CE_BIT + }, + { + .name = "dram_ecc_errd", + .mask = DRAM_ECC_INT_UE_BIT + }, + { + .name = "failed_access", + .mask = FAILED_ACCESS_INT_BIT + }, + { + .name = "failed_prog", + .mask = FAILED_PROG_INT_BIT + }, + { + .name = "link_err", + .mask = LINK_ERR_INT_BIT + }, + { + .name = "temperature_event", + .mask = TEMPERATURE_EVENT_INT_BIT + }, + { + .name = "arch_fsm", + .mask = ARCH_FSM_INT_BIT + }, + { + .name = "phy_request", + .mask = PHY_REQUEST_INT_BIT + } +}; + +#define NUMBER_OF_IRQS ARRAY_SIZE(dmc520_irq_configs) + +/* + * The EDAC driver private data. + * error_lock is to protect concurrent writes to the mci->error_desc through + * edac_mc_handle_error(). + */ +struct dmc520_edac { + void __iomem *reg_base; + spinlock_t error_lock; + u32 mem_width_in_bytes; + int irqs[NUMBER_OF_IRQS]; + int masks[NUMBER_OF_IRQS]; +}; + +static int dmc520_mc_idx; + +static u32 dmc520_read_reg(struct dmc520_edac *pvt, u32 offset) +{ + return readl(pvt->reg_base + offset); +} + +static void dmc520_write_reg(struct dmc520_edac *pvt, u32 val, u32 offset) +{ + writel(val, pvt->reg_base + offset); +} + +static u32 dmc520_calc_dram_ecc_error(u32 value) +{ + u32 total = 0; + + /* Each rank's error counter takes one byte. */ + while (value > 0) { + total += (value & 0xFF); + value >>= 8; + } + return total; +} + +static u32 dmc520_get_dram_ecc_error_count(struct dmc520_edac *pvt, + bool is_ce) +{ + u32 reg_offset_low, reg_offset_high; + u32 err_low, err_high; + u32 err_count; + + reg_offset_low = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_31_00 : + REG_OFFSET_ECC_ERRD_COUNT_31_00; + reg_offset_high = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_63_32 : + REG_OFFSET_ECC_ERRD_COUNT_63_32; + + err_low = dmc520_read_reg(pvt, reg_offset_low); + err_high = dmc520_read_reg(pvt, reg_offset_high); + /* Reset error counters */ + dmc520_write_reg(pvt, 0, reg_offset_low); + dmc520_write_reg(pvt, 0, reg_offset_high); + + err_count = dmc520_calc_dram_ecc_error(err_low) + + dmc520_calc_dram_ecc_error(err_high); + + return err_count; +} + +static void dmc520_get_dram_ecc_error_info(struct dmc520_edac *pvt, + bool is_ce, + struct ecc_error_info *info) +{ + u32 reg_offset_low, reg_offset_high; + u32 reg_val_low, reg_val_high; + bool valid; + + reg_offset_low = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 : + REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00; + reg_offset_high = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 : + REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32; + + reg_val_low = dmc520_read_reg(pvt, reg_offset_low); + reg_val_high = dmc520_read_reg(pvt, reg_offset_high); + + valid = (FIELD_GET(REG_FIELD_ERR_INFO_LOW_VALID, reg_val_low) != 0) && + (FIELD_GET(REG_FIELD_ERR_INFO_HIGH_VALID, reg_val_high) != 0); + + if (valid) { + info->col = FIELD_GET(REG_FIELD_ERR_INFO_LOW_COL, reg_val_low); + info->row = FIELD_GET(REG_FIELD_ERR_INFO_LOW_ROW, reg_val_low); + info->rank = FIELD_GET(REG_FIELD_ERR_INFO_LOW_RANK, reg_val_low); + info->bank = FIELD_GET(REG_FIELD_ERR_INFO_HIGH_BANK, reg_val_high); + } else { + memset(info, 0, sizeof(*info)); + } +} + +static bool dmc520_is_ecc_enabled(void __iomem *reg_base) +{ + u32 reg_val = readl(reg_base + REG_OFFSET_FEATURE_CONFIG); + + return FIELD_GET(REG_FIELD_DRAM_ECC_ENABLED, reg_val); +} + +static enum scrub_type dmc520_get_scrub_type(struct dmc520_edac *pvt) +{ + enum scrub_type type = SCRUB_NONE; + u32 reg_val, scrub_cfg; + + reg_val = dmc520_read_reg(pvt, REG_OFFSET_SCRUB_CONTROL0_NOW); + scrub_cfg = FIELD_GET(SCRUB_TRIGGER0_NEXT_MASK, reg_val); + + if (scrub_cfg == DMC520_SCRUB_TRIGGER_ERR_DETECT || + scrub_cfg == DMC520_SCRUB_TRIGGER_IDLE) + type = SCRUB_HW_PROG; + + return type; +} + +/* Get the memory data bus width, in number of bytes. */ +static u32 dmc520_get_memory_width(struct dmc520_edac *pvt) +{ + enum dmc520_mem_width mem_width_field; + u32 mem_width_in_bytes = 0; + u32 reg_val; + + reg_val = dmc520_read_reg(pvt, REG_OFFSET_FORMAT_CONTROL); + mem_width_field = FIELD_GET(MEMORY_WIDTH_MASK, reg_val); + + if (mem_width_field == MEM_WIDTH_X32) + mem_width_in_bytes = 4; + else if (mem_width_field == MEM_WIDTH_X64) + mem_width_in_bytes = 8; + return mem_width_in_bytes; +} + +static enum mem_type dmc520_get_mtype(struct dmc520_edac *pvt) +{ + enum mem_type mt = MEM_UNKNOWN; + enum dmc520_mem_type type; + u32 reg_val; + + reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW); + type = FIELD_GET(REG_FIELD_MEMORY_TYPE, reg_val); + + switch (type) { + case MEM_TYPE_DDR3: + mt = MEM_DDR3; + break; + + case MEM_TYPE_DDR4: + mt = MEM_DDR4; + break; + } + + return mt; +} + +static enum dev_type dmc520_get_dtype(struct dmc520_edac *pvt) +{ + enum dmc520_dev_width device_width; + enum dev_type dt = DEV_UNKNOWN; + u32 reg_val; + + reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW); + device_width = FIELD_GET(REG_FIELD_DEVICE_WIDTH, reg_val); + + switch (device_width) { + case DEV_WIDTH_X4: + dt = DEV_X4; + break; + + case DEV_WIDTH_X8: + dt = DEV_X8; + break; + + case DEV_WIDTH_X16: + dt = DEV_X16; + break; + } + + return dt; +} + +static u32 dmc520_get_rank_count(void __iomem *reg_base) +{ + u32 reg_val, rank_bits; + + reg_val = readl(reg_base + REG_OFFSET_ADDRESS_CONTROL_NOW); + rank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_RANK, reg_val); + + return BIT(rank_bits); +} + +static u64 dmc520_get_rank_size(struct dmc520_edac *pvt) +{ + u32 reg_val, col_bits, row_bits, bank_bits; + + reg_val = dmc520_read_reg(pvt, REG_OFFSET_ADDRESS_CONTROL_NOW); + + col_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_COL, reg_val) + + DRAM_ADDRESS_CONTROL_MIN_COL_BITS; + row_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_ROW, reg_val) + + DRAM_ADDRESS_CONTROL_MIN_ROW_BITS; + bank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_BANK, reg_val); + + return (u64)pvt->mem_width_in_bytes << (col_bits + row_bits + bank_bits); +} + +static void dmc520_handle_dram_ecc_errors(struct mem_ctl_info *mci, + bool is_ce) +{ + struct dmc520_edac *pvt = mci->pvt_info; + char message[DMC520_MSG_BUF_SIZE]; + struct ecc_error_info info; + u32 cnt; + + dmc520_get_dram_ecc_error_info(pvt, is_ce, &info); + + cnt = dmc520_get_dram_ecc_error_count(pvt, is_ce); + if (!cnt) + return; + + snprintf(message, ARRAY_SIZE(message), + "rank:%d bank:%d row:%d col:%d", + info.rank, info.bank, + info.row, info.col); + + spin_lock(&pvt->error_lock); + edac_mc_handle_error((is_ce ? HW_EVENT_ERR_CORRECTED : + HW_EVENT_ERR_UNCORRECTED), + mci, cnt, 0, 0, 0, info.rank, -1, -1, + message, ""); + spin_unlock(&pvt->error_lock); +} + +static irqreturn_t dmc520_edac_dram_ecc_isr(int irq, struct mem_ctl_info *mci, + bool is_ce) +{ + struct dmc520_edac *pvt = mci->pvt_info; + u32 i_mask; + + i_mask = is_ce ? DRAM_ECC_INT_CE_BIT : DRAM_ECC_INT_UE_BIT; + + dmc520_handle_dram_ecc_errors(mci, is_ce); + + dmc520_write_reg(pvt, i_mask, REG_OFFSET_INTERRUPT_CLR); + + return IRQ_HANDLED; +} + +static irqreturn_t dmc520_edac_dram_all_isr(int irq, struct mem_ctl_info *mci, + u32 irq_mask) +{ + struct dmc520_edac *pvt = mci->pvt_info; + irqreturn_t irq_ret = IRQ_NONE; + u32 status; + + status = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_STATUS); + + if ((irq_mask & DRAM_ECC_INT_CE_BIT) && + (status & DRAM_ECC_INT_CE_BIT)) + irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, true); + + if ((irq_mask & DRAM_ECC_INT_UE_BIT) && + (status & DRAM_ECC_INT_UE_BIT)) + irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, false); + + return irq_ret; +} + +static irqreturn_t dmc520_isr(int irq, void *data) +{ + struct mem_ctl_info *mci = data; + struct dmc520_edac *pvt = mci->pvt_info; + u32 mask = 0; + int idx; + + for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { + if (pvt->irqs[idx] == irq) { + mask = pvt->masks[idx]; + break; + } + } + return dmc520_edac_dram_all_isr(irq, mci, mask); +} + +static void dmc520_init_csrow(struct mem_ctl_info *mci) +{ + struct dmc520_edac *pvt = mci->pvt_info; + struct csrow_info *csi; + struct dimm_info *dimm; + u32 pages_per_rank; + enum dev_type dt; + enum mem_type mt; + int row, ch; + u64 rs; + + dt = dmc520_get_dtype(pvt); + mt = dmc520_get_mtype(pvt); + rs = dmc520_get_rank_size(pvt); + pages_per_rank = rs >> PAGE_SHIFT; + + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + + for (ch = 0; ch < csi->nr_channels; ch++) { + dimm = csi->channels[ch]->dimm; + dimm->grain = pvt->mem_width_in_bytes; + dimm->dtype = dt; + dimm->mtype = mt; + dimm->edac_mode = EDAC_SECDED; + dimm->nr_pages = pages_per_rank / csi->nr_channels; + } + } +} + +static int dmc520_edac_probe(struct platform_device *pdev) +{ + bool registered[NUMBER_OF_IRQS] = { false }; + int irqs[NUMBER_OF_IRQS] = { -ENXIO }; + int masks[NUMBER_OF_IRQS] = { 0 }; + struct edac_mc_layer layers[1]; + struct dmc520_edac *pvt = NULL; + struct mem_ctl_info *mci; + void __iomem *reg_base; + u32 irq_mask_all = 0; + struct device *dev; + int ret, idx, irq; + u32 reg_val; + + /* Parse the device node */ + dev = &pdev->dev; + + for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { + irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name); + irqs[idx] = irq; + masks[idx] = dmc520_irq_configs[idx].mask; + if (irq >= 0) { + irq_mask_all |= dmc520_irq_configs[idx].mask; + edac_dbg(0, "Discovered %s, irq: %d.\n", dmc520_irq_configs[idx].name, irq); + } + } + + if (!irq_mask_all) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "At least one valid interrupt line is expected.\n"); + return -EINVAL; + } + + /* Initialize dmc520 edac */ + reg_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(reg_base)) + return PTR_ERR(reg_base); + + if (!dmc520_is_ecc_enabled(reg_base)) + return -ENXIO; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = dmc520_get_rank_count(reg_base); + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(dmc520_mc_idx++, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "Failed to allocate memory for mc instance\n"); + ret = -ENOMEM; + goto err; + } + + pvt = mci->pvt_info; + + pvt->reg_base = reg_base; + spin_lock_init(&pvt->error_lock); + memcpy(pvt->irqs, irqs, sizeof(irqs)); + memcpy(pvt->masks, masks, sizeof(masks)); + + platform_set_drvdata(pdev, mci); + + mci->pdev = dev; + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = dmc520_get_scrub_type(pvt); + mci->ctl_name = EDAC_CTL_NAME; + mci->dev_name = dev_name(mci->pdev); + mci->mod_name = EDAC_MOD_NAME; + + edac_op_state = EDAC_OPSTATE_INT; + + pvt->mem_width_in_bytes = dmc520_get_memory_width(pvt); + + dmc520_init_csrow(mci); + + /* Clear interrupts, not affecting other unrelated interrupts */ + reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL); + dmc520_write_reg(pvt, reg_val & (~irq_mask_all), + REG_OFFSET_INTERRUPT_CONTROL); + dmc520_write_reg(pvt, irq_mask_all, REG_OFFSET_INTERRUPT_CLR); + + for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { + irq = irqs[idx]; + if (irq >= 0) { + ret = devm_request_irq(&pdev->dev, irq, + dmc520_isr, IRQF_SHARED, + dev_name(&pdev->dev), mci); + if (ret < 0) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed to request irq %d\n", irq); + goto err; + } + registered[idx] = true; + } + } + + /* Reset DRAM CE/UE counters */ + if (irq_mask_all & DRAM_ECC_INT_CE_BIT) + dmc520_get_dram_ecc_error_count(pvt, true); + + if (irq_mask_all & DRAM_ECC_INT_UE_BIT) + dmc520_get_dram_ecc_error_count(pvt, false); + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "Failed to register with EDAC core\n"); + goto err; + } + + /* Enable interrupts, not affecting other unrelated interrupts */ + dmc520_write_reg(pvt, reg_val | irq_mask_all, + REG_OFFSET_INTERRUPT_CONTROL); + + return 0; + +err: + for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { + if (registered[idx]) + devm_free_irq(&pdev->dev, pvt->irqs[idx], mci); + } + if (mci) + edac_mc_free(mci); + + return ret; +} + +static void dmc520_edac_remove(struct platform_device *pdev) +{ + u32 reg_val, idx, irq_mask_all = 0; + struct mem_ctl_info *mci; + struct dmc520_edac *pvt; + + mci = platform_get_drvdata(pdev); + pvt = mci->pvt_info; + + /* Disable interrupts */ + reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL); + dmc520_write_reg(pvt, reg_val & (~irq_mask_all), + REG_OFFSET_INTERRUPT_CONTROL); + + /* free irq's */ + for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { + if (pvt->irqs[idx] >= 0) { + irq_mask_all |= pvt->masks[idx]; + devm_free_irq(&pdev->dev, pvt->irqs[idx], mci); + } + } + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static const struct of_device_id dmc520_edac_driver_id[] = { + { .compatible = "arm,dmc-520", }, + { /* end of table */ } +}; + +MODULE_DEVICE_TABLE(of, dmc520_edac_driver_id); + +static struct platform_driver dmc520_edac_driver = { + .driver = { + .name = "dmc520", + .of_match_table = dmc520_edac_driver_id, + }, + + .probe = dmc520_edac_probe, + .remove = dmc520_edac_remove +}; + +module_platform_driver(dmc520_edac_driver); + +MODULE_AUTHOR("Rui Zhao <ruizhao@microsoft.com>"); +MODULE_AUTHOR("Lei Wang <lewan@microsoft.com>"); +MODULE_AUTHOR("Shiping Ji <shji@microsoft.com>"); +MODULE_DESCRIPTION("DMC-520 ECC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 644fec54681f..7221b4bb6df2 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -7,7 +7,7 @@ * Implement support for the e7520, E7525, e7320 and i3100 memory controllers. * * Datasheets: - * http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html + * https://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html * ftp://download.intel.com/design/intarch/datashts/31345803.pdf * * Written by Tom Zimmerman @@ -24,9 +24,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define E752X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e752x_edac" static int report_non_memory_errors; @@ -209,7 +208,6 @@ enum e752x_chips { */ struct e752x_pvt { - struct pci_dev *bridge_ck; struct pci_dev *dev_d0f0; struct pci_dev *dev_d0f1; u32 tolm; @@ -891,7 +889,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_ferr); if (info->dram_ferr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_FERR, info->dram_ferr, info->dram_ferr); pci_write_config_dword(dev, E752X_FERR_GLOBAL, @@ -936,7 +934,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_nerr); if (info->dram_nerr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_NERR, info->dram_nerr, info->dram_nerr); pci_write_config_dword(dev, E752X_NERR_GLOBAL, @@ -982,7 +980,6 @@ static void e752x_check(struct mem_ctl_info *mci) { struct e752x_error_info info; - edac_dbg(3, "\n"); e752x_get_error_info(mci, &info); e752x_process_error_info(mci, &info, 1); } @@ -1177,36 +1174,33 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, struct e752x_pvt *pvt) { - struct pci_dev *dev; - - pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, pvt->bridge_ck); + pvt->dev_d0f1 = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_info->err_dev, NULL); - if (pvt->bridge_ck == NULL) - pvt->bridge_ck = pci_scan_single_device(pdev->bus, + if (pvt->dev_d0f1 == NULL) { + pvt->dev_d0f1 = pci_scan_single_device(pdev->bus, PCI_DEVFN(0, 1)); + pci_dev_get(pvt->dev_d0f1); + } - if (pvt->bridge_ck == NULL) { + if (pvt->dev_d0f1 == NULL) { e752x_printk(KERN_ERR, "error reporting device not found:" "vendor %x device 0x%x (broken BIOS?)\n", PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); return 1; } - dev = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_d0f0 = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, NULL); - if (dev == NULL) + if (pvt->dev_d0f0 == NULL) goto fail; - pvt->dev_d0f0 = dev; - pvt->dev_d0f1 = pci_dev_get(pvt->bridge_ck); - return 0; fail: - pci_dev_put(pvt->bridge_ck); + pci_dev_put(pvt->dev_d0f1); return 1; } @@ -1307,7 +1301,6 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) (EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED); /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = E752X_REVISION; mci->pdev = &pdev->dev; edac_dbg(3, "init pvt\n"); @@ -1383,7 +1376,6 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) fail: pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); return -ENODEV; @@ -1417,11 +1409,10 @@ static void e752x_remove_one(struct pci_dev *pdev) pvt = (struct e752x_pvt *)mci->pvt_info; pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(e752x_pci_tbl) = { +static const struct pci_device_id e752x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520}, @@ -1454,8 +1445,8 @@ static int __init e752x_init(void) edac_dbg(3, "\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); pci_rc = pci_register_driver(&e752x_driver); return (pci_rc < 0) ? pci_rc : 0; @@ -1471,7 +1462,7 @@ module_init(e752x_init); module_exit(e752x_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman"); MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 1c4056a50383..5852b95fa470 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -30,9 +30,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define E7XXX_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e7xxx_edac" #define e7xxx_printk(level, fmt, arg...) \ @@ -226,7 +225,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) static void process_ce_no_info(struct mem_ctl_info *mci) { edac_dbg(3, "\n"); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, "e7xxx CE log register overflow", ""); } @@ -334,7 +333,6 @@ static void e7xxx_check(struct mem_ctl_info *mci) { struct e7xxx_error_info info; - edac_dbg(3, "\n"); e7xxx_get_error_info(mci, &info); e7xxx_process_error_info(mci, &info, 1); } @@ -458,7 +456,6 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = E7XXX_REVISION; mci->pdev = &pdev->dev; edac_dbg(3, "init pvt\n"); pvt = (struct e7xxx_pvt *)mci->pvt_info; @@ -555,7 +552,7 @@ static void e7xxx_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(e7xxx_pci_tbl) = { +static const struct pci_device_id e7xxx_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205}, @@ -599,8 +596,7 @@ module_init(e7xxx_init); module_exit(e7xxx_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on.work by Dan Hollis et al"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al"); MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c new file mode 100644 index 000000000000..51c451c7f0f0 --- /dev/null +++ b/drivers/edac/ecs.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic ECS driver is designed to support control of on-die error + * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts + * the control of various ECS functionalities into a unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +#define EDAC_ECS_FRU_NAME "ecs_fru" + +enum edac_ecs_attributes { + ECS_LOG_ENTRY_TYPE, + ECS_MODE, + ECS_RESET, + ECS_THRESHOLD, + ECS_MAX_ATTRS +}; + +struct edac_ecs_dev_attr { + struct device_attribute dev_attr; + int fru_id; +}; + +struct edac_ecs_fru_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS]; + struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +struct edac_ecs_context { + u16 num_media_frus; + struct edac_ecs_fru_context *fru_ctxs; +}; + +#define TO_ECS_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr) + +#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n") + +#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul) + +static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; + + switch (attr_id) { + case ECS_LOG_ENTRY_TYPE: + if (ops->get_log_entry_type) { + if (ops->set_log_entry_type) + return a->mode; + else + return 0444; + } + break; + case ECS_MODE: + if (ops->get_mode) { + if (ops->set_mode) + return a->mode; + else + return 0444; + } + break; + case ECS_RESET: + if (ops->reset) + return a->mode; + break; + case ECS_THRESHOLD: + if (ops->get_threshold) { + if (ops->set_threshold) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_ECS_ATTR_RO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_WO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_RW(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .fru_id = _fru_id }) + +static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups, + u16 num_media_frus) +{ + struct edac_ecs_context *ecs_ctx; + u32 fru; + + ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL); + if (!ecs_ctx) + return -ENOMEM; + + ecs_ctx->num_media_frus = num_media_frus; + ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus, + sizeof(*ecs_ctx->fru_ctxs), + GFP_KERNEL); + if (!ecs_ctx->fru_ctxs) + return -ENOMEM; + + for (fru = 0; fru < num_media_frus; fru++) { + struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru]; + struct attribute_group *group = &fru_ctx->group; + int i; + + fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru); + fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru); + fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); + fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); + + for (i = 0; i < ECS_MAX_ATTRS; i++) { + sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr); + fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + } + + sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); + group->name = fru_ctx->name; + group->attrs = fru_ctx->ecs_attrs; + group->is_visible = ecs_attr_visible; + + attr_groups[fru] = group; + } + + return 0; +} + +/** + * edac_ecs_get_desc - get EDAC ECS descriptors + * @ecs_dev: client device, supports ECS feature + * @attr_groups: pointer to attribute group container + * @num_media_frus: number of media FRUs in the device + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, u16 num_media_frus) +{ + if (!ecs_dev || !attr_groups || !num_media_frus) + return -EINVAL; + + return ecs_create_desc(ecs_dev, attr_groups, num_media_frus); +} diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h deleted file mode 100644 index 3c2625e7980d..000000000000 --- a/drivers/edac/edac_core.h +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Defines, structures, APIs for edac_core module - * - * (C) 2007 Linux Networx (http://lnxi.com) - * This file may be distributed under the terms of the - * GNU General Public License. - * - * Written by Thayne Harbaugh - * Based on work by Dan Hollis <goemon at anime dot net> and others. - * http://www.anime.net/~goemon/linux-ecc/ - * - * NMI handling support added by - * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> - * - * Refactored for multi-source files: - * Doug Thompson <norsk5@xmission.com> - * - */ - -#ifndef _EDAC_CORE_H_ -#define _EDAC_CORE_H_ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/smp.h> -#include <linux/pci.h> -#include <linux/time.h> -#include <linux/nmi.h> -#include <linux/rcupdate.h> -#include <linux/completion.h> -#include <linux/kobject.h> -#include <linux/platform_device.h> -#include <linux/workqueue.h> -#include <linux/edac.h> - -#define EDAC_DEVICE_NAME_LEN 31 -#define EDAC_ATTRIB_VALUE_LEN 15 - -#if PAGE_SHIFT < 20 -#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) -#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) -#else /* PAGE_SHIFT > 20 */ -#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20)) -#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20)) -#endif - -#define edac_printk(level, prefix, fmt, arg...) \ - printk(level "EDAC " prefix ": " fmt, ##arg) - -#define edac_mc_printk(mci, level, fmt, arg...) \ - printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) - -#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ - printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) - -#define edac_device_printk(ctl, level, fmt, arg...) \ - printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) - -#define edac_pci_printk(ctl, level, fmt, arg...) \ - printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) - -/* prefixes for edac_printk() and edac_mc_printk() */ -#define EDAC_MC "MC" -#define EDAC_PCI "PCI" -#define EDAC_DEBUG "DEBUG" - -extern const char *edac_mem_types[]; - -#ifdef CONFIG_EDAC_DEBUG -extern int edac_debug_level; - -#define edac_dbg(level, fmt, ...) \ -do { \ - if (level <= edac_debug_level) \ - edac_printk(KERN_DEBUG, EDAC_DEBUG, \ - "%s: " fmt, __func__, ##__VA_ARGS__); \ -} while (0) - -#else /* !CONFIG_EDAC_DEBUG */ - -#define edac_dbg(level, fmt, ...) \ -do { \ - if (0) \ - edac_printk(KERN_DEBUG, EDAC_DEBUG, \ - "%s: " fmt, __func__, ##__VA_ARGS__); \ -} while (0) - -#endif /* !CONFIG_EDAC_DEBUG */ - -#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ - PCI_DEVICE_ID_ ## vend ## _ ## dev - -#define edac_dev_name(dev) (dev)->dev_name - -/* - * The following are the structures to provide for a generic - * or abstract 'edac_device'. This set of structures and the - * code that implements the APIs for the same, provide for - * registering EDAC type devices which are NOT standard memory. - * - * CPU caches (L1 and L2) - * DMA engines - * Core CPU switches - * Fabric switch units - * PCIe interface controllers - * other EDAC/ECC type devices that can be monitored for - * errors, etc. - * - * It allows for a 2 level set of hierarchy. For example: - * - * cache could be composed of L1, L2 and L3 levels of cache. - * Each CPU core would have its own L1 cache, while sharing - * L2 and maybe L3 caches. - * - * View them arranged, via the sysfs presentation: - * /sys/devices/system/edac/.. - * - * mc/ <existing memory device directory> - * cpu/cpu0/.. <L1 and L2 block directory> - * /L1-cache/ce_count - * /ue_count - * /L2-cache/ce_count - * /ue_count - * cpu/cpu1/.. <L1 and L2 block directory> - * /L1-cache/ce_count - * /ue_count - * /L2-cache/ce_count - * /ue_count - * ... - * - * the L1 and L2 directories would be "edac_device_block's" - */ - -struct edac_device_counter { - u32 ue_count; - u32 ce_count; -}; - -/* forward reference */ -struct edac_device_ctl_info; -struct edac_device_block; - -/* edac_dev_sysfs_attribute structure - * used for driver sysfs attributes in mem_ctl_info - * for extra controls and attributes: - * like high level error Injection controls - */ -struct edac_dev_sysfs_attribute { - struct attribute attr; - ssize_t (*show)(struct edac_device_ctl_info *, char *); - ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); -}; - -/* edac_dev_sysfs_block_attribute structure - * - * used in leaf 'block' nodes for adding controls/attributes - * - * each block in each instance of the containing control structure - * can have an array of the following. The show and store functions - * will be filled in with the show/store function in the - * low level driver. - * - * The 'value' field will be the actual value field used for - * counting - */ -struct edac_dev_sysfs_block_attribute { - struct attribute attr; - ssize_t (*show)(struct kobject *, struct attribute *, char *); - ssize_t (*store)(struct kobject *, struct attribute *, - const char *, size_t); - struct edac_device_block *block; - - unsigned int value; -}; - -/* device block control structure */ -struct edac_device_block { - struct edac_device_instance *instance; /* Up Pointer */ - char name[EDAC_DEVICE_NAME_LEN + 1]; - - struct edac_device_counter counters; /* basic UE and CE counters */ - - int nr_attribs; /* how many attributes */ - - /* this block's attributes, could be NULL */ - struct edac_dev_sysfs_block_attribute *block_attributes; - - /* edac sysfs device control */ - struct kobject kobj; -}; - -/* device instance control structure */ -struct edac_device_instance { - struct edac_device_ctl_info *ctl; /* Up pointer */ - char name[EDAC_DEVICE_NAME_LEN + 4]; - - struct edac_device_counter counters; /* instance counters */ - - u32 nr_blocks; /* how many blocks */ - struct edac_device_block *blocks; /* block array */ - - /* edac sysfs device control */ - struct kobject kobj; -}; - - -/* - * Abstract edac_device control info structure - * - */ -struct edac_device_ctl_info { - /* for global list of edac_device_ctl_info structs */ - struct list_head link; - - struct module *owner; /* Module owner of this control struct */ - - int dev_idx; - - /* Per instance controls for this edac_device */ - int log_ue; /* boolean for logging UEs */ - int log_ce; /* boolean for logging CEs */ - int panic_on_ue; /* boolean for panic'ing on an UE */ - unsigned poll_msec; /* number of milliseconds to poll interval */ - unsigned long delay; /* number of jiffies for poll_msec */ - - /* Additional top controller level attributes, but specified - * by the low level driver. - * - * Set by the low level driver to provide attributes at the - * controller level, same level as 'ue_count' and 'ce_count' above. - * An array of structures, NULL terminated - * - * If attributes are desired, then set to array of attributes - * If no attributes are desired, leave NULL - */ - struct edac_dev_sysfs_attribute *sysfs_attributes; - - /* pointer to main 'edac' subsys in sysfs */ - struct bus_type *edac_subsys; - - /* the internal state of this controller instance */ - int op_state; - /* work struct for this instance */ - struct delayed_work work; - - /* pointer to edac polling checking routine: - * If NOT NULL: points to polling check routine - * If NULL: Then assumes INTERRUPT operation, where - * MC driver will receive events - */ - void (*edac_check) (struct edac_device_ctl_info * edac_dev); - - struct device *dev; /* pointer to device structure */ - - const char *mod_name; /* module name */ - const char *ctl_name; /* edac controller name */ - const char *dev_name; /* pci/platform/etc... name */ - - void *pvt_info; /* pointer to 'private driver' info */ - - unsigned long start_time; /* edac_device load start time (jiffies) */ - - struct completion removal_complete; - - /* sysfs top name under 'edac' directory - * and instance name: - * cpu/cpu0/... - * cpu/cpu1/... - * cpu/cpu2/... - * ... - */ - char name[EDAC_DEVICE_NAME_LEN + 1]; - - /* Number of instances supported on this control structure - * and the array of those instances - */ - u32 nr_instances; - struct edac_device_instance *instances; - - /* Event counters for the this whole EDAC Device */ - struct edac_device_counter counters; - - /* edac sysfs device control for the 'name' - * device this structure controls - */ - struct kobject kobj; -}; - -/* To get from the instance's wq to the beginning of the ctl structure */ -#define to_edac_mem_ctl_work(w) \ - container_of(w, struct mem_ctl_info, work) - -#define to_edac_device_ctl_work(w) \ - container_of(w,struct edac_device_ctl_info,work) - -/* - * The alloc() and free() functions for the 'edac_device' control info - * structure. A MC driver will allocate one of these for each edac_device - * it is going to control/register with the EDAC CORE. - */ -extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( - unsigned sizeof_private, - char *edac_device_name, unsigned nr_instances, - char *edac_block_name, unsigned nr_blocks, - unsigned offset_value, - struct edac_dev_sysfs_block_attribute *block_attributes, - unsigned nr_attribs, - int device_index); - -/* The offset value can be: - * -1 indicating no offset value - * 0 for zero-based block numbers - * 1 for 1-based block number - * other for other-based block number - */ -#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) - -extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); - -#ifdef CONFIG_PCI - -struct edac_pci_counter { - atomic_t pe_count; - atomic_t npe_count; -}; - -/* - * Abstract edac_pci control info structure - * - */ -struct edac_pci_ctl_info { - /* for global list of edac_pci_ctl_info structs */ - struct list_head link; - - int pci_idx; - - struct bus_type *edac_subsys; /* pointer to subsystem */ - - /* the internal state of this controller instance */ - int op_state; - /* work struct for this instance */ - struct delayed_work work; - - /* pointer to edac polling checking routine: - * If NOT NULL: points to polling check routine - * If NULL: Then assumes INTERRUPT operation, where - * MC driver will receive events - */ - void (*edac_check) (struct edac_pci_ctl_info * edac_dev); - - struct device *dev; /* pointer to device structure */ - - const char *mod_name; /* module name */ - const char *ctl_name; /* edac controller name */ - const char *dev_name; /* pci/platform/etc... name */ - - void *pvt_info; /* pointer to 'private driver' info */ - - unsigned long start_time; /* edac_pci load start time (jiffies) */ - - struct completion complete; - - /* sysfs top name under 'edac' directory - * and instance name: - * cpu/cpu0/... - * cpu/cpu1/... - * cpu/cpu2/... - * ... - */ - char name[EDAC_DEVICE_NAME_LEN + 1]; - - /* Event counters for the this whole EDAC Device */ - struct edac_pci_counter counters; - - /* edac sysfs device control for the 'name' - * device this structure controls - */ - struct kobject kobj; - struct completion kobj_complete; -}; - -#define to_edac_pci_ctl_work(w) \ - container_of(w, struct edac_pci_ctl_info,work) - -/* write all or some bits in a byte-register*/ -static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, - u8 mask) -{ - if (mask != 0xff) { - u8 buf; - - pci_read_config_byte(pdev, offset, &buf); - value &= mask; - buf &= ~mask; - value |= buf; - } - - pci_write_config_byte(pdev, offset, value); -} - -/* write all or some bits in a word-register*/ -static inline void pci_write_bits16(struct pci_dev *pdev, int offset, - u16 value, u16 mask) -{ - if (mask != 0xffff) { - u16 buf; - - pci_read_config_word(pdev, offset, &buf); - value &= mask; - buf &= ~mask; - value |= buf; - } - - pci_write_config_word(pdev, offset, value); -} - -/* - * pci_write_bits32 - * - * edac local routine to do pci_write_config_dword, but adds - * a mask parameter. If mask is all ones, ignore the mask. - * Otherwise utilize the mask to isolate specified bits - * - * write all or some bits in a dword-register - */ -static inline void pci_write_bits32(struct pci_dev *pdev, int offset, - u32 value, u32 mask) -{ - if (mask != 0xffffffff) { - u32 buf; - - pci_read_config_dword(pdev, offset, &buf); - value &= mask; - buf &= ~mask; - value |= buf; - } - - pci_write_config_dword(pdev, offset, value); -} - -#endif /* CONFIG_PCI */ - -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, - struct edac_mc_layer *layers, - unsigned sz_pvt); -extern int edac_mc_add_mc(struct mem_ctl_info *mci); -extern void edac_mc_free(struct mem_ctl_info *mci); -extern struct mem_ctl_info *edac_mc_find(int idx); -extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); -extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); -extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, - unsigned long page); - -void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, - struct mem_ctl_info *mci, - struct edac_raw_error_desc *e); - -void edac_mc_handle_error(const enum hw_event_mc_err_type type, - struct mem_ctl_info *mci, - const u16 error_count, - const unsigned long page_frame_number, - const unsigned long offset_in_page, - const unsigned long syndrome, - const int top_layer, - const int mid_layer, - const int low_layer, - const char *msg, - const char *other_detail); - -/* - * edac_device APIs - */ -extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); -extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); -extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg); -extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg); -extern int edac_device_alloc_index(void); -extern const char *edac_layer_name[]; - -/* - * edac_pci APIs - */ -extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, - const char *edac_pci_name); - -extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci); - -extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, - unsigned long value); - -extern int edac_pci_alloc_index(void); -extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx); -extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev); - -extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl( - struct device *dev, - const char *mod_name); - -extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci); -extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci); -extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci); - -/* - * edac misc APIs - */ -extern char *edac_op_state_to_string(int op_state); - -#endif /* _EDAC_CORE_H_ */ diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 211021dfec73..0734909b08a4 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -12,23 +12,20 @@ * 19 Jan 2007 */ +#include <asm/page.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/jiffies.h> #include <linux/module.h> -#include <linux/types.h> +#include <linux/slab.h> #include <linux/smp.h> -#include <linux/init.h> +#include <linux/spinlock.h> #include <linux/sysctl.h> -#include <linux/highmem.h> #include <linux/timer.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/ctype.h> -#include <linux/workqueue.h> -#include <asm/uaccess.h> -#include <asm/page.h> -#include "edac_core.h" +#include "edac_device.h" #include "edac_module.h" /* lock for the list: 'edac_device_list', manipulation of this list @@ -37,6 +34,9 @@ static DEFINE_MUTEX(device_ctls_mutex); static LIST_HEAD(edac_device_list); +/* Default workqueue processing interval on this instance, in msecs */ +#define DEFAULT_POLL_INTERVAL 1000 + #ifdef CONFIG_EDAC_DEBUG static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) { @@ -50,114 +50,56 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) } #endif /* CONFIG_EDAC_DEBUG */ - /* - * edac_device_alloc_ctl_info() - * Allocate a new edac device control info structure - * - * The control structure is allocated in complete chunk - * from the OS. It is in turn sub allocated to the - * various objects that compose the structure - * - * The structure has a 'nr_instance' array within itself. - * Each instance represents a major component - * Example: L1 cache and L2 cache are 2 instance components - * - * Within each instance is an array of 'nr_blocks' blockoffsets + * @off_val: zero, 1, or other based offset */ -struct edac_device_ctl_info *edac_device_alloc_ctl_info( - unsigned sz_private, - char *edac_device_name, unsigned nr_instances, - char *edac_block_name, unsigned nr_blocks, - unsigned offset_value, /* zero, 1, or other based offset */ - struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib, - int device_index) +struct edac_device_ctl_info * +edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances, + char *blk_name, unsigned nr_blocks, unsigned off_val, + int device_index) { - struct edac_device_ctl_info *dev_ctl; - struct edac_device_instance *dev_inst, *inst; struct edac_device_block *dev_blk, *blk_p, *blk; - struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; - unsigned total_size; - unsigned count; - unsigned instance, block, attr; - void *pvt, *p; + struct edac_device_instance *dev_inst, *inst; + struct edac_device_ctl_info *dev_ctl; + unsigned instance, block; + void *pvt; int err; edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks); - /* Calculate the size of memory we need to allocate AND - * determine the offsets of the various item arrays - * (instance,block,attrib) from the start of an allocated structure. - * We want the alignment of each item (instance,block,attrib) - * to be at least as stringent as what the compiler would - * provide if we could simply hardcode everything into a single struct. - */ - p = NULL; - dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1); + dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL); + if (!dev_ctl) + return NULL; - /* Calc the 'end' offset past end of ONE ctl_info structure - * which will become the start of the 'instance' array - */ - dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances); + dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL); + if (!dev_inst) + goto free; - /* Calc the 'end' offset past the instance array within the ctl_info - * which will become the start of the block array - */ - count = nr_instances * nr_blocks; - dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count); + dev_ctl->instances = dev_inst; - /* Calc the 'end' offset past the dev_blk array - * which will become the start of the attrib array, if any. - */ - /* calc how many nr_attrib we need */ - if (nr_attrib > 0) - count *= nr_attrib; - dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count); + dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL); + if (!dev_blk) + goto free; - /* Calc the 'end' offset past the attributes array */ - pvt = edac_align_ptr(&p, sz_private, 1); + dev_ctl->blocks = dev_blk; - /* 'pvt' now points to where the private data area is. - * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) - * is baselined at ZERO - */ - total_size = ((unsigned long)pvt) + sz_private; + if (pvt_sz) { + pvt = kzalloc(pvt_sz, GFP_KERNEL); + if (!pvt) + goto free; - /* Allocate the amount of memory for the set of control structures */ - dev_ctl = kzalloc(total_size, GFP_KERNEL); - if (dev_ctl == NULL) - return NULL; + dev_ctl->pvt_info = pvt; + } - /* Adjust pointers so they point within the actual memory we - * just allocated rather than an imaginary chunk of memory - * located at address 0. - * 'dev_ctl' points to REAL memory, while the others are - * ZERO based and thus need to be adjusted to point within - * the allocated memory. - */ - dev_inst = (struct edac_device_instance *) - (((char *)dev_ctl) + ((unsigned long)dev_inst)); - dev_blk = (struct edac_device_block *) - (((char *)dev_ctl) + ((unsigned long)dev_blk)); - dev_attrib = (struct edac_dev_sysfs_block_attribute *) - (((char *)dev_ctl) + ((unsigned long)dev_attrib)); - pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL; - - /* Begin storing the information into the control info structure */ - dev_ctl->dev_idx = device_index; - dev_ctl->nr_instances = nr_instances; - dev_ctl->instances = dev_inst; - dev_ctl->pvt_info = pvt; + dev_ctl->dev_idx = device_index; + dev_ctl->nr_instances = nr_instances; /* Default logging of CEs and UEs */ dev_ctl->log_ce = 1; dev_ctl->log_ue = 1; /* Name of this edac device */ - snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); - - edac_dbg(4, "edac_dev=%p next after end=%p\n", - dev_ctl, pvt + sz_private); + snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name); /* Initialize every Instance */ for (instance = 0; instance < nr_instances; instance++) { @@ -168,56 +110,17 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( inst->blocks = blk_p; /* name of this instance */ - snprintf(inst->name, sizeof(inst->name), - "%s%u", edac_device_name, instance); + snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance); /* Initialize every block in each instance */ for (block = 0; block < nr_blocks; block++) { blk = &blk_p[block]; blk->instance = inst; snprintf(blk->name, sizeof(blk->name), - "%s%d", edac_block_name, block+offset_value); + "%s%d", blk_name, block + off_val); edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n", instance, inst, block, blk, blk->name); - - /* if there are NO attributes OR no attribute pointer - * then continue on to next block iteration - */ - if ((nr_attrib == 0) || (attrib_spec == NULL)) - continue; - - /* setup the attribute array for this block */ - blk->nr_attribs = nr_attrib; - attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; - blk->block_attributes = attrib_p; - - edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n", - blk->block_attributes); - - /* Initialize every user specified attribute in this - * block with the data the caller passed in - * Each block gets its own copy of pointers, - * and its unique 'value' - */ - for (attr = 0; attr < nr_attrib; attr++) { - attrib = &attrib_p[attr]; - - /* populate the unique per attrib - * with the code pointers and info - */ - attrib->attr = attrib_spec[attr].attr; - attrib->show = attrib_spec[attr].show; - attrib->store = attrib_spec[attr].store; - - attrib->block = blk; /* up link */ - - edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n", - attrib, attrib->attr.name, - &attrib_spec[attr], - attrib_spec[attr].attr.name - ); - } } } @@ -228,10 +131,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( * Initialize the 'root' kobj for the edac_device controller */ err = edac_device_register_sysfs_main_kobj(dev_ctl); - if (err) { - kfree(dev_ctl); - return NULL; - } + if (err) + goto free; /* at this point, the root kobj is valid, and in order to * 'free' the object, then the function: @@ -241,14 +142,14 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( */ return dev_ctl; + +free: + __edac_device_free_ctl_info(dev_ctl); + + return NULL; } EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info); -/* - * edac_device_free_ctl_info() - * frees the memory allocated by the edac_device_alloc_ctl_info() - * function - */ void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info) { edac_device_unregister_sysfs_main_kobj(ctl_info); @@ -389,12 +290,10 @@ static void edac_device_workq_function(struct work_struct *work_req) * whole one second to save timers firing all over the period * between integral seconds */ - if (edac_dev->poll_msec == 1000) - queue_delayed_work(edac_workqueue, &edac_dev->work, - round_jiffies_relative(edac_dev->delay)); + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else - queue_delayed_work(edac_workqueue, &edac_dev->work, - edac_dev->delay); + edac_queue_work(&edac_dev->work, edac_dev->delay); } /* @@ -402,8 +301,8 @@ static void edac_device_workq_function(struct work_struct *work_req) * initialize a workq item for this edac_device instance * passing in the new delay period in msec */ -void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, - unsigned msec) +static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec) { edac_dbg(0, "\n"); @@ -421,27 +320,24 @@ void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, * timers firing on sub-second basis, while they are happy * to fire together on the 1 second exactly */ - if (edac_dev->poll_msec == 1000) - queue_delayed_work(edac_workqueue, &edac_dev->work, - round_jiffies_relative(edac_dev->delay)); + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else - queue_delayed_work(edac_workqueue, &edac_dev->work, - edac_dev->delay); + edac_queue_work(&edac_dev->work, edac_dev->delay); } /* * edac_device_workq_teardown * stop the workq processing on this edac_dev */ -void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) +static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { - int status; + if (!edac_dev->edac_check) + return; - status = cancel_delayed_work(&edac_dev->work); - if (status == 0) { - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + edac_dev->op_state = OP_OFFLINE; + + edac_stop_work(&edac_dev->work); } /* @@ -452,26 +348,18 @@ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) * Then restart the workq on the new delay */ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, - unsigned long value) + unsigned long msec) { - /* cancel the current workq request, without the mutex lock */ - edac_device_workq_teardown(edac_dev); - - /* acquire the mutex before doing the workq setup */ - mutex_lock(&device_ctls_mutex); - - /* restart the workq request, with new delay value */ - edac_device_workq_setup(edac_dev, value); + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); - mutex_unlock(&device_ctls_mutex); + /* See comment in edac_device_workq_setup() above */ + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) + edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_mod_work(&edac_dev->work, edac_dev->delay); } -/* - * edac_device_alloc_index: Allocate a unique device index number - * - * Return: - * allocated index number - */ int edac_device_alloc_index(void) { static atomic_t device_indexes = ATOMIC_INIT(0); @@ -480,17 +368,6 @@ int edac_device_alloc_index(void) } EXPORT_SYMBOL_GPL(edac_device_alloc_index); -/** - * edac_device_add_device: Insert the 'edac_dev' structure into the - * edac_device global list and create sysfs entries associated with - * edac_device structure. - * @edac_device: pointer to the edac_device structure to be added to the list - * 'edac_device' structure. - * - * Return: - * 0 Success - * !0 Failure - */ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) { edac_dbg(0, "\n"); @@ -519,23 +396,16 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) /* This instance is NOW RUNNING */ edac_dev->op_state = OP_RUNNING_POLL; - /* - * enable workq processing on this instance, - * default = 1000 msec - */ - edac_device_workq_setup(edac_dev, 1000); + edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL); } else { edac_dev->op_state = OP_RUNNING_INTERRUPT; } /* Report action taken */ edac_device_printk(edac_dev, KERN_INFO, - "Giving out device to module '%s' controller " - "'%s': DEV '%s' (%s)\n", - edac_dev->mod_name, - edac_dev->ctl_name, - edac_dev_name(edac_dev), - edac_op_state_to_string(edac_dev->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name, + edac_op_state_to_string(edac_dev->op_state)); mutex_unlock(&device_ctls_mutex); return 0; @@ -550,19 +420,6 @@ fail0: } EXPORT_SYMBOL_GPL(edac_device_add_device); -/** - * edac_device_del_device: - * Remove sysfs entries for specified edac_device structure and - * then remove edac_device structure from global list - * - * @dev: - * Pointer to 'struct device' representing edac_device - * structure to remove. - * - * Return: - * Pointer to removed edac_device structure, - * OR NULL if device not found. - */ struct edac_device_ctl_info *edac_device_del_device(struct device *dev) { struct edac_device_ctl_info *edac_dev; @@ -617,16 +474,16 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info return edac_dev->panic_on_ue; } -/* - * edac_device_handle_ce - * perform a common output and handling of an 'edac_dev' CE event - */ -void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -648,31 +505,31 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ce_count++; + block->counters.ce_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ce_count++; - edac_dev->counters.ce_count++; + instance->counters.ce_count += count; + edac_dev->counters.ce_count += count; if (edac_device_get_log_ce(edac_dev)) edac_device_printk(edac_dev, KERN_WARNING, - "CE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "CE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); } -EXPORT_SYMBOL_GPL(edac_device_handle_ce); +EXPORT_SYMBOL_GPL(edac_device_handle_ce_count); -/* - * edac_device_handle_ue - * perform a common output and handling of an 'edac_dev' UE event - */ -void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -694,22 +551,207 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ue_count++; + block->counters.ue_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ue_count++; - edac_dev->counters.ue_count++; + instance->counters.ue_count += count; + edac_dev->counters.ue_count += count; if (edac_device_get_log_ue(edac_dev)) edac_device_printk(edac_dev, KERN_EMERG, - "UE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "UE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); if (edac_device_get_panic_on_ue(edac_dev)) - panic("EDAC %s: UE instance: %s block %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); +} +EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); + +static void edac_dev_release(struct device *dev) +{ + struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + + kfree(ctx->mem_repair); + kfree(ctx->scrub); + kfree(ctx->dev.groups); + kfree(ctx); +} + +static const struct device_type edac_dev_type = { + .name = "edac_dev", + .release = edac_dev_release, +}; + +static void edac_dev_unreg(void *data) +{ + device_unregister(data); +} + +/** + * edac_dev_register - register device for RAS features with EDAC + * @parent: parent device. + * @name: name for the folder in the /sys/bus/edac/devices/, + * which is derived from the parent device. + * For e.g. /sys/bus/edac/devices/cxl_mem0/ + * @private: parent driver's data to store in the context if any. + * @num_features: number of RAS features to register. + * @ras_features: list of RAS features to register. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + * + */ +int edac_dev_register(struct device *parent, char *name, + void *private, int num_features, + const struct edac_dev_feature *ras_features) +{ + const struct attribute_group **ras_attr_groups; + struct edac_dev_data *dev_data; + struct edac_dev_feat_ctx *ctx; + int mem_repair_cnt = 0; + int attr_gcnt = 0; + int ret = -ENOMEM; + int scrub_cnt = 0; + int feat; + + if (!parent || !name || !num_features || !ras_features) + return -EINVAL; + + /* Double parse to make space for attributes */ + for (feat = 0; feat < num_features; feat++) { + switch (ras_features[feat].ft_type) { + case RAS_FEAT_SCRUB: + attr_gcnt++; + scrub_cnt++; + break; + case RAS_FEAT_ECS: + attr_gcnt += ras_features[feat].ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + attr_gcnt++; + mem_repair_cnt++; + break; + default: + return -EINVAL; + } + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL); + if (!ras_attr_groups) + goto ctx_free; + + if (scrub_cnt) { + ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL); + if (!ctx->scrub) + goto groups_free; + } + + if (mem_repair_cnt) { + ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL); + if (!ctx->mem_repair) + goto data_mem_free; + } + + attr_gcnt = 0; + scrub_cnt = 0; + mem_repair_cnt = 0; + for (feat = 0; feat < num_features; feat++, ras_features++) { + switch (ras_features->ft_type) { + case RAS_FEAT_SCRUB: + if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->scrub[scrub_cnt]; + dev_data->instance = scrub_cnt; + dev_data->scrub_ops = ras_features->scrub_ops; + dev_data->private = ras_features->ctx; + ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + scrub_cnt++; + attr_gcnt++; + break; + case RAS_FEAT_ECS: + if (!ras_features->ecs_ops) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->ecs; + dev_data->ecs_ops = ras_features->ecs_ops; + dev_data->private = ras_features->ctx; + ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->ecs_info.num_media_frus); + if (ret) + goto data_mem_free; + + attr_gcnt += ras_features->ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + if (!ras_features->mem_repair_ops || + mem_repair_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->mem_repair[mem_repair_cnt]; + dev_data->instance = mem_repair_cnt; + dev_data->mem_repair_ops = ras_features->mem_repair_ops; + dev_data->private = ras_features->ctx; + ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + mem_repair_cnt++; + attr_gcnt++; + break; + default: + ret = -EINVAL; + goto data_mem_free; + } + } + + ctx->dev.parent = parent; + ctx->dev.bus = edac_get_sysfs_subsys(); + ctx->dev.type = &edac_dev_type; + ctx->dev.groups = ras_attr_groups; + ctx->private = private; + dev_set_drvdata(&ctx->dev, ctx); + + ret = dev_set_name(&ctx->dev, "%s", name); + if (ret) + goto data_mem_free; + + ret = device_register(&ctx->dev); + if (ret) { + put_device(&ctx->dev); + return ret; + } + + return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); + +data_mem_free: + kfree(ctx->mem_repair); + kfree(ctx->scrub); +groups_free: + kfree(ras_attr_groups); +ctx_free: + kfree(ctx); + return ret; } -EXPORT_SYMBOL_GPL(edac_device_handle_ue); +EXPORT_SYMBOL_GPL(edac_dev_register); diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h new file mode 100644 index 000000000000..034711d71ebf --- /dev/null +++ b/drivers/edac/edac_device.h @@ -0,0 +1,349 @@ +/* + * Defines, structures, APIs for edac_device + * + * (C) 2007 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis <goemon at anime dot net> and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * NMI handling support added by + * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> + * + * Refactored for multi-source files: + * Doug Thompson <norsk5@xmission.com> + * + * Please look at Documentation/driver-api/edac.rst for more info about + * EDAC core structs and functions. + */ + +#ifndef _EDAC_DEVICE_H_ +#define _EDAC_DEVICE_H_ + +#include <linux/device.h> +#include <linux/edac.h> +#include <linux/kobject.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> + + +/* + * The following are the structures to provide for a generic + * or abstract 'edac_device'. This set of structures and the + * code that implements the APIs for the same, provide for + * registering EDAC type devices which are NOT standard memory. + * + * CPU caches (L1 and L2) + * DMA engines + * Core CPU switches + * Fabric switch units + * PCIe interface controllers + * other EDAC/ECC type devices that can be monitored for + * errors, etc. + * + * It allows for a 2 level set of hierarchy. For example: + * + * cache could be composed of L1, L2 and L3 levels of cache. + * Each CPU core would have its own L1 cache, while sharing + * L2 and maybe L3 caches. + * + * View them arranged, via the sysfs presentation: + * /sys/devices/system/edac/.. + * + * mc/ <existing memory device directory> + * cpu/cpu0/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * cpu/cpu1/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * ... + * + * the L1 and L2 directories would be "edac_device_block's" + */ + +struct edac_device_counter { + u32 ue_count; + u32 ce_count; +}; + +/* forward reference */ +struct edac_device_ctl_info; +struct edac_device_block; + +/* edac_dev_sysfs_attribute structure + * used for driver sysfs attributes in mem_ctl_info + * for extra controls and attributes: + * like high level error Injection controls + */ +struct edac_dev_sysfs_attribute { + struct attribute attr; + ssize_t (*show)(struct edac_device_ctl_info *, char *); + ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); +}; + +/* edac_dev_sysfs_block_attribute structure + * + * used in leaf 'block' nodes for adding controls/attributes + * + * each block in each instance of the containing control structure can + * have an array of the following. The show function will be filled in + * with the show function in the low level driver. + */ +struct edac_dev_sysfs_block_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *, struct attribute *, char *); +}; + +/* device block control structure */ +struct edac_device_block { + struct edac_device_instance *instance; /* Up Pointer */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + struct edac_device_counter counters; /* basic UE and CE counters */ + + int nr_attribs; /* how many attributes */ + + /* this block's attributes, could be NULL */ + struct edac_dev_sysfs_block_attribute *block_attributes; + + /* edac sysfs device control */ + struct kobject kobj; +}; + +/* device instance control structure */ +struct edac_device_instance { + struct edac_device_ctl_info *ctl; /* Up pointer */ + char name[EDAC_DEVICE_NAME_LEN + 4]; + + struct edac_device_counter counters; /* instance counters */ + + u32 nr_blocks; /* how many blocks */ + struct edac_device_block *blocks; /* block array */ + + /* edac sysfs device control */ + struct kobject kobj; +}; + + +/* + * Abstract edac_device control info structure + * + */ +struct edac_device_ctl_info { + /* for global list of edac_device_ctl_info structs */ + struct list_head link; + + struct module *owner; /* Module owner of this control struct */ + + int dev_idx; + + /* Per instance controls for this edac_device */ + int log_ue; /* boolean for logging UEs */ + int log_ce; /* boolean for logging CEs */ + int panic_on_ue; /* boolean for panic'ing on an UE */ + unsigned poll_msec; /* number of milliseconds to poll interval */ + unsigned long delay; /* number of jiffies for poll_msec */ + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + struct edac_dev_sysfs_attribute *sysfs_attributes; + + /* pointer to main 'edac' subsys in sysfs */ + const struct bus_type *edac_subsys; + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_device_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_device load start time (jiffies) */ + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Number of instances supported on this control structure + * and the array of those instances + */ + u32 nr_instances; + struct edac_device_instance *instances; + struct edac_device_block *blocks; + + /* Event counters for the this whole EDAC Device */ + struct edac_device_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; +}; + +/* To get from the instance's wq to the beginning of the ctl structure */ +#define to_edac_mem_ctl_work(w) \ + container_of(w, struct mem_ctl_info, work) + +#define to_edac_device_ctl_work(w) \ + container_of(w,struct edac_device_ctl_info,work) + +/* + * The alloc() and free() functions for the 'edac_device' control info + * structure. A MC driver will allocate one of these for each edac_device + * it is going to control/register with the EDAC CORE. + */ +extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( + unsigned sizeof_private, + char *edac_device_name, unsigned nr_instances, + char *edac_block_name, unsigned nr_blocks, + unsigned offset_value, + int device_index); + +/* The offset value can be: + * -1 indicating no offset value + * 0 for zero-based block numbers + * 1 for 1-based block number + * other for other-based block number + */ +#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) + +extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); + +/** + * edac_device_add_device - Insert the 'edac_dev' structure into the + * edac_device global list and create sysfs entries associated with + * edac_device structure. + * + * @edac_dev: pointer to edac_device structure to be added to the list + * 'edac_device' structure. + * + * Returns: + * 0 on Success, or an error code on failure + */ +extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); + +/** + * edac_device_del_device - Remove sysfs entries for specified edac_device + * structure and then remove edac_device structure from global list + * + * @dev: + * Pointer to struct &device representing the edac device + * structure to remove. + * + * Returns: + * Pointer to removed edac_device structure, + * or %NULL if device not found. + */ +extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); + +/** + * edac_device_handle_ce_count - Log correctable errors. + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened + * @msg: message to be printed + */ +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + +/** + * edac_device_handle_ue_count - Log uncorrectable errors. + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened + * @msg: message to be printed + */ +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + +/** + * edac_device_handle_ce(): Log a single correctable error + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the CE error happened + * @block_nr: number of the block where the CE error happened + * @msg: message to be printed + */ +static inline void +edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, + int block_nr, const char *msg) +{ + edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg); +} + +/** + * edac_device_handle_ue(): Log a single uncorrectable error + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the UE error happened + * @block_nr: number of the block where the UE error happened + * @msg: message to be printed + */ +static inline void +edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, + int block_nr, const char *msg) +{ + edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg); +} + +/** + * edac_device_alloc_index: Allocate a unique device index number + * + * Returns: + * allocated index number + */ +extern int edac_device_alloc_index(void); +extern const char *edac_layer_name[]; + +/* Free the actual struct */ +static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci) +{ + if (ci) { + kfree(ci->pvt_info); + kfree(ci->blocks); + kfree(ci->instances); + kfree(ci); + } +} +#endif diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index fb68a06ad683..fcebc4ffea26 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -1,7 +1,7 @@ /* * file for managing the edac_device subsystem of devices for EDAC * - * (C) 2007 SoftwareBitMaker + * (C) 2007 SoftwareBitMaker * * This file may be distributed under the terms of the * GNU General Public License. @@ -15,7 +15,7 @@ #include <linux/slab.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_device.h" #include "edac_module.h" #define EDAC_DEVICE_SYMLINK "device" @@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); /* Base Attributes of the EDAC_DEVICE ECC object */ -static struct ctl_info_attribute *device_ctrl_attr[] = { - &attr_ctl_info_panic_on_ue, - &attr_ctl_info_log_ue, - &attr_ctl_info_log_ce, - &attr_ctl_info_poll_msec, +static struct attribute *device_ctrl_attrs[] = { + &attr_ctl_info_panic_on_ue.attr, + &attr_ctl_info_log_ue.attr, + &attr_ctl_info_log_ce.attr, + &attr_ctl_info_poll_msec.attr, NULL, }; +ATTRIBUTE_GROUPS(device_ctrl); /* * edac_device_ctrl_master_release @@ -207,17 +208,14 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) /* decrement the EDAC CORE module ref count */ module_put(edac_dev->owner); - /* free the control struct containing the 'main' kobj - * passed in to this routine - */ - kfree(edac_dev); + __edac_device_free_ctl_info(edac_dev); } /* ktype for the main (master) kobject */ static struct kobj_type ktype_device_ctrl = { .release = edac_device_ctrl_master_release, .sysfs_ops = &device_ctl_info_ops, - .default_attrs = (struct attribute **)device_ctrl_attr, + .default_groups = device_ctrl_groups, }; /* @@ -230,18 +228,14 @@ static struct kobj_type ktype_device_ctrl = { */ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) { - struct bus_type *edac_subsys; - int err; + struct device *dev_root; + const struct bus_type *edac_subsys; + int err = -ENODEV; edac_dbg(1, "\n"); /* get the /sys/devices/system/edac reference */ edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys error\n"); - err = -ENODEV; - goto err_out; - } /* Point to the 'edac_subsys' this instance 'reports' to */ edac_dev->edac_subsys = edac_subsys; @@ -254,15 +248,16 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) */ edac_dev->owner = THIS_MODULE; - if (!try_module_get(edac_dev->owner)) { - err = -ENODEV; - goto err_mod_get; - } + if (!try_module_get(edac_dev->owner)) + goto err_out; /* register */ - err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl, - &edac_subsys->dev_root->kobj, - "%s", edac_dev->name); + dev_root = bus_get_dev_root(edac_subsys); + if (dev_root) { + err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl, + &dev_root->kobj, "%s", edac_dev->name); + put_device(dev_root); + } if (err) { edac_dbg(1, "Failed to register '.../edac/%s'\n", edac_dev->name); @@ -280,11 +275,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) /* Error exit stack */ err_kobj_reg: + kobject_put(&edac_dev->kobj); module_put(edac_dev->owner); -err_mod_get: - edac_put_sysfs_subsys(); - err_out: return err; } @@ -306,7 +299,6 @@ void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev) * b) 'kfree' the memory */ kobject_put(&dev->kobj); - edac_put_sysfs_subsys(); } /* edac_dev -> instance information */ @@ -397,17 +389,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); /* list of edac_dev 'instance' attributes */ -static struct instance_attribute *device_instance_attr[] = { - &attr_instance_ce_count, - &attr_instance_ue_count, +static struct attribute *device_instance_attrs[] = { + &attr_instance_ce_count.attr, + &attr_instance_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_instance); /* The 'ktype' for each edac_dev 'instance' */ static struct kobj_type ktype_instance_ctrl = { .release = edac_device_ctrl_instance_release, .sysfs_ops = &device_instance_ops, - .default_attrs = (struct attribute **)device_instance_attr, + .default_groups = device_instance_groups, }; /* edac_dev -> instance -> block information */ @@ -464,48 +457,33 @@ static ssize_t edac_dev_block_show(struct kobject *kobj, return -EIO; } -/* Function to 'store' fields into the edac_dev 'block' structure */ -static ssize_t edac_dev_block_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct edac_dev_sysfs_block_attribute *block_attr; - - block_attr = to_block_attr(attr); - - if (block_attr->store) - return block_attr->store(kobj, attr, buffer, count); - return -EIO; -} - /* edac_dev file operations for a 'block' */ static const struct sysfs_ops device_block_ops = { .show = edac_dev_block_show, - .store = edac_dev_block_store }; -#define BLOCK_ATTR(_name,_mode,_show,_store) \ +#define BLOCK_ATTR(_name,_mode,_show) \ static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ - .store = _store, \ }; -BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); -BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); +BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show); +BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show); /* list of edac_dev 'block' attributes */ -static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { - &attr_block_ce_count, - &attr_block_ue_count, +static struct attribute *device_block_attrs[] = { + &attr_block_ce_count.attr, + &attr_block_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_block); /* The 'ktype' for each edac_dev 'block' */ static struct kobj_type ktype_block_ctrl = { .release = edac_device_ctrl_block_release, .sysfs_ops = &device_block_ops, - .default_attrs = (struct attribute **)device_block_attr, + .default_groups = device_block_groups, }; /* block ctor/dtor code */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..0959320fe51c 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -28,16 +28,21 @@ #include <linux/ctype.h> #include <linux/edac.h> #include <linux/bitops.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/page.h> -#include <asm/edac.h> -#include "edac_core.h" +#include "edac_mc.h" #include "edac_module.h" - -#define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../../include/ras #include <ras/ras_event.h> +#ifdef CONFIG_EDAC_ATOMIC_SCRUB +#include <asm/edac.h> +#else +#define edac_atomic_scrub(va, size) do { } while (0) +#endif + +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -46,24 +51,27 @@ static LIST_HEAD(mc_devices); * Used to lock EDAC MC to just one module, avoiding two drivers e. g. * apei/ghes and i7core_edac to be used at the same time. */ -static void const *edac_mc_owner; +static const char *edac_mc_owner; -unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, - unsigned len) +static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e) +{ + return container_of(e, struct mem_ctl_info, error_desc); +} + +unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned int len) { struct mem_ctl_info *mci = dimm->mci; int i, n, count = 0; char *p = buf; for (i = 0; i < mci->n_layers; i++) { - n = snprintf(p, len, "%s %d ", + n = scnprintf(p, len, "%s %d ", edac_layer_name[mci->layers[i].type], dimm->location[i]); p += n; len -= n; count += n; - if (!len) - break; } return count; @@ -79,20 +87,22 @@ static void edac_mc_dump_channel(struct rank_info *chan) edac_dbg(4, " channel->dimm = %p\n", chan->dimm); } -static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) +static void edac_mc_dump_dimm(struct dimm_info *dimm) { char location[80]; + if (!dimm->nr_pages) + return; + edac_dimm_info_location(dimm, location, sizeof(location)); edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", dimm->mci->csbased ? "rank" : "dimm", - number, location, dimm->csrow, dimm->cschannel); + dimm->idx, location, dimm->csrow, dimm->cschannel); edac_dbg(4, " dimm = %p\n", dimm); edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); edac_dbg(4, " dimm->grain = %d\n", dimm->grain); - edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) @@ -126,233 +136,97 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci) #endif /* CONFIG_EDAC_DEBUG */ -/* - * keep those in sync with the enum mem_type - */ -const char *edac_mem_types[] = { - "Empty csrow", - "Reserved csrow type", - "Unknown csrow type", - "Fast page mode RAM", - "Extended data out RAM", - "Burst Extended data out RAM", - "Single data rate SDRAM", - "Registered single data rate SDRAM", - "Double data rate SDRAM", - "Registered Double data rate SDRAM", - "Rambus DRAM", - "Unbuffered DDR2 RAM", - "Fully buffered DDR2", - "Registered DDR2 RAM", - "Rambus XDR", - "Unbuffered DDR3 RAM", - "Registered DDR3 RAM", +const char * const edac_mem_types[] = { + [MEM_EMPTY] = "Empty", + [MEM_RESERVED] = "Reserved", + [MEM_UNKNOWN] = "Unknown", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "Unbuffered-SDR", + [MEM_RDR] = "Registered-SDR", + [MEM_DDR] = "Unbuffered-DDR", + [MEM_RDDR] = "Registered-DDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "Unbuffered-DDR2", + [MEM_FB_DDR2] = "FullyBuffered-DDR2", + [MEM_RDDR2] = "Registered-DDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "Unbuffered-DDR3", + [MEM_RDDR3] = "Registered-DDR3", + [MEM_LRDDR3] = "Load-Reduced-DDR3-RAM", + [MEM_LPDDR3] = "Low-Power-DDR3-RAM", + [MEM_DDR4] = "Unbuffered-DDR4", + [MEM_RDDR4] = "Registered-DDR4", + [MEM_LPDDR4] = "Low-Power-DDR4-RAM", + [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", + [MEM_DDR5] = "Unbuffered-DDR5", + [MEM_RDDR5] = "Registered-DDR5", + [MEM_LRDDR5] = "Load-Reduced-DDR5-RAM", + [MEM_NVDIMM] = "Non-volatile-RAM", + [MEM_WIO2] = "Wide-IO-2", + [MEM_HBM2] = "High-bandwidth-memory-Gen2", + [MEM_HBM3] = "High-bandwidth-memory-Gen3", }; EXPORT_SYMBOL_GPL(edac_mem_types); -/** - * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation - * @p: pointer to a pointer with the memory offset to be used. At - * return, this will be incremented to point to the next offset - * @size: Size of the data structure to be reserved - * @n_elems: Number of elements that should be reserved - * - * If 'size' is a constant, the compiler will optimize this whole function - * down to either a no-op or the addition of a constant to the value of '*p'. - * - * The 'p' pointer is absolutely needed to keep the proper advancing - * further in memory to the proper offsets when allocating the struct along - * with its embedded structs, as edac_device_alloc_ctl_info() does it - * above, for example. - * - * At return, the pointer 'p' will be incremented to be used on a next call - * to this function. - */ -void *edac_align_ptr(void **p, unsigned size, int n_elems) +static void _edac_mc_free(struct mem_ctl_info *mci) { - unsigned align, r; - void *ptr = *p; - - *p += size * n_elems; - - /* - * 'p' can possibly be an unaligned item X such that sizeof(X) is - * 'size'. Adjust 'p' so that its alignment is at least as - * stringent as what the compiler would provide for X and return - * the aligned result. - * Here we assume that the alignment of a "long long" is the most - * stringent alignment that the compiler will ever provide by default. - * As far as I know, this is a reasonable assumption. - */ - if (size > sizeof(long)) - align = sizeof(long long); - else if (size > sizeof(int)) - align = sizeof(long); - else if (size > sizeof(short)) - align = sizeof(int); - else if (size > sizeof(char)) - align = sizeof(short); - else - return (char *)ptr; - - r = (unsigned long)p % align; - - if (r == 0) - return (char *)ptr; - - *p += align - r; - - return (void *)(((unsigned long)ptr) + align - r); + put_device(&mci->dev); } -static void _edac_mc_free(struct mem_ctl_info *mci) +static void mci_release(struct device *dev) { - int i, chn, row; + struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); struct csrow_info *csr; - const unsigned int tot_dimms = mci->tot_dimms; - const unsigned int tot_channels = mci->num_cschannel; - const unsigned int tot_csrows = mci->nr_csrows; + int i, chn, row; if (mci->dimms) { - for (i = 0; i < tot_dimms; i++) + for (i = 0; i < mci->tot_dimms; i++) kfree(mci->dimms[i]); kfree(mci->dimms); } + if (mci->csrows) { - for (row = 0; row < tot_csrows; row++) { + for (row = 0; row < mci->nr_csrows; row++) { csr = mci->csrows[row]; - if (csr) { - if (csr->channels) { - for (chn = 0; chn < tot_channels; chn++) - kfree(csr->channels[chn]); - kfree(csr->channels); - } - kfree(csr); + if (!csr) + continue; + + if (csr->channels) { + for (chn = 0; chn < mci->num_cschannel; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); } + kfree(csr); } kfree(mci->csrows); } + kfree(mci->pvt_info); + kfree(mci->layers); kfree(mci); } -/** - * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure - * @mc_num: Memory controller number - * @n_layers: Number of MC hierarchy layers - * layers: Describes each layer as seen by the Memory Controller - * @size_pvt: size of private storage needed - * - * - * Everything is kmalloc'ed as one big chunk - more efficient. - * Only can be used if all structures have the same lifetime - otherwise - * you have to allocate and initialize your own structures. - * - * Use edac_mc_free() to free mc structures allocated by this function. - * - * NOTE: drivers handle multi-rank memories in different ways: in some - * drivers, one multi-rank memory stick is mapped as one entry, while, in - * others, a single multi-rank memory stick would be mapped into several - * entries. Currently, this function will allocate multiple struct dimm_info - * on such scenarios, as grouping the multiple ranks require drivers change. - * - * Returns: - * On failure: NULL - * On success: struct mem_ctl_info pointer - */ -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, - struct edac_mc_layer *layers, - unsigned sz_pvt) +static int edac_mc_alloc_csrows(struct mem_ctl_info *mci) { - struct mem_ctl_info *mci; - struct edac_mc_layer *layer; - struct csrow_info *csr; - struct rank_info *chan; - struct dimm_info *dimm; - u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; - unsigned pos[EDAC_MAX_LAYERS]; - unsigned size, tot_dimms = 1, count = 1; - unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; - void *pvt, *p, *ptr = NULL; - int i, j, row, chn, n, len, off; - bool per_rank = false; - - BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); - /* - * Calculate the total amount of dimms and csrows/cschannels while - * in the old API emulation mode - */ - for (i = 0; i < n_layers; i++) { - tot_dimms *= layers[i].size; - if (layers[i].is_virt_csrow) - tot_csrows *= layers[i].size; - else - tot_channels *= layers[i].size; - - if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) - per_rank = true; - } - - /* Figure out the offsets of the various items from the start of an mc - * structure. We want the alignment of each item to be at least as - * stringent as what the compiler would provide if we could simply - * hardcode everything into a single struct. - */ - mci = edac_align_ptr(&ptr, sizeof(*mci), 1); - layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); - for (i = 0; i < n_layers; i++) { - count *= layers[i].size; - edac_dbg(4, "errcount layer %d size %d\n", i, count); - ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); - ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); - tot_errcount += 2 * count; - } - - edac_dbg(4, "allocating %d error counters\n", tot_errcount); - pvt = edac_align_ptr(&ptr, sz_pvt, 1); - size = ((unsigned long)pvt) + sz_pvt; - - edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", - size, - tot_dimms, - per_rank ? "ranks" : "dimms", - tot_csrows * tot_channels); - - mci = kzalloc(size, GFP_KERNEL); - if (mci == NULL) - return NULL; - - /* Adjust pointers so they point within the memory we just allocated - * rather than an imaginary chunk of memory located at address 0. - */ - layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); - for (i = 0; i < n_layers; i++) { - mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); - mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); - } - pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; - - /* setup index and various internal pointers */ - mci->mc_idx = mc_num; - mci->tot_dimms = tot_dimms; - mci->pvt_info = pvt; - mci->n_layers = n_layers; - mci->layers = layer; - memcpy(mci->layers, layers, sizeof(*layer) * n_layers); - mci->nr_csrows = tot_csrows; - mci->num_cschannel = tot_channels; - mci->csbased = per_rank; + unsigned int tot_channels = mci->num_cschannel; + unsigned int tot_csrows = mci->nr_csrows; + unsigned int row, chn; /* - * Alocate and fill the csrow/channels structs + * Allocate and fill the csrow/channels structs */ mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); if (!mci->csrows) - goto error; + return -ENOMEM; + for (row = 0; row < tot_csrows; row++) { + struct csrow_info *csr; + csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL); if (!csr) - goto error; + return -ENOMEM; + mci->csrows[row] = csr; csr->csrow_idx = row; csr->mci = mci; @@ -360,60 +234,70 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, csr->channels = kcalloc(tot_channels, sizeof(*csr->channels), GFP_KERNEL); if (!csr->channels) - goto error; + return -ENOMEM; for (chn = 0; chn < tot_channels; chn++) { + struct rank_info *chan; + chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL); if (!chan) - goto error; + return -ENOMEM; + csr->channels[chn] = chan; chan->chan_idx = chn; chan->csrow = csr; } } + return 0; +} + +static int edac_mc_alloc_dimms(struct mem_ctl_info *mci) +{ + unsigned int pos[EDAC_MAX_LAYERS]; + unsigned int row, chn, idx; + int layer; + void *p; + /* * Allocate and fill the dimm structs */ - mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); + mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); if (!mci->dimms) - goto error; + return -ENOMEM; memset(&pos, 0, sizeof(pos)); row = 0; chn = 0; - for (i = 0; i < tot_dimms; i++) { + for (idx = 0; idx < mci->tot_dimms; idx++) { + struct dimm_info *dimm; + struct rank_info *chan; + int n, len; + chan = mci->csrows[row]->channels[chn]; - off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); - if (off < 0 || off >= tot_dimms) { - edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); - goto error; - } dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); if (!dimm) - goto error; - mci->dimms[off] = dimm; + return -ENOMEM; + mci->dimms[idx] = dimm; dimm->mci = mci; + dimm->idx = idx; /* * Copy DIMM location and initialize it. */ len = sizeof(dimm->label); p = dimm->label; - n = snprintf(p, len, "mc#%u", mc_num); + n = scnprintf(p, len, "mc#%u", mci->mc_idx); p += n; len -= n; - for (j = 0; j < n_layers; j++) { - n = snprintf(p, len, "%s#%u", - edac_layer_name[layers[j].type], - pos[j]); + for (layer = 0; layer < mci->n_layers; layer++) { + n = scnprintf(p, len, "%s#%u", + edac_layer_name[mci->layers[layer].type], + pos[layer]); p += n; len -= n; - dimm->location[j] = pos[j]; - - if (len <= 0) - break; + dimm->location[layer] = pos[layer]; } /* Link it to the csrows old API data */ @@ -422,29 +306,92 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, dimm->cschannel = chn; /* Increment csrow location */ - if (layers[0].is_virt_csrow) { + if (mci->layers[0].is_virt_csrow) { chn++; - if (chn == tot_channels) { + if (chn == mci->num_cschannel) { chn = 0; row++; } } else { row++; - if (row == tot_csrows) { + if (row == mci->nr_csrows) { row = 0; chn++; } } /* Increment dimm location */ - for (j = n_layers - 1; j >= 0; j--) { - pos[j]++; - if (pos[j] < layers[j].size) + for (layer = mci->n_layers - 1; layer >= 0; layer--) { + pos[layer]++; + if (pos[layer] < mci->layers[layer].size) break; - pos[j] = 0; + pos[layer] = 0; } } + return 0; +} + +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, + struct edac_mc_layer *layers, + unsigned int sz_pvt) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer *layer; + unsigned int idx, tot_dimms = 1; + unsigned int tot_csrows = 1, tot_channels = 1; + bool per_rank = false; + + if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0)) + return NULL; + + /* + * Calculate the total amount of dimms and csrows/cschannels while + * in the old API emulation mode + */ + for (idx = 0; idx < n_layers; idx++) { + tot_dimms *= layers[idx].size; + + if (layers[idx].is_virt_csrow) + tot_csrows *= layers[idx].size; + else + tot_channels *= layers[idx].size; + + if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT) + per_rank = true; + } + + mci = kzalloc(sizeof(struct mem_ctl_info), GFP_KERNEL); + if (!mci) + return NULL; + + mci->layers = kcalloc(n_layers, sizeof(struct edac_mc_layer), GFP_KERNEL); + if (!mci->layers) + goto error; + + mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL); + if (!mci->pvt_info) + goto error; + + mci->dev.release = mci_release; + device_initialize(&mci->dev); + + /* setup index and various internal pointers */ + mci->mc_idx = mc_num; + mci->tot_dimms = tot_dimms; + mci->n_layers = n_layers; + memcpy(mci->layers, layers, sizeof(*layer) * n_layers); + mci->nr_csrows = tot_csrows; + mci->num_cschannel = tot_channels; + mci->csbased = per_rank; + + if (edac_mc_alloc_csrows(mci)) + goto error; + + if (edac_mc_alloc_dimms(mci)) + goto error; + mci->op_state = OP_ALLOC; return mci; @@ -456,37 +403,30 @@ error: } EXPORT_SYMBOL_GPL(edac_mc_alloc); -/** - * edac_mc_free - * 'Free' a previously allocated 'mci' structure - * @mci: pointer to a struct mem_ctl_info structure - */ void edac_mc_free(struct mem_ctl_info *mci) { edac_dbg(1, "\n"); - /* If we're not yet registered with sysfs free only what was allocated - * in edac_mc_alloc(). - */ - if (!device_is_registered(&mci->dev)) { - _edac_mc_free(mci); - return; - } - - /* the mci instance is freed here, when the sysfs object is dropped */ - edac_unregister_sysfs(mci); + _edac_mc_free(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); +bool edac_has_mcs(void) +{ + bool ret; -/** - * find_mci_by_dev - * - * scan list of controllers looking for the one that manages - * the 'dev' device - * @dev: pointer to a struct device related with the MCI - */ -struct mem_ctl_info *find_mci_by_dev(struct device *dev) + mutex_lock(&mem_ctls_mutex); + + ret = list_empty(&mc_devices); + + mutex_unlock(&mem_ctls_mutex); + + return !ret; +} +EXPORT_SYMBOL_GPL(edac_has_mcs); + +/* Caller must hold mem_ctls_mutex */ +static struct mem_ctl_info *__find_mci_by_dev(struct device *dev) { struct mem_ctl_info *mci; struct list_head *item; @@ -502,23 +442,25 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) return NULL; } -EXPORT_SYMBOL_GPL(find_mci_by_dev); -/* - * handler for EDAC to check if NMI type handler has asserted interrupt +/** + * find_mci_by_dev + * + * scan list of controllers looking for the one that manages + * the 'dev' device + * @dev: pointer to a struct device related with the MCI */ -static int edac_mc_assert_error_check_and_clear(void) +struct mem_ctl_info *find_mci_by_dev(struct device *dev) { - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; + struct mem_ctl_info *ret; - old_state = edac_err_assert; - edac_err_assert = 0; + mutex_lock(&mem_ctls_mutex); + ret = __find_mci_by_dev(dev); + mutex_unlock(&mem_ctls_mutex); - return old_state; + return ret; } +EXPORT_SYMBOL_GPL(find_mci_by_dev); /* * edac_mc_workq_function @@ -531,66 +473,18 @@ static void edac_mc_workq_function(struct work_struct *work_req) mutex_lock(&mem_ctls_mutex); - /* if this control struct has movd to offline state, we are done */ - if (mci->op_state == OP_OFFLINE) { + if (mci->op_state != OP_RUNNING_POLL) { mutex_unlock(&mem_ctls_mutex); return; } - /* Only poll controllers that are running polled and have a check */ - if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); - /* Reschedule */ - queue_delayed_work(edac_workqueue, &mci->work, - msecs_to_jiffies(edac_mc_get_poll_msec())); -} - -/* - * edac_mc_workq_setup - * initialize a workq item for this mci - * passing in the new delay period in msec - * - * locking model: - * - * called with the mem_ctls_mutex held - */ -static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) -{ - edac_dbg(0, "\n"); - - /* if this instance is not in the POLL state, then simply return */ - if (mci->op_state != OP_RUNNING_POLL) - return; - - INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); - mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); -} - -/* - * edac_mc_workq_teardown - * stop the workq processing on this mci - * - * locking model: - * - * called WITHOUT lock held - */ -static void edac_mc_workq_teardown(struct mem_ctl_info *mci) -{ - int status; - - if (mci->op_state != OP_RUNNING_POLL) - return; - - status = cancel_delayed_work(&mci->work); - if (status == 0) { - edac_dbg(0, "not canceled, flush the queue\n"); - - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + /* Queue ourselves again. */ + edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec())); } /* @@ -599,7 +493,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) * user space has updated our poll period value, need to * reset our workq delays */ -void edac_mc_reset_delay_period(int value) +void edac_mc_reset_delay_period(unsigned long value) { struct mem_ctl_info *mci; struct list_head *item; @@ -609,9 +503,9 @@ void edac_mc_reset_delay_period(int value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mc_workq_setup(mci, (unsigned long) value); + if (mci->op_state == OP_RUNNING_POLL) + edac_mod_work(&mci->work, value); } - mutex_unlock(&mem_ctls_mutex); } @@ -632,7 +526,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) insert_before = &mc_devices; - p = find_mci_by_dev(mci->pdev); + p = __find_mci_by_dev(mci->pdev); if (unlikely(p != NULL)) goto fail0; @@ -649,7 +543,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -667,7 +560,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,49 +568,38 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } -/** - * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. - * - * If found, return a pointer to the structure. - * Else return NULL. - * - * Caller must hold mem_ctls_mutex. - */ struct mem_ctl_info *edac_mc_find(int idx) { - struct list_head *item; struct mem_ctl_info *mci; + struct list_head *item; + + mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->mc_idx >= idx) { - if (mci->mc_idx == idx) - return mci; - - break; - } + if (mci->mc_idx == idx) + goto unlock; } - return NULL; + mci = NULL; +unlock: + mutex_unlock(&mem_ctls_mutex); + return mci; } EXPORT_SYMBOL(edac_mc_find); -/** - * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and - * create sysfs entries associated with mci structure - * @mci: pointer to the mci structure to be added to the list - * - * Return: - * 0 Success - * !0 Failure - */ +const char *edac_get_owner(void) +{ + return edac_mc_owner; +} +EXPORT_SYMBOL_GPL(edac_get_owner); /* FIXME - should a warning be printed if no error detection? correction? */ -int edac_mc_add_mc(struct mem_ctl_info *mci) +int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, + const struct attribute_group **groups) { int ret = -EINVAL; edac_dbg(0, "\n"); @@ -728,6 +609,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_dump_mci(mci); if (edac_debug_level >= 4) { + struct dimm_info *dimm; int i; for (i = 0; i < mci->nr_csrows; i++) { @@ -744,9 +626,9 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) if (csrow->channels[j]->dimm->nr_pages) edac_mc_dump_channel(csrow->channels[j]); } - for (i = 0; i < mci->tot_dimms; i++) - if (mci->dimms[i]->nr_pages) - edac_mc_dump_dimm(mci->dimms[i], i); + + mci_for_each_dimm(mci, dimm) + edac_mc_dump_dimm(dimm); } #endif mutex_lock(&mem_ctls_mutex); @@ -762,25 +644,29 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; - if (edac_create_sysfs_mci_device(mci)) { + mci->bus = edac_get_sysfs_subsys(); + + if (edac_create_sysfs_mci_device(mci, groups)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); goto fail1; } - /* If there IS a check routine, then we are running POLLED */ - if (mci->edac_check != NULL) { - /* This instance is NOW RUNNING */ + if (mci->edac_check) { mci->op_state = OP_RUNNING_POLL; - edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec())); + } else { mci->op_state = OP_RUNNING_INTERRUPT; } /* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" - " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_printk(mci, KERN_INFO, + "Giving out device to module %s controller %s: DEV %s (%s)\n", + mci->mod_name, mci->ctl_name, mci->dev_name, + edac_op_state_to_string(mci->op_state)); edac_mc_owner = mci->mod_name; @@ -794,15 +680,8 @@ fail0: mutex_unlock(&mem_ctls_mutex); return ret; } -EXPORT_SYMBOL_GPL(edac_mc_add_mc); +EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups); -/** - * edac_mc_del_mc: Remove sysfs entries for specified mci structure and - * remove mci structure from global list - * @pdev: Pointer to 'struct device' representing mci structure to remove. - * - * Return pointer to removed mci structure, or NULL if device not found. - */ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) { struct mem_ctl_info *mci; @@ -812,21 +691,22 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) mutex_lock(&mem_ctls_mutex); /* find the requested mci struct in the global list */ - mci = find_mci_by_dev(dev); + mci = __find_mci_by_dev(dev); if (mci == NULL) { mutex_unlock(&mem_ctls_mutex); return NULL; } - if (!del_mc_from_global_list(mci)) + /* mark MCI offline: */ + mci->op_state = OP_OFFLINE; + + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; - mutex_unlock(&mem_ctls_mutex); - /* flush workq processes */ - edac_mc_workq_teardown(mci); + mutex_unlock(&mem_ctls_mutex); - /* marking MCI offline */ - mci->op_state = OP_OFFLINE; + if (mci->edac_check) + edac_stop_work(&mci->work); /* remove from sysfs */ edac_remove_sysfs_mci_device(mci); @@ -861,7 +741,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, virt_addr = kmap_atomic(pg); /* Perform architecture specific atomic scrub operation */ - atomic_scrub(virt_addr + offset, size); + edac_atomic_scrub(virt_addr + offset, size); /* Unmap and complete */ kunmap_atomic(virt_addr); @@ -921,90 +801,53 @@ const char *edac_layer_name[] = { }; EXPORT_SYMBOL_GPL(edac_layer_name); -static void edac_inc_ce_error(struct mem_ctl_info *mci, - bool enable_per_layer_report, - const int pos[EDAC_MAX_LAYERS], - const u16 count) +static void edac_inc_ce_error(struct edac_raw_error_desc *e) { - int i, index = 0; - - mci->ce_mc += count; - - if (!enable_per_layer_report) { - mci->ce_noinfo_count += count; - return; - } + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + struct mem_ctl_info *mci = error_desc_to_mci(e); + struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]); - for (i = 0; i < mci->n_layers; i++) { - if (pos[i] < 0) - break; - index += pos[i]; - mci->ce_per_layer[i][index] += count; + mci->ce_mc += e->error_count; - if (i < mci->n_layers - 1) - index *= mci->layers[i + 1].size; - } + if (dimm) + dimm->ce_count += e->error_count; + else + mci->ce_noinfo_count += e->error_count; } -static void edac_inc_ue_error(struct mem_ctl_info *mci, - bool enable_per_layer_report, - const int pos[EDAC_MAX_LAYERS], - const u16 count) +static void edac_inc_ue_error(struct edac_raw_error_desc *e) { - int i, index = 0; - - mci->ue_mc += count; - - if (!enable_per_layer_report) { - mci->ce_noinfo_count += count; - return; - } + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + struct mem_ctl_info *mci = error_desc_to_mci(e); + struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]); - for (i = 0; i < mci->n_layers; i++) { - if (pos[i] < 0) - break; - index += pos[i]; - mci->ue_per_layer[i][index] += count; + mci->ue_mc += e->error_count; - if (i < mci->n_layers - 1) - index *= mci->layers[i + 1].size; - } + if (dimm) + dimm->ue_count += e->error_count; + else + mci->ue_noinfo_count += e->error_count; } -static void edac_ce_error(struct mem_ctl_info *mci, - const u16 error_count, - const int pos[EDAC_MAX_LAYERS], - const char *msg, - const char *location, - const char *label, - const char *detail, - const char *other_detail, - const bool enable_per_layer_report, - const unsigned long page_frame_number, - const unsigned long offset_in_page, - long grain) +static void edac_ce_error(struct edac_raw_error_desc *e) { + struct mem_ctl_info *mci = error_desc_to_mci(e); unsigned long remapped_page; - char *msg_aux = ""; - - if (*msg) - msg_aux = " "; if (edac_mc_get_log_ce()) { - if (other_detail && *other_detail) - edac_mc_printk(mci, KERN_WARNING, - "%d CE %s%son %s (%s %s - %s)\n", - error_count, msg, msg_aux, label, - location, detail, other_detail); - else - edac_mc_printk(mci, KERN_WARNING, - "%d CE %s%son %s (%s %s)\n", - error_count, msg, msg_aux, label, - location, detail); + edac_mc_printk(mci, KERN_WARNING, + "%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n", + e->error_count, e->msg, + *e->msg ? " " : "", + e->label, e->location, e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome, + *e->other_detail ? " - " : "", + e->other_detail); } - edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); - if (mci->scrub_mode & SCRUB_SW_SRC) { + edac_inc_ce_error(e); + + if (mci->scrub_mode == SCRUB_SW_SRC) { /* * Some memory controllers (called MCs below) can remap * memory so that it is still available at a different @@ -1017,113 +860,87 @@ static void edac_ce_error(struct mem_ctl_info *mci, * be scrubbed. */ remapped_page = mci->ctl_page_to_phys ? - mci->ctl_page_to_phys(mci, page_frame_number) : - page_frame_number; + mci->ctl_page_to_phys(mci, e->page_frame_number) : + e->page_frame_number; - edac_mc_scrub_block(remapped_page, - offset_in_page, grain); + edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain); } } -static void edac_ue_error(struct mem_ctl_info *mci, - const u16 error_count, - const int pos[EDAC_MAX_LAYERS], - const char *msg, - const char *location, - const char *label, - const char *detail, - const char *other_detail, - const bool enable_per_layer_report) +static void edac_ue_error(struct edac_raw_error_desc *e) { - char *msg_aux = ""; - - if (*msg) - msg_aux = " "; + struct mem_ctl_info *mci = error_desc_to_mci(e); if (edac_mc_get_log_ue()) { - if (other_detail && *other_detail) - edac_mc_printk(mci, KERN_WARNING, - "%d UE %s%son %s (%s %s - %s)\n", - error_count, msg, msg_aux, label, - location, detail, other_detail); - else - edac_mc_printk(mci, KERN_WARNING, - "%d UE %s%son %s (%s %s)\n", - error_count, msg, msg_aux, label, - location, detail); + edac_mc_printk(mci, KERN_WARNING, + "%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n", + e->error_count, e->msg, + *e->msg ? " " : "", + e->label, e->location, e->page_frame_number, e->offset_in_page, + e->grain, + *e->other_detail ? " - " : "", + e->other_detail); } + edac_inc_ue_error(e); + if (edac_mc_get_panic_on_ue()) { - if (other_detail && *other_detail) - panic("UE %s%son %s (%s%s - %s)\n", - msg, msg_aux, label, location, detail, other_detail); - else - panic("UE %s%son %s (%s%s)\n", - msg, msg_aux, label, location, detail); + panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n", + e->msg, + *e->msg ? " " : "", + e->label, e->location, e->page_frame_number, e->offset_in_page, + e->grain, + *e->other_detail ? " - " : "", + e->other_detail); } - - edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -/** - * edac_raw_mc_handle_error - reports a memory event to userspace without doing - * anything to discover the error location - * - * @type: severity of the error (CE/UE/Fatal) - * @mci: a struct mem_ctl_info pointer - * @e: error description - * - * This raw function is used internally by edac_mc_handle_error(). It should - * only be called directly when the hardware error come directly from BIOS, - * like in the case of APEI GHES driver. - */ -void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, - struct mem_ctl_info *mci, - struct edac_raw_error_desc *e) +static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan) { - char detail[80]; - int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + struct mem_ctl_info *mci = error_desc_to_mci(e); + enum hw_event_mc_err_type type = e->type; + u16 count = e->error_count; + + if (row < 0) + return; + + edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - e->page_frame_number, e->offset_in_page, - e->grain, e->syndrome); - edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, - detail, e->other_detail, e->enable_per_layer_report, - e->page_frame_number, e->offset_in_page, e->grain); + mci->csrows[row]->ce_count += count; + if (chan >= 0) + mci->csrows[row]->channels[chan]->ce_count += count; } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - e->page_frame_number, e->offset_in_page, e->grain); - - edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, - detail, e->other_detail, e->enable_per_layer_report); + mci->csrows[row]->ue_count += count; } +} +void edac_raw_mc_handle_error(struct edac_raw_error_desc *e) +{ + struct mem_ctl_info *mci = error_desc_to_mci(e); + u8 grain_bits; + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + + grain_bits = fls_long(e->grain - 1); + + /* Report the error via the trace interface */ + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(e->type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); + + if (e->type == HW_EVENT_ERR_CORRECTED) + edac_ce_error(e); + else + edac_ue_error(e); } EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); -/** - * edac_mc_handle_error - reports a memory event to userspace - * - * @type: severity of the error (CE/UE/Fatal) - * @mci: a struct mem_ctl_info pointer - * @error_count: Number of errors of the same type - * @page_frame_number: mem page where the error occurred - * @offset_in_page: offset of the error inside the page - * @syndrome: ECC syndrome - * @top_layer: Memory layer[0] position - * @mid_layer: Memory layer[1] position - * @low_layer: Memory layer[2] position - * @msg: Message meaningful to the end users that - * explains the event - * @other_detail: Technical details about the event that - * may help hardware manufacturers and - * EDAC developers to analyse the event - */ void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, @@ -1136,31 +953,34 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - char *p; + struct dimm_info *dimm; + char *p, *end; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i, n_labels = 0; - u8 grain_bits; struct edac_raw_error_desc *e = &mci->error_desc; + bool any_memory = true; + const char *prefix; edac_dbg(3, "MC%d\n", mci->mc_idx); /* Fills the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = error_count; + e->type = type; e->top_layer = top_layer; e->mid_layer = mid_layer; e->low_layer = low_layer; e->page_frame_number = page_frame_number; e->offset_in_page = offset_in_page; e->syndrome = syndrome; - e->msg = msg; - e->other_detail = other_detail; + /* need valid strings here for both: */ + e->msg = msg ?: ""; + e->other_detail = other_detail ?: ""; /* - * Check if the event report is consistent and if the memory - * location is known. If it is known, enable_per_layer_report will be - * true, the DIMM(s) label info will be filled and the per-layer + * Check if the event report is consistent and if the memory location is + * known. If it is, the DIMM(s) label info will be filled and the DIMM's * error counters will be incremented. */ for (i = 0; i < mci->n_layers; i++) { @@ -1179,7 +999,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - e->enable_per_layer_report = true; + any_memory = false; } /* @@ -1195,10 +1015,10 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, */ p = e->label; *p = '\0'; + end = p + sizeof(e->label); + prefix = ""; - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; - + mci_for_each_dimm(mci, dimm) { if (top_layer >= 0 && top_layer != dimm->location[0]) continue; if (mid_layer >= 0 && mid_layer != dimm->location[1]) @@ -1212,81 +1032,61 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* * If the error is memory-controller wide, there's no need to - * seek for the affected DIMMs because the whole - * channel/memory controller/... may be affected. - * Also, don't show errors for empty DIMM slots. + * seek for the affected DIMMs because the whole channel/memory + * controller/... may be affected. Also, don't show errors for + * empty DIMM slots. */ - if (e->enable_per_layer_report && dimm->nr_pages) { - if (n_labels >= EDAC_MAX_LABELS) { - e->enable_per_layer_report = false; - break; - } - n_labels++; - if (p != e->label) { - strcpy(p, OTHER_LABEL); - p += strlen(OTHER_LABEL); - } - strcpy(p, dimm->label); - p += strlen(p); - *p = '\0'; + if (!dimm->nr_pages) + continue; - /* - * get csrow/channel of the DIMM, in order to allow - * incrementing the compat API counters - */ - edac_dbg(4, "%s csrows map: (%d,%d)\n", - mci->csbased ? "rank" : "dimm", - dimm->csrow, dimm->cschannel); - if (row == -1) - row = dimm->csrow; - else if (row >= 0 && row != dimm->csrow) - row = -2; - - if (chan == -1) - chan = dimm->cschannel; - else if (chan >= 0 && chan != dimm->cschannel) - chan = -2; + n_labels++; + if (n_labels > EDAC_MAX_LABELS) { + p = e->label; + *p = '\0'; + } else { + p += scnprintf(p, end - p, "%s%s", prefix, dimm->label); + prefix = OTHER_LABEL; } - } - if (!e->enable_per_layer_report) { - strcpy(e->label, "any memory"); - } else { - edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == e->label) - strcpy(e->label, "unknown memory"); - if (type == HW_EVENT_ERR_CORRECTED) { - if (row >= 0) { - mci->csrows[row]->ce_count += error_count; - if (chan >= 0) - mci->csrows[row]->channels[chan]->ce_count += error_count; - } - } else - if (row >= 0) - mci->csrows[row]->ue_count += error_count; + /* + * get csrow/channel of the DIMM, in order to allow + * incrementing the compat API counters + */ + edac_dbg(4, "%s csrows map: (%d,%d)\n", + mci->csbased ? "rank" : "dimm", + dimm->csrow, dimm->cschannel); + if (row == -1) + row = dimm->csrow; + else if (row >= 0 && row != dimm->csrow) + row = -2; + + if (chan == -1) + chan = dimm->cschannel; + else if (chan >= 0 && chan != dimm->cschannel) + chan = -2; } + if (any_memory) + strscpy(e->label, "any memory", sizeof(e->label)); + else if (!*e->label) + strscpy(e->label, "unknown memory", sizeof(e->label)); + + edac_inc_csrow(e, row, chan); + /* Fill the RAM location data */ p = e->location; + end = p + sizeof(e->location); + prefix = ""; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) continue; - p += sprintf(p, "%s:%d ", - edac_layer_name[mci->layers[i].type], - pos[i]); + p += scnprintf(p, end - p, "%s%s:%d", prefix, + edac_layer_name[mci->layers[i].type], pos[i]); + prefix = " "; } - if (p > e->location) - *(p - 1) = '\0'; - - /* Report the error via the trace interface */ - grain_bits = fls_long(e->grain) + 1; - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); - edac_raw_mc_handle_error(type, mci, e); + edac_raw_mc_handle_error(e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h new file mode 100644 index 000000000000..881b00eadf7a --- /dev/null +++ b/drivers/edac/edac_mc.h @@ -0,0 +1,258 @@ +/* + * Defines, structures, APIs for edac_mc module + * + * (C) 2007 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis <goemon at anime dot net> and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * NMI handling support added by + * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> + * + * Refactored for multi-source files: + * Doug Thompson <norsk5@xmission.com> + * + * Please look at Documentation/driver-api/edac.rst for more info about + * EDAC core structs and functions. + */ + +#ifndef _EDAC_MC_H_ +#define _EDAC_MC_H_ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/smp.h> +#include <linux/pci.h> +#include <linux/time.h> +#include <linux/nmi.h> +#include <linux/rcupdate.h> +#include <linux/completion.h> +#include <linux/kobject.h> +#include <linux/platform_device.h> +#include <linux/workqueue.h> +#include <linux/edac.h> + +#if PAGE_SHIFT < 20 +#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) +#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) +#else /* PAGE_SHIFT > 20 */ +#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20)) +#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20)) +#endif + +#define edac_printk(level, prefix, fmt, arg...) \ + printk(level "EDAC " prefix ": " fmt, ##arg) + +#define edac_mc_printk(mci, level, fmt, arg...) \ + printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) + +#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ + printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) + +#define edac_device_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) + +#define edac_pci_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) + +/* prefixes for edac_printk() and edac_mc_printk() */ +#define EDAC_MC "MC" +#define EDAC_PCI "PCI" +#define EDAC_DEBUG "DEBUG" + +extern const char * const edac_mem_types[]; + +#ifdef CONFIG_EDAC_DEBUG +extern int edac_debug_level; + +#define edac_dbg(level, fmt, ...) \ +do { \ + if (level <= edac_debug_level) \ + edac_printk(KERN_DEBUG, EDAC_DEBUG, \ + "%s: " fmt, __func__, ##__VA_ARGS__); \ +} while (0) + +#else /* !CONFIG_EDAC_DEBUG */ + +#define edac_dbg(level, fmt, ...) \ +do { \ + if (0) \ + edac_printk(KERN_DEBUG, EDAC_DEBUG, \ + "%s: " fmt, __func__, ##__VA_ARGS__); \ +} while (0) + +#endif /* !CONFIG_EDAC_DEBUG */ + +#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ + PCI_DEVICE_ID_ ## vend ## _ ## dev + +#define edac_dev_name(dev) (dev)->dev_name + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/** + * edac_mc_alloc() - Allocate and partially fill a struct &mem_ctl_info. + * + * @mc_num: Memory controller number + * @n_layers: Number of MC hierarchy layers + * @layers: Describes each layer as seen by the Memory Controller + * @sz_pvt: size of private storage needed + * + * + * Everything is kmalloc'ed as one big chunk - more efficient. + * Only can be used if all structures have the same lifetime - otherwise + * you have to allocate and initialize your own structures. + * + * Use edac_mc_free() to free mc structures allocated by this function. + * + * .. note:: + * + * drivers handle multi-rank memories in different ways: in some + * drivers, one multi-rank memory stick is mapped as one entry, while, in + * others, a single multi-rank memory stick would be mapped into several + * entries. Currently, this function will allocate multiple struct dimm_info + * on such scenarios, as grouping the multiple ranks require drivers change. + * + * Returns: + * On success, return a pointer to struct mem_ctl_info pointer; + * %NULL otherwise + */ +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, + struct edac_mc_layer *layers, + unsigned int sz_pvt); + +/** + * edac_get_owner - Return the owner's mod_name of EDAC MC + * + * Returns: + * Pointer to mod_name string when EDAC MC is owned. NULL otherwise. + */ +extern const char *edac_get_owner(void); + +/* + * edac_mc_add_mc_with_groups() - Insert the @mci structure into the mci + * global list and create sysfs entries associated with @mci structure. + * + * @mci: pointer to the mci structure to be added to the list + * @groups: optional attribute groups for the driver-specific sysfs entries + * + * Returns: + * 0 on Success, or an error code on failure + */ +extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, + const struct attribute_group **groups); +#define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL) + +/** + * edac_mc_free() - Frees a previously allocated @mci structure + * + * @mci: pointer to a struct mem_ctl_info structure + */ +extern void edac_mc_free(struct mem_ctl_info *mci); + +/** + * edac_has_mcs() - Check if any MCs have been allocated. + * + * Returns: + * True if MC instances have been registered successfully. + * False otherwise. + */ +extern bool edac_has_mcs(void); + +/** + * edac_mc_find() - Search for a mem_ctl_info structure whose index is @idx. + * + * @idx: index to be seek + * + * If found, return a pointer to the structure. + * Else return NULL. + */ +extern struct mem_ctl_info *edac_mc_find(int idx); + +/** + * find_mci_by_dev() - Scan list of controllers looking for the one that + * manages the @dev device. + * + * @dev: pointer to a struct device related with the MCI + * + * Returns: on success, returns a pointer to struct &mem_ctl_info; + * %NULL otherwise. + */ +extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); + +/** + * edac_mc_del_mc() - Remove sysfs entries for mci structure associated with + * @dev and remove mci structure from global list. + * + * @dev: Pointer to struct &device representing mci structure to remove. + * + * Returns: pointer to removed mci structure, or %NULL if device not found. + */ +extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); + +/** + * edac_mc_find_csrow_by_page() - Ancillary routine to identify what csrow + * contains a memory page. + * + * @mci: pointer to a struct mem_ctl_info structure + * @page: memory page to find + * + * Returns: on success, returns the csrow. -1 if not found. + */ +extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, + unsigned long page); + +/** + * edac_raw_mc_handle_error() - Reports a memory event to userspace without + * doing anything to discover the error location. + * + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(struct edac_raw_error_desc *e); + +/** + * edac_mc_handle_error() - Reports a memory event to userspace. + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @error_count: Number of errors of the same type + * @page_frame_number: mem page where the error occurred + * @offset_in_page: offset of the error inside the page + * @syndrome: ECC syndrome + * @top_layer: Memory layer[0] position + * @mid_layer: Memory layer[1] position + * @low_layer: Memory layer[2] position + * @msg: Message meaningful to the end users that + * explains the event + * @other_detail: Technical details about the event that + * may help hardware manufacturers and + * EDAC developers to analyse the event + */ +void edac_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + const u16 error_count, + const unsigned long page_frame_number, + const unsigned long offset_in_page, + const unsigned long syndrome, + const int top_layer, + const int mid_layer, + const int low_layer, + const char *msg, + const char *other_detail); + +/* + * edac misc APIs + */ +extern char *edac_op_state_to_string(int op_state); + +#endif /* _EDAC_MC_H_ */ diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ef15a7e613bc..091cc6aae8a9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab * The entire API were re-written, and ported to use struct device * */ @@ -19,14 +19,14 @@ #include <linux/pm_runtime.h> #include <linux/uaccess.h> -#include "edac_core.h" +#include "edac_mc.h" #include "edac_module.h" /* MC EDAC Controls, setable by module parameter, and sysfs */ static int edac_mc_log_ue = 1; static int edac_mc_log_ce = 1; static int edac_mc_panic_on_ue; -static int edac_mc_poll_msec = 1000; +static unsigned int edac_mc_poll_msec = 1000; /* Getter functions for above */ int edac_mc_get_log_ue(void) @@ -45,26 +45,30 @@ int edac_mc_get_panic_on_ue(void) } /* this is temporary */ -int edac_mc_get_poll_msec(void) +unsigned int edac_mc_get_poll_msec(void) { return edac_mc_poll_msec; } -static int edac_set_poll_msec(const char *val, struct kernel_param *kp) +static int edac_set_poll_msec(const char *val, const struct kernel_param *kp) { - long l; + unsigned int i; int ret; if (!val) return -EINVAL; - ret = strict_strtol(val, 0, &l); - if (ret == -EINVAL || ((int)l != l)) + ret = kstrtouint(val, 0, &i); + if (ret) + return ret; + + if (i < 1000) return -EINVAL; - *((int *)kp->arg) = l; + + *((unsigned int *)kp->arg) = i; /* notify edac_mc engine to reset the poll period */ - edac_mc_reset_delay_period(l); + edac_mc_reset_delay_period(i); return 0; } @@ -78,7 +82,7 @@ MODULE_PARM_DESC(edac_mc_log_ue, module_param(edac_mc_log_ce, int, 0644); MODULE_PARM_DESC(edac_mc_log_ce, "Log correctable error to console: 0=off 1=on"); -module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int, +module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_uint, &edac_mc_poll_msec, 0644); MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); @@ -87,26 +91,6 @@ static struct device *mci_pdev; /* * various constants for Memory Controllers */ -static const char * const mem_types[] = { - [MEM_EMPTY] = "Empty", - [MEM_RESERVED] = "Reserved", - [MEM_UNKNOWN] = "Unknown", - [MEM_FPM] = "FPM", - [MEM_EDO] = "EDO", - [MEM_BEDO] = "BEDO", - [MEM_SDR] = "Unbuffered-SDR", - [MEM_RDR] = "Registered-SDR", - [MEM_DDR] = "Unbuffered-DDR", - [MEM_RDDR] = "Registered-DDR", - [MEM_RMBS] = "RMBS", - [MEM_DDR2] = "Unbuffered-DDR2", - [MEM_FB_DDR2] = "FullyBuffered-DDR2", - [MEM_RDDR2] = "Registered-DDR2", - [MEM_XDR] = "XDR", - [MEM_DDR3] = "Unbuffered-DDR3", - [MEM_RDDR3] = "Registered-DDR3" -}; - static const char * const dev_types[] = { [DEV_UNKNOWN] = "Unknown", [DEV_X1] = "x1", @@ -131,353 +115,6 @@ static const char * const edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -#ifdef CONFIG_EDAC_LEGACY_SYSFS -/* - * EDAC sysfs CSROW data structures and methods - */ - -#define to_csrow(k) container_of(k, struct csrow_info, dev) - -/* - * We need it to avoid namespace conflicts between the legacy API - * and the per-dimm/per-rank one - */ -#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) - -struct dev_ch_attribute { - struct device_attribute attr; - int channel; -}; - -#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ - struct dev_ch_attribute dev_attr_legacy_##_name = \ - { __ATTR(_name, _mode, _show, _store), (_var) } - -#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - int i; - u32 nr_pages = 0; - - for (i = 0; i < csrow->nr_channels; i++) - nr_pages += csrow->channels[i]->dimm->nr_pages; - return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", mem_types[csrow->channels[0]->dimm->mtype]); -} - -static ssize_t csrow_dev_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sprintf(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - /* if field has not been initialized, there is nothing to send */ - if (!rank->dimm->label[0]) - return 0; - - return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", - rank->dimm->label); -} - -static ssize_t channel_dimm_label_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - ssize_t max_size = 0; - - max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); - strncpy(rank->dimm->label, data, max_size); - rank->dimm->label[max_size] = '\0'; - - return max_size; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - return sprintf(data, "%u\n", rank->ce_count); -} - -/* cwrow<id>/attribute files */ -DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); -DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); -DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); -DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); -DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); -DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); - -/* default attributes of the CSROW<id> object */ -static struct attribute *csrow_attrs[] = { - &dev_attr_legacy_dev_type.attr, - &dev_attr_legacy_mem_type.attr, - &dev_attr_legacy_edac_mode.attr, - &dev_attr_legacy_size_mb.attr, - &dev_attr_legacy_ue_count.attr, - &dev_attr_legacy_ce_count.attr, - NULL, -}; - -static struct attribute_group csrow_attr_grp = { - .attrs = csrow_attrs, -}; - -static const struct attribute_group *csrow_attr_groups[] = { - &csrow_attr_grp, - NULL -}; - -static void csrow_attr_release(struct device *dev) -{ - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); - kfree(csrow); -} - -static struct device_type csrow_attr_type = { - .groups = csrow_attr_groups, - .release = csrow_attr_release, -}; - -/* - * possible dynamic channel DIMM Label attribute files - * - */ - -#define EDAC_NR_CHANNELS 6 - -DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 0); -DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 1); -DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 2); -DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 3); -DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 4); -DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 5); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct device_attribute *dynamic_csrow_dimm_attr[] = { - &dev_attr_legacy_ch0_dimm_label.attr, - &dev_attr_legacy_ch1_dimm_label.attr, - &dev_attr_legacy_ch2_dimm_label.attr, - &dev_attr_legacy_ch3_dimm_label.attr, - &dev_attr_legacy_ch4_dimm_label.attr, - &dev_attr_legacy_ch5_dimm_label.attr -}; - -/* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 5); - -/* Total possible dynamic ce_count attribute file table */ -static struct device_attribute *dynamic_csrow_ce_count_attr[] = { - &dev_attr_legacy_ch0_ce_count.attr, - &dev_attr_legacy_ch1_ce_count.attr, - &dev_attr_legacy_ch2_ce_count.attr, - &dev_attr_legacy_ch3_ce_count.attr, - &dev_attr_legacy_ch4_ce_count.attr, - &dev_attr_legacy_ch5_ce_count.attr -}; - -static inline int nr_pages_per_csrow(struct csrow_info *csrow) -{ - int chan, nr_pages = 0; - - for (chan = 0; chan < csrow->nr_channels; chan++) - nr_pages += csrow->channels[chan]->dimm->nr_pages; - - return nr_pages; -} - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) -{ - int err, chan; - - if (csrow->nr_channels >= EDAC_NR_CHANNELS) - return -ENODEV; - - csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; - device_initialize(&csrow->dev); - csrow->dev.parent = &mci->dev; - csrow->mci = mci; - dev_set_name(&csrow->dev, "csrow%d", index); - dev_set_drvdata(&csrow->dev, csrow); - - edac_dbg(0, "creating (virtual) csrow node %s\n", - dev_name(&csrow->dev)); - - err = device_add(&csrow->dev); - if (err < 0) - return err; - - for (chan = 0; chan < csrow->nr_channels; chan++) { - /* Only expose populated DIMMs */ - if (!csrow->channels[chan]->dimm->nr_pages) - continue; - err = device_create_file(&csrow->dev, - dynamic_csrow_dimm_attr[chan]); - if (err < 0) - goto error; - err = device_create_file(&csrow->dev, - dynamic_csrow_ce_count_attr[chan]); - if (err < 0) { - device_remove_file(&csrow->dev, - dynamic_csrow_dimm_attr[chan]); - goto error; - } - } - - return 0; - -error: - for (--chan; chan >= 0; chan--) { - device_remove_file(&csrow->dev, - dynamic_csrow_dimm_attr[chan]); - device_remove_file(&csrow->dev, - dynamic_csrow_ce_count_attr[chan]); - } - put_device(&csrow->dev); - - return err; -} - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_objects(struct mem_ctl_info *mci) -{ - int err, i, chan; - struct csrow_info *csrow; - - for (i = 0; i < mci->nr_csrows; i++) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) { - edac_dbg(1, - "failure: create csrow objects for csrow %d\n", - i); - goto error; - } - } - return 0; - -error: - for (--i; i >= 0; i--) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - for (chan = csrow->nr_channels - 1; chan >= 0; chan--) { - if (!csrow->channels[chan]->dimm->nr_pages) - continue; - device_remove_file(&csrow->dev, - dynamic_csrow_dimm_attr[chan]); - device_remove_file(&csrow->dev, - dynamic_csrow_ce_count_attr[chan]); - } - put_device(&mci->csrows[i]->dev); - } - - return err; -} - -static void edac_delete_csrow_objects(struct mem_ctl_info *mci) -{ - int i, chan; - struct csrow_info *csrow; - - for (i = mci->nr_csrows - 1; i >= 0; i--) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - for (chan = csrow->nr_channels - 1; chan >= 0; chan--) { - if (!csrow->channels[chan]->dimm->nr_pages) - continue; - edac_dbg(1, "Removing csrow %d channel %d sysfs nodes\n", - i, chan); - device_remove_file(&csrow->dev, - dynamic_csrow_dimm_attr[chan]); - device_remove_file(&csrow->dev, - dynamic_csrow_ce_count_attr[chan]); - } - device_unregister(&mci->csrows[i]->dev); - } -} -#endif - /* * Per-dimm (or per-rank) devices */ @@ -489,8 +126,12 @@ static ssize_t dimmdev_location_show(struct device *dev, struct device_attribute *mattr, char *data) { struct dimm_info *dimm = to_dimm(dev); + ssize_t count; - return edac_dimm_info_location(dimm, data, PAGE_SIZE); + count = edac_dimm_info_location(dimm, data, PAGE_SIZE); + count += scnprintf(data + count, PAGE_SIZE - count, "\n"); + + return count; } static ssize_t dimmdev_label_show(struct device *dev, @@ -502,7 +143,7 @@ static ssize_t dimmdev_label_show(struct device *dev, if (!dimm->label[0]) return 0; - return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", dimm->label); + return sysfs_emit(data, "%s\n", dimm->label); } static ssize_t dimmdev_label_store(struct device *dev, @@ -511,14 +152,21 @@ static ssize_t dimmdev_label_store(struct device *dev, size_t count) { struct dimm_info *dimm = to_dimm(dev); + size_t copy_count = count; - ssize_t max_size = 0; + if (count == 0) + return -EINVAL; + + if (data[count - 1] == '\0' || data[count - 1] == '\n') + copy_count -= 1; + + if (copy_count == 0 || copy_count >= sizeof(dimm->label)) + return -EINVAL; - max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); - strncpy(dimm->label, data, max_size); - dimm->label[max_size] = '\0'; + memcpy(dimm->label, data, copy_count); + dimm->label[copy_count] = '\0'; - return max_size; + return count; } static ssize_t dimmdev_size_show(struct device *dev, @@ -526,7 +174,7 @@ static ssize_t dimmdev_size_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); + return sysfs_emit(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); } static ssize_t dimmdev_mem_type_show(struct device *dev, @@ -534,7 +182,7 @@ static ssize_t dimmdev_mem_type_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", mem_types[dimm->mtype]); + return sysfs_emit(data, "%s\n", edac_mem_types[dimm->mtype]); } static ssize_t dimmdev_dev_type_show(struct device *dev, @@ -542,7 +190,7 @@ static ssize_t dimmdev_dev_type_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", dev_types[dimm->dtype]); + return sysfs_emit(data, "%s\n", dev_types[dimm->dtype]); } static ssize_t dimmdev_edac_mode_show(struct device *dev, @@ -551,7 +199,25 @@ static ssize_t dimmdev_edac_mode_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); - return sprintf(data, "%s\n", edac_caps[dimm->edac_mode]); + return sysfs_emit(data, "%s\n", edac_caps[dimm->edac_mode]); +} + +static ssize_t dimmdev_ce_count_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + return sysfs_emit(data, "%u\n", dimm->ce_count); +} + +static ssize_t dimmdev_ue_count_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + return sysfs_emit(data, "%u\n", dimm->ue_count); } /* dimm/rank attribute files */ @@ -562,6 +228,8 @@ static DEVICE_ATTR(size, S_IRUGO, dimmdev_size_show, NULL); static DEVICE_ATTR(dimm_mem_type, S_IRUGO, dimmdev_mem_type_show, NULL); static DEVICE_ATTR(dimm_dev_type, S_IRUGO, dimmdev_dev_type_show, NULL); static DEVICE_ATTR(dimm_edac_mode, S_IRUGO, dimmdev_edac_mode_show, NULL); +static DEVICE_ATTR(dimm_ce_count, S_IRUGO, dimmdev_ce_count_show, NULL); +static DEVICE_ATTR(dimm_ue_count, S_IRUGO, dimmdev_ue_count_show, NULL); /* attributes of the dimm<id>/rank<id> object */ static struct attribute *dimm_attrs[] = { @@ -571,10 +239,12 @@ static struct attribute *dimm_attrs[] = { &dev_attr_dimm_mem_type.attr, &dev_attr_dimm_dev_type.attr, &dev_attr_dimm_edac_mode.attr, + &dev_attr_dimm_ce_count.attr, + &dev_attr_dimm_ue_count.attr, NULL, }; -static struct attribute_group dimm_attr_grp = { +static const struct attribute_group dimm_attr_grp = { .attrs = dimm_attrs, }; @@ -583,44 +253,53 @@ static const struct attribute_group *dimm_attr_groups[] = { NULL }; -static void dimm_attr_release(struct device *dev) -{ - struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - - edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev)); - kfree(dimm); -} - -static struct device_type dimm_attr_type = { +static const struct device_type dimm_attr_type = { .groups = dimm_attr_groups, - .release = dimm_attr_release, }; -/* Create a DIMM object under specifed memory controller device */ +static void dimm_release(struct device *dev) +{ + /* + * Nothing to do, just unregister sysfs here. The mci + * device owns the data and will also release it. + */ +} + +/* Create a DIMM object under specified memory controller device */ static int edac_create_dimm_object(struct mem_ctl_info *mci, - struct dimm_info *dimm, - int index) + struct dimm_info *dimm) { int err; dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.release = dimm_release; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; if (mci->csbased) - dev_set_name(&dimm->dev, "rank%d", index); + dev_set_name(&dimm->dev, "rank%d", dimm->idx); else - dev_set_name(&dimm->dev, "dimm%d", index); + dev_set_name(&dimm->dev, "dimm%d", dimm->idx); dev_set_drvdata(&dimm->dev, dimm); pm_runtime_forbid(&mci->dev); - err = device_add(&dimm->dev); + err = device_add(&dimm->dev); + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev)); + put_device(&dimm->dev); + return err; + } - edac_dbg(0, "creating rank/dimm device %s\n", dev_name(&dimm->dev)); + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + char location[80]; - return err; + edac_dimm_info_location(dimm, location, sizeof(location)); + edac_dbg(0, "device %s created at location %s\n", + dev_name(&dimm->dev), location); + } + + return 0; } /* @@ -634,7 +313,9 @@ static ssize_t mci_reset_counters_store(struct device *dev, const char *data, size_t count) { struct mem_ctl_info *mci = to_mci(dev); - int cnt, row, chan, i; + struct dimm_info *dimm; + int row, chan; + mci->ue_mc = 0; mci->ce_mc = 0; mci->ue_noinfo_count = 0; @@ -650,11 +331,9 @@ static ssize_t mci_reset_counters_store(struct device *dev, ri->channels[chan]->ce_count = 0; } - cnt = 1; - for (i = 0; i < mci->n_layers; i++) { - cnt *= mci->layers[i].size; - memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32)); - memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32)); + mci_for_each_dimm(mci, dimm) { + dimm->ue_count = 0; + dimm->ce_count = 0; } mci->start_time = jiffies; @@ -707,7 +386,7 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, return bandwidth; } - return sprintf(data, "%d\n", bandwidth); + return sysfs_emit(data, "%d\n", bandwidth); } /* default attribute files for the MCI object */ @@ -717,7 +396,7 @@ static ssize_t mci_ue_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ue_mc); + return sysfs_emit(data, "%u\n", mci->ue_mc); } static ssize_t mci_ce_count_show(struct device *dev, @@ -726,7 +405,7 @@ static ssize_t mci_ce_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ce_mc); + return sysfs_emit(data, "%u\n", mci->ce_mc); } static ssize_t mci_ce_noinfo_show(struct device *dev, @@ -735,7 +414,7 @@ static ssize_t mci_ce_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ce_noinfo_count); + return sysfs_emit(data, "%u\n", mci->ce_noinfo_count); } static ssize_t mci_ue_noinfo_show(struct device *dev, @@ -744,7 +423,7 @@ static ssize_t mci_ue_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ue_noinfo_count); + return sysfs_emit(data, "%u\n", mci->ue_noinfo_count); } static ssize_t mci_seconds_show(struct device *dev, @@ -753,7 +432,7 @@ static ssize_t mci_seconds_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ); + return sysfs_emit(data, "%ld\n", (jiffies - mci->start_time) / HZ); } static ssize_t mci_ctl_name_show(struct device *dev, @@ -762,7 +441,7 @@ static ssize_t mci_ctl_name_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%s\n", mci->ctl_name); + return sysfs_emit(data, "%s\n", mci->ctl_name); } static ssize_t mci_size_mb_show(struct device *dev, @@ -782,7 +461,7 @@ static ssize_t mci_size_mb_show(struct device *dev, } } - return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages)); + return sysfs_emit(data, "%u\n", PAGES_TO_MiB(total_pages)); } static ssize_t mci_max_location_show(struct device *dev, @@ -790,74 +469,42 @@ static ssize_t mci_max_location_show(struct device *dev, char *data) { struct mem_ctl_info *mci = to_mci(dev); - int i; + int len = PAGE_SIZE; char *p = data; + int i, n; for (i = 0; i < mci->n_layers; i++) { - p += sprintf(p, "%s %d ", - edac_layer_name[mci->layers[i].type], - mci->layers[i].size - 1); + n = scnprintf(p, len, "%s %d ", + edac_layer_name[mci->layers[i].type], + mci->layers[i].size - 1); + len -= n; + if (len <= 0) + goto out; + + p += n; } + p += scnprintf(p, len, "\n"); +out: return p - data; } -#ifdef CONFIG_EDAC_DEBUG -static ssize_t edac_fake_inject_write(struct file *file, - const char __user *data, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - struct mem_ctl_info *mci = to_mci(dev); - static enum hw_event_mc_err_type type; - u16 errcount = mci->fake_inject_count; - - if (!errcount) - errcount = 1; - - type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED - : HW_EVENT_ERR_CORRECTED; - - printk(KERN_DEBUG - "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", - errcount, - (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", - errcount > 1 ? "s" : "", - mci->fake_inject_layer[0], - mci->fake_inject_layer[1], - mci->fake_inject_layer[2] - ); - edac_mc_handle_error(type, mci, errcount, 0, 0, 0, - mci->fake_inject_layer[0], - mci->fake_inject_layer[1], - mci->fake_inject_layer[2], - "FAKE ERROR", "for EDAC testing only"); - - return count; -} - -static const struct file_operations debug_fake_inject_fops = { - .open = simple_open, - .write = edac_fake_inject_write, - .llseek = generic_file_llseek, -}; -#endif - /* default Control file */ -DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); +static DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); /* default Attribute files */ -DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); -DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); -DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); -DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); -DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); -DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); -DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); -DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); +static DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); +static DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); +static DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); +static DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); +static DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); +static DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); +static DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); +static DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); +static DEVICE_ATTR(sdram_scrub_rate, 0, mci_sdram_scrub_rate_show, + mci_sdram_scrub_rate_store); /* umode set later in is_visible */ static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -869,11 +516,29 @@ static struct attribute *mci_attrs[] = { &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, &dev_attr_max_location.attr, + &dev_attr_sdram_scrub_rate.attr, NULL }; -static struct attribute_group mci_attr_grp = { +static umode_t mci_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct mem_ctl_info *mci = to_mci(dev); + umode_t mode = 0; + + if (attr != &dev_attr_sdram_scrub_rate.attr) + return attr->mode; + if (mci->get_sdram_scrub_rate) + mode |= S_IRUGO; + if (mci->set_sdram_scrub_rate) + mode |= S_IWUSR; + return mode; +} + +static const struct attribute_group mci_attr_grp = { .attrs = mci_attrs, + .is_visible = mci_attr_is_visible, }; static const struct attribute_group *mci_attr_groups[] = { @@ -881,84 +546,10 @@ static const struct attribute_group *mci_attr_groups[] = { NULL }; -static void mci_attr_release(struct device *dev) -{ - struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); - kfree(mci); -} - -static struct device_type mci_attr_type = { +static const struct device_type mci_attr_type = { .groups = mci_attr_groups, - .release = mci_attr_release, }; -#ifdef CONFIG_EDAC_DEBUG -static struct dentry *edac_debugfs; - -int __init edac_debugfs_init(void) -{ - edac_debugfs = debugfs_create_dir("edac", NULL); - if (IS_ERR(edac_debugfs)) { - edac_debugfs = NULL; - return -ENOMEM; - } - return 0; -} - -void __exit edac_debugfs_exit(void) -{ - debugfs_remove(edac_debugfs); -} - -int edac_create_debug_nodes(struct mem_ctl_info *mci) -{ - struct dentry *d, *parent; - char name[80]; - int i; - - if (!edac_debugfs) - return -ENODEV; - - d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs); - if (!d) - return -ENOMEM; - parent = d; - - for (i = 0; i < mci->n_layers; i++) { - sprintf(name, "fake_inject_%s", - edac_layer_name[mci->layers[i].type]); - d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_layer[i]); - if (!d) - goto nomem; - } - - d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_ue); - if (!d) - goto nomem; - - d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_count); - if (!d) - goto nomem; - - d = debugfs_create_file("fake_inject", S_IWUSR, parent, - &mci->dev, - &debug_fake_inject_fops); - if (!d) - goto nomem; - - mci->debugfs = parent; - return 0; -nomem: - debugfs_remove(mci->debugfs); - return -ENOMEM; -} -#endif - /* * Create a new Memory Controller kobject instance, * mc<id> under the 'mc' directory @@ -967,105 +558,48 @@ nomem: * 0 Success * !0 Failure */ -int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) +int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, + const struct attribute_group **groups) { - int i, err; - - /* - * The memory controller needs its own bus, in order to avoid - * namespace conflicts at /sys/bus/edac. - */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) - return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); - if (err < 0) - return err; + struct dimm_info *dimm; + int err; /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; - device_initialize(&mci->dev); - mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.groups = groups; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); - edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + /* no put_device() here, free mci with _edac_mc_free() */ return err; } - if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { - if (mci->get_sdram_scrub_rate) { - dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; - dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; - } - if (mci->set_sdram_scrub_rate) { - dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; - dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; - } - err = device_create_file(&mci->dev, - &dev_attr_sdram_scrub_rate); - if (err) { - edac_dbg(1, "failure: create sdram_scrub_rate\n"); - goto fail2; - } - } + edac_dbg(0, "device %s created\n", dev_name(&mci->dev)); + /* * Create the dimm/rank devices */ - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; + mci_for_each_dimm(mci, dimm) { /* Only expose populated DIMMs */ - if (dimm->nr_pages == 0) + if (!dimm->nr_pages) continue; -#ifdef CONFIG_EDAC_DEBUG - edac_dbg(1, "creating dimm%d, located at ", i); - if (edac_debug_level >= 1) { - int lay; - for (lay = 0; lay < mci->n_layers; lay++) - printk(KERN_CONT "%s %d ", - edac_layer_name[mci->layers[lay].type], - dimm->location[lay]); - printk(KERN_CONT "\n"); - } -#endif - err = edac_create_dimm_object(mci, dimm, i); - if (err) { - edac_dbg(1, "failure: create dimm %d obj\n", i); + + err = edac_create_dimm_object(mci, dimm); + if (err) goto fail; - } } -#ifdef CONFIG_EDAC_LEGACY_SYSFS - err = edac_create_csrow_objects(mci); - if (err < 0) - goto fail; -#endif - -#ifdef CONFIG_EDAC_DEBUG - edac_create_debug_nodes(mci); -#endif + edac_create_debugfs_nodes(mci); return 0; fail: - for (i--; i >= 0; i--) { - struct dimm_info *dimm = mci->dimms[i]; - if (dimm->nr_pages == 0) - continue; - device_unregister(&dimm->dev); - } -fail2: - device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + edac_remove_sysfs_mci_device(mci); + return err; } @@ -1074,32 +608,26 @@ fail2: */ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) { - int i; + struct dimm_info *dimm; + + if (!device_is_registered(&mci->dev)) + return; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG - debugfs_remove(mci->debugfs); -#endif -#ifdef CONFIG_EDAC_LEGACY_SYSFS - edac_delete_csrow_objects(mci); + edac_debugfs_remove_recursive(mci->debugfs); #endif - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; - if (dimm->nr_pages == 0) + mci_for_each_dimm(mci, dimm) { + if (!device_is_registered(&dimm->dev)) continue; - edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev)); device_unregister(&dimm->dev); } -} -void edac_unregister_sysfs(struct mem_ctl_info *mci) -{ - edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); - device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + /* only remove the device, but keep mci */ + device_del(&mci->dev); } static void mc_attr_release(struct device *dev) @@ -1109,58 +637,38 @@ static void mc_attr_release(struct device *dev) * parent device, used to create the /sys/devices/mc sysfs node. * So, there are no attributes on it. */ - edac_dbg(1, "Releasing device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dev); } -static struct device_type mc_attr_type = { - .release = mc_attr_release, -}; /* * Init/exit code for the module. Basically, creates/removes /sys/class/rc */ int __init edac_mc_sysfs_init(void) { - struct bus_type *edac_subsys; int err; - /* get the /sys/devices/system/edac subsys reference */ - edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys\n"); - err = -EINVAL; - goto out; - } - mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); - if (!mci_pdev) { - err = -ENOMEM; - goto out_put_sysfs; - } + if (!mci_pdev) + return -ENOMEM; - mci_pdev->bus = edac_subsys; - mci_pdev->type = &mc_attr_type; - device_initialize(mci_pdev); - dev_set_name(mci_pdev, "mc"); + mci_pdev->bus = edac_get_sysfs_subsys(); + mci_pdev->release = mc_attr_release; + mci_pdev->init_name = "mc"; - err = device_add(mci_pdev); - if (err < 0) - goto out_dev_free; + err = device_register(mci_pdev); + if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev)); + put_device(mci_pdev); + return err; + } edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; - - out_dev_free: - kfree(mci_pdev); - out_put_sysfs: - edac_put_sysfs_subsys(); - out: - return err; } -void __exit edac_mc_sysfs_exit(void) +void edac_mc_sysfs_exit(void) { device_unregister(mci_pdev); - edac_put_sysfs_subsys(); } diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index a66941fea5a4..1c9f62382666 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -12,14 +12,15 @@ */ #include <linux/edac.h> -#include "edac_core.h" +#include "edac_mc.h" #include "edac_module.h" #define EDAC_VERSION "Ver: 3.0.0" #ifdef CONFIG_EDAC_DEBUG -static int edac_set_debug_level(const char *buf, struct kernel_param *kp) +static int edac_set_debug_level(const char *buf, + const struct kernel_param *kp) { unsigned long val; int ret; @@ -28,7 +29,7 @@ static int edac_set_debug_level(const char *buf, struct kernel_param *kp) if (ret) return ret; - if (val < 0 || val > 4) + if (val > 4) return -EINVAL; return param_set_int(buf, kp); @@ -43,9 +44,6 @@ module_param_call(edac_debug_level, edac_set_debug_level, param_get_int, MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2"); #endif -/* scope is to module level only */ -struct workqueue_struct *edac_workqueue; - /* * edac_op_state_to_string() */ @@ -66,31 +64,37 @@ char *edac_op_state_to_string(int opstate) } /* - * edac_workqueue_setup - * initialize the edac work queue for polling operations + * sysfs object: /sys/devices/system/edac + * need to export to other files */ -static int edac_workqueue_setup(void) +static const struct bus_type edac_subsys = { + .name = "edac", + .dev_name = "edac", +}; + +static int edac_subsys_init(void) { - edac_workqueue = create_singlethread_workqueue("edac-poller"); - if (edac_workqueue == NULL) - return -ENODEV; - else - return 0; + int err; + + /* create the /sys/devices/system/edac directory */ + err = subsys_system_register(&edac_subsys, NULL); + if (err) + printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); + + return err; } -/* - * edac_workqueue_teardown - * teardown the edac workqueue - */ -static void edac_workqueue_teardown(void) +static void edac_subsys_exit(void) { - if (edac_workqueue) { - flush_workqueue(edac_workqueue); - destroy_workqueue(edac_workqueue); - edac_workqueue = NULL; - } + bus_unregister(&edac_subsys); } +/* return pointer to the 'edac' node in sysfs */ +const struct bus_type *edac_get_sysfs_subsys(void) +{ + return &edac_subsys; +} +EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys); /* * edac_init * module initialization entry point @@ -101,6 +105,10 @@ static int __init edac_init(void) edac_printk(KERN_INFO, EDAC_MC, EDAC_VERSION "\n"); + err = edac_subsys_init(); + if (err) + return err; + /* * Harvest and clear any boot/initialization PCI parity errors * @@ -112,20 +120,25 @@ static int __init edac_init(void) err = edac_mc_sysfs_init(); if (err) - goto error; + goto err_sysfs; edac_debugfs_init(); - /* Setup/Initialize the workq for this core */ err = edac_workqueue_setup(); if (err) { - edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n"); - goto error; + edac_printk(KERN_ERR, EDAC_MC, "Failure initializing workqueue\n"); + goto err_wq; } return 0; -error: +err_wq: + edac_debugfs_exit(); + edac_mc_sysfs_exit(); + +err_sysfs: + edac_subsys_exit(); + return err; } @@ -141,6 +154,7 @@ static void __exit edac_exit(void) edac_workqueue_teardown(); edac_mc_sysfs_exit(); edac_debugfs_exit(); + edac_subsys_exit(); } /* diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 3d139c6e7fe3..47593afdc234 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * edac_module.h @@ -10,7 +11,10 @@ #ifndef __EDAC_MODULE_H__ #define __EDAC_MODULE_H__ -#include "edac_core.h" +#include <acpi/ghes.h> +#include "edac_mc.h" +#include "edac_pci.h" +#include "edac_device.h" /* * INTERNAL EDAC MODULE: @@ -22,17 +26,13 @@ /* on edac_mc_sysfs.c */ int edac_mc_sysfs_init(void); void edac_mc_sysfs_exit(void); -extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci); +extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, + const struct attribute_group **groups); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); -void edac_unregister_sysfs(struct mem_ctl_info *mci); -extern int edac_get_log_ue(void); -extern int edac_get_log_ce(void); -extern int edac_get_panic_on_ue(void); extern int edac_mc_get_log_ue(void); extern int edac_mc_get_log_ce(void); extern int edac_mc_get_panic_on_ue(void); -extern int edac_get_poll_msec(void); -extern int edac_mc_get_poll_msec(void); +extern unsigned int edac_mc_get_poll_msec(void); unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len); @@ -46,28 +46,54 @@ extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); /* edac core workqueue: single CPU mode */ -extern struct workqueue_struct *edac_workqueue; -extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, - unsigned msec); -extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); -extern void edac_device_reset_delay_period(struct edac_device_ctl_info - *edac_dev, unsigned long value); -extern void edac_mc_reset_delay_period(int value); +int edac_workqueue_setup(void); +void edac_workqueue_teardown(void); +bool edac_queue_work(struct delayed_work *work, unsigned long delay); +bool edac_stop_work(struct delayed_work *work); +bool edac_mod_work(struct delayed_work *work, unsigned long delay); -extern void *edac_align_ptr(void **p, unsigned size, int n_elems); +extern void edac_device_reset_delay_period(struct edac_device_ctl_info + *edac_dev, unsigned long msec); +extern void edac_mc_reset_delay_period(unsigned long value); /* * EDAC debugfs functions */ + +#define edac_debugfs_remove_recursive debugfs_remove_recursive +#define edac_debugfs_remove debugfs_remove #ifdef CONFIG_EDAC_DEBUG -int edac_debugfs_init(void); +void edac_debugfs_init(void); void edac_debugfs_exit(void); +void edac_create_debugfs_nodes(struct mem_ctl_info *mci); +struct dentry *edac_debugfs_create_dir(const char *dirname); +struct dentry * +edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent); +struct dentry * +edac_debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, + void *data, const struct file_operations *fops); +void edac_debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, u8 *value); +void edac_debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, u16 *value); +void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value); #else -static inline int edac_debugfs_init(void) -{ - return -ENODEV; -} -static inline void edac_debugfs_exit(void) {} +static inline void edac_debugfs_init(void) { } +static inline void edac_debugfs_exit(void) { } +static inline void edac_create_debugfs_nodes(struct mem_ctl_info *mci) { } +static inline struct dentry *edac_debugfs_create_dir(const char *dirname) { return NULL; } +static inline struct dentry * +edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) { return NULL; } +static inline struct dentry * +edac_debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, + void *data, const struct file_operations *fops) { return NULL; } +static inline void edac_debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, u8 *value) { } +static inline void edac_debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, u16 *value) { } +static inline void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) { } #endif /* diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index dd370f92ace3..64c142aecca7 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -1,83 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * EDAC PCI component * * Author: Dave Jiang <djiang@mvista.com> * - * 2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * + * 2007 (c) MontaVista Software, Inc. */ +#include <asm/page.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> +#include <linux/highmem.h> +#include <linux/init.h> #include <linux/module.h> -#include <linux/types.h> +#include <linux/slab.h> #include <linux/smp.h> -#include <linux/init.h> +#include <linux/spinlock.h> #include <linux/sysctl.h> -#include <linux/highmem.h> #include <linux/timer.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/ctype.h> -#include <linux/workqueue.h> -#include <asm/uaccess.h> -#include <asm/page.h> -#include "edac_core.h" +#include "edac_pci.h" #include "edac_module.h" static DEFINE_MUTEX(edac_pci_ctls_mutex); static LIST_HEAD(edac_pci_list); static atomic_t pci_indexes = ATOMIC_INIT(0); -/* - * edac_pci_alloc_ctl_info - * - * The alloc() function for the 'edac_pci' control info - * structure. The chip driver will allocate one of these for each - * edac_pci it is going to control/register with the EDAC CORE. - */ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, - const char *edac_pci_name) + const char *edac_pci_name) { struct edac_pci_ctl_info *pci; - void *p = NULL, *pvt; - unsigned int size; edac_dbg(1, "\n"); - pci = edac_align_ptr(&p, sizeof(*pci), 1); - pvt = edac_align_ptr(&p, 1, sz_pvt); - size = ((unsigned long)pvt) + sz_pvt; - - /* Alloc the needed control struct memory */ - pci = kzalloc(size, GFP_KERNEL); - if (pci == NULL) + pci = kzalloc(sizeof(struct edac_pci_ctl_info), GFP_KERNEL); + if (!pci) return NULL; - /* Now much private space */ - pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; + if (sz_pvt) { + pci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL); + if (!pci->pvt_info) + goto free; + } - pci->pvt_info = pvt; pci->op_state = OP_ALLOC; snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); return pci; + +free: + kfree(pci); + return NULL; } EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); -/* - * edac_pci_free_ctl_info() - * - * Last action on the pci control structure. - * - * call the remove sysfs information, which will unregister - * this control struct's kobj. When that kobj's ref count - * goes to zero, its release function will be call and then - * kfree() the memory. - */ void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) { edac_dbg(1, "\n"); @@ -178,41 +154,6 @@ static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) INIT_LIST_HEAD(&pci->link); } -#if 0 -/* Older code, but might use in the future */ - -/* - * edac_pci_find() - * Search for an edac_pci_ctl_info structure whose index is 'idx' - * - * If found, return a pointer to the structure - * Else return NULL. - * - * Caller must hold pci_ctls_mutex. - */ -struct edac_pci_ctl_info *edac_pci_find(int idx) -{ - struct list_head *item; - struct edac_pci_ctl_info *pci; - - /* Iterage over list, looking for exact match of ID */ - list_for_each(item, &edac_pci_list) { - pci = list_entry(item, struct edac_pci_ctl_info, link); - - if (pci->pci_idx >= idx) { - if (pci->pci_idx == idx) - return pci; - - /* not on list, so terminate early */ - break; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(edac_pci_find); -#endif - /* * edac_pci_workq_function() * @@ -230,107 +171,32 @@ static void edac_pci_workq_function(struct work_struct *work_req) mutex_lock(&edac_pci_ctls_mutex); - if (pci->op_state == OP_RUNNING_POLL) { - /* we might be in POLL mode, but there may NOT be a poll func - */ - if ((pci->edac_check != NULL) && edac_pci_get_check_errors()) - pci->edac_check(pci); - - /* if we are on a one second period, then use round */ - msec = edac_pci_get_poll_msec(); - if (msec == 1000) - delay = round_jiffies_relative(msecs_to_jiffies(msec)); - else - delay = msecs_to_jiffies(msec); - - /* Reschedule only if we are in POLL mode */ - queue_delayed_work(edac_workqueue, &pci->work, delay); + if (pci->op_state != OP_RUNNING_POLL) { + mutex_unlock(&edac_pci_ctls_mutex); + return; } - mutex_unlock(&edac_pci_ctls_mutex); -} - -/* - * edac_pci_workq_setup() - * initialize a workq item for this edac_pci instance - * passing in the new delay period in msec - * - * locking model: - * called when 'edac_pci_ctls_mutex' is locked - */ -static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, - unsigned int msec) -{ - edac_dbg(0, "\n"); - - INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); - queue_delayed_work(edac_workqueue, &pci->work, - msecs_to_jiffies(edac_pci_get_poll_msec())); -} + if (edac_pci_get_check_errors()) + pci->edac_check(pci); -/* - * edac_pci_workq_teardown() - * stop the workq processing on this edac_pci instance - */ -static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) -{ - int status; - - edac_dbg(0, "\n"); - - status = cancel_delayed_work(&pci->work); - if (status == 0) - flush_workqueue(edac_workqueue); -} - -/* - * edac_pci_reset_delay_period - * - * called with a new period value for the workq period - * a) stop current workq timer - * b) restart workq timer with new value - */ -void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, - unsigned long value) -{ - edac_dbg(0, "\n"); + /* if we are on a one second period, then use round */ + msec = edac_pci_get_poll_msec(); + if (msec == 1000) + delay = round_jiffies_relative(msecs_to_jiffies(msec)); + else + delay = msecs_to_jiffies(msec); - edac_pci_workq_teardown(pci); - - /* need to lock for the setup */ - mutex_lock(&edac_pci_ctls_mutex); - - edac_pci_workq_setup(pci, value); + edac_queue_work(&pci->work, delay); mutex_unlock(&edac_pci_ctls_mutex); } -EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); -/* - * edac_pci_alloc_index: Allocate a unique PCI index number - * - * Return: - * allocated index number - * - */ int edac_pci_alloc_index(void) { return atomic_inc_return(&pci_indexes) - 1; } EXPORT_SYMBOL_GPL(edac_pci_alloc_index); -/* - * edac_pci_add_device: Insert the 'edac_dev' structure into the - * edac_pci global list and create sysfs entries associated with - * edac_pci structure. - * @pci: pointer to the edac_device structure to be added to the list - * @edac_idx: A unique numeric identifier to be assigned to the - * 'edac_pci' structure. - * - * Return: - * 0 Success - * !0 Failure - */ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) { edac_dbg(0, "\n"); @@ -349,20 +215,20 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) goto fail1; } - if (pci->edac_check != NULL) { + if (pci->edac_check) { pci->op_state = OP_RUNNING_POLL; - edac_pci_workq_setup(pci, 1000); + INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); + edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec())); + } else { pci->op_state = OP_RUNNING_INTERRUPT; } edac_pci_printk(pci, KERN_INFO, - "Giving out device to module '%s' controller '%s':" - " DEV '%s' (%s)\n", - pci->mod_name, - pci->ctl_name, - edac_dev_name(pci), edac_op_state_to_string(pci->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + pci->mod_name, pci->ctl_name, pci->dev_name, + edac_op_state_to_string(pci->op_state)); mutex_unlock(&edac_pci_ctls_mutex); return 0; @@ -376,19 +242,6 @@ fail0: } EXPORT_SYMBOL_GPL(edac_pci_add_device); -/* - * edac_pci_del_device() - * Remove sysfs entries for specified edac_pci structure and - * then remove edac_pci structure from global list - * - * @dev: - * Pointer to 'struct device' representing edac_pci structure - * to remove - * - * Return: - * Pointer to removed edac_pci structure, - * or NULL if device not found - */ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) { struct edac_pci_ctl_info *pci; @@ -412,8 +265,8 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) mutex_unlock(&edac_pci_ctls_mutex); - /* stop the workq timer */ - edac_pci_workq_teardown(pci); + if (pci->edac_check) + edac_stop_work(&pci->work); edac_printk(KERN_INFO, EDAC_PCI, "Removed device %d for %s %s: DEV %s\n", @@ -442,17 +295,6 @@ struct edac_pci_gen_data { int edac_idx; }; -/* - * edac_pci_create_generic_ctl - * - * A generic constructor for a PCI parity polling device - * Some systems have more than one domain of PCI busses. - * For systems with one domain, then this API will - * provide for a generic poller. - * - * This routine calls the edac_pci_alloc_ctl_info() for - * the generic device, with default values - */ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, const char *mod_name) { @@ -485,11 +327,6 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, } EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); -/* - * edac_pci_release_generic_ctl - * - * The release function of a generic EDAC PCI polling device - */ void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) { edac_dbg(0, "pci mod=%s\n", pci->mod_name); diff --git a/drivers/edac/edac_pci.h b/drivers/edac/edac_pci.h new file mode 100644 index 000000000000..3f47cd9b2b03 --- /dev/null +++ b/drivers/edac/edac_pci.h @@ -0,0 +1,266 @@ +/* + * Defines, structures, APIs for edac_pci and edac_pci_sysfs + * + * (C) 2007 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis <goemon at anime dot net> and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * NMI handling support added by + * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> + * + * Refactored for multi-source files: + * Doug Thompson <norsk5@xmission.com> + * + * Please look at Documentation/driver-api/edac.rst for more info about + * EDAC core structs and functions. + */ + +#ifndef _EDAC_PCI_H_ +#define _EDAC_PCI_H_ + +#include <linux/device.h> +#include <linux/edac.h> +#include <linux/kobject.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#ifdef CONFIG_PCI + +struct edac_pci_counter { + atomic_t pe_count; + atomic_t npe_count; +}; + +/* + * Abstract edac_pci control info structure + * + */ +struct edac_pci_ctl_info { + /* for global list of edac_pci_ctl_info structs */ + struct list_head link; + + int pci_idx; + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_pci_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_pci load start time (jiffies) */ + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Event counters for the this whole EDAC Device */ + struct edac_pci_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; +}; + +#define to_edac_pci_ctl_work(w) \ + container_of(w, struct edac_pci_ctl_info,work) + +/* write all or some bits in a byte-register*/ +static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, + u8 mask) +{ + if (mask != 0xff) { + u8 buf; + + pci_read_config_byte(pdev, offset, &buf); + value &= mask; + buf &= ~mask; + value |= buf; + } + + pci_write_config_byte(pdev, offset, value); +} + +/* write all or some bits in a word-register*/ +static inline void pci_write_bits16(struct pci_dev *pdev, int offset, + u16 value, u16 mask) +{ + if (mask != 0xffff) { + u16 buf; + + pci_read_config_word(pdev, offset, &buf); + value &= mask; + buf &= ~mask; + value |= buf; + } + + pci_write_config_word(pdev, offset, value); +} + +/* + * pci_write_bits32 + * + * edac local routine to do pci_write_config_dword, but adds + * a mask parameter. If mask is all ones, ignore the mask. + * Otherwise utilize the mask to isolate specified bits + * + * write all or some bits in a dword-register + */ +static inline void pci_write_bits32(struct pci_dev *pdev, int offset, + u32 value, u32 mask) +{ + if (mask != 0xffffffff) { + u32 buf; + + pci_read_config_dword(pdev, offset, &buf); + value &= mask; + buf &= ~mask; + value |= buf; + } + + pci_write_config_dword(pdev, offset, value); +} + +#endif /* CONFIG_PCI */ + +/* + * edac_pci APIs + */ + +/** + * edac_pci_alloc_ctl_info: + * The alloc() function for the 'edac_pci' control info + * structure. + * + * @sz_pvt: size of the private info at struct &edac_pci_ctl_info + * @edac_pci_name: name of the PCI device + * + * The chip driver will allocate one of these for each + * edac_pci it is going to control/register with the EDAC CORE. + * + * Returns: a pointer to struct &edac_pci_ctl_info on success; %NULL otherwise. + */ +extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, + const char *edac_pci_name); + +/** + * edac_pci_free_ctl_info(): + * Last action on the pci control structure. + * + * @pci: pointer to struct &edac_pci_ctl_info + * + * Calls the remove sysfs information, which will unregister + * this control struct's kobj. When that kobj's ref count + * goes to zero, its release function will be call and then + * kfree() the memory. + */ +extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci); + +/** + * edac_pci_alloc_index: Allocate a unique PCI index number + * + * Returns: + * allocated index number + * + */ +extern int edac_pci_alloc_index(void); + +/** + * edac_pci_add_device(): Insert the 'edac_dev' structure into the + * edac_pci global list and create sysfs entries associated with + * edac_pci structure. + * + * @pci: pointer to the edac_device structure to be added to the list + * @edac_idx: A unique numeric identifier to be assigned to the + * 'edac_pci' structure. + * + * Returns: + * 0 on Success, or an error code on failure + */ +extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx); + +/** + * edac_pci_del_device() + * Remove sysfs entries for specified edac_pci structure and + * then remove edac_pci structure from global list + * + * @dev: + * Pointer to 'struct device' representing edac_pci structure + * to remove + * + * Returns: + * Pointer to removed edac_pci structure, + * or %NULL if device not found + */ +extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev); + +/** + * edac_pci_create_generic_ctl() + * A generic constructor for a PCI parity polling device + * Some systems have more than one domain of PCI busses. + * For systems with one domain, then this API will + * provide for a generic poller. + * + * @dev: pointer to struct &device; + * @mod_name: name of the PCI device + * + * This routine calls the edac_pci_alloc_ctl_info() for + * the generic device, with default values + * + * Returns: Pointer to struct &edac_pci_ctl_info on success, %NULL on + * failure. + */ +extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl( + struct device *dev, + const char *mod_name); + +/** + * edac_pci_release_generic_ctl + * The release function of a generic EDAC PCI polling device + * + * @pci: pointer to struct &edac_pci_ctl_info + */ +extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci); + +/** + * edac_pci_create_sysfs + * Create the controls/attributes for the specified EDAC PCI device + * + * @pci: pointer to struct &edac_pci_ctl_info + */ +extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci); + +/** + * edac_pci_remove_sysfs() + * remove the controls and attributes for this EDAC PCI device + * + * @pci: pointer to struct &edac_pci_ctl_info + */ +extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci); + +#endif diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index e8658e451762..7b44afcf48db 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -11,12 +11,9 @@ #include <linux/slab.h> #include <linux/ctype.h> -#include "edac_core.h" +#include "edac_pci.h" #include "edac_module.h" -/* Turn off this whole feature if PCI is not configured */ -#ifdef CONFIG_PCI - #define EDAC_PCI_SYMLINK "device" /* data variables exported via sysfs */ @@ -138,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); /* pci instance attributes */ -static struct instance_attribute *pci_instance_attr[] = { - &attr_instance_pe_count, - &attr_instance_npe_count, +static struct attribute *pci_instance_attrs[] = { + &attr_instance_pe_count.attr, + &attr_instance_npe_count.attr, NULL }; +ATTRIBUTE_GROUPS(pci_instance); /* the ktype for a pci instance */ static struct kobj_type ktype_pci_instance = { .release = edac_pci_instance_release, .sysfs_ops = &pci_instance_ops, - .default_attrs = (struct attribute **)pci_instance_attr, + .default_groups = pci_instance_groups, }; /* @@ -295,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); /* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_errors, - &edac_pci_attr_edac_pci_log_pe, - &edac_pci_attr_edac_pci_log_npe, - &edac_pci_attr_edac_pci_panic_on_pe, - &edac_pci_attr_pci_parity_count, - &edac_pci_attr_pci_nonparity_count, +static struct attribute *edac_pci_attrs[] = { + &edac_pci_attr_check_pci_errors.attr, + &edac_pci_attr_edac_pci_log_pe.attr, + &edac_pci_attr_edac_pci_log_npe.attr, + &edac_pci_attr_edac_pci_panic_on_pe.attr, + &edac_pci_attr_pci_parity_count.attr, + &edac_pci_attr_pci_nonparity_count.attr, NULL, }; +ATTRIBUTE_GROUPS(edac_pci); /* * edac_pci_release_main_kobj @@ -330,19 +329,17 @@ static void edac_pci_release_main_kobj(struct kobject *kobj) static struct kobj_type ktype_edac_pci_main_kobj = { .release = edac_pci_release_main_kobj, .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **)edac_pci_attr, + .default_groups = edac_pci_groups, }; /** - * edac_pci_main_kobj_setup() - * - * setup the sysfs for EDAC PCI attributes - * assumes edac_subsys has already been initialized + * edac_pci_main_kobj_setup: Setup the sysfs for EDAC PCI attributes. */ static int edac_pci_main_kobj_setup(void) { - int err; - struct bus_type *edac_subsys; + int err = -ENODEV; + const struct bus_type *edac_subsys; + struct device *dev_root; edac_dbg(0, "\n"); @@ -354,11 +351,6 @@ static int edac_pci_main_kobj_setup(void) * controls and attributes */ edac_subsys = edac_get_sysfs_subsys(); - if (edac_subsys == NULL) { - edac_dbg(1, "no edac_subsys\n"); - err = -ENODEV; - goto decrement_count_fail; - } /* Bump the reference count on this module to ensure the * modules isn't unloaded until we deconstruct the top @@ -366,8 +358,7 @@ static int edac_pci_main_kobj_setup(void) */ if (!try_module_get(THIS_MODULE)) { edac_dbg(1, "try_module_get() failed\n"); - err = -ENODEV; - goto mod_get_fail; + goto decrement_count_fail; } edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); @@ -378,9 +369,13 @@ static int edac_pci_main_kobj_setup(void) } /* Instanstiate the pci object */ - err = kobject_init_and_add(edac_pci_top_main_kobj, - &ktype_edac_pci_main_kobj, - &edac_subsys->dev_root->kobj, "pci"); + dev_root = bus_get_dev_root(edac_subsys); + if (dev_root) { + err = kobject_init_and_add(edac_pci_top_main_kobj, + &ktype_edac_pci_main_kobj, + &dev_root->kobj, "pci"); + put_device(dev_root); + } if (err) { edac_dbg(1, "Failed to register '.../edac/pci'\n"); goto kobject_init_and_add_fail; @@ -397,14 +392,11 @@ static int edac_pci_main_kobj_setup(void) /* Error unwind statck */ kobject_init_and_add_fail: - kfree(edac_pci_top_main_kobj); + kobject_put(edac_pci_top_main_kobj); kzalloc_fail: module_put(THIS_MODULE); -mod_get_fail: - edac_put_sysfs_subsys(); - decrement_count_fail: /* if are on this error exit, nothing to tear down */ atomic_dec(&edac_pci_sysfs_refcount); @@ -429,16 +421,9 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); - edac_put_sysfs_subsys(); } } -/* - * - * edac_pci_create_sysfs - * - * Create the controls/attributes for the specified EDAC PCI device - */ int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) { int err; @@ -474,11 +459,6 @@ unregister_cleanup: return err; } -/* - * edac_pci_remove_sysfs - * - * remove the controls and attributes for this EDAC PCI device - */ void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) { edac_dbg(0, "index=%d\n", pci->pci_idx); @@ -541,7 +521,7 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev) /* read the device TYPE, looking for bridges */ pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) + if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) get_pci_parity_status(dev, 1); } @@ -603,7 +583,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev)); - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + if ((header_type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) { /* On bridges, need to examine secondary status register */ status = get_pci_parity_status(dev, 1); @@ -761,5 +741,3 @@ MODULE_PARM_DESC(check_pci_errors, module_param(edac_pci_panic_on_pe, int, 0644); MODULE_PARM_DESC(edac_pci_panic_on_pe, "Panic on PCI Bus Parity error: 0=off 1=on"); - -#endif /* CONFIG_PCI */ diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 351945fa2ecd..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <bp@alien8.de> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include <linux/module.h> -#include <linux/edac.h> -#include <linux/atomic.h> -#include <linux/device.h> -#include <asm/edac.h> - -int edac_op_state = EDAC_OPSTATE_INVAL; -EXPORT_SYMBOL_GPL(edac_op_state); - -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - -static atomic_t edac_subsys_valid = ATOMIC_INIT(0); - -/* - * called to determine if there is an EDAC driver interested in - * knowing an event (such as NMI) occurred - */ -int edac_handler_set(void) -{ - if (edac_op_state == EDAC_OPSTATE_POLL) - return 0; - - return atomic_read(&edac_handlers); -} -EXPORT_SYMBOL_GPL(edac_handler_set); - -/* - * handler for NMI type of interrupts to assert error - */ -void edac_atomic_assert_error(void) -{ - edac_err_assert++; -} -EXPORT_SYMBOL_GPL(edac_atomic_assert_error); - -/* - * sysfs object: /sys/devices/system/edac - * need to export to other files - */ -struct bus_type edac_subsys = { - .name = "edac", - .dev_name = "edac", -}; -EXPORT_SYMBOL_GPL(edac_subsys); - -/* return pointer to the 'edac' node in sysfs */ -struct bus_type *edac_get_sysfs_subsys(void) -{ - int err = 0; - - if (atomic_read(&edac_subsys_valid)) - goto out; - - /* create the /sys/devices/system/edac directory */ - err = subsys_system_register(&edac_subsys, NULL); - if (err) { - printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); - return NULL; - } - -out: - atomic_inc(&edac_subsys_valid); - return &edac_subsys; -} -EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys); - -void edac_put_sysfs_subsys(void) -{ - /* last user unregisters it */ - if (atomic_dec_and_test(&edac_subsys_valid)) - bus_unregister(&edac_subsys); -} -EXPORT_SYMBOL_GPL(edac_put_sysfs_subsys); diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c new file mode 100644 index 000000000000..e4eaec0aa81d --- /dev/null +++ b/drivers/edac/fsl_ddr_edac.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Freescale Memory Controller kernel module + * + * Support Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and + * ARM-based Layerscape SoCs including LS2xxx and LS1021A. Originally + * split out from mpc85xx_edac EDAC driver. + * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ctype.h> +#include <linux/io.h> +#include <linux/mod_devicetable.h> +#include <linux/edac.h> +#include <linux/smp.h> +#include <linux/gfp.h> + +#include <linux/of.h> +#include <linux/of_address.h> +#include "edac_module.h" +#include "fsl_ddr_edac.h" + +#define EDAC_MOD_STR "fsl_ddr_edac" + +static int edac_mc_idx; + +static inline void __iomem *ddr_reg_addr(struct fsl_mc_pdata *pdata, unsigned int off) +{ + if (pdata->flag == TYPE_IMX9 && off >= FSL_MC_DATA_ERR_INJECT_HI && off <= FSL_MC_ERR_SBE) + return pdata->inject_vbase + off - FSL_MC_DATA_ERR_INJECT_HI + + IMX9_MC_DATA_ERR_INJECT_OFF; + + if (pdata->flag == TYPE_IMX9 && off >= IMX9_MC_ERR_EN) + return pdata->inject_vbase + off - IMX9_MC_ERR_EN; + + return pdata->mc_vbase + off; +} + +static inline u32 ddr_in32(struct fsl_mc_pdata *pdata, unsigned int off) +{ + void __iomem *addr = ddr_reg_addr(pdata, off); + + return pdata->little_endian ? ioread32(addr) : ioread32be(addr); +} + +static inline void ddr_out32(struct fsl_mc_pdata *pdata, unsigned int off, u32 value) +{ + void __iomem *addr = ddr_reg_addr(pdata, off); + + if (pdata->little_endian) + iowrite32(value, addr); + else + iowrite32be(value, addr); +} + +#ifdef CONFIG_EDAC_DEBUG +/************************ MC SYSFS parts ***********************************/ + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t fsl_mc_inject_data_hi_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_HI)); +} + +static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_LO)); +} + +static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata, FSL_MC_ECC_ERR_INJECT)); +} + +static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_HI, val); + return count; + } + return 0; +} + +static ssize_t fsl_mc_inject_data_lo_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_LO, val); + return count; + } + return 0; +} + +static ssize_t fsl_mc_inject_ctrl_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata, FSL_MC_ECC_ERR_INJECT, val); + return count; + } + return 0; +} + +static DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR, + fsl_mc_inject_data_hi_show, fsl_mc_inject_data_hi_store); +static DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR, + fsl_mc_inject_data_lo_show, fsl_mc_inject_data_lo_store); +static DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR, + fsl_mc_inject_ctrl_show, fsl_mc_inject_ctrl_store); +#endif /* CONFIG_EDAC_DEBUG */ + +static struct attribute *fsl_ddr_dev_attrs[] = { +#ifdef CONFIG_EDAC_DEBUG + &dev_attr_inject_data_hi.attr, + &dev_attr_inject_data_lo.attr, + &dev_attr_inject_ctrl.attr, +#endif + NULL +}; + +ATTRIBUTE_GROUPS(fsl_ddr_dev); + +/**************************** MC Err device ***************************/ + +/* + * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the + * MPC8572 User's Manual. Each line represents a syndrome bit column as a + * 64-bit value, but split into an upper and lower 32-bit chunk. The labels + * below correspond to Freescale's manuals. + */ +static unsigned int ecc_table[16] = { + /* MSB LSB */ + /* [0:31] [32:63] */ + 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */ + 0x00ff00ff, 0x00fff0ff, + 0x0f0f0f0f, 0x0f0fff00, + 0x11113333, 0x7777000f, + 0x22224444, 0x8888222f, + 0x44448888, 0xffff4441, + 0x8888ffff, 0x11118882, + 0xffff1111, 0x22221114, /* Syndrome bit 0 */ +}; + +/* + * Calculate the correct ECC value for a 64-bit value specified by high:low + */ +static u8 calculate_ecc(u32 high, u32 low) +{ + u32 mask_low; + u32 mask_high; + int bit_cnt; + u8 ecc = 0; + int i; + int j; + + for (i = 0; i < 8; i++) { + mask_high = ecc_table[i * 2]; + mask_low = ecc_table[i * 2 + 1]; + bit_cnt = 0; + + for (j = 0; j < 32; j++) { + if ((mask_high >> j) & 1) + bit_cnt ^= (high >> j) & 1; + if ((mask_low >> j) & 1) + bit_cnt ^= (low >> j) & 1; + } + + ecc |= bit_cnt << i; + } + + return ecc; +} + +/* + * Create the syndrome code which is generated if the data line specified by + * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641 + * User's Manual and 9-61 in the MPC8572 User's Manual. + */ +static u8 syndrome_from_bit(unsigned int bit) { + int i; + u8 syndrome = 0; + + /* + * Cycle through the upper or lower 32-bit portion of each value in + * ecc_table depending on if 'bit' is in the upper or lower half of + * 64-bit data. + */ + for (i = bit < 32; i < 16; i += 2) + syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2); + + return syndrome; +} + +/* + * Decode data and ecc syndrome to determine what went wrong + * Note: This can only decode single-bit errors + */ +static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, + int *bad_data_bit, int *bad_ecc_bit) +{ + int i; + u8 syndrome; + + *bad_data_bit = -1; + *bad_ecc_bit = -1; + + /* + * Calculate the ECC of the captured data and XOR it with the captured + * ECC to find an ECC syndrome value we can search for + */ + syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc; + + /* Check if a data line is stuck... */ + for (i = 0; i < 64; i++) { + if (syndrome == syndrome_from_bit(i)) { + *bad_data_bit = i; + return; + } + } + + /* If data is correct, check ECC bits for errors... */ + for (i = 0; i < 8; i++) { + if ((syndrome >> i) & 0x1) { + *bad_ecc_bit = i; + return; + } + } +} + +#define make64(high, low) (((u64)(high) << 32) | (low)) + +static void fsl_mc_check(struct mem_ctl_info *mci) +{ + struct fsl_mc_pdata *pdata = mci->pvt_info; + struct csrow_info *csrow; + u32 bus_width; + u32 err_detect; + u32 syndrome; + u64 err_addr; + u32 pfn; + int row_index; + u32 cap_high; + u32 cap_low; + int bad_data_bit; + int bad_ecc_bit; + + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); + if (!err_detect) + return; + + fsl_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n", + err_detect); + + /* no more processing if not ECC bit errors */ + if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); + return; + } + + syndrome = ddr_in32(pdata, FSL_MC_CAPTURE_ECC); + + /* Mask off appropriate bits of syndrome based on bus width */ + bus_width = (ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG) & + DSC_DBW_MASK) ? 32 : 64; + if (bus_width == 64) + syndrome &= 0xff; + else + syndrome &= 0xffff; + + err_addr = make64( + ddr_in32(pdata, FSL_MC_CAPTURE_EXT_ADDRESS), + ddr_in32(pdata, FSL_MC_CAPTURE_ADDRESS)); + pfn = err_addr >> PAGE_SHIFT; + + for (row_index = 0; row_index < mci->nr_csrows; row_index++) { + csrow = mci->csrows[row_index]; + if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page)) + break; + } + + cap_high = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_HI); + cap_low = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_LO); + + /* + * Analyze single-bit errors on 64-bit wide buses + * TODO: Add support for 32-bit wide buses + */ + if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + u64 cap = (u64)cap_high << 32 | cap_low; + u32 s = syndrome; + + sbe_ecc_decode(cap_high, cap_low, syndrome, + &bad_data_bit, &bad_ecc_bit); + + if (bad_data_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty Data bit: %d\n", bad_data_bit); + cap ^= 1ULL << bad_data_bit; + } + + if (bad_ecc_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty ECC bit: %d\n", bad_ecc_bit); + s ^= 1 << bad_ecc_bit; + } + + fsl_mc_printk(mci, KERN_ERR, + "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + upper_32_bits(cap), lower_32_bits(cap), s); + } + + fsl_mc_printk(mci, KERN_ERR, + "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + cap_high, cap_low, syndrome); + fsl_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr); + fsl_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); + + /* we are out of range */ + if (row_index == mci->nr_csrows) + fsl_mc_printk(mci, KERN_ERR, "PFN out of range!\n"); + + if (err_detect & DDR_EDE_SBE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); + + if (err_detect & DDR_EDE_MBE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); + + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); +} + +static irqreturn_t fsl_mc_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct fsl_mc_pdata *pdata = mci->pvt_info; + u32 err_detect; + + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); + if (!err_detect) + return IRQ_NONE; + + fsl_mc_check(mci); + + return IRQ_HANDLED; +} + +static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) +{ + struct fsl_mc_pdata *pdata = mci->pvt_info; + struct csrow_info *csrow; + struct dimm_info *dimm; + u32 sdram_ctl; + u32 sdtype; + enum mem_type mtype; + u32 cs_bnds; + int index; + + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); + + sdtype = sdram_ctl & DSC_SDTYPE_MASK; + if (sdram_ctl & DSC_RD_EN) { + switch (sdtype) { + case 0x02000000: + mtype = MEM_RDDR; + break; + case 0x03000000: + mtype = MEM_RDDR2; + break; + case 0x07000000: + mtype = MEM_RDDR3; + break; + case 0x05000000: + mtype = MEM_RDDR4; + break; + default: + mtype = MEM_UNKNOWN; + break; + } + } else { + switch (sdtype) { + case 0x02000000: + mtype = MEM_DDR; + break; + case 0x03000000: + mtype = MEM_DDR2; + break; + case 0x07000000: + mtype = MEM_DDR3; + break; + case 0x05000000: + mtype = MEM_DDR4; + break; + case 0x04000000: + mtype = MEM_LPDDR4; + break; + default: + mtype = MEM_UNKNOWN; + break; + } + } + + for (index = 0; index < mci->nr_csrows; index++) { + u32 start; + u32 end; + + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; + + cs_bnds = ddr_in32(pdata, FSL_MC_CS_BNDS_0 + + (index * FSL_MC_CS_BNDS_OFS)); + + start = (cs_bnds & 0xffff0000) >> 16; + end = (cs_bnds & 0x0000ffff); + + if (start == end) + continue; /* not populated */ + + start <<= (24 - PAGE_SHIFT); + end <<= (24 - PAGE_SHIFT); + end |= (1 << (24 - PAGE_SHIFT)) - 1; + + csrow->first_page = start; + csrow->last_page = end; + + dimm->nr_pages = end + 1 - start; + dimm->grain = 8; + dimm->mtype = mtype; + dimm->dtype = DEV_UNKNOWN; + if (pdata->flag == TYPE_IMX9) + dimm->dtype = DEV_X16; + else if (sdram_ctl & DSC_X32_EN) + dimm->dtype = DEV_X32; + dimm->edac_mode = EDAC_SECDED; + } +} + +int fsl_mc_err_probe(struct platform_device *op) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct fsl_mc_pdata *pdata; + struct resource r; + u32 ecc_en_mask; + u32 sdram_ctl; + int res; + + if (!devres_open_group(&op->dev, fsl_mc_err_probe, GFP_KERNEL)) + return -ENOMEM; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 4; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(*pdata)); + if (!mci) { + devres_release_group(&op->dev, fsl_mc_err_probe); + return -ENOMEM; + } + + pdata = mci->pvt_info; + pdata->name = "fsl_mc_err"; + mci->pdev = &op->dev; + pdata->edac_idx = edac_mc_idx++; + dev_set_drvdata(mci->pdev, mci); + mci->ctl_name = pdata->name; + mci->dev_name = pdata->name; + + pdata->flag = (unsigned long)device_get_match_data(&op->dev); + + /* + * Get the endianness of DDR controller registers. + * Default is big endian. + */ + pdata->little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); + + res = of_address_to_resource(op->dev.of_node, 0, &r); + if (res) { + pr_err("%s: Unable to get resource for MC err regs\n", + __func__); + goto err; + } + + if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), + pdata->name)) { + pr_err("%s: Error while requesting mem region\n", + __func__); + res = -EBUSY; + goto err; + } + + pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); + if (!pdata->mc_vbase) { + pr_err("%s: Unable to setup MC err regs\n", __func__); + res = -ENOMEM; + goto err; + } + + if (pdata->flag == TYPE_IMX9) { + pdata->inject_vbase = devm_platform_ioremap_resource_byname(op, "inject"); + if (IS_ERR(pdata->inject_vbase)) { + res = -ENOMEM; + goto err; + } + } + + if (pdata->flag == TYPE_IMX9) { + sdram_ctl = ddr_in32(pdata, IMX9_MC_ERR_EN); + ecc_en_mask = ERR_ECC_EN | ERR_INLINE_ECC; + } else { + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); + ecc_en_mask = DSC_ECC_EN; + } + + if ((sdram_ctl & ecc_en_mask) != ecc_en_mask) { + /* no ECC */ + pr_warn("%s: No ECC DIMMs discovered\n", __func__); + res = -ENODEV; + goto err; + } + + edac_dbg(3, "init mci\n"); + mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR | + MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 | + MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 | + MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LPDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = fsl_mc_check; + + mci->ctl_page_to_phys = NULL; + + mci->scrub_mode = SCRUB_SW_SRC; + + fsl_ddr_init_csrows(mci); + + /* store the original error disable bits */ + pdata->orig_ddr_err_disable = ddr_in32(pdata, FSL_MC_ERR_DISABLE); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, 0); + + /* clear all error bits */ + ddr_out32(pdata, FSL_MC_ERR_DETECT, ~0); + + res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups); + if (res) { + edac_dbg(3, "failed edac_mc_add_mc()\n"); + goto err; + } + + if (edac_op_state == EDAC_OPSTATE_INT) { + ddr_out32(pdata, FSL_MC_ERR_INT_EN, + DDR_EIE_MBEE | DDR_EIE_SBEE); + + /* store the original error management threshold */ + pdata->orig_ddr_err_sbe = ddr_in32(pdata, + FSL_MC_ERR_SBE) & 0xff0000; + + /* set threshold to 1 error per interrupt */ + ddr_out32(pdata, FSL_MC_ERR_SBE, 0x10000); + + /* register interrupts */ + pdata->irq = platform_get_irq(op, 0); + res = devm_request_irq(&op->dev, pdata->irq, + fsl_mc_isr, + IRQF_SHARED, + "[EDAC] MC err", mci); + if (res < 0) { + pr_err("%s: Unable to request irq %d for FSL DDR DRAM ERR\n", + __func__, pdata->irq); + res = -ENODEV; + goto err2; + } + + pr_info(EDAC_MOD_STR " acquired irq %d for MC\n", + pdata->irq); + } + + devres_remove_group(&op->dev, fsl_mc_err_probe); + edac_dbg(3, "success\n"); + pr_info(EDAC_MOD_STR " MC err registered\n"); + + return 0; + +err2: + edac_mc_del_mc(&op->dev); +err: + devres_release_group(&op->dev, fsl_mc_err_probe); + edac_mc_free(mci); + return res; +} + +void fsl_mc_err_remove(struct platform_device *op) +{ + struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + + edac_dbg(0, "\n"); + + if (edac_op_state == EDAC_OPSTATE_INT) { + ddr_out32(pdata, FSL_MC_ERR_INT_EN, 0); + } + + ddr_out32(pdata, FSL_MC_ERR_DISABLE, + pdata->orig_ddr_err_disable); + ddr_out32(pdata, FSL_MC_ERR_SBE, pdata->orig_ddr_err_sbe); + + + edac_mc_del_mc(&op->dev); + edac_mc_free(mci); +} diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h new file mode 100644 index 000000000000..73618f79e587 --- /dev/null +++ b/drivers/edac/fsl_ddr_edac.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Freescale Memory Controller kernel module + * + * Support Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and + * ARM-based Layerscape SoCs including LS2xxx and LS1021A. Originally + * split out from mpc85xx_edac EDAC driver. + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. + */ +#ifndef _FSL_DDR_EDAC_H_ +#define _FSL_DDR_EDAC_H_ + +#define fsl_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "FSL_DDR", fmt, ##arg) + +/* + * DRAM error defines + */ + +/* DDR_SDRAM_CFG */ +#define FSL_MC_DDR_SDRAM_CFG 0x0110 +#define FSL_MC_CS_BNDS_0 0x0000 +#define FSL_MC_CS_BNDS_OFS 0x0008 + +#define FSL_MC_DATA_ERR_INJECT_HI 0x0e00 +#define FSL_MC_DATA_ERR_INJECT_LO 0x0e04 +#define FSL_MC_ECC_ERR_INJECT 0x0e08 +#define FSL_MC_CAPTURE_DATA_HI 0x0e20 +#define FSL_MC_CAPTURE_DATA_LO 0x0e24 +#define FSL_MC_CAPTURE_ECC 0x0e28 +#define FSL_MC_ERR_DETECT 0x0e40 +#define FSL_MC_ERR_DISABLE 0x0e44 +#define FSL_MC_ERR_INT_EN 0x0e48 +#define FSL_MC_CAPTURE_ATRIBUTES 0x0e4c +#define FSL_MC_CAPTURE_ADDRESS 0x0e50 +#define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54 +#define FSL_MC_ERR_SBE 0x0e58 + +#define IMX9_MC_ERR_EN 0x1000 +#define IMX9_MC_DATA_ERR_INJECT_OFF 0x100 + +#define DSC_MEM_EN 0x80000000 +#define DSC_ECC_EN 0x20000000 +#define DSC_RD_EN 0x10000000 +#define DSC_DBW_MASK 0x00180000 +#define DSC_DBW_32 0x00080000 +#define DSC_DBW_64 0x00000000 + +#define ERR_ECC_EN 0x80000000 +#define ERR_INLINE_ECC 0x40000000 + +#define DSC_SDTYPE_MASK 0x07000000 +#define DSC_X32_EN 0x00000020 + +/* Err_Int_En */ +#define DDR_EIE_MSEE 0x1 /* memory select */ +#define DDR_EIE_SBEE 0x4 /* single-bit ECC error */ +#define DDR_EIE_MBEE 0x8 /* multi-bit ECC error */ + +/* Err_Detect */ +#define DDR_EDE_MSE 0x1 /* memory select */ +#define DDR_EDE_SBE 0x4 /* single-bit ECC error */ +#define DDR_EDE_MBE 0x8 /* multi-bit ECC error */ +#define DDR_EDE_MME 0x80000000 /* multiple memory errors */ + +/* Err_Disable */ +#define DDR_EDI_MSED 0x1 /* memory select disable */ +#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ +#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ + +#define TYPE_IMX9 0x1 /* MC used by iMX9 having registers changed */ + +struct fsl_mc_pdata { + char *name; + int edac_idx; + void __iomem *mc_vbase; + void __iomem *inject_vbase; + int irq; + u32 orig_ddr_err_disable; + u32 orig_ddr_err_sbe; + bool little_endian; + unsigned long flag; +}; +int fsl_mc_err_probe(struct platform_device *op); +void fsl_mc_err_remove(struct platform_device *op); +#endif diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index bb534670ec02..d80c88818691 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * GHES/EDAC Linux driver * - * This file may be distributed under the terms of the GNU General Public - * License version 2. + * Copyright (c) 2013 by Mauro Carvalho Chehab * - * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> - * - * Red Hat Inc. http://www.redhat.com + * Red Hat Inc. https://www.redhat.com */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -14,26 +12,52 @@ #include <acpi/ghes.h> #include <linux/edac.h> #include <linux/dmi.h> -#include "edac_core.h" +#include "edac_module.h" #include <ras/ras_event.h> +#include <linux/notifier.h> +#include <linux/string.h> -#define GHES_EDAC_REVISION " Ver: 1.0.0" +#define OTHER_DETAIL_LEN 400 -struct ghes_edac_pvt { - struct list_head list; - struct ghes *ghes; +struct ghes_pvt { struct mem_ctl_info *mci; /* Buffers for the error handling routine */ - char detail_location[240]; - char other_detail[160]; + char other_detail[OTHER_DETAIL_LEN]; char msg[80]; }; -static LIST_HEAD(ghes_reglist); -static DEFINE_MUTEX(ghes_edac_lock); -static int ghes_edac_mc_num; +static refcount_t ghes_refcount = REFCOUNT_INIT(0); + +/* + * Access to ghes_pvt must be protected by ghes_lock. The spinlock + * also provides the necessary (implicit) memory barrier for the SMP + * case to make the pointer visible on another CPU. + */ +static struct ghes_pvt *ghes_pvt; + +/* + * This driver's representation of the system hardware, as collected + * from DMI. + */ +static struct ghes_hw_desc { + int num_dimms; + struct dimm_info *dimms; +} ghes_hw; + +/* GHES registration mutex */ +static DEFINE_MUTEX(ghes_reg_mutex); + +/* + * Sync with other, potentially concurrent callers of + * ghes_edac_report_mem_error(). We don't know what the + * "inventive" firmware would do. + */ +static DEFINE_SPINLOCK(ghes_lock); + +static bool system_scanned; +static struct list_head *ghes_devs; /* Memory Device - Type 17 of SMBIOS spec */ struct memdev_dmi_entry { @@ -61,155 +85,221 @@ struct memdev_dmi_entry { u16 conf_mem_clk_speed; } __attribute__((__packed__)); -struct ghes_edac_dimm_fill { - struct mem_ctl_info *mci; - unsigned count; -}; +static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle) +{ + struct dimm_info *dimm; -char *memory_type[] = { - [MEM_EMPTY] = "EMPTY", - [MEM_RESERVED] = "RESERVED", - [MEM_UNKNOWN] = "UNKNOWN", - [MEM_FPM] = "FPM", - [MEM_EDO] = "EDO", - [MEM_BEDO] = "BEDO", - [MEM_SDR] = "SDR", - [MEM_RDR] = "RDR", - [MEM_DDR] = "DDR", - [MEM_RDDR] = "RDDR", - [MEM_RMBS] = "RMBS", - [MEM_DDR2] = "DDR2", - [MEM_FB_DDR2] = "FB_DDR2", - [MEM_RDDR2] = "RDDR2", - [MEM_XDR] = "XDR", - [MEM_DDR3] = "DDR3", - [MEM_RDDR3] = "RDDR3", -}; + mci_for_each_dimm(mci, dimm) { + if (dimm->smbios_handle == handle) + return dimm; + } + + return NULL; +} -static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +static void dimm_setup_label(struct dimm_info *dimm, u16 handle) { - int *num_dimm = arg; + const char *bank = NULL, *device = NULL; + + dmi_memdev_name(handle, &bank, &device); - if (dh->type == DMI_ENTRY_MEM_DEVICE) - (*num_dimm)++; + /* + * Set to a NULL string when both bank and device are zero. In this case, + * the label assigned by default will be preserved. + */ + snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", + (bank && *bank) ? bank : "", + (bank && *bank && device && *device) ? " " : "", + (device && *device) ? device : ""); } -static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) { - struct ghes_edac_dimm_fill *dimm_fill = arg; - struct mem_ctl_info *mci = dimm_fill->mci; - - if (dh->type == DMI_ENTRY_MEM_DEVICE) { - struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, - dimm_fill->count, 0, 0); - - if (entry->size == 0xffff) { - pr_info("Can't get DIMM%i size\n", - dimm_fill->count); - dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ - } else if (entry->size == 0x7fff) { - dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); - } else { - if (entry->size & 1 << 15) - dimm->nr_pages = MiB_TO_PAGES((entry->size & - 0x7fff) << 10); - else - dimm->nr_pages = MiB_TO_PAGES(entry->size); - } + u16 rdr_mask = BIT(7) | BIT(13); - switch (entry->memory_type) { - case 0x12: - if (entry->type_detail & 1 << 13) - dimm->mtype = MEM_RDDR; - else - dimm->mtype = MEM_DDR; - break; - case 0x13: - if (entry->type_detail & 1 << 13) - dimm->mtype = MEM_RDDR2; - else - dimm->mtype = MEM_DDR2; - break; - case 0x14: - dimm->mtype = MEM_FB_DDR2; - break; - case 0x18: - if (entry->type_detail & 1 << 13) - dimm->mtype = MEM_RDDR3; - else - dimm->mtype = MEM_DDR3; - break; - default: - if (entry->type_detail & 1 << 6) - dimm->mtype = MEM_RMBS; - else if ((entry->type_detail & ((1 << 7) | (1 << 13))) - == ((1 << 7) | (1 << 13))) - dimm->mtype = MEM_RDR; - else if (entry->type_detail & 1 << 7) - dimm->mtype = MEM_SDR; - else if (entry->type_detail & 1 << 9) - dimm->mtype = MEM_EDO; - else - dimm->mtype = MEM_UNKNOWN; - } + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", dimm->idx); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & BIT(15)) + dimm->nr_pages = MiB_TO_PAGES((entry->size & 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } - /* - * Actually, we can only detect if the memory has bits for - * checksum or not - */ - if (entry->total_width == entry->data_width) - dimm->edac_mode = EDAC_NONE; + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & BIT(13)) + dimm->mtype = MEM_RDDR; else - dimm->edac_mode = EDAC_SECDED; + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & BIT(13)) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & BIT(12)) + dimm->mtype = MEM_NVDIMM; + else if (entry->type_detail & BIT(13)) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + case 0x1a: + if (entry->type_detail & BIT(12)) + dimm->mtype = MEM_NVDIMM; + else if (entry->type_detail & BIT(13)) + dimm->mtype = MEM_RDDR4; + else + dimm->mtype = MEM_DDR4; + break; + default: + if (entry->type_detail & BIT(6)) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & rdr_mask) == rdr_mask) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & BIT(7)) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & BIT(9)) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } - dimm->dtype = DEV_UNKNOWN; - dimm->grain = 128; /* Likely, worse case */ - - /* - * FIXME: It shouldn't be hard to also fill the DIMM labels - */ - - if (dimm->nr_pages) { - edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", - dimm_fill->count, memory_type[dimm->mtype], - PAGES_TO_MiB(dimm->nr_pages), - (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); - edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", - entry->memory_type, entry->type_detail, - entry->total_width, entry->data_width); + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + dimm_setup_label(dimm, entry->handle); + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm->idx, edac_mem_types[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm->smbios_handle = entry->handle; +} + +static void enumerate_dimms(const struct dmi_header *dh, void *arg) +{ + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg; + struct dimm_info *d; + + if (dh->type != DMI_ENTRY_MEM_DEVICE) + return; + + /* Enlarge the array with additional 16 */ + if (!hw->num_dimms || !(hw->num_dimms % 16)) { + struct dimm_info *new; + + new = krealloc_array(hw->dimms, hw->num_dimms + 16, + sizeof(struct dimm_info), GFP_KERNEL); + if (!new) { + WARN_ON_ONCE(1); + return; } - dimm_fill->count++; + hw->dimms = new; } + + d = &hw->dimms[hw->num_dimms]; + d->idx = hw->num_dimms; + + assign_dmi_dimm_info(d, entry); + + hw->num_dimms++; +} + +static void ghes_scan_system(void) +{ + if (system_scanned) + return; + + dmi_walk(enumerate_dimms, &ghes_hw); + + system_scanned = true; } -void ghes_edac_report_mem_error(struct ghes *ghes, int sev, - struct cper_sec_mem_err *mem_err) +static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg, + const char *location, unsigned int len) { - enum hw_event_mc_err_type type; + u32 n; + + if (!msg) + return 0; + + n = 0; + len -= 1; + + n += scnprintf(msg + n, len - n, "APEI location: %s ", location); + + if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)) + goto out; + + n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status); + n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status)); + +out: + msg[n] = '\0'; + + return n; +} + +static int ghes_edac_report_mem_error(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cper_sec_mem_err *mem_err = (struct cper_sec_mem_err *)data; + struct cper_mem_err_compact cmem; struct edac_raw_error_desc *e; struct mem_ctl_info *mci; - struct ghes_edac_pvt *pvt = NULL; + unsigned long sev = val; + struct ghes_pvt *pvt; + unsigned long flags; char *p; - u8 grain_bits; - list_for_each_entry(pvt, &ghes_reglist, list) { - if (ghes == pvt->ghes) - break; - } - if (!pvt) { - pr_err("Internal error: Can't find EDAC structure\n"); - return; - } + /* + * We can do the locking below because GHES defers error processing + * from NMI to IRQ context. Whenever that changes, we'd at least + * know. + */ + if (WARN_ON_ONCE(in_nmi())) + return NOTIFY_OK; + + spin_lock_irqsave(&ghes_lock, flags); + + pvt = ghes_pvt; + if (!pvt) + goto unlock; + mci = pvt->mci; e = &mci->error_desc; /* Cleans the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = 1; - strcpy(e->label, "unknown label"); + e->grain = 1; e->msg = pvt->msg; e->other_detail = pvt->other_detail; e->top_layer = -1; @@ -220,17 +310,17 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, switch (sev) { case GHES_SEV_CORRECTED: - type = HW_EVENT_ERR_CORRECTED; + e->type = HW_EVENT_ERR_CORRECTED; break; case GHES_SEV_RECOVERABLE: - type = HW_EVENT_ERR_UNCORRECTED; + e->type = HW_EVENT_ERR_UNCORRECTED; break; case GHES_SEV_PANIC: - type = HW_EVENT_ERR_FATAL; + e->type = HW_EVENT_ERR_FATAL; break; default: case GHES_SEV_NO: - type = HW_EVENT_ERR_INFO; + e->type = HW_EVENT_ERR_INFO; } edac_dbg(1, "error validation_bits: 0x%08llx\n", @@ -238,266 +328,147 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Error type, mapped on e->msg */ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + u8 etype = mem_err->error_type; + p = pvt->msg; - switch (mem_err->error_type) { - case 0: - p += sprintf(p, "Unknown"); - break; - case 1: - p += sprintf(p, "No error"); - break; - case 2: - p += sprintf(p, "Single-bit ECC"); - break; - case 3: - p += sprintf(p, "Multi-bit ECC"); - break; - case 4: - p += sprintf(p, "Single-symbol ChipKill ECC"); - break; - case 5: - p += sprintf(p, "Multi-symbol ChipKill ECC"); - break; - case 6: - p += sprintf(p, "Master abort"); - break; - case 7: - p += sprintf(p, "Target abort"); - break; - case 8: - p += sprintf(p, "Parity Error"); - break; - case 9: - p += sprintf(p, "Watchdog timeout"); - break; - case 10: - p += sprintf(p, "Invalid address"); - break; - case 11: - p += sprintf(p, "Mirror Broken"); - break; - case 12: - p += sprintf(p, "Memory Sparing"); - break; - case 13: - p += sprintf(p, "Scrub corrected error"); - break; - case 14: - p += sprintf(p, "Scrub uncorrected error"); - break; - case 15: - p += sprintf(p, "Physical Memory Map-out event"); - break; - default: - p += sprintf(p, "reserved error (%d)", - mem_err->error_type); - } + p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { - strcpy(pvt->msg, "unknown error"); + strscpy(pvt->msg, "unknown error"); } /* Error address */ - if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { - e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; - e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + if (mem_err->validation_bits & CPER_MEM_VALID_PA) { + e->page_frame_number = PHYS_PFN(mem_err->physical_addr); + e->offset_in_page = offset_in_page(mem_err->physical_addr); } /* Error grain */ - if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { - e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); - } + if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) + e->grain = ~mem_err->physical_addr_mask + 1; /* Memory error location, mapped on e->location */ p = e->location; - if (mem_err->validation_bits & CPER_MEM_VALID_NODE) - p += sprintf(p, "node:%d ", mem_err->node); - if (mem_err->validation_bits & CPER_MEM_VALID_CARD) - p += sprintf(p, "card:%d ", mem_err->card); - if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) - p += sprintf(p, "module:%d ", mem_err->module); - if (mem_err->validation_bits & CPER_MEM_VALID_BANK) - p += sprintf(p, "bank:%d ", mem_err->bank); - if (mem_err->validation_bits & CPER_MEM_VALID_ROW) - p += sprintf(p, "row:%d ", mem_err->row); - if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) - p += sprintf(p, "col:%d ", mem_err->column); - if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) - p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + cper_mem_err_pack(mem_err, &cmem); + p += cper_mem_err_location(&cmem, p); + + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { + struct dimm_info *dimm; + + p += cper_dimm_err_location(&cmem, p); + dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); + if (dimm) { + e->top_layer = dimm->idx; + strscpy(e->label, dimm->label); + } + } if (p > e->location) *(p - 1) = '\0'; + if (!*e->label) + strscpy(e->label, "unknown memory"); + /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; - if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { - u64 status = mem_err->error_status; - - p += sprintf(p, "status(0x%016llx): ", (long long)status); - switch ((status >> 8) & 0xff) { - case 1: - p += sprintf(p, "Error detected internal to the component "); - break; - case 16: - p += sprintf(p, "Error detected in the bus "); - break; - case 4: - p += sprintf(p, "Storage error in DRAM memory "); - break; - case 5: - p += sprintf(p, "Storage error in TLB "); - break; - case 6: - p += sprintf(p, "Storage error in cache "); - break; - case 7: - p += sprintf(p, "Error in one or more functional units "); - break; - case 8: - p += sprintf(p, "component failed self test "); - break; - case 9: - p += sprintf(p, "Overflow or undervalue of internal queue "); - break; - case 17: - p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); - break; - case 18: - p += sprintf(p, "Improper access error "); - break; - case 19: - p += sprintf(p, "Access to a memory address which is not mapped to any component "); - break; - case 20: - p += sprintf(p, "Loss of Lockstep "); - break; - case 21: - p += sprintf(p, "Response not associated with a request "); - break; - case 22: - p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); - break; - case 23: - p += sprintf(p, "Detection of a PATH_ERROR "); - break; - case 25: - p += sprintf(p, "Bus operation timeout "); - break; - case 26: - p += sprintf(p, "A read was issued to data that has been poisoned "); - break; - default: - p += sprintf(p, "reserved "); - break; - } - } - if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) - p += sprintf(p, "requestorID: 0x%016llx ", - (long long)mem_err->requestor_id); - if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) - p += sprintf(p, "responderID: 0x%016llx ", - (long long)mem_err->responder_id); - if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) - p += sprintf(p, "targetID: 0x%016llx ", - (long long)mem_err->responder_id); + p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN); if (p > pvt->other_detail) *(p - 1) = '\0'; - /* Generate the trace event */ - grain_bits = fls_long(e->grain); - sprintf(pvt->detail_location, "APEI location: %s %s", - e->location, e->other_detail); - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, - grain_bits, e->syndrome, pvt->detail_location); - - /* Report the error via EDAC API */ - edac_raw_mc_handle_error(type, mci, e); + edac_raw_mc_handle_error(e); + +unlock: + spin_unlock_irqrestore(&ghes_lock, flags); + + return NOTIFY_OK; } -EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); -int ghes_edac_register(struct ghes *ghes, struct device *dev) +static struct notifier_block ghes_edac_mem_err_nb = { + .notifier_call = ghes_edac_report_mem_error, + .priority = 0, +}; + +static int ghes_edac_register(struct device *dev) { bool fake = false; - int rc, num_dimm = 0; struct mem_ctl_info *mci; + struct ghes_pvt *pvt; struct edac_mc_layer layers[1]; - struct ghes_edac_pvt *pvt; - struct ghes_edac_dimm_fill dimm_fill; + unsigned long flags; + int rc = 0; + + /* finish another registration/unregistration instance first */ + mutex_lock(&ghes_reg_mutex); - /* Get the number of DIMMs */ - dmi_walk(ghes_edac_count_dimms, &num_dimm); + /* + * We have only one logical memory controller to which all DIMMs belong. + */ + if (refcount_inc_not_zero(&ghes_refcount)) + goto unlock; + + ghes_scan_system(); /* Check if we've got a bogus BIOS */ - if (num_dimm == 0) { + if (!ghes_hw.num_dimms) { fake = true; - num_dimm = 1; + ghes_hw.num_dimms = 1; } layers[0].type = EDAC_MC_LAYER_ALL_MEM; - layers[0].size = num_dimm; + layers[0].size = ghes_hw.num_dimms; layers[0].is_virt_csrow = true; - /* - * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), - * to avoid duplicated memory controller numbers - */ - mutex_lock(&ghes_edac_lock); - mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, - sizeof(*pvt)); + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt)); if (!mci) { pr_info("Can't allocate memory for EDAC data\n"); - mutex_unlock(&ghes_edac_lock); - return -ENOMEM; + rc = -ENOMEM; + goto unlock; } - pvt = mci->pvt_info; - memset(pvt, 0, sizeof(*pvt)); - list_add_tail(&pvt->list, &ghes_reglist); - pvt->ghes = ghes; - pvt->mci = mci; - mci->pdev = dev; + pvt = mci->pvt_info; + pvt->mci = mci; + mci->pdev = dev; mci->mtype_cap = MEM_FLAG_EMPTY; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "ghes_edac.c"; - mci->mod_ver = GHES_EDAC_REVISION; mci->ctl_name = "ghes_edac"; mci->dev_name = "ghes"; - if (!ghes_edac_mc_num) { - if (!fake) { - pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); - pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); - pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); - pr_info("If you find incorrect reports, please contact your hardware vendor\n"); - pr_info("to correct its BIOS.\n"); - pr_info("This system has %d DIMM sockets.\n", - num_dimm); - } else { - pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); - pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); - pr_info("work on such system. Use this driver with caution\n"); - } + if (fake) { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); } + pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms); + if (!fake) { - /* - * Fill DIMM info from DMI for the memory controller #0 - * - * Keep it in blank for the other memory controllers, as - * there's no reliable way to properly credit each DIMM to - * the memory controller, as different BIOSes fill the - * DMI bank location fields on different ways - */ - if (!ghes_edac_mc_num) { - dimm_fill.count = 0; - dimm_fill.mci = mci; - dmi_walk(ghes_edac_dmidecode, &dimm_fill); + struct dimm_info *src, *dst; + int i = 0; + + mci_for_each_dimm(mci, dst) { + src = &ghes_hw.dimms[i]; + + dst->idx = src->idx; + dst->smbios_handle = src->smbios_handle; + dst->nr_pages = src->nr_pages; + dst->mtype = src->mtype; + dst->edac_mode = src->edac_mode; + dst->dtype = src->dtype; + dst->grain = src->grain; + + /* + * If no src->label, preserve default label assigned + * from EDAC core. + */ + if (strlen(src->label)) + memcpy(dst->label, src->label, sizeof(src->label)); + + i++; } + } else { - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, 0, 0, 0); + struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0); dimm->nr_pages = 1; dimm->grain = 128; @@ -508,30 +479,96 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) rc = edac_mc_add_mc(mci); if (rc < 0) { - pr_info("Can't register at EDAC core\n"); + pr_info("Can't register with the EDAC core\n"); edac_mc_free(mci); - mutex_unlock(&ghes_edac_lock); + rc = -ENODEV; + goto unlock; + } + + spin_lock_irqsave(&ghes_lock, flags); + ghes_pvt = pvt; + spin_unlock_irqrestore(&ghes_lock, flags); + + ghes_register_report_chain(&ghes_edac_mem_err_nb); + + /* only set on success */ + refcount_set(&ghes_refcount, 1); + +unlock: + + /* Not needed anymore */ + kfree(ghes_hw.dimms); + ghes_hw.dimms = NULL; + + mutex_unlock(&ghes_reg_mutex); + + return rc; +} + +static void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + unsigned long flags; + + mutex_lock(&ghes_reg_mutex); + + system_scanned = false; + memset(&ghes_hw, 0, sizeof(struct ghes_hw_desc)); + + if (!refcount_dec_and_test(&ghes_refcount)) + goto unlock; + + /* + * Wait for the irq handler being finished. + */ + spin_lock_irqsave(&ghes_lock, flags); + mci = ghes_pvt ? ghes_pvt->mci : NULL; + ghes_pvt = NULL; + spin_unlock_irqrestore(&ghes_lock, flags); + + if (!mci) + goto unlock; + + mci = edac_mc_del_mc(mci->pdev); + if (mci) + edac_mc_free(mci); + + ghes_unregister_report_chain(&ghes_edac_mem_err_nb); + +unlock: + mutex_unlock(&ghes_reg_mutex); +} + +static int __init ghes_edac_init(void) +{ + struct ghes *g, *g_tmp; + + ghes_devs = ghes_get_devices(); + if (!ghes_devs) return -ENODEV; + + if (list_empty(ghes_devs)) { + pr_info("GHES probing device list is empty\n"); + return -ENODEV; + } + + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { + ghes_edac_register(g->dev); } - ghes_edac_mc_num++; - mutex_unlock(&ghes_edac_lock); return 0; } -EXPORT_SYMBOL_GPL(ghes_edac_register); +module_init(ghes_edac_init); -void ghes_edac_unregister(struct ghes *ghes) +static void __exit ghes_edac_exit(void) { - struct mem_ctl_info *mci; - struct ghes_edac_pvt *pvt, *tmp; - - list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { - if (ghes == pvt->ghes) { - mci = pvt->mci; - edac_mc_del_mc(mci->pdev); - edac_mc_free(mci); - list_del(&pvt->list); - } + struct ghes *g, *g_tmp; + + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { + ghes_edac_unregister(g); } } -EXPORT_SYMBOL_GPL(ghes_edac_unregister); +module_exit(ghes_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors via EDAC"); diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index c2bd8c6a4349..24f163ff323f 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -1,27 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2011-2012 Calxeda, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/edac.h> #include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> -#include <linux/of_platform.h> -#include "edac_core.h" #include "edac_module.h" #define SR_CLR_SB_ECC_INTR 0x0 @@ -50,15 +39,22 @@ static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id) return IRQ_HANDLED; } +static const struct of_device_id hb_l2_err_of_match[] = { + { .compatible = "calxeda,hb-sregs-l2-ecc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); + static int highbank_l2_err_probe(struct platform_device *pdev) { + const struct of_device_id *id; struct edac_device_ctl_info *dci; struct hb_l2_drvdata *drvdata; struct resource *r; int res = 0; dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu", - 1, "L", 1, 2, NULL, 0, 0); + 1, "L", 1, 2, 0); if (!dci) return -ENOMEM; @@ -90,49 +86,46 @@ static int highbank_l2_err_probe(struct platform_device *pdev) goto err; } + id = of_match_device(hb_l2_err_of_match, &pdev->dev); + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; + dci->dev_name = dev_name(&pdev->dev); + + if (edac_device_add_device(dci)) + goto err; + drvdata->db_irq = platform_get_irq(pdev, 0); res = devm_request_irq(&pdev->dev, drvdata->db_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; + goto err2; drvdata->sb_irq = platform_get_irq(pdev, 1); res = devm_request_irq(&pdev->dev, drvdata->sb_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; - - dci->mod_name = dev_name(&pdev->dev); - dci->dev_name = dev_name(&pdev->dev); - - if (edac_device_add_device(dci)) - goto err; + goto err2; devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_device_del_device(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); edac_device_free_ctl_info(dci); return res; } -static int highbank_l2_err_remove(struct platform_device *pdev) +static void highbank_l2_err_remove(struct platform_device *pdev) { struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(dci); - return 0; } -static const struct of_device_id hb_l2_err_of_match[] = { - { .compatible = "calxeda,hb-sregs-l2-ecc", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); - static struct platform_driver highbank_l2_edac_driver = { .probe = highbank_l2_err_probe, .remove = highbank_l2_err_remove, diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 4695dd2d71fd..a8879d72d064 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -1,56 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2011-2012 Calxeda, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/edac.h> #include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> -#include <linux/of_platform.h> #include <linux/uaccess.h> -#include "edac_core.h" #include "edac_module.h" /* DDR Ctrlr Error Registers */ -#define HB_DDR_ECC_OPT 0x128 -#define HB_DDR_ECC_U_ERR_ADDR 0x130 -#define HB_DDR_ECC_U_ERR_STAT 0x134 -#define HB_DDR_ECC_U_ERR_DATAL 0x138 -#define HB_DDR_ECC_U_ERR_DATAH 0x13c -#define HB_DDR_ECC_C_ERR_ADDR 0x140 -#define HB_DDR_ECC_C_ERR_STAT 0x144 -#define HB_DDR_ECC_C_ERR_DATAL 0x148 -#define HB_DDR_ECC_C_ERR_DATAH 0x14c -#define HB_DDR_ECC_INT_STATUS 0x180 -#define HB_DDR_ECC_INT_ACK 0x184 -#define HB_DDR_ECC_U_ERR_ID 0x424 -#define HB_DDR_ECC_C_ERR_ID 0x428 -#define HB_DDR_ECC_INT_STAT_CE 0x8 -#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 -#define HB_DDR_ECC_INT_STAT_UE 0x20 -#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 +#define HB_DDR_ECC_ERR_BASE 0x128 +#define MW_DDR_ECC_ERR_BASE 0x1b4 + +#define HB_DDR_ECC_OPT 0x00 +#define HB_DDR_ECC_U_ERR_ADDR 0x08 +#define HB_DDR_ECC_U_ERR_STAT 0x0c +#define HB_DDR_ECC_U_ERR_DATAL 0x10 +#define HB_DDR_ECC_U_ERR_DATAH 0x14 +#define HB_DDR_ECC_C_ERR_ADDR 0x18 +#define HB_DDR_ECC_C_ERR_STAT 0x1c +#define HB_DDR_ECC_C_ERR_DATAL 0x20 +#define HB_DDR_ECC_C_ERR_DATAH 0x24 #define HB_DDR_ECC_OPT_MODE_MASK 0x3 #define HB_DDR_ECC_OPT_FWC 0x100 #define HB_DDR_ECC_OPT_XOR_SHIFT 16 +/* DDR Ctrlr Interrupt Registers */ + +#define HB_DDR_ECC_INT_BASE 0x180 +#define MW_DDR_ECC_INT_BASE 0x218 + +#define HB_DDR_ECC_INT_STATUS 0x00 +#define HB_DDR_ECC_INT_ACK 0x04 + +#define HB_DDR_ECC_INT_STAT_CE 0x8 +#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 +#define HB_DDR_ECC_INT_STAT_UE 0x20 +#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 + struct hb_mc_drvdata { - void __iomem *mc_vbase; + void __iomem *mc_err_base; + void __iomem *mc_int_base; }; static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) @@ -60,10 +58,10 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) u32 status, err_addr; /* Read the interrupt status register */ - status = readl(drvdata->mc_vbase + HB_DDR_ECC_INT_STATUS); + status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS); if (status & HB_DDR_ECC_INT_STAT_UE) { - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_U_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, 0, @@ -71,9 +69,9 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) mci->ctl_name, ""); } if (status & HB_DDR_ECC_INT_STAT_CE) { - u32 syndrome = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_STAT); + u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT); syndrome = (syndrome >> 8) & 0xff; - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, syndrome, @@ -82,66 +80,86 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) } /* clear the error, clears the interrupt */ - writel(status, drvdata->mc_vbase + HB_DDR_ECC_INT_ACK); + writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK); return IRQ_HANDLED; } -#ifdef CONFIG_EDAC_DEBUG -static ssize_t highbank_mc_err_inject_write(struct file *file, - const char __user *data, - size_t count, loff_t *ppos) +static void highbank_mc_err_inject(struct mem_ctl_info *mci, u8 synd) { - struct mem_ctl_info *mci = file->private_data; struct hb_mc_drvdata *pdata = mci->pvt_info; - char buf[32]; - size_t buf_size; u32 reg; + + reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT); + reg &= HB_DDR_ECC_OPT_MODE_MASK; + reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; + writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t highbank_mc_inject_ctrl(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); u8 synd; - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, data, buf_size)) - return -EFAULT; - buf[buf_size] = 0; + if (kstrtou8(buf, 16, &synd)) + return -EINVAL; - if (!kstrtou8(buf, 16, &synd)) { - reg = readl(pdata->mc_vbase + HB_DDR_ECC_OPT); - reg &= HB_DDR_ECC_OPT_MODE_MASK; - reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; - writel(reg, pdata->mc_vbase + HB_DDR_ECC_OPT); - } + highbank_mc_err_inject(mci, synd); return count; } -static const struct file_operations highbank_mc_debug_inject_fops = { - .open = simple_open, - .write = highbank_mc_err_inject_write, - .llseek = generic_file_llseek, +static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl); + +static struct attribute *highbank_dev_attrs[] = { + &dev_attr_inject_ctrl.attr, + NULL }; -static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) -{ - if (mci->debugfs) - debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, - &highbank_mc_debug_inject_fops); -; -} -#else -static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) -{} -#endif +ATTRIBUTE_GROUPS(highbank_dev); + +struct hb_mc_settings { + int err_offset; + int int_offset; +}; + +static struct hb_mc_settings hb_settings = { + .err_offset = HB_DDR_ECC_ERR_BASE, + .int_offset = HB_DDR_ECC_INT_BASE, +}; + +static struct hb_mc_settings mw_settings = { + .err_offset = MW_DDR_ECC_ERR_BASE, + .int_offset = MW_DDR_ECC_INT_BASE, +}; + +static const struct of_device_id hb_ddr_ctrl_of_match[] = { + { .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings }, + { .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); static int highbank_mc_probe(struct platform_device *pdev) { + const struct of_device_id *id; + const struct hb_mc_settings *settings; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct hb_mc_drvdata *drvdata; struct dimm_info *dimm; struct resource *r; + void __iomem *base; u32 control; int irq; int res = 0; + id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev); + if (!id) + return -ENODEV; + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; layers[0].size = 1; layers[0].is_virt_csrow = true; @@ -157,8 +175,10 @@ static int highbank_mc_probe(struct platform_device *pdev) drvdata = mci->pvt_info; platform_set_drvdata(pdev, mci); - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) - return -ENOMEM; + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + res = -ENOMEM; + goto free; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -174,35 +194,30 @@ static int highbank_mc_probe(struct platform_device *pdev) goto err; } - drvdata->mc_vbase = devm_ioremap(&pdev->dev, - r->start, resource_size(r)); - if (!drvdata->mc_vbase) { + base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!base) { dev_err(&pdev->dev, "Unable to map regs\n"); res = -ENOMEM; goto err; } - control = readl(drvdata->mc_vbase + HB_DDR_ECC_OPT) & 0x3; + settings = id->data; + drvdata->mc_err_base = base + settings->err_offset; + drvdata->mc_int_base = base + settings->int_offset; + + control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3; if (!control || (control == 0x2)) { dev_err(&pdev->dev, "No ECC present, or ECC disabled\n"); res = -ENODEV; goto err; } - irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, - 0, dev_name(&pdev->dev), mci); - if (res < 0) { - dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - goto err; - } - mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = dev_name(&pdev->dev); - mci->mod_ver = "1"; - mci->ctl_name = dev_name(&pdev->dev); + mci->mod_name = pdev->dev.driver->name; + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); mci->scrub_mode = SCRUB_SW_SRC; /* Only a single 4GB DIMM is supported */ @@ -213,35 +228,37 @@ static int highbank_mc_probe(struct platform_device *pdev) dimm->mtype = MEM_DDR3; dimm->edac_mode = EDAC_SECDED; - res = edac_mc_add_mc(mci); + res = edac_mc_add_mc_with_groups(mci, highbank_dev_groups); if (res < 0) goto err; - highbank_mc_create_debugfs_nodes(mci); + irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, + 0, dev_name(&pdev->dev), mci); + if (res < 0) { + dev_err(&pdev->dev, "Unable to request irq %d\n", irq); + goto err2; + } devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); +free: edac_mc_free(mci); return res; } -static int highbank_mc_remove(struct platform_device *pdev) +static void highbank_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - return 0; } -static const struct of_device_id hb_ddr_ctrl_of_match[] = { - { .compatible = "calxeda,hb-ddr-ctrl", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); - static struct platform_driver highbank_mc_edac_driver = { .probe = highbank_mc_probe, .remove = highbank_mc_remove, diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c new file mode 100644 index 000000000000..89b3e8cc38b1 --- /dev/null +++ b/drivers/edac/i10nm_base.c @@ -0,0 +1,1285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) 10nm server memory controller. + * Copyright (c) 2019, Intel Corporation. + * + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include "edac_module.h" +#include "skx_common.h" + +#define I10NM_REVISION "v0.0.6" +#define EDAC_MOD_STR "i10nm_edac" + +/* Debug macros */ +#define i10nm_printk(level, fmt, arg...) \ + edac_printk(level, "i10nm", fmt, ##arg) + +#define I10NM_GET_SCK_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd0, &(reg)) +#define I10NM_GET_IMC_BAR(d, i, reg) \ + pci_read_config_dword((d)->uracu, \ + (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg)) +#define I10NM_GET_SAD(d, offset, i, reg)\ + pci_read_config_dword((d)->sad_all, (offset) + (i) * \ + (res_cfg->type == GNR ? 12 : 8), &(reg)) +#define I10NM_GET_HBM_IMC_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd4, &(reg)) +#define I10NM_GET_CAPID3_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, \ + res_cfg->type == GNR ? 0x290 : 0x90, &(reg)) +#define I10NM_GET_CAPID5_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, \ + res_cfg->type == GNR ? 0x298 : 0x98, &(reg)) +#define I10NM_GET_DIMMMTR(m, i, j) \ + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \ + (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \ + (i) * (m)->chan_mmio_sz + (j) * 4) +#define I10NM_GET_MCDDRTCFG(m, i) \ + readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_MCMTR(m, i) \ + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ + (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_REG32(m, i, offset) \ + readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_GET_REG64(m, i, offset) \ + readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_SET_REG32(m, i, offset, v) \ + writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) + +#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) +#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) +#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ + GET_BITFIELD(reg, 0, 10) + 1) << 12) +#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ + ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) + +#define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 +#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 +#define I10NM_GNR_IMC_MMIO_SIZE 0x4000 +#define I10NM_HBM_IMC_MMIO_SIZE 0x9000 +#define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) +#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) +#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) + +#define I10NM_MAX_SAD 16 +#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) +#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) + +static struct list_head *i10nm_edac_list; + +static struct res_config *res_cfg; +static int retry_rd_err_log; +static int decoding_via_mca; +static bool mem_cfg_2lm; + +static struct reg_rrl icx_reg_rrl_ddr = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, + }, + .widths = {4, 4, 4, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_ddr = { + .set_num = 3, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, + .offsets = { + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch0 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch1 = { + .set_num = 2, + .reg_num = 6, + .modes = {LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, + .cecnt_widths = {4, 4, 4, 4}, +}; + +static struct reg_rrl gnr_reg_rrl_ddr = { + .set_num = 4, + .reg_num = 6, + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(14), + .noover_mask = BIT(15), + .en_mask = BIT(12), + + .cecnt_num = 8, + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, +}; + +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + switch (width) { + case 4: + return I10NM_GET_REG32(imc, chan, offset); + case 8: + return I10NM_GET_REG64(imc, chan, offset); + default: + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); + return 0; + } +} + +static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) +{ + switch (width) { + case 4: + return I10NM_SET_REG32(imc, chan, offset, (u32)val); + default: + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); + } +} + +static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + int rrl_set, bool enable, u32 *rrl_ctl) +{ + enum rrl_mode mode = rrl->modes[rrl_set]; + u32 offset = rrl->offsets[rrl_set][0], v; + u8 width = rrl->widths[0]; + bool first, scrub; + + /* First or last read error. */ + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); + /* Patrol scrub or on-demand read error. */ + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); + + v = read_imc_reg(imc, chan, offset, width); + + if (enable) { + /* Save default configurations. */ + *rrl_ctl = v; + v &= ~rrl->uc_mask; + + if (first) + v |= rrl->noover_mask; + else + v &= ~rrl->noover_mask; + + if (scrub) + v |= rrl->en_patspr_mask; + else + v &= ~rrl->en_patspr_mask; + + v |= rrl->en_mask; + } else { + /* Restore default configurations. */ + if (*rrl_ctl & rrl->uc_mask) + v |= rrl->uc_mask; + + if (first) { + if (!(*rrl_ctl & rrl->noover_mask)) + v &= ~rrl->noover_mask; + } else { + if (*rrl_ctl & rrl->noover_mask) + v |= rrl->noover_mask; + } + + if (scrub) { + if (!(*rrl_ctl & rrl->en_patspr_mask)) + v &= ~rrl->en_patspr_mask; + } else { + if (*rrl_ctl & rrl->en_patspr_mask) + v |= rrl->en_patspr_mask; + } + + if (!(*rrl_ctl & rrl->en_mask)) + v &= ~rrl->en_mask; + } + + write_imc_reg(imc, chan, offset, width, v); +} + +static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + bool enable, u32 *rrl_ctl) +{ + for (int i = 0; i < rrl->set_num; i++) + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); +} + +static void enable_rrls_ddr(struct skx_imc *imc, bool enable) +{ + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; + int i, chan_num = res_cfg->ddr_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase) + return; + + for (i = 0; i < chan_num; i++) + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); +} + +static void enable_rrls_hbm(struct skx_imc *imc, bool enable) +{ + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; + int i, chan_num = res_cfg->hbm_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) + return; + + for (i = 0; i < chan_num; i++) { + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); + } +} + +static void enable_retry_rd_err_log(bool enable) +{ + struct skx_dev *d; + int i, imc_num; + + edac_dbg(2, "\n"); + + list_for_each_entry(d, i10nm_edac_list, list) { + imc_num = res_cfg->ddr_imc_num; + for (i = 0; i < imc_num; i++) + enable_rrls_ddr(&d->imc[i], enable); + + imc_num += res_cfg->hbm_imc_num; + for (; i < imc_num; i++) + enable_rrls_hbm(&d->imc[i], enable); + } +} + +static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, + int len, bool scrub_err) +{ + int i, j, n, ch = res->channel, pch = res->cs & 1; + struct skx_imc *imc = &res->dev->imc[res->imc]; + u64 log, corr, status_mask; + struct reg_rrl *rrl; + bool scrub; + u32 offset; + u8 width; + + if (!imc->mbase) + return; + + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; + + if (!rrl) + return; + + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; + + n = scnprintf(msg, len, " retry_rd_err_log["); + for (i = 0; i < rrl->set_num; i++) { + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); + if (scrub_err != scrub) + continue; + + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { + offset = rrl->offsets[i][j]; + width = rrl->widths[j]; + log = read_imc_reg(imc, ch, offset, width); + + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", log); + else + n += scnprintf(msg + n, len - n, "%.16llx ", log); + + /* Clear RRL status if RRL in Linux control mode. */ + if (retry_rd_err_log == 2 && !j && (log & status_mask)) + write_imc_reg(imc, ch, offset, width, log & ~status_mask); + } + } + + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); + + if (len - n > 0) { + n += scnprintf(msg + n, len - n, " correrrcnt["); + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { + offset = rrl->cecnt_offsets[i]; + width = rrl->cecnt_widths[i]; + corr = read_imc_reg(imc, ch, offset, width); + + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ + if (res_cfg->type <= SPR) { + n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); + } else { + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ + if (width == 4) + n += scnprintf(msg + n, len - n, "%.8llx ", corr); + else + n += scnprintf(msg + n, len - n, "%.16llx ", corr); + } + } + + /* Move back one space. */ + n--; + n += scnprintf(msg + n, len - n, "]"); + } +} + +static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, + unsigned int dev, unsigned int fun) +{ + struct pci_dev *pdev; + + pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun)); + if (!pdev) { + edac_dbg(2, "No device %02x:%02x.%x\n", + bus, dev, fun); + return NULL; + } + + if (unlikely(pci_enable_device(pdev) < 0)) { + edac_dbg(2, "Failed to enable device %02x:%02x.%x\n", + bus, dev, fun); + pci_dev_put(pdev); + return NULL; + } + + return pdev; +} + +/** + * i10nm_get_imc_num() - Get the number of present DDR memory controllers. + * + * @cfg : The pointer to the structure of EDAC resource configurations. + * + * For Granite Rapids CPUs, the number of present DDR memory controllers read + * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. + * For other CPUs, the number of present DDR memory controllers is statically + * configured in @cfg->ddr_imc_num. + * + * RETURNS : 0 on success, < 0 on failure. + */ +static int i10nm_get_imc_num(struct res_config *cfg) +{ + int n, imc_num, chan_num = 0; + struct skx_dev *d; + u32 reg; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus], + res_cfg->pcu_cr3_bdf.dev, + res_cfg->pcu_cr3_bdf.fun); + if (!d->pcu_cr3) + continue; + + if (I10NM_GET_CAPID5_CFG(d, reg)) + continue; + + n = I10NM_DDR_IMC_CH_CNT(reg); + + if (!chan_num) { + chan_num = n; + edac_dbg(2, "Get DDR CH number: %d\n", chan_num); + } else if (chan_num != n) { + i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n); + } + } + + switch (cfg->type) { + case GNR: + /* + * One channel per DDR memory controller for Granite Rapids CPUs. + */ + imc_num = chan_num; + + if (!imc_num) { + i10nm_printk(KERN_ERR, "Invalid DDR MC number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set DDR MC number: %d", imc_num); + + /* Release and reallocate skx_dev list with the updated number. */ + skx_remove(); + if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0) + return -ENODEV; + } + + return 0; + default: + /* + * For other CPUs, the number of present DDR memory controllers + * is statically pre-configured in cfg->ddr_imc_num. + */ + return 0; + } +} + +static bool i10nm_check_2lm(struct res_config *cfg) +{ + struct skx_dev *d; + u32 reg; + int i; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus], + res_cfg->sad_all_bdf.dev, + res_cfg->sad_all_bdf.fun); + if (!d->sad_all) + continue; + + for (i = 0; i < I10NM_MAX_SAD; i++) { + I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); + if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { + edac_dbg(2, "2-level memory configuration.\n"); + return true; + } + } + } + + return false; +} + +/* + * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. + * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. + */ +static bool i10nm_mscod_is_ddrt(u32 mscod) +{ + switch (res_cfg->type) { + case I10NM: + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + case SPR: + switch (mscod) { + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + default: + return false; + } + + return false; +} + +static bool i10nm_mc_decode_available(struct mce *mce) +{ +#define ICX_IMCx_CHy 0x06666000 + u8 bank; + + if (!decoding_via_mca || mem_cfg_2lm) + return false; + + if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + return false; + + bank = mce->bank; + + switch (res_cfg->type) { + case I10NM: + /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ + if (!(ICX_IMCx_CHy & (1 << bank))) + return false; + break; + case SPR: + if (bank < 13 || bank > 20) + return false; + break; + default: + return false; + } + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + return true; +} + +static bool i10nm_mc_decode(struct decoded_addr *res) +{ + struct mce *m = res->mce; + struct skx_dev *d; + u8 bank; + + if (!i10nm_mc_decode_available(m)) + return false; + + list_for_each_entry(d, i10nm_edac_list, list) { + if (d->imc[0].src_id == m->socketid) { + res->socket = m->socketid; + res->dev = d; + break; + } + } + + switch (res_cfg->type) { + case I10NM: + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + break; + case SPR: + bank = m->bank - 13; + res->imc = bank / 2; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 36); + res->bank_group = GET_BITFIELD(m->misc, 37, 38); + res->bank_address = GET_BITFIELD(m->misc, 39, 40); + res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; + res->rank = GET_BITFIELD(m->misc, 57, 57); + res->dimm = GET_BITFIELD(m->misc, 58, 58); + break; + default: + return false; + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + m->socketid, res->imc); + return false; + } + + return true; +} + +/** + * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. + * + * @d : The pointer to the structure of CPU socket EDAC device. + * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). + * @physical_idx : To store the corresponding physical index of @logical_idx. + * + * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure. + */ +static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx) +{ +#define GNR_MAX_IMC_PCI_CNT 28 + + struct pci_dev *mdev; + int i, logical = 0; + + /* + * Detect present memory controllers from { PCI device: 8-5, function 7-1 } + */ + for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) { + mdev = pci_get_dev_wrapper(d->seg, + d->bus[res_cfg->ddr_mdev_bdf.bus], + res_cfg->ddr_mdev_bdf.dev + i / 7, + res_cfg->ddr_mdev_bdf.fun + i % 7); + + if (mdev) { + if (logical == logical_idx) { + *physical_idx = i; + return mdev; + } + + pci_dev_put(mdev); + logical++; + } + } + + return NULL; +} + +static u32 get_gnr_imc_mmio_offset(void) +{ + if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D) + return I10NM_GNR_D_IMC_MMIO_OFFSET; + + return I10NM_GNR_IMC_MMIO_OFFSET; +} + +/** + * get_ddr_munit() - Get the resource of the i-th DDR memory controller. + * + * @d : The pointer to the structure of CPU socket EDAC device. + * @i : The index of the CPU socket relative DDR memory controller. + * @offset : To store the MMIO offset of the i-th DDR memory controller. + * @size : To store the MMIO size of the i-th DDR memory controller. + * + * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure. + */ +static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size) +{ + struct pci_dev *mdev; + int physical_idx; + u32 reg; + + switch (res_cfg->type) { + case GNR: + if (I10NM_GET_IMC_BAR(d, 0, reg)) { + i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n"); + return NULL; + } + + mdev = get_gnr_mdev(d, i, &physical_idx); + if (!mdev) + return NULL; + + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + + get_gnr_imc_mmio_offset() + + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; + *size = I10NM_GNR_IMC_MMIO_SIZE; + + break; + default: + if (I10NM_GET_IMC_BAR(d, i, reg)) { + i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i); + return NULL; + } + + mdev = pci_get_dev_wrapper(d->seg, + d->bus[res_cfg->ddr_mdev_bdf.bus], + res_cfg->ddr_mdev_bdf.dev + i, + res_cfg->ddr_mdev_bdf.fun); + if (!mdev) + return NULL; + + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg); + *size = I10NM_GET_IMC_MMIO_SIZE(reg); + } + + return mdev; +} + +/** + * i10nm_imc_absent() - Check whether the memory controller @imc is absent + * + * @imc : The pointer to the structure of memory controller EDAC device. + * + * RETURNS : true if the memory controller EDAC device is absent, false otherwise. + */ +static bool i10nm_imc_absent(struct skx_imc *imc) +{ + u32 mcmtr; + int i; + + switch (res_cfg->type) { + case SPR: + for (i = 0; i < res_cfg->ddr_chan_num; i++) { + mcmtr = I10NM_GET_MCMTR(imc, i); + edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr); + if (mcmtr != ~0) + return false; + } + + /* + * Some workstations' absent memory controllers still + * appear as PCIe devices, misleading the EDAC driver. + * By observing that the MMIO registers of these absent + * memory controllers consistently hold the value of ~0. + * + * We identify a memory controller as absent by checking + * if its MMIO register "mcmtr" == ~0 in all its channels. + */ + return true; + default: + return false; + } +} + +static int i10nm_get_ddr_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + unsigned long size; + struct skx_dev *d; + int i, lmc, j = 0; + u32 reg, off; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus], + res_cfg->util_all_bdf.dev, + res_cfg->util_all_bdf.fun); + if (!d->util_all) + return -ENODEV; + + d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus], + res_cfg->uracu_bdf.dev, + res_cfg->uracu_bdf.fun); + if (!d->uracu) + return -ENODEV; + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to socket bar\n"); + return -ENODEV; + } + + base = I10NM_GET_SCK_MMIO_BASE(reg); + edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", + j++, base, reg); + + for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) { + mdev = get_ddr_munit(d, i, &off, &size); + + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No IMC found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n", + i, base + off, size, reg); + + mbase = ioremap(base + off, size); + if (!mbase) { + i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", + base + off); + return -ENODEV; + } + + d->imc[lmc].mbase = mbase; + if (i10nm_imc_absent(&d->imc[lmc])) { + pci_dev_put(mdev); + iounmap(mbase); + d->imc[lmc].mbase = NULL; + edac_dbg(2, "Skip absent mc%d\n", i); + continue; + } else { + d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); + lmc++; + } + } + } + + return 0; +} + +static bool i10nm_check_hbm_imc(struct skx_dev *d) +{ + u32 reg; + + if (I10NM_GET_CAPID3_CFG(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n"); + return false; + } + + return I10NM_IS_HBM_PRESENT(reg) != 0; +} + +static int i10nm_get_hbm_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + u32 reg, off, mcmtr; + struct skx_dev *d; + int i, lmc; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + if (!d->pcu_cr3) + return -ENODEV; + + if (!i10nm_check_hbm_imc(d)) { + i10nm_printk(KERN_DEBUG, "No hbm memory\n"); + return -ENODEV; + } + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); + return -ENODEV; + } + base = I10NM_GET_SCK_MMIO_BASE(reg); + + if (I10NM_GET_HBM_IMC_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); + return -ENODEV; + } + base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); + + lmc = res_cfg->ddr_imc_num; + + for (i = 0; i < res_cfg->hbm_imc_num; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus], + res_cfg->hbm_mdev_bdf.dev + i / 4, + res_cfg->hbm_mdev_bdf.fun + i % 4); + + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No hbm mc found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + d->imc[lmc].mdev = mdev; + off = i * I10NM_HBM_IMC_MMIO_SIZE; + + edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n", + lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); + + mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); + if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", + base + off); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].hbm_mc = true; + + mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); + if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); + return -ENODEV; + } + + lmc++; + } + } + + return 0; +} + +static struct res_config i10nm_cfg0 = { + .type = I10NM, + .decs_did = 0x3452, + .busno_cfg_offset = 0xcc, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, + .ddr_chan_mmio_sz = 0x4000, + .sad_all_bdf = {1, 29, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, + .sad_all_offset = 0x108, + .reg_rrl_ddr = &icx_reg_rrl_ddr, +}; + +static struct res_config i10nm_cfg1 = { + .type = I10NM, + .decs_did = 0x3452, + .busno_cfg_offset = 0xd0, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, + .ddr_chan_mmio_sz = 0x4000, + .sad_all_bdf = {1, 29, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, + .sad_all_offset = 0x108, + .reg_rrl_ddr = &icx_reg_rrl_ddr, +}; + +static struct res_config spr_cfg = { + .type = SPR, + .decs_did = 0x3252, + .busno_cfg_offset = 0xd0, + .ddr_imc_num = 4, + .ddr_chan_num = 2, + .ddr_dimm_num = 2, + .hbm_imc_num = 16, + .hbm_chan_num = 2, + .hbm_dimm_num = 1, + .ddr_chan_mmio_sz = 0x8000, + .hbm_chan_mmio_sz = 0x4000, + .support_ddr5 = true, + .sad_all_bdf = {1, 10, 0}, + .pcu_cr3_bdf = {1, 30, 3}, + .util_all_bdf = {1, 29, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 12, 0}, + .hbm_mdev_bdf = {0, 12, 1}, + .sad_all_offset = 0x300, + .reg_rrl_ddr = &spr_reg_rrl_ddr, + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, +}; + +static struct res_config gnr_cfg = { + .type = GNR, + .decs_did = 0x3252, + .busno_cfg_offset = 0xd0, + .ddr_imc_num = 12, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_chan_mmio_sz = 0x4000, + .support_ddr5 = true, + .sad_all_bdf = {0, 13, 0}, + .pcu_cr3_bdf = {0, 5, 0}, + .util_all_bdf = {0, 13, 1}, + .uracu_bdf = {0, 0, 1}, + .ddr_mdev_bdf = {0, 5, 1}, + .sad_all_offset = 0x300, + .reg_rrl_ddr = &gnr_reg_rrl_ddr, +}; + +static const struct x86_cpu_id i10nm_cpuids[] = { + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1), + + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_cfg), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); + +static bool i10nm_check_ecc(struct skx_imc *imc, int chan) +{ + u32 mcmtr; + + mcmtr = I10NM_GET_MCMTR(imc, chan); + edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr); + + return !!GET_BITFIELD(mcmtr, 2, 2); +} + +static bool i10nm_channel_disabled(struct skx_imc *imc, int chan) +{ + u32 mcmtr = I10NM_GET_MCMTR(imc, chan); + + edac_dbg(1, "mc%d ch%d mcmtr reg %x\n", imc->mc, chan, mcmtr); + + return (mcmtr == ~0 || GET_BITFIELD(mcmtr, 18, 18)); +} + +static int i10nm_get_dimm_config(struct mem_ctl_info *mci, + struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + u32 mtr, mcddrtcfg = 0; + struct dimm_info *dimm; + int i, j, ndimms; + + for (i = 0; i < imc->num_channels; i++) { + if (!imc->mbase) + continue; + + if (i10nm_channel_disabled(imc, i)) { + edac_dbg(1, "mc%d ch%d is disabled.\n", imc->mc, i); + continue; + } + + ndimms = 0; + + if (res_cfg->type != GNR) + mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); + + for (j = 0; j < imc->num_dimms; j++) { + dimm = edac_get_dimm(mci, i, j, 0); + mtr = I10NM_GET_DIMMMTR(imc, i, j); + edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", + mtr, mcddrtcfg, imc->mc, i, j); + + if (IS_DIMM_PRESENT(mtr)) + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, + imc, i, j, cfg); + else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) + ndimms += skx_get_nvdimm_info(dimm, imc, i, j, + EDAC_MOD_STR); + } + if (ndimms && !i10nm_check_ecc(imc, i)) { + i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n", + imc->mc, i); + return -ENODEV; + } + } + + return 0; +} + +static struct notifier_block i10nm_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init i10nm_init(void) +{ + u8 mc = 0, src_id = 0; + const struct x86_cpu_id *id; + struct res_config *cfg; + const char *owner; + struct skx_dev *d; + int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; + u64 tolm, tohm; + int imc_num; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(i10nm_cpuids); + if (!id) + return -ENODEV; + + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + res_cfg = cfg; + + rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); + if (rc) + return rc; + + rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list); + if (rc < 0) + goto fail; + if (rc == 0) { + i10nm_printk(KERN_ERR, "No memory controllers found\n"); + return -ENODEV; + } + + rc = i10nm_get_imc_num(cfg); + if (rc < 0) + goto fail; + + mem_cfg_2lm = i10nm_check_2lm(cfg); + skx_set_mem_cfg(mem_cfg_2lm); + + rc = i10nm_get_ddr_munits(); + + if (i10nm_get_hbm_munits() && rc) + goto fail; + + imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num; + + list_for_each_entry(d, i10nm_edac_list, list) { + rc = skx_get_src_id(d, 0xf8, &src_id); + if (rc < 0) + goto fail; + + edac_dbg(2, "src_id = %d\n", src_id); + for (i = 0; i < imc_num; i++) { + if (!d->imc[i].mdev) + continue; + + d->imc[i].mc = mc++; + d->imc[i].lmc = i; + d->imc[i].src_id = src_id; + if (d->imc[i].hbm_mc) { + d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; + d->imc[i].num_channels = cfg->hbm_chan_num; + d->imc[i].num_dimms = cfg->hbm_dimm_num; + } else { + d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + d->imc[i].num_channels = cfg->ddr_chan_num; + d->imc[i].num_dimms = cfg->ddr_dimm_num; + } + + rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, + pci_name(d->imc[i].mdev), + "Intel_10nm Socket", EDAC_MOD_STR, + i10nm_get_dimm_config, cfg); + if (rc < 0) + goto fail; + } + } + + rc = skx_adxl_get(); + if (rc) + goto fail; + + opstate_init(); + mce_register_decode_chain(&i10nm_mce_dec); + skx_setup_debug("i10nm_test"); + + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { + skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(true); + } else { + skx_set_decode(i10nm_mc_decode, NULL); + } + + i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit i10nm_exit(void) +{ + edac_dbg(2, "\n"); + + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { + skx_set_decode(NULL, NULL); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(false); + } + + skx_teardown_debug(); + mce_unregister_decode_chain(&i10nm_mce_dec); + skx_adxl_put(); + skx_remove(); +} + +module_init(i10nm_init); +module_exit(i10nm_exit); + +static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + + if (ret || val > 1) + return -EINVAL; + + if (val && mem_cfg_2lm) { + i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n"); + return -EIO; + } + + ret = param_set_int(buf, kp); + + return ret; +} + +static const struct kernel_param_ops decoding_via_mca_param_ops = { + .set = set_decoding_via_mca, + .get = param_get_int, +}; + +module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); +MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable"); + +module_param(retry_rd_err_log, int, 0444); +MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors"); diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index 694efcbf19c0..9065bc4386ff 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -14,9 +14,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" - -#define I3000_REVISION "1.1" +#include "edac_module.h" #define EDAC_MOD_STR "i3000_edac" @@ -275,7 +273,6 @@ static void i3000_check(struct mem_ctl_info *mci) { struct i3000_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i3000_get_error_info(mci, &info); i3000_process_error_info(mci, &info, 1); } @@ -326,7 +323,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar); mchbar &= I3000_MCHBAR_MASK; - window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE); + window = ioremap(mchbar, I3000_MMR_WINDOW_SIZE); if (!window) { printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n", mchbar); @@ -375,7 +372,6 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I3000_REVISION; mci->ctl_name = i3000_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = i3000_check; @@ -487,7 +483,7 @@ static void i3000_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i3000_pci_tbl) = { +static const struct pci_device_id i3000_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3000}, @@ -511,8 +507,8 @@ static int __init i3000_init(void) edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); pci_rc = pci_register_driver(&i3000_driver); if (pci_rc < 0) @@ -542,8 +538,7 @@ fail1: pci_unregister_driver(&i3000_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index aa44c1718f50..afccdebf5ac1 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -13,11 +13,9 @@ #include <linux/pci_ids.h> #include <linux/edac.h> #include <linux/io.h> -#include "edac_core.h" +#include "edac_module.h" -#include <asm-generic/io-64-nonatomic-lo-hi.h> - -#define I3200_REVISION "1.1" +#include <linux/io-64-nonatomic-lo-hi.h> #define EDAC_MOD_STR "i3200_edac" @@ -242,11 +240,11 @@ static void i3200_process_error_info(struct mem_ctl_info *mci, -1, -1, "i3000 UE", ""); } else if (log & I3200_ECCERRLOG_CE) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, eccerrlog_syndrome(log), eccerrlog_row(channel, log), -1, -1, - "i3000 UE", ""); + "i3000 CE", ""); } } } @@ -255,13 +253,11 @@ static void i3200_check(struct mem_ctl_info *mci) { struct i3200_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i3200_get_and_clear_error_info(mci, &info); i3200_process_error_info(mci, &info); } - -void __iomem *i3200_map_mchbar(struct pci_dev *pdev) +static void __iomem *i3200_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -283,7 +279,7 @@ void __iomem *i3200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, I3200_MMR_WINDOW_SIZE); if (!window) printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -376,7 +372,6 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I3200_REVISION; mci->ctl_name = i3200_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = i3200_check; @@ -396,14 +391,13 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) unsigned long nr_pages; for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, i, j, 0); + struct dimm_info *dimm = edac_get_dimm(mci, i, j, 0); nr_pages = drb_to_nr_pages(drbs, stacked, j, i); if (nr_pages == 0) continue; - edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j, stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; @@ -465,9 +459,11 @@ static void i3200_remove_one(struct pci_dev *pdev) iounmap(priv->window); edac_mc_free(mci); + + pci_disable_device(pdev); } -static DEFINE_PCI_DEVICE_TABLE(i3200_pci_tbl) = { +static const struct pci_device_id i3200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3200}, @@ -522,8 +518,7 @@ fail1: pci_unregister_driver(&i3200_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 63b2194e8c20..4a1bebc1ff14 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -22,7 +22,7 @@ #include <linux/edac.h> #include <asm/mmzone.h> -#include "edac_core.h" +#include "edac_module.h" /* * Alter this version for the I5000 module when modifications are made @@ -227,7 +227,7 @@ #define NREC_RDWR(x) (((x)>>11) & 1) #define NREC_RANK(x) (((x)>>8) & 0x7) #define NRECMEMB 0xC0 -#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define NREC_CAS(x) (((x)>>16) & 0xFFF) #define NREC_RAS(x) ((x) & 0x7FFF) #define NRECFGLOG 0xC4 #define NREEECFBDA 0xC8 @@ -338,11 +338,11 @@ struct i5000_pvt { u16 mir0, mir1, mir2; - u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b0_mtr[NUM_MTRS]; /* Memory Technology Reg */ u16 b0_ambpresent0; /* Branch 0, Channel 0 */ - u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ + u16 b0_ambpresent1; /* Branch 0, Channel 1 */ - u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b1_mtr[NUM_MTRS]; /* Memory Technology Reg */ u16 b1_ambpresent0; /* Branch 1, Channel 8 */ u16 b1_ambpresent1; /* Branch 1, Channel 1 */ @@ -371,7 +371,7 @@ struct i5000_error_info { /* These registers are input ONLY if there was a * Non-Recoverable Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ @@ -765,7 +765,7 @@ static void i5000_clear_error(struct mem_ctl_info *mci) static void i5000_check_error(struct mem_ctl_info *mci) { struct i5000_error_info info; - edac_dbg(4, "MC%d\n", mci->mc_idx); + i5000_get_error_info(mci, &info); i5000_process_error_info(mci, &info, 1); } @@ -1134,8 +1134,6 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) u32 actual_tolm; u16 limit; int slot_row; - int maxch; - int maxdimmperch; int way0, way1; pvt = mci->pvt_info; @@ -1145,9 +1143,6 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), &pvt->u.ambase_top); - maxdimmperch = pvt->maxdimmperch; - maxch = pvt->maxch; - edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); @@ -1215,7 +1210,7 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) &pvt->b0_ambpresent1); edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); - /* Only if we have 2 branchs (4 channels) */ + /* Only if we have 2 branches (4 channels) */ if (pvt->maxch < CHANNELS_PER_BRANCH) { pvt->b1_ambpresent0 = 0; pvt->b1_ambpresent1 = 0; @@ -1253,7 +1248,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) { struct i5000_pvt *pvt; struct dimm_info *dimm; - int empty, channel_count; + int empty; int max_csrows; int mtr; int csrow_megs; @@ -1261,8 +1256,6 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) int slot; pvt = mci->pvt_info; - - channel_count = pvt->maxch; max_csrows = pvt->maxdimmperch * 2; empty = 1; /* Assume NO memory */ @@ -1282,9 +1275,8 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) if (!MTR_DIMMS_PRESENT(mtr)) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - channel / MAX_BRANCHES, - channel % MAX_BRANCHES, slot); + dimm = edac_get_dimm(mci, channel / MAX_BRANCHES, + channel % MAX_BRANCHES, slot); csrow_megs = pvt->dimm_info[slot][channel].megabytes; dimm->grain = 8; @@ -1293,7 +1285,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; @@ -1430,7 +1422,6 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i5000_edac.c"; - mci->mod_ver = I5000_REVISION; mci->ctl_name = i5000_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; @@ -1530,7 +1521,7 @@ static void i5000_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static DEFINE_PCI_DEVICE_TABLE(i5000_pci_tbl) = { +static const struct pci_device_id i5000_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), .driver_data = I5000P}, @@ -1560,8 +1551,8 @@ static int __init i5000_init(void) edac_dbg(2, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); pci_rc = pci_register_driver(&i5000_driver); @@ -1582,13 +1573,10 @@ module_init(i5000_init); module_exit(i5000_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); -MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " - I5000_REVISION); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); +MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); module_param(misc_messages, int, 0444); MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages"); - diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..d470afe65001 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -29,7 +29,7 @@ #include <linux/mmzone.h> #include <linux/debugfs.h> -#include "edac_core.h" +#include "edac_module.h" /* register addresses */ @@ -244,11 +244,6 @@ static inline u32 i5100_nrecmema_rank(u32 a) return a >> 8 & ((1 << 3) - 1); } -static inline u32 i5100_nrecmema_dm_buf_id(u32 a) -{ - return a & ((1 << 8) - 1); -} - static inline u32 i5100_nrecmemb_cas(u32 a) { return a >> 16 & ((1 << 13) - 1); @@ -259,11 +254,6 @@ static inline u32 i5100_nrecmemb_ras(u32 a) return a & ((1 << 16) - 1); } -static inline u32 i5100_redmemb_ecc_locator(u32 a) -{ - return a & ((1 << 18) - 1); -} - static inline u32 i5100_recmema_merr(u32 a) { return i5100_nrecmema_merr(a); @@ -279,11 +269,6 @@ static inline u32 i5100_recmema_rank(u32 a) return i5100_nrecmema_rank(a); } -static inline u32 i5100_recmema_dm_buf_id(u32 a) -{ - return i5100_nrecmema_dm_buf_id(a); -} - static inline u32 i5100_recmemb_cas(u32 a) { return i5100_nrecmemb_cas(a); @@ -422,7 +407,8 @@ static const char *i5100_err_msg(unsigned err) } /* convert csrow index into a rank (per channel -- 0..5) */ -static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -430,7 +416,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) } /* convert csrow index into a channel (0..1) */ -static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -489,7 +476,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, u32 dw; u32 dw2; unsigned syndrome = 0; - unsigned ecc_loc = 0; unsigned merr; unsigned bank; unsigned rank; @@ -502,7 +488,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, pci_read_config_dword(pdev, I5100_REDMEMA, &dw2); syndrome = dw2; pci_read_config_dword(pdev, I5100_REDMEMB, &dw2); - ecc_loc = i5100_redmemb_ecc_locator(dw2); } if (i5100_validlog_recmemvalid(dw)) { @@ -579,9 +564,7 @@ static void i5100_check_error(struct mem_ctl_info *mci) static void i5100_refresh_scrubbing(struct work_struct *work) { - struct delayed_work *i5100_scrubbing = container_of(work, - struct delayed_work, - work); + struct delayed_work *i5100_scrubbing = to_delayed_work(work); struct i5100_priv *priv = container_of(i5100_scrubbing, struct i5100_priv, i5100_scrubbing); @@ -660,11 +643,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow) { struct i5100_priv *priv = mci->pvt_info; - const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); - const unsigned chan = i5100_csrow_to_chan(mci, csrow); + const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow); + const unsigned int chan = i5100_csrow_to_chan(mci, csrow); unsigned addr_lines; /* dimm present? */ @@ -718,7 +701,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, { struct i5100_priv *priv = mci->pvt_info; u16 w; - unsigned long et; pci_read_config_word(priv->mc, I5100_SPDDATA, &w); if (i5100_spddata_busy(w)) @@ -729,7 +711,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, 0, 0)); /* wait up to 100ms */ - et = jiffies + HZ / 10; udelay(100); while (1) { pci_read_config_word(priv->mc, I5100_SPDDATA, &w); @@ -853,32 +834,25 @@ static void i5100_init_interleaving(struct pci_dev *pdev, static void i5100_init_csrows(struct mem_ctl_info *mci) { - int i; struct i5100_priv *priv = mci->pvt_info; + struct dimm_info *dimm; - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm; - const unsigned long npages = i5100_npages(mci, i); - const unsigned chan = i5100_csrow_to_chan(mci, i); - const unsigned rank = i5100_csrow_to_rank(mci, i); + mci_for_each_dimm(mci, dimm) { + const unsigned long npages = i5100_npages(mci, dimm->idx); + const unsigned int chan = i5100_csrow_to_chan(mci, dimm->idx); + const unsigned int rank = i5100_csrow_to_rank(mci, dimm->idx); if (!npages) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - chan, rank, 0); - dimm->nr_pages = npages; - if (npages) { - dimm->grain = 32; - dimm->dtype = (priv->mtr[chan][rank].width == 4) ? - DEV_X4 : DEV_X8; - dimm->mtype = MEM_RDDR2; - dimm->edac_mode = EDAC_SECDED; - snprintf(dimm->label, sizeof(dimm->label), - "DIMM%u", - i5100_rank_to_slot(mci, chan, rank)); - } + dimm->grain = 32; + dimm->dtype = (priv->mtr[chan][rank].width == 4) ? + DEV_X4 : DEV_X8; + dimm->mtype = MEM_RDDR2; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "DIMM%u", + i5100_rank_to_slot(mci, chan, rank)); edac_dbg(2, "dimm channel %d, rank %d, size %ld\n", chan, rank, (long)PAGES_TO_MiB(npages)); @@ -935,7 +909,7 @@ static void i5100_do_inject(struct mem_ctl_info *mci) * * The injection code don't work without setting this register. * The register needs to be flipped off then on else the hardware - * will only preform the first injection. + * will only perform the first injection. * * Stop condition bits 7:4 * 1010 - Stop after one injection @@ -974,25 +948,25 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = edac_debugfs_create_dir_at(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; - debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_channel); - debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_hlinesel); - debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_deviceptr1); - debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_deviceptr2); - debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_eccmask1); - debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, - &priv->inject_eccmask2); - debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, - &mci->dev, &i5100_inject_enable_fops); + edac_debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + edac_debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + edac_debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + edac_debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + edac_debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + edac_debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + edac_debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); return 0; @@ -1082,16 +1056,15 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) PCI_DEVICE_ID_INTEL_5100_19, 0); if (!einj) { ret = -ENODEV; - goto bail_einj; + goto bail_mc_free; } rc = pci_enable_device(einj); if (rc < 0) { ret = rc; - goto bail_disable_einj; + goto bail_einj; } - mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -1118,7 +1091,6 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = "i5100_edac.c"; - mci->mod_ver = "not versioned"; mci->ctl_name = "i5100"; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; @@ -1158,14 +1130,14 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) bail_scrub: priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); - edac_mc_free(mci); - -bail_disable_einj: pci_disable_device(einj); bail_einj: pci_dev_put(einj); +bail_mc_free: + edac_mc_free(mci); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1197,7 +1169,7 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; - debugfs_remove_recursive(priv->debugfs); + edac_debugfs_remove_recursive(priv->debugfs); priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); @@ -1213,7 +1185,7 @@ static void i5100_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i5100_pci_tbl) = { +static const struct pci_device_id i5100_pci_tbl[] = { /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, { 0, } @@ -1231,7 +1203,7 @@ static int __init i5100_init(void) { int pci_rc; - i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + i5100_debugfs = edac_debugfs_create_dir_at("i5100_edac", NULL); pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; @@ -1239,7 +1211,7 @@ static int __init i5100_init(void) static void __exit i5100_exit(void) { - debugfs_remove(i5100_debugfs); + edac_debugfs_remove(i5100_debugfs); pci_unregister_driver(&i5100_driver); } @@ -1248,6 +1220,5 @@ module_init(i5100_init); module_exit(i5100_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Arthur Jones <ajones@riverbed.com>"); +MODULE_AUTHOR("Arthur Jones <ajones@riverbed.com>"); MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers"); diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 0a05bbceb08f..b5cf25905b05 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -6,9 +6,9 @@ * * Copyright (c) 2008 by: * Ben Woodard <woodard@redhat.com> - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * - * Red Hat Inc. http://www.redhat.com + * Red Hat Inc. https://www.redhat.com * * Forked and adapted from the i5000_edac driver which was * written by Douglas Thompson Linux Networx <norsk5@xmission.com> @@ -31,8 +31,9 @@ #include <linux/slab.h> #include <linux/edac.h> #include <linux/mmzone.h> +#include <linux/string_choices.h> -#include "edac_core.h" +#include "edac_module.h" /* * Alter this version for the I5400 module when modifications are made @@ -279,7 +280,8 @@ static inline int from_nf_ferr(unsigned int mask) #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) -/* Defines to extract the vaious fields from the +/* + * Defines to extract the various fields from the * MTRx - Memory Technology Registers */ #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10)) @@ -368,7 +370,7 @@ struct i5400_error_info { /* These registers are input ONLY if there was a Non-Rec Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -458,7 +460,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ @@ -548,8 +550,8 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, ras = nrec_ras(info); cas = nrec_cas(info); - edac_dbg(0, "\t\tDIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, + edac_dbg(0, "\t\t%s DIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + type, rank, channel, channel + 1, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas); /* Only 1 bit will be on */ @@ -686,7 +688,7 @@ static void i5400_clear_error(struct mem_ctl_info *mci) static void i5400_check_error(struct mem_ctl_info *mci) { struct i5400_error_info info; - edac_dbg(4, "MC%d\n", mci->mc_idx); + i5400_get_error_info(mci, &info); i5400_process_error_info(mci, &info); } @@ -898,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr) edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -1054,8 +1056,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) u32 actual_tolm; u16 limit; int slot_row; - int maxch; - int maxdimmperch; int way0, way1; pvt = mci->pvt_info; @@ -1065,9 +1065,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), &pvt->u.ambase_top); - maxdimmperch = pvt->maxdimmperch; - maxch = pvt->maxch; - edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); @@ -1170,17 +1167,13 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) { struct i5400_pvt *pvt; struct dimm_info *dimm; - int ndimms, channel_count; - int max_dimms; + int ndimms; int mtr; int size_mb; int channel, slot; pvt = mci->pvt_info; - channel_count = pvt->maxch; - max_dimms = pvt->maxdimmperch; - ndimms = 0; /* @@ -1196,8 +1189,7 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) if (!MTR_DIMMS_PRESENT(mtr)) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - channel / 2, channel % 2, slot); + dimm = edac_get_dimm(mci, channel / 2, channel % 2, slot); size_mb = pvt->dimm_info[slot][channel].megabytes; @@ -1207,13 +1199,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; } @@ -1314,7 +1307,6 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i5400_edac.c"; - mci->mod_ver = I5400_REVISION; mci->ctl_name = i5400_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; @@ -1408,6 +1400,8 @@ static void i5400_remove_one(struct pci_dev *pdev) /* retrieve references to resources, and free those resources */ i5400_put_devices(mci); + pci_disable_device(pdev); + edac_mc_free(mci); } @@ -1416,7 +1410,7 @@ static void i5400_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static DEFINE_PCI_DEVICE_TABLE(i5400_pci_tbl) = { +static const struct pci_device_id i5400_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1467,8 +1461,8 @@ module_exit(i5400_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); +MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " I5400_REVISION); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 9004c64b169e..69068f8d0cad 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -1,13 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Intel 7300 class Memory Controllers kernel module (Clarksboro) * - * This file may be distributed under the terms of the - * GNU General Public License version 2 only. - * * Copyright (c) 2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * - * Red Hat Inc. http://www.redhat.com + * Red Hat Inc. https://www.redhat.com * * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet * http://www.intel.com/Assets/PDF/datasheet/318082.pdf @@ -25,8 +23,9 @@ #include <linux/slab.h> #include <linux/edac.h> #include <linux/mmzone.h> +#include <linux/string_choices.h> -#include "edac_core.h" +#include "edac_module.h" /* * Alter this version for the I7300 module when modifications are made @@ -304,7 +303,6 @@ static const char *ferr_global_lo_name[] = { #define REDMEMA 0xdc #define REDMEMB 0x7c - #define IS_SECOND_CH(v) ((v) * (1 << 17)) #define RECMEMA 0xe0 #define RECMEMA_BANK(v) (((v) >> 12) & 7) @@ -483,8 +481,9 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMB, &value); channel = (branch << 1); - if (IS_SECOND_CH(value)) - channel++; + + /* Second channel ? */ + channel += !!(value & BIT(17)); /* Clear the error bit */ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, @@ -582,7 +581,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) * @ch: Channel number within the branch (0 or 1) * @branch: Branch number (0 or 1) * @dinfo: Pointer to DIMM info where dimm size is stored - * @p_csrow: Pointer to the struct csrow_info that corresponds to that element + * @dimm: Pointer to the struct dimm_info that corresponds to that element */ static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, @@ -622,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt, edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -796,8 +795,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) for (ch = 0; ch < max_channel; ch++) { int channel = to_channel(ch, branch); - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, branch, ch, slot); + dimm = edac_get_dimm(mci, branch, ch, slot); dinfo = &pvt->dimm_info[slot][channel]; @@ -819,7 +817,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) /** * decode_mir() - Decodes Memory Interleave Register (MIR) info - * @int mir_no: number of the MIR register to decode + * @mir_no: number of the MIR register to decode * @mir: array with the MIR data cached on the driver */ static void decode_mir(int mir_no, u16 mir[MAX_MIR]) @@ -874,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); edac_dbg(0, "Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings))); edac_dbg(0, "Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings))); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, @@ -943,33 +941,35 @@ static int i7300_get_devices(struct mem_ctl_info *mci) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->pci_dev_16_1_fsb_addr_map || - !pvt->pci_dev_16_2_fsb_err_regs) { - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); - if (!pdev) { - /* End of list, leave */ - i7300_printk(KERN_ERR, - "'system address,Process Bus' " - "device not found:" - "vendor 0x%x device 0x%x ERR funcs " - "(broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); - goto error; - } - + while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + pdev))) { /* Store device 16 funcs 1 and 2 */ switch (PCI_FUNC(pdev->devfn)) { case 1: - pvt->pci_dev_16_1_fsb_addr_map = pdev; + if (!pvt->pci_dev_16_1_fsb_addr_map) + pvt->pci_dev_16_1_fsb_addr_map = + pci_dev_get(pdev); break; case 2: - pvt->pci_dev_16_2_fsb_err_regs = pdev; + if (!pvt->pci_dev_16_2_fsb_err_regs) + pvt->pci_dev_16_2_fsb_err_regs = + pci_dev_get(pdev); break; } } + if (!pvt->pci_dev_16_1_fsb_addr_map || + !pvt->pci_dev_16_2_fsb_err_regs) { + /* At least one device was not found */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' device not found:" + "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_0_fsb_ctlr), pvt->pci_dev_16_0_fsb_ctlr->vendor, @@ -1075,7 +1075,6 @@ static int i7300_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i7300_edac.c"; - mci->mod_ver = I7300_REVISION; mci->ctl_name = i7300_devs[0].ctl_name; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; @@ -1160,7 +1159,7 @@ static void i7300_remove_one(struct pci_dev *pdev) * * Has only 8086:360c PCI ID */ -static DEFINE_PCI_DEVICE_TABLE(i7300_pci_tbl) = { +static const struct pci_device_id i7300_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1195,7 +1194,7 @@ static int __init i7300_init(void) } /** - * i7300_init() - Unregisters the driver + * i7300_exit() - Unregisters the driver */ static void __exit i7300_exit(void) { @@ -1207,8 +1206,8 @@ module_init(i7300_init); module_exit(i7300_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); +MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " I7300_REVISION); diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 80a963d64e58..91e0a88ef904 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Intel i7 core/Nehalem Memory Controller kernel module * * This driver supports the memory controllers found on the Intel @@ -5,13 +6,10 @@ * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield * and Westmere-EP. * - * This file may be distributed under the terms of the - * GNU General Public License version 2 only. - * * Copyright (c) 2009-2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * - * Red Hat Inc. http://www.redhat.com + * Red Hat Inc. https://www.redhat.com * * Forked and adapted from the i5400_edac driver * @@ -39,7 +37,7 @@ #include <asm/processor.h> #include <asm/div64.h> -#include "edac_core.h" +#include "edac_module.h" /* Static vars */ static LIST_HEAD(i7core_edac_list); @@ -271,16 +269,6 @@ struct i7core_pvt { bool is_registered, enable_scrub; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* DCLK Frequency used for computing scrub rate */ int dclk_freq; @@ -388,16 +376,16 @@ static const struct pci_id_table pci_dev_table[] = { PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem), PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield), PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere), - {0,} /* 0 terminated list. */ + { NULL, } }; /* * pci_device_id table for which devices we are looking for */ -static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = { +static const struct pci_device_id i7core_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, - {0,} /* 0 terminated list. */ + { 0, } }; /**************************************************************************** @@ -471,7 +459,7 @@ static struct i7core_dev *alloc_i7core_dev(u8 socket, if (!i7core_dev) return NULL; - i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs, + i7core_dev->pdev = kcalloc(table->n_devs, sizeof(*i7core_dev->pdev), GFP_KERNEL); if (!i7core_dev->pdev) { kfree(i7core_dev); @@ -597,8 +585,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) if (!DIMM_PRESENT(dimm_dod[j])) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); banks = numbank(MC_DOD_NUMBANK(dimm_dod[j])); ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j])); rows = numrow(MC_DOD_NUMROW(dimm_dod[j])); @@ -607,7 +594,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) /* DDR3 has 8 I/O banks */ size = (rows * cols * banks * ranks) >> (20 - 3); - edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", j, size, RANKOFFSET(dimm_dod[j]), banks, ranks, rows, cols); @@ -734,7 +721,7 @@ static ssize_t i7core_inject_type_store(struct device *dev, const char *data, size_t count) { struct mem_ctl_info *mci = to_mci(dev); -struct i7core_pvt *pvt = mci->pvt_info; + struct i7core_pvt *pvt = mci->pvt_info; unsigned long value; int rc; @@ -1089,7 +1076,7 @@ static struct attribute *i7core_addrmatch_attrs[] = { NULL }; -static struct attribute_group addrmatch_grp = { +static const struct attribute_group addrmatch_grp = { .attrs = i7core_addrmatch_attrs, }; @@ -1104,7 +1091,7 @@ static void addrmatch_release(struct device *device) kfree(device); } -static struct device_type addrmatch_type = { +static const struct device_type addrmatch_type = { .groups = addrmatch_groups, .release = addrmatch_release, }; @@ -1120,7 +1107,7 @@ static struct attribute *i7core_udimm_counters_attrs[] = { NULL }; -static struct attribute_group all_channel_counts_grp = { +static const struct attribute_group all_channel_counts_grp = { .attrs = i7core_udimm_counters_attrs, }; @@ -1135,7 +1122,7 @@ static void all_channel_counts_release(struct device *device) kfree(device); } -static struct device_type all_channel_counts_type = { +static const struct device_type all_channel_counts_type = { .groups = all_channel_counts_groups, .release = all_channel_counts_release, }; @@ -1157,27 +1144,24 @@ static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR, static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR, i7core_inject_enable_show, i7core_inject_enable_store); +static struct attribute *i7core_dev_attrs[] = { + &dev_attr_inject_section.attr, + &dev_attr_inject_type.attr, + &dev_attr_inject_eccmask.attr, + &dev_attr_inject_enable.attr, + NULL +}; + +ATTRIBUTE_GROUPS(i7core_dev); + static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) { struct i7core_pvt *pvt = mci->pvt_info; int rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_section); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_type); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_enable); - if (rc < 0) - return rc; - pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL); if (!pvt->addrmatch_dev) - return rc; + return -ENOMEM; pvt->addrmatch_dev->type = &addrmatch_type; pvt->addrmatch_dev->bus = mci->dev.bus; @@ -1190,15 +1174,14 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->addrmatch_dev); if (rc < 0) - return rc; + goto err_put_addrmatch; if (!pvt->is_registered) { pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev), GFP_KERNEL); if (!pvt->chancounts_dev) { - put_device(pvt->addrmatch_dev); - device_del(pvt->addrmatch_dev); - return rc; + rc = -ENOMEM; + goto err_del_addrmatch; } pvt->chancounts_dev->type = &all_channel_counts_type; @@ -1212,9 +1195,18 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->chancounts_dev); if (rc < 0) - return rc; + goto err_put_chancounts; } return 0; + +err_put_chancounts: + put_device(pvt->chancounts_dev); +err_del_addrmatch: + device_del(pvt->addrmatch_dev); +err_put_addrmatch: + put_device(pvt->addrmatch_dev); + + return rc; } static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) @@ -1223,17 +1215,12 @@ static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) edac_dbg(1, "\n"); - device_remove_file(&mci->dev, &dev_attr_inject_section); - device_remove_file(&mci->dev, &dev_attr_inject_type); - device_remove_file(&mci->dev, &dev_attr_inject_eccmask); - device_remove_file(&mci->dev, &dev_attr_inject_enable); - if (!pvt->is_registered) { - put_device(pvt->chancounts_dev); device_del(pvt->chancounts_dev); + put_device(pvt->chancounts_dev); } - put_device(pvt->addrmatch_dev); device_del(pvt->addrmatch_dev); + put_device(pvt->addrmatch_dev); } /**************************************************************************** @@ -1334,14 +1321,19 @@ static int i7core_get_onedevice(struct pci_dev **prev, * is at addr 8086:2c40, instead of 8086:2c41. So, we need * to probe for the alternate address in case of failure */ - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + } - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && + !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, *prev); + } if (!pdev) { if (*prev) { @@ -1703,7 +1695,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, const struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - char *type, *optype, *err; + char *optype, *err; enum hw_event_mc_err_type tp_event; unsigned long error = m->status & 0x1ff0000l; bool uncorrected_error = m->mcgstatus & 1ll << 61; @@ -1716,15 +1708,12 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { - if (ripv) { - type = "FATAL"; - tp_event = HW_EVENT_ERR_FATAL; - } else { - type = "NON_FATAL"; + core_err_cnt = 1; + if (ripv) tp_event = HW_EVENT_ERR_UNCORRECTED; - } + else + tp_event = HW_EVENT_ERR_FATAL; } else { - type = "CORRECTED"; tp_event = HW_EVENT_ERR_CORRECTED; } @@ -1760,7 +1749,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, err = "write parity error"; break; case 19: - err = "redundacy loss"; + err = "redundancy loss"; break; case 20: err = "reserved"; @@ -1799,56 +1788,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, * i7core_check_error Retrieve and process errors reported by the * hardware. Called by the Core module. */ -static void i7core_check_error(struct mem_ctl_info *mci) +static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * losing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - goto check_ce_error; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - i7core_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - i7core_mce_output_error(mci, &pvt->mce_outentry[i]); + i7core_mce_output_error(mci, m); /* * Now, let's increment CE error counts */ -check_ce_error: if (!pvt->is_registered) i7core_udimm_check_mc_ecc_err(mci); else @@ -1856,12 +1804,8 @@ check_ce_error: } /* - * i7core_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by i7core_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -1869,14 +1813,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct i7core_dev *i7_dev; struct mem_ctl_info *mci; - struct i7core_pvt *pvt; i7_dev = get_i7core_dev(mce->socketid); - if (!i7_dev) - return NOTIFY_BAD; + if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC)) + return NOTIFY_DONE; mci = i7_dev->mci; - pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is @@ -1889,28 +1831,16 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - i7core_check_error(mci); + i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block i7_mce_dec = { .notifier_call = i7core_mce_check_error, + .priority = MCE_PRIO_EDAC, }; struct memdev_dmi_entry { @@ -2234,9 +2164,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i7core_edac.c"; - mci->mod_ver = I7CORE_REVISION; - mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", - i7core_dev->socket); + + mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", i7core_dev->socket); + if (!mci->ctl_name) { + rc = -ENOMEM; + goto fail1; + } + mci->dev_name = pci_name(i7core_dev->pdev[0]); mci->ctl_page_to_phys = NULL; @@ -2250,15 +2184,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) get_dimm_config(mci); /* record ptr to the generic device */ mci->pdev = &i7core_dev->pdev[0]->dev; - /* Set the function pointer to an actual operation function */ - mci->edac_check = i7core_check_error; /* Enable scrubrate setting */ if (pvt->enable_scrub) enable_sdram_scrub_setting(mci); /* add this new MC control structure to EDAC's list of MCs */ - if (unlikely(edac_mc_add_mc(mci))) { + if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) { edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); /* FIXME: perhaps some code should go here that disables error * reporting if we just enabled it @@ -2292,6 +2224,8 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) fail0: kfree(mci->ctl_name); + +fail1: edac_mc_free(mci); i7core_dev->mci = NULL; return rc; @@ -2456,8 +2390,8 @@ module_init(i7core_init); module_exit(i7core_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); +MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - " I7CORE_REVISION); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 57fdb77903ba..933dcf3cfdff 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -29,9 +29,7 @@ #include <linux/edac.h> -#include "edac_core.h" - -#define I82443_REVISION "0.1" +#include "edac_module.h" #define EDAC_MOD_STR "i82443bxgx_edac" @@ -178,7 +176,6 @@ static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci) { struct i82443bxgx_edacmc_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i82443bxgx_edacmc_get_error_info(mci, &info); i82443bxgx_edacmc_process_error_info(mci, &info, 1); } @@ -320,7 +317,6 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) I82443BXGX_EAP_OFFSET_MBE)); mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I82443_REVISION; mci->ctl_name = "I82443BXGX"; mci->dev_name = pci_name(pdev); mci->edac_check = i82443bxgx_edacmc_check; @@ -350,8 +346,6 @@ fail: return -ENODEV; } -EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1); - /* returns count (>= 0), or negative on error */ static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -384,9 +378,7 @@ static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); - -static DEFINE_PCI_DEVICE_TABLE(i82443bxgx_pci_tbl) = { +static const struct pci_device_id i82443bxgx_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, @@ -445,9 +437,7 @@ fail1: pci_unregister_driver(&i82443bxgx_edacmc_driver); fail0: - if (mci_pdev != NULL) - pci_dev_put(mci_pdev); - + pci_dev_put(mci_pdev); return pci_rc; } @@ -458,8 +448,7 @@ static void __exit i82443bxgx_edacmc_exit(void) if (!i82443bxgx_registered) i82443bxgx_edacmc_remove_one(mci_pdev); - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); } module_init(i82443bxgx_edacmc_init); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 3e3e431c8301..b8a497f0de28 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -14,9 +14,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define I82860_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82860_edac" #define i82860_printk(level, fmt, arg...) \ @@ -124,7 +123,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci, dimm->location[0], dimm->location[1], -1, "i82860 UE", ""); else - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, info->eap, 0, info->derrsyn, dimm->location[0], dimm->location[1], -1, "i82860 CE", ""); @@ -136,7 +135,6 @@ static void i82860_check(struct mem_ctl_info *mci) { struct i82860_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i82860_get_error_info(mci, &info); i82860_process_error_info(mci, &info, 1); } @@ -216,7 +214,6 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) /* I"m not sure about this but I think that all RDRAM is SECDED */ mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I82860_REVISION; mci->ctl_name = i82860_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = i82860_check; @@ -288,7 +285,7 @@ static void i82860_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82860_pci_tbl) = { +static const struct pci_device_id i82860_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82860}, @@ -343,28 +340,22 @@ fail1: pci_unregister_driver(&i82860_driver); fail0: - if (mci_pdev != NULL) - pci_dev_put(mci_pdev); - + pci_dev_put(mci_pdev); return pci_rc; } static void __exit i82860_exit(void) { edac_dbg(3, "\n"); - pci_unregister_driver(&i82860_driver); - - if (mci_pdev != NULL) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); } module_init(i82860_init); module_exit(i82860_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " - "Ben Woodard <woodard@redhat.com>"); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) Ben Woodard <woodard@redhat.com>"); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); module_param(edac_op_state, int, 0444); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 2f8535fc451e..553880b9fc12 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -18,9 +18,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define I82875P_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82875p_edac" #define i82875p_printk(level, fmt, arg...) \ @@ -263,7 +262,6 @@ static void i82875p_check(struct mem_ctl_info *mci) { struct i82875p_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i82875p_get_error_info(mci, &info); i82875p_process_error_info(mci, &info, 1); } @@ -275,7 +273,6 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, { struct pci_dev *dev; void __iomem *window; - int err; *ovrfl_pdev = NULL; *ovrfl_window = NULL; @@ -293,13 +290,8 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (dev == NULL) return 1; - err = pci_bus_add_device(dev); - if (err) { - i82875p_printk(KERN_ERR, - "%s(): pci_bus_add_device() Failed\n", - __func__); - } pci_bus_assign_resources(dev->bus); + pci_bus_add_device(dev); } *ovrfl_pdev = dev; @@ -406,8 +398,6 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(0, "\n"); - ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL); - if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window)) return -ENODEV; drc = readl(ovrfl_window + I82875P_DRC); @@ -431,7 +421,6 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_UNKNOWN; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I82875P_REVISION; mci->ctl_name = i82875p_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = i82875p_check; @@ -527,7 +516,7 @@ static void i82875p_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82875p_pci_tbl) = { +static const struct pci_device_id i82875p_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P}, @@ -584,9 +573,7 @@ fail1: pci_unregister_driver(&i82875p_driver); fail0: - if (mci_pdev != NULL) - pci_dev_put(mci_pdev); - + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 0c8d4b0eaa32..d99f005832cf 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -14,9 +14,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define I82975X_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i82975x_edac" #define i82975x_printk(level, fmt, arg...) \ @@ -105,7 +104,7 @@ NOTE: Only ONE of the three must be enabled * * 31:14 Base Addr of 16K memory-mapped * configuration space - * 13:1 reserverd + * 13:1 reserved * 0 mem-mapped config space enable */ @@ -331,7 +330,6 @@ static void i82975x_check(struct mem_ctl_info *mci) { struct i82975x_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); i82975x_get_error_info(mci, &info); i82975x_process_error_info(mci, &info, 1); } @@ -359,14 +357,6 @@ static int dual_channel_active(void __iomem *mch_window) return dualch; } -static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) -{ - /* - * ECC is possible on i92975x ONLY with DEV_X8 - */ - return DEV_X8; -} - static void i82975x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, void __iomem *mch_window) { @@ -376,7 +366,6 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, u32 cumul_size, nr_pages; int index, chan; struct dimm_info *dimm; - enum dev_type dtype; last_cumul_size = 0; @@ -414,7 +403,6 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, * [0-7] for single-channel; i.e. csrow->nr_channels = 1 * [0-3] for dual-channel; i.e. csrow->nr_channels = 2 */ - dtype = i82975x_dram_type(mch_window, index); for (chan = 0; chan < csrow->nr_channels; chan++) { dimm = mci->csrows[index]->channels[chan]->dimm; @@ -424,7 +412,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, (chan == 0) ? 'A' : 'B', index); dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ - dimm->dtype = i82975x_dram_type(mch_window, index); + + /* ECC is possible on i92975x ONLY with DEV_X8. */ + dimm->dtype = DEV_X8; + dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ dimm->edac_mode = EDAC_SECDED; /* only supported */ } @@ -493,7 +484,11 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) goto fail0; } mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ - mch_window = ioremap_nocache(mchbar, 0x1000); + mch_window = ioremap(mchbar, 0x1000); + if (!mch_window) { + edac_dbg(3, "error ioremapping MCHBAR!\n"); + goto fail0; + } #ifdef i82975x_DEBUG_IOMEM i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", @@ -560,7 +555,6 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = I82975X_REVISION; mci->ctl_name = i82975x_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = i82975x_check; @@ -628,7 +622,7 @@ static void i82975x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82975x_pci_tbl) = { +static const struct pci_device_id i82975x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82975X @@ -653,8 +647,8 @@ static int __init i82975x_init(void) edac_dbg(3, "\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); pci_rc = pci_register_driver(&i82975x_driver); if (pci_rc < 0) @@ -685,9 +679,7 @@ fail1: pci_unregister_driver(&i82975x_driver); fail0: - if (mci_pdev != NULL) - pci_dev_put(mci_pdev); - + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c new file mode 100644 index 000000000000..eaab6af143e1 --- /dev/null +++ b/drivers/edac/ie31200_edac.c @@ -0,0 +1,845 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel E3-1200 + * Copyright (C) 2014 Jason Baron <jbaron@akamai.com> + * + * Support for the E3-1200 processor family. Heavily based on previous + * Intel EDAC drivers. + * + * Since the DRAM controller is on the cpu chip, we can use its PCI device + * id to identify these processors. + * + * PCI DRAM controller device ids (Taken from The PCI ID Repository - https://pci-ids.ucw.cz/) + * + * 0108: Xeon E3-1200 Processor Family DRAM Controller + * 010c: Xeon E3-1200/2nd Generation Core Processor Family DRAM Controller + * 0150: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller + * 0158: Xeon E3-1200 v2/Ivy Bridge DRAM Controller + * 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller + * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller + * 0c08: Xeon E3-1200 v3 Processor DRAM Controller + * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers + * 590f: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 5918: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers + * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers + * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers + * + * Based on Intel specification: + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/desktop-6th-gen-core-family-datasheet-vol-2.pdf + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v6-vol-2-datasheet.pdf + * https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html + * https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html + * + * According to the above datasheet (p.16): + * " + * 6. Software must not access B0/D0/F0 32-bit memory-mapped registers with + * requests that cross a DW boundary. + * " + * + * Thus, we make use of the explicit: lo_hi_readq(), which breaks the readq into + * 2 readl() calls. This restriction may be lifted in subsequent chip releases, + * but lo_hi_readq() ensures that we are safe across all e3-1200 processors. + */ + +#include <linux/bitfield.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/edac.h> + +#include <linux/io-64-nonatomic-lo-hi.h> +#include <asm/mce.h> +#include <asm/msr.h> +#include "edac_module.h" + +#define EDAC_MOD_STR "ie31200_edac" + +#define ie31200_printk(level, fmt, arg...) \ + edac_printk(level, "ie31200", fmt, ##arg) + +#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x590f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_12 0x5918 + +/* Coffee Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1 0x3e0f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2 0x3e18 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3 0x3e1f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4 0x3e30 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5 0x3e31 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6 0x3e32 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7 0x3e33 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8 0x3ec2 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca + +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 /* 8P+8E, e.g. i7-13700 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 /* 6P+8E, e.g. i5-13500, i5-13600, i5-14500 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 /* 4P+0E, e.g. i3-13100E */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700 /* 8P+16E, e.g. i9-13900, i9-14900 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5 0xa740 /* 8P+12E, e.g. i7-14700 */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6 0xa704 /* 6P+8E, e.g. i5-14600 */ + +/* Raptor Lake-HX */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1 0xa702 /* 8P+16E, e.g. i9-13950HX */ + +/* Alder Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660 +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2 0x4668 /* 8P+4E, e.g. i7-12700K */ +#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3 0x4648 /* 6P+4E, e.g. i5-12600K */ + +/* Bartlett Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1 0x4639 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2 0x463c +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3 0x4642 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4 0x4643 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5 0xa731 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6 0xa732 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7 0xa733 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8 0xa741 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9 0xa744 +#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10 0xa745 + +#define IE31200_RANKS_PER_CHANNEL 8 +#define IE31200_DIMMS_PER_CHANNEL 2 +#define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 + +/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ +#define IE31200_MCHBAR_LOW 0x48 +#define IE31200_MCHBAR_HIGH 0x4c + +/* + * Error Status Register (16b) + * + * 1 Multi-bit DRAM ECC Error Flag (DMERR) + * 0 Single-bit DRAM ECC Error Flag (DSERR) + */ +#define IE31200_ERRSTS 0xc8 +#define IE31200_ERRSTS_UE BIT(1) +#define IE31200_ERRSTS_CE BIT(0) +#define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) + +#define IE31200_CAPID0 0xe4 +#define IE31200_CAPID0_PDCD BIT(4) +#define IE31200_CAPID0_DDPCD BIT(6) +#define IE31200_CAPID0_ECC BIT(1) + +static int nr_channels; +static struct pci_dev *mci_pdev; +static int ie31200_registered = 1; + +struct res_config { + enum mem_type mtype; + bool cmci; + int imc_num; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; + /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + +struct ie31200_priv { + void __iomem *window; + void __iomem *c0errlog; + void __iomem *c1errlog; + struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; +}; + +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + +enum ie31200_chips { + IE31200 = 0, + IE31200_1 = 1, +}; + +struct ie31200_dev_info { + const char *ctl_name; +}; + +struct ie31200_error_info { + u16 errsts; + u16 errsts2; + u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; +}; + +static const struct ie31200_dev_info ie31200_devs[] = { + [IE31200] = { + .ctl_name = "IE31200" + }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, +}; + +struct dimm_data { + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; +}; + +static int how_many_channels(struct pci_dev *pdev) +{ + int n_channels; + unsigned char capid0_2b; /* 2nd byte of CAPID0 */ + + pci_read_config_byte(pdev, IE31200_CAPID0 + 1, &capid0_2b); + + /* check PDCD: Dual Channel Disable */ + if (capid0_2b & IE31200_CAPID0_PDCD) { + edac_dbg(0, "In single channel mode\n"); + n_channels = 1; + } else { + edac_dbg(0, "In dual channel mode\n"); + n_channels = 2; + } + + /* check DDPCD - check if both channels are filled */ + if (capid0_2b & IE31200_CAPID0_DDPCD) + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; +} + +static bool ecc_capable(struct pci_dev *pdev) +{ + unsigned char capid0_4b; /* 4th byte of CAPID0 */ + + pci_read_config_byte(pdev, IE31200_CAPID0 + 3, &capid0_4b); + if (capid0_4b & IE31200_CAPID0_ECC) + return false; + return true; +} + +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) + +static void ie31200_clear_error_info(struct mem_ctl_info *mci) +{ + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + + /* + * Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, + IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); +} + +static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, + struct ie31200_error_info *info) +{ + struct pci_dev *pdev = mci_to_pci_dev(mci); + struct ie31200_priv *priv = mci->pvt_info; + + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts); + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; + + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2); + + /* + * If the error is the same for both reads then the first set + * of reads is valid. If there is a change then there is a CE + * with no info and the second set of reads is valid and + * should be UE info. + */ + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = + lo_hi_readq(priv->c1errlog); + } + + ie31200_clear_error_info(mci); +} + +static void ie31200_process_error_info(struct mem_ctl_info *mci, + struct ie31200_error_info *info) +{ + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + int channel; + u64 log; + + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; + + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } + } + + for (channel = 0; channel < nr_channels; channel++) { + log = info->eccerrlog[channel]; + if (log & cfg->reg_eccerrlog_ue_mask) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + info->erraddr >> PAGE_SHIFT, 0, 0, + field_get(cfg->reg_eccerrlog_rank_mask, log), + channel, -1, + "ie31200 UE", ""); + } else if (log & cfg->reg_eccerrlog_ce_mask) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + info->erraddr >> PAGE_SHIFT, 0, + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), + channel, -1, + "ie31200 CE", ""); + } + } +} + +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) +{ + struct ie31200_error_info info; + + info.erraddr = mce ? mce->addr : 0; + ie31200_get_and_clear_error_info(mci, &info); + ie31200_process_error_info(mci, &info); +} + +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) +{ + union { + u64 mchbar; + struct { + u32 mchbar_low; + u32 mchbar_high; + }; + } u; + void __iomem *window; + + pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); + pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); + u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; + + if (u.mchbar != (resource_size_t)u.mchbar) { + ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", + (unsigned long long)u.mchbar); + return NULL; + } + + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); + if (!window) + ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", + (unsigned long long)u.mchbar); + + return window; +} + +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, + struct res_config *cfg) +{ + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; +} + +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg, int mc) +{ + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; + u32 addr_decode; + int i, j, k; + + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); + + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); + + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } + } +} + +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) +{ + struct edac_mc_layer layers[2]; + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + void __iomem *window; + int ret; + + nr_channels = how_many_channels(pdev); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = IE31200_RANKS_PER_CHANNEL; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = nr_channels; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, + sizeof(struct ie31200_priv)); + if (!mci) + return -ENOMEM; + + window = ie31200_map_mchbar(pdev, cfg, mc); + if (!window) { + ret = -ENODEV; + goto fail_free; + } + + edac_dbg(3, "MC: init mci\n"); + mci->mtype_cap = BIT(cfg->mtype); + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = ie31200_devs[mc].ctl_name; + mci->dev_name = pci_name(pdev); + mci->edac_check = cfg->cmci ? NULL : ie31200_check; + mci->ctl_page_to_phys = NULL; + priv = mci->pvt_info; + priv->window = window; + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); + /* + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. + */ + mci->pdev = mc ? &priv->dev : &pdev->dev; + + ie31200_get_dimm_config(mci, window, cfg, mc); + ie31200_clear_error_info(mci); + + if (edac_mc_add_mc(mci)) { + edac_dbg(3, "MC: failed edac_mc_add_mc()\n"); + ret = -ENODEV; + goto fail_unmap; + } + + ie31200_pvt.priv[mc] = priv; + return 0; +fail_unmap: + put_device(&priv->dev); + iounmap(window); +fail_free: + edac_mc_free(mci); + return ret; +} + +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + put_device(&priv->dev); + edac_mc_free(mci); + } +} + +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int i, ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } + + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; + +fail_register: + ie31200_unregister_mcis(); + return ret; +} + +static int ie31200_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + edac_dbg(0, "MC:\n"); + if (pci_enable_device(pdev) < 0) + return -EIO; + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); + if (rc == 0 && !mci_pdev) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void ie31200_remove_one(struct pci_dev *pdev) +{ + struct ie31200_priv *priv = ie31200_pvt.priv[0]; + + edac_dbg(0, "\n"); + pci_dev_put(mci_pdev); + mci_pdev = NULL; + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); + ie31200_unregister_mcis(); +} + +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .cmci = true, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + +static const struct pci_device_id ie31200_pci_tbl[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_5), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_4), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_5), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_6), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_7), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_8), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_9), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_10), (kernel_ulong_t)&rpl_s_cfg}, + { 0, } /* 0 terminated list. */ +}; +MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); + +static struct pci_driver ie31200_driver = { + .name = EDAC_MOD_STR, + .probe = ie31200_init_one, + .remove = ie31200_remove_one, + .id_table = ie31200_pci_tbl, +}; + +static int __init ie31200_init(void) +{ + int pci_rc, i; + + edac_dbg(3, "MC:\n"); + + pci_rc = pci_register_driver(&ie31200_driver); + if (pci_rc < 0) + return pci_rc; + + if (!mci_pdev) { + ie31200_registered = 0; + for (i = 0; ie31200_pci_tbl[i].vendor != 0; i++) { + mci_pdev = pci_get_device(ie31200_pci_tbl[i].vendor, + ie31200_pci_tbl[i].device, + NULL); + if (mci_pdev) + break; + } + + if (!mci_pdev) { + edac_dbg(0, "ie31200 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail0; + } + + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); + if (pci_rc < 0) { + edac_dbg(0, "ie31200 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; +fail1: + pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); + + return pci_rc; +} + +static void __exit ie31200_exit(void) +{ + edac_dbg(3, "MC:\n"); + pci_unregister_driver(&ie31200_driver); + if (!ie31200_registered) + ie31200_remove_one(mci_pdev); +} + +module_init(ie31200_init); +module_exit(ie31200_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>"); +MODULE_DESCRIPTION("MC support for Intel Processor E31200 memory hub controllers"); diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c new file mode 100644 index 000000000000..553c31a2d922 --- /dev/null +++ b/drivers/edac/igen6_edac.c @@ -0,0 +1,1601 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel client SoC with integrated memory controller using IBECC + * + * Copyright (C) 2020 Intel Corporation + * + * The In-Band ECC (IBECC) IP provides ECC protection to all or specific + * regions of the physical memory space. It's used for memory controllers + * that don't support the out-of-band ECC which often needs an additional + * storage device to each channel for storing ECC data. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/irq_work.h> +#include <linux/llist.h> +#include <linux/genalloc.h> +#include <linux/edac.h> +#include <linux/bits.h> +#include <linux/io.h> +#include <asm/mach_traps.h> +#include <asm/nmi.h> +#include <asm/mce.h> + +#include "edac_mc.h" +#include "edac_module.h" + +#define IGEN6_REVISION "v2.5.1" + +#define EDAC_MOD_STR "igen6_edac" +#define IGEN6_NMI_NAME "igen6_ibecc" + +/* Debug macros */ +#define igen6_printk(level, fmt, arg...) \ + edac_printk(level, "igen6", fmt, ##arg) + +#define igen6_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) + +#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) + +#define NUM_IMC 2 /* Max memory controllers */ +#define NUM_CHANNELS 2 /* Max channels */ +#define NUM_DIMMS 2 /* Max DIMMs per channel */ + +#define _4GB BIT_ULL(32) + +/* Size of physical memory */ +#define TOM_OFFSET 0xa0 +/* Top of low usable DRAM */ +#define TOLUD_OFFSET 0xbc +/* Capability register C */ +#define CAPID_C_OFFSET 0xec +#define CAPID_C_IBECC BIT(15) + +/* Capability register E */ +#define CAPID_E_OFFSET 0xf0 +#define CAPID_E_IBECC BIT(12) +#define CAPID_E_IBECC_BIT18 BIT(18) + +/* Error Status */ +#define ERRSTS_OFFSET 0xc8 +#define ERRSTS_CE BIT_ULL(6) +#define ERRSTS_UE BIT_ULL(7) + +/* Error Command */ +#define ERRCMD_OFFSET 0xca +#define ERRCMD_CE BIT_ULL(6) +#define ERRCMD_UE BIT_ULL(7) + +/* IBECC MMIO base address */ +#define IBECC_BASE (res_cfg->ibecc_base) +#define IBECC_ACTIVATE_OFFSET IBECC_BASE +#define IBECC_ACTIVATE_EN BIT(0) + +/* IBECC error log */ +#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) +#define ECC_ERROR_LOG_CE BIT_ULL(62) +#define ECC_ERROR_LOG_UE BIT_ULL(63) +#define ECC_ERROR_LOG_ADDR_SHIFT 5 +#define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) +#define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) +#define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) + +/* Host MMIO base address */ +#define MCHBAR_OFFSET 0x48 +#define MCHBAR_EN BIT_ULL(0) +#define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) +#define MCHBAR_SIZE 0x10000 + +/* Parameters for the channel decode stage */ +#define IMC_BASE (res_cfg->imc_base) +#define MAD_INTER_CHANNEL_OFFSET IMC_BASE +#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) +#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) +#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) +#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) + +/* Parameters for DRAM decode stage */ +#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) +#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) + +/* DIMM characteristics */ +#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) +#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) +#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) +#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) +#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) + +/* Hash for memory controller selection */ +#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) +#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) + +/* Hash for channel selection */ +#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) +/* Hash for enhanced channel selection */ +#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) +#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) +#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) +#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) + +/* Parameters for memory slice decode stage */ +#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) +#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) + +static struct res_config { + bool machine_check; + /* The number of present memory controllers. */ + int num_imc; + u32 imc_base; + u32 cmf_base; + u32 cmf_size; + u32 ms_hash_offset; + u32 ibecc_base; + u32 ibecc_error_log_offset; + bool (*ibecc_available)(struct pci_dev *pdev); + /* Extract error address logged in IBECC */ + u64 (*err_addr)(u64 ecclog); + /* Convert error address logged in IBECC to system physical address */ + u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); + /* Convert error address logged in IBECC to integrated memory controller address */ + u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); +} *res_cfg; + +struct igen6_imc { + int mc; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; + void __iomem *window; + u64 size; + u64 ch_s_size; + int ch_l_map; + u64 dimm_s_size[NUM_CHANNELS]; + u64 dimm_l_size[NUM_CHANNELS]; + int dimm_l_map[NUM_CHANNELS]; +}; + +static struct igen6_pvt { + struct igen6_imc imc[NUM_IMC]; + u64 ms_hash; + u64 ms_s_size; + int ms_l_map; +} *igen6_pvt; + +/* The top of low usable DRAM */ +static u32 igen6_tolud; +/* The size of physical memory */ +static u64 igen6_tom; + +struct decoded_addr { + int mc; + u64 imc_addr; + u64 sys_addr; + int channel_idx; + u64 channel_addr; + int sub_channel_idx; + u64 sub_channel_addr; +}; + +struct ecclog_node { + struct llist_node llnode; + int mc; + u64 ecclog; +}; + +/* + * In the NMI handler, the driver uses the lock-less memory allocator + * to allocate memory to store the IBECC error logs and links the logs + * to the lock-less list. Delay printk() and the work of error reporting + * to EDAC core in a worker. + */ +#define ECCLOG_POOL_SIZE PAGE_SIZE +static LLIST_HEAD(ecclog_llist); +static struct gen_pool *ecclog_pool; +static char ecclog_buf[ECCLOG_POOL_SIZE]; +static struct irq_work ecclog_irq_work; +static struct work_struct ecclog_work; + +/* Compute die IDs for Elkhart Lake with IBECC */ +#define DID_EHL_SKU5 0x4514 +#define DID_EHL_SKU6 0x4528 +#define DID_EHL_SKU7 0x452a +#define DID_EHL_SKU8 0x4516 +#define DID_EHL_SKU9 0x452c +#define DID_EHL_SKU10 0x452e +#define DID_EHL_SKU11 0x4532 +#define DID_EHL_SKU12 0x4518 +#define DID_EHL_SKU13 0x451a +#define DID_EHL_SKU14 0x4534 +#define DID_EHL_SKU15 0x4536 + +/* Compute die IDs for ICL-NNPI with IBECC */ +#define DID_ICL_SKU8 0x4581 +#define DID_ICL_SKU10 0x4585 +#define DID_ICL_SKU11 0x4589 +#define DID_ICL_SKU12 0x458d + +/* Compute die IDs for Tiger Lake with IBECC */ +#define DID_TGL_SKU 0x9a14 + +/* Compute die IDs for Alder Lake with IBECC */ +#define DID_ADL_SKU1 0x4601 +#define DID_ADL_SKU2 0x4602 +#define DID_ADL_SKU3 0x4621 +#define DID_ADL_SKU4 0x4641 + +/* Compute die IDs for Alder Lake-N with IBECC */ +#define DID_ADL_N_SKU1 0x4614 +#define DID_ADL_N_SKU2 0x4617 +#define DID_ADL_N_SKU3 0x461b +#define DID_ADL_N_SKU4 0x461c +#define DID_ADL_N_SKU5 0x4673 +#define DID_ADL_N_SKU6 0x4674 +#define DID_ADL_N_SKU7 0x4675 +#define DID_ADL_N_SKU8 0x4677 +#define DID_ADL_N_SKU9 0x4678 +#define DID_ADL_N_SKU10 0x4679 +#define DID_ADL_N_SKU11 0x467c +#define DID_ADL_N_SKU12 0x4632 + +/* Compute die IDs for Arizona Beach with IBECC */ +#define DID_AZB_SKU1 0x4676 + +/* Compute did IDs for Amston Lake with IBECC */ +#define DID_ASL_SKU1 0x464a + +/* Compute die IDs for Raptor Lake-P with IBECC */ +#define DID_RPL_P_SKU1 0xa706 +#define DID_RPL_P_SKU2 0xa707 +#define DID_RPL_P_SKU3 0xa708 +#define DID_RPL_P_SKU4 0xa716 +#define DID_RPL_P_SKU5 0xa718 + +/* Compute die IDs for Meteor Lake-PS with IBECC */ +#define DID_MTL_PS_SKU1 0x7d21 +#define DID_MTL_PS_SKU2 0x7d22 +#define DID_MTL_PS_SKU3 0x7d23 +#define DID_MTL_PS_SKU4 0x7d24 + +/* Compute die IDs for Meteor Lake-P with IBECC */ +#define DID_MTL_P_SKU1 0x7d01 +#define DID_MTL_P_SKU2 0x7d02 +#define DID_MTL_P_SKU3 0x7d14 + +/* Compute die IDs for Arrow Lake-UH with IBECC */ +#define DID_ARL_UH_SKU1 0x7d06 +#define DID_ARL_UH_SKU2 0x7d20 +#define DID_ARL_UH_SKU3 0x7d30 + +/* Compute die IDs for Panther Lake-H with IBECC */ +#define DID_PTL_H_SKU1 0xb000 +#define DID_PTL_H_SKU2 0xb001 +#define DID_PTL_H_SKU3 0xb002 + +/* Compute die IDs for Wildcat Lake with IBECC */ +#define DID_WCL_SKU1 0xfd00 + +static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) +{ + union { + u64 v; + struct { + u32 v_lo; + u32 v_hi; + }; + } u; + + if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { + igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); + return -ENODEV; + } + + if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { + igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); + return -ENODEV; + } + + if (!(u.v & MCHBAR_EN)) { + igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); + return -ENODEV; + } + + *mchbar = MCHBAR_BASE(u.v); + + return 0; +} + +static bool ehl_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) + return false; + + return !!(CAPID_C_IBECC & v); +} + +static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) +{ + return eaddr; +} + +static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) +{ + if (eaddr < igen6_tolud) + return eaddr; + + if (igen6_tom <= _4GB) + return eaddr + igen6_tolud - _4GB; + + if (eaddr >= igen6_tom) + return eaddr + igen6_tolud - igen6_tom; + + return eaddr; +} + +static bool icl_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) + return false; + + return !(CAPID_C_IBECC & v) && + (boot_cpu_data.x86_stepping >= 1); +} + +static bool tgl_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) + return false; + + return !(CAPID_E_IBECC & v); +} + +static bool mtl_p_ibecc_available(struct pci_dev *pdev) +{ + u32 v; + + if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) + return false; + + return !(CAPID_E_IBECC_BIT18 & v); +} + +static bool mtl_ps_ibecc_available(struct pci_dev *pdev) +{ +#define MCHBAR_MEMSS_IBECCDIS 0x13c00 + void __iomem *window; + u64 mchbar; + u32 val; + + if (get_mchbar(pdev, &mchbar)) + return false; + + window = ioremap(mchbar, MCHBAR_SIZE * 2); + if (!window) { + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); + return false; + } + + val = readl(window + MCHBAR_MEMSS_IBECCDIS); + iounmap(window); + + /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ + return !GET_BITFIELD(val, 6, 6); +} + +static u64 mem_addr_to_sys_addr(u64 maddr) +{ + if (maddr < igen6_tolud) + return maddr; + + if (igen6_tom <= _4GB) + return maddr - igen6_tolud + _4GB; + + if (maddr < _4GB) + return maddr - igen6_tolud + igen6_tom; + + return maddr; +} + +static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) +{ + u64 hash_addr = addr & mask, hash = hash_init; + u64 intlv = (addr >> intlv_bit) & 1; + int i; + + for (i = 6; i < 20; i++) + hash ^= (hash_addr >> i) & 1; + + return hash ^ intlv; +} + +static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) +{ + u64 maddr, hash, mask, ms_s_size; + int intlv_bit; + u32 ms_hash; + + ms_s_size = igen6_pvt->ms_s_size; + if (eaddr >= ms_s_size) + return eaddr + ms_s_size; + + ms_hash = igen6_pvt->ms_hash; + + mask = MEM_SLICE_HASH_MASK(ms_hash); + intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; + + maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | + GET_BITFIELD(eaddr, 0, intlv_bit - 1); + + hash = mem_slice_hash(maddr, mask, mc, intlv_bit); + + return maddr | (hash << intlv_bit); +} + +static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) +{ + u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); + + return mem_addr_to_sys_addr(maddr); +} + +static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) +{ + return eaddr; +} + +static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) +{ + return mem_addr_to_sys_addr(eaddr); +} + +static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) +{ + u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; + struct igen6_imc *imc = &igen6_pvt->imc[mc]; + int intlv_bit; + u32 mc_hash; + + if (eaddr >= 2 * ms_s_size) + return eaddr - ms_s_size; + + mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); + + intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; + + imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | + GET_BITFIELD(eaddr, 0, intlv_bit - 1); + + return imc_addr; +} + +static u64 rpl_p_err_addr(u64 ecclog) +{ + return ECC_ERROR_LOG_ADDR45(ecclog); +} + +static struct res_config ehl_cfg = { + .num_imc = 1, + .imc_base = 0x5000, + .ibecc_base = 0xdc00, + .ibecc_available = ehl_ibecc_available, + .ibecc_error_log_offset = 0x170, + .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, +}; + +static struct res_config icl_cfg = { + .num_imc = 1, + .imc_base = 0x5000, + .ibecc_base = 0xd800, + .ibecc_error_log_offset = 0x170, + .ibecc_available = icl_ibecc_available, + .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, +}; + +static struct res_config tgl_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0x5000, + .cmf_base = 0x11000, + .cmf_size = 0x800, + .ms_hash_offset = 0xac, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, +}; + +static struct res_config adl_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config adl_n_cfg = { + .machine_check = true, + .num_imc = 1, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config rpl_p_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x68, + .ibecc_available = tgl_ibecc_available, + .err_addr = rpl_p_err_addr, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config mtl_ps_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_ps_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config mtl_p_cfg = { + .machine_check = true, + .num_imc = 2, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_p_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct res_config wcl_cfg = { + .machine_check = true, + .num_imc = 1, + .imc_base = 0xd800, + .ibecc_base = 0xd400, + .ibecc_error_log_offset = 0x170, + .ibecc_available = mtl_p_ibecc_available, + .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, + .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, +}; + +static struct pci_device_id igen6_pci_tbl[] = { + { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, + { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg }, + { }, +}; +MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); + +static enum dev_type get_width(int dimm_l, u32 mad_dimm) +{ + u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : + MAD_DIMM_CH_DSW(mad_dimm); + + switch (w) { + case 0: + return DEV_X8; + case 1: + return DEV_X16; + case 2: + return DEV_X32; + default: + return DEV_UNKNOWN; + } +} + +static enum mem_type get_memory_type(u32 mad_inter) +{ + u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); + + switch (t) { + case 0: + return MEM_DDR4; + case 1: + return MEM_DDR3; + case 2: + return MEM_LPDDR3; + case 3: + return MEM_LPDDR4; + case 4: + return MEM_WIO2; + default: + return MEM_UNKNOWN; + } +} + +static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) +{ + u64 hash_addr = addr & mask, hash = 0; + u64 intlv = (addr >> intlv_bit) & 1; + int i; + + for (i = 6; i < 20; i++) + hash ^= (hash_addr >> i) & 1; + + return (int)hash ^ intlv; +} + +static u64 decode_channel_addr(u64 addr, int intlv_bit) +{ + u64 channel_addr; + + /* Remove the interleave bit and shift upper part down to fill gap */ + channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; + channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); + + return channel_addr; +} + +static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, + int *idx, u64 *sub_addr) +{ + int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; + + if (addr > 2 * s_size) { + *sub_addr = addr - s_size; + *idx = l_map; + return; + } + + if (CHANNEL_HASH_MODE(hash)) { + *sub_addr = decode_channel_addr(addr, intlv_bit); + *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); + } else { + *sub_addr = decode_channel_addr(addr, 6); + *idx = GET_BITFIELD(addr, 6, 6); + } +} + +static int igen6_decode(struct decoded_addr *res) +{ + struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; + u64 addr = res->imc_addr, sub_addr, s_size; + int idx, l_map; + u32 hash; + + if (addr >= igen6_tom) { + edac_dbg(0, "Address 0x%llx out of range\n", addr); + return -EINVAL; + } + + /* Decode channel */ + hash = readl(imc->window + CHANNEL_HASH_OFFSET); + s_size = imc->ch_s_size; + l_map = imc->ch_l_map; + decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); + res->channel_idx = idx; + res->channel_addr = sub_addr; + + /* Decode sub-channel/DIMM */ + hash = readl(imc->window + CHANNEL_EHASH_OFFSET); + s_size = imc->dimm_s_size[idx]; + l_map = imc->dimm_l_map[idx]; + decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); + res->sub_channel_idx = idx; + res->sub_channel_addr = sub_addr; + + return 0; +} + +static void igen6_output_error(struct decoded_addr *res, + struct mem_ctl_info *mci, u64 ecclog) +{ + enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? + HW_EVENT_ERR_UNCORRECTED : + HW_EVENT_ERR_CORRECTED; + + edac_mc_handle_error(type, mci, 1, + res->sys_addr >> PAGE_SHIFT, + res->sys_addr & ~PAGE_MASK, + ECC_ERROR_LOG_SYND(ecclog), + res->channel_idx, res->sub_channel_idx, + -1, "", ""); +} + +static struct gen_pool *ecclog_gen_pool_create(void) +{ + struct gen_pool *pool; + + pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); + if (!pool) + return NULL; + + if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { + gen_pool_destroy(pool); + return NULL; + } + + return pool; +} + +static int ecclog_gen_pool_add(int mc, u64 ecclog) +{ + struct ecclog_node *node; + + node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); + if (!node) + return -ENOMEM; + + node->mc = mc; + node->ecclog = ecclog; + llist_add(&node->llnode, &ecclog_llist); + + return 0; +} + +/* + * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI + * configuration space status register ERRSTS can indicate whether a + * correctable error or an uncorrectable error occurred. We only use the + * ECC_ERROR_LOG register to check error type, but need to clear both + * registers to enable future error events. + */ +static u64 ecclog_read_and_clear(struct igen6_imc *imc) +{ + u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); + + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; + + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; +} + +static void errsts_clear(struct igen6_imc *imc) +{ + u16 errsts; + + if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { + igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); + return; + } + + /* Clear CE/UE bits by writing 1s */ + if (errsts & (ERRSTS_CE | ERRSTS_UE)) + pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); +} + +static int errcmd_enable_error_reporting(bool enable) +{ + struct igen6_imc *imc = &igen6_pvt->imc[0]; + u16 errcmd; + int rc; + + rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); + if (rc) + return pcibios_err_to_errno(rc); + + if (enable) + errcmd |= ERRCMD_CE | ERRSTS_UE; + else + errcmd &= ~(ERRCMD_CE | ERRSTS_UE); + + rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); + if (rc) + return pcibios_err_to_errno(rc); + + return 0; +} + +static int ecclog_handler(void) +{ + struct igen6_imc *imc; + int i, n = 0; + u64 ecclog; + + for (i = 0; i < res_cfg->num_imc; i++) { + imc = &igen6_pvt->imc[i]; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + continue; + + if (!ecclog_gen_pool_add(i, ecclog)) + irq_work_queue(&ecclog_irq_work); + + n++; + } + + return n; +} + +static void ecclog_work_cb(struct work_struct *work) +{ + struct ecclog_node *node, *tmp; + struct mem_ctl_info *mci; + struct llist_node *head; + struct decoded_addr res; + u64 eaddr; + + head = llist_del_all(&ecclog_llist); + if (!head) + return; + + llist_for_each_entry_safe(node, tmp, head, llnode) { + memset(&res, 0, sizeof(res)); + if (res_cfg->err_addr) + eaddr = res_cfg->err_addr(node->ecclog); + else + eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << + ECC_ERROR_LOG_ADDR_SHIFT; + res.mc = node->mc; + res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); + res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); + + mci = igen6_pvt->imc[res.mc].mci; + + edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); + igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); + igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); + + if (!igen6_decode(&res)) + igen6_output_error(&res, mci, node->ecclog); + + gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); + } +} + +static void ecclog_irq_work_cb(struct irq_work *irq_work) +{ + int i; + + for (i = 0; i < res_cfg->num_imc; i++) + errsts_clear(&igen6_pvt->imc[i]); + + if (!llist_empty(&ecclog_llist)) + schedule_work(&ecclog_work); +} + +static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) +{ + unsigned char reason; + + if (!ecclog_handler()) + return NMI_DONE; + + /* + * Both In-Band ECC correctable error and uncorrectable error are + * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) + * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to + * re-enable the SERR# NMI after NMI handling. So clear this bit here + * to re-enable SERR# NMI for receiving future In-Band ECC errors. + */ + reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; + reason |= NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); + reason &= ~NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); + + return NMI_HANDLED; +} + +static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, + * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + /* + * We just use the Machine Check for the memory error notification. + * Each memory controller is associated with an IBECC instance. + * Directly read and clear the error information(error address and + * error type) on all the IBECC instances so that we know on which + * memory controller the memory error(s) occurred. + */ + if (!ecclog_handler()) + return NOTIFY_DONE; + + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ecclog_mce_dec = { + .notifier_call = ecclog_mce_handler, + .priority = MCE_PRIO_EDAC, +}; + +static bool igen6_check_ecc(struct igen6_imc *imc) +{ + u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); + + return !!(activate & IBECC_ACTIVATE_EN); +} + +static int igen6_get_dimm_config(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u32 mad_inter, mad_intra, mad_dimm; + int i, j, ndimms, mc = imc->mc; + struct dimm_info *dimm; + enum mem_type mtype; + enum dev_type dtype; + u64 dsize; + bool ecc; + + edac_dbg(2, "\n"); + + mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); + mtype = get_memory_type(mad_inter); + ecc = igen6_check_ecc(imc); + imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); + imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); + + for (i = 0; i < NUM_CHANNELS; i++) { + mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); + mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); + + imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); + imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); + imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); + imc->size += imc->dimm_s_size[i]; + imc->size += imc->dimm_l_size[i]; + ndimms = 0; + + for (j = 0; j < NUM_DIMMS; j++) { + dimm = edac_get_dimm(mci, i, j, 0); + + if (j ^ imc->dimm_l_map[i]) { + dtype = get_width(0, mad_dimm); + dsize = imc->dimm_s_size[i]; + } else { + dtype = get_width(1, mad_dimm); + dsize = imc->dimm_l_size[i]; + } + + if (!dsize) + continue; + + dimm->grain = 64; + dimm->mtype = mtype; + dimm->dtype = dtype; + dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), + "MC#%d_Chan#%d_DIMM#%d", mc, i, j); + edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", + mc, i, j, dsize >> 20, dimm->nr_pages); + + ndimms++; + } + + if (ndimms && !ecc) { + igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); + return -ENODEV; + } + } + + edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); + + return 0; +} + +#ifdef CONFIG_EDAC_DEBUG +/* Top of upper usable DRAM */ +static u64 igen6_touud; +#define TOUUD_OFFSET 0xa8 + +static void igen6_reg_dump(struct igen6_imc *imc) +{ + int i; + + edac_dbg(2, "CHANNEL_HASH : 0x%x\n", + readl(imc->window + CHANNEL_HASH_OFFSET)); + edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", + readl(imc->window + CHANNEL_EHASH_OFFSET)); + edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", + readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); + edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", + readq(imc->window + ECC_ERROR_LOG_OFFSET)); + + for (i = 0; i < NUM_CHANNELS; i++) { + edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, + readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); + edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, + readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); + } + edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); + edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); + edac_dbg(2, "TOM : 0x%llx", igen6_tom); +} + +static struct dentry *igen6_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + u64 ecclog; + + if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { + edac_dbg(0, "Address 0x%llx out of range\n", val); + return 0; + } + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + val >>= ECC_ERROR_LOG_ADDR_SHIFT; + ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; + + if (!ecclog_gen_pool_add(0, ecclog)) + irq_work_queue(&ecclog_irq_work); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void igen6_debug_setup(void) +{ + igen6_test = edac_debugfs_create_dir("igen6_test"); + if (!igen6_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, igen6_test, + NULL, &fops_u64_wo)) { + debugfs_remove(igen6_test); + igen6_test = NULL; + } +} + +static void igen6_debug_teardown(void) +{ + debugfs_remove_recursive(igen6_test); +} +#else +static void igen6_reg_dump(struct igen6_imc *imc) {} +static void igen6_debug_setup(void) {} +static void igen6_debug_teardown(void) {} +#endif + +static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) +{ + union { + u64 v; + struct { + u32 v_lo; + u32 v_hi; + }; + } u; + + edac_dbg(2, "\n"); + + if (!res_cfg->ibecc_available(pdev)) { + edac_dbg(2, "No In-Band ECC IP\n"); + goto fail; + } + + if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { + igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); + goto fail; + } + + igen6_tolud &= GENMASK(31, 20); + + if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { + igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); + goto fail; + } + + if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { + igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); + goto fail; + } + + igen6_tom = u.v & GENMASK_ULL(38, 20); + + if (get_mchbar(pdev, mchbar)) + goto fail; + +#ifdef CONFIG_EDAC_DEBUG + if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) + edac_dbg(2, "Failed to read lower TOUUD\n"); + else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) + edac_dbg(2, "Failed to read upper TOUUD\n"); + else + igen6_touud = u.v & GENMASK_ULL(38, 20); +#endif + + return 0; +fail: + return -ENODEV; +} + +static void igen6_check(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u64 ecclog; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + return; + + if (!ecclog_gen_pool_add(imc->mc, ecclog)) + irq_work_queue(&ecclog_irq_work); +} + +/* Check whether the memory controller is absent. */ +static bool igen6_imc_absent(void __iomem *window) +{ + return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; +} + +static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct igen6_imc *imc; + int rc; + + edac_dbg(2, "\n"); + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = NUM_DIMMS; + layers[1].is_virt_csrow = true; + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + if (!mci) { + rc = -ENOMEM; + goto fail; + } + + mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); + if (!mci->ctl_name) { + rc = -ENOMEM; + goto fail2; + } + + mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->dev_name = pci_name(pdev); + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = igen6_check; + mci->pvt_info = &igen6_pvt->imc[mc]; + + imc = mci->pvt_info; + device_initialize(&imc->dev); + /* + * EDAC core uses mci->pdev(pointer of structure device) as + * memory controller ID. The client SoCs attach one or more + * memory controllers to single pci_dev (single pci_dev->dev + * can be for multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique imc->dev + * to mci->pdev for each non-first memory controller. + */ + mci->pdev = mc ? &imc->dev : &pdev->dev; + imc->mc = mc; + imc->pdev = pdev; + imc->window = window; + + igen6_reg_dump(imc); + + rc = igen6_get_dimm_config(mci); + if (rc) + goto fail3; + + rc = edac_mc_add_mc(mci); + if (rc) { + igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); + goto fail3; + } + + imc->mci = mci; + return 0; +fail3: + put_device(&imc->dev); + mci->pvt_info = NULL; + kfree(mci->ctl_name); +fail2: + edac_mc_free(mci); +fail: + return rc; +} + +static void igen6_unregister_mcis(void) +{ + struct mem_ctl_info *mci; + struct igen6_imc *imc; + int i; + + edac_dbg(2, "\n"); + + for (i = 0; i < res_cfg->num_imc; i++) { + imc = &igen6_pvt->imc[i]; + mci = imc->mci; + if (!mci) + continue; + + edac_mc_del_mc(mci->pdev); + kfree(mci->ctl_name); + mci->pvt_info = NULL; + edac_mc_free(mci); + put_device(&imc->dev); + iounmap(imc->window); + } +} + +static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) +{ + void __iomem *window; + int lmc, pmc, rc; + u64 base; + + for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { + base = mchbar + pmc * MCHBAR_SIZE; + window = ioremap(base, MCHBAR_SIZE); + if (!window) { + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); + rc = -ENOMEM; + goto out_unregister_mcis; + } + + if (igen6_imc_absent(window)) { + iounmap(window); + edac_dbg(2, "Skip absent mc%d\n", pmc); + continue; + } + + rc = igen6_register_mci(lmc, window, pdev); + if (rc) + goto out_iounmap; + + /* Done, if all present MCs are detected and registered. */ + if (++lmc >= res_cfg->num_imc) + break; + } + + if (!lmc) { + igen6_printk(KERN_ERR, "No mc found.\n"); + return -ENODEV; + } + + if (lmc < res_cfg->num_imc) { + igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.", + res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } + + return 0; + +out_iounmap: + iounmap(window); + +out_unregister_mcis: + igen6_unregister_mcis(); + + return rc; +} + +static int igen6_mem_slice_setup(u64 mchbar) +{ + struct igen6_imc *imc = &igen6_pvt->imc[0]; + u64 base = mchbar + res_cfg->cmf_base; + u32 offset = res_cfg->ms_hash_offset; + u32 size = res_cfg->cmf_size; + u64 ms_s_size, ms_hash; + void __iomem *cmf; + int ms_l_map; + + edac_dbg(2, "\n"); + + if (imc[0].size < imc[1].size) { + ms_s_size = imc[0].size; + ms_l_map = 1; + } else { + ms_s_size = imc[1].size; + ms_l_map = 0; + } + + igen6_pvt->ms_s_size = ms_s_size; + igen6_pvt->ms_l_map = ms_l_map; + + edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", + ms_s_size >> 20, ms_l_map); + + if (!size) + return 0; + + cmf = ioremap(base, size); + if (!cmf) { + igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); + return -ENODEV; + } + + ms_hash = readq(cmf + offset); + igen6_pvt->ms_hash = ms_hash; + + edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); + + iounmap(cmf); + + return 0; +} + +static int register_err_handler(void) +{ + int rc; + + if (res_cfg->machine_check) { + mce_register_decode_chain(&ecclog_mce_dec); + return 0; + } + + rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, + 0, IGEN6_NMI_NAME); + if (rc) { + igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); + return rc; + } + + return 0; +} + +static void unregister_err_handler(void) +{ + if (res_cfg->machine_check) { + mce_unregister_decode_chain(&ecclog_mce_dec); + return; + } + + unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); +} + +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) +{ + /* + * Quirk: Certain SoCs' error reporting interrupts don't work. + * Force polling mode for them to ensure that memory error + * events can be handled. + */ + if (ent->device == DID_ADL_N_SKU4) { + edac_op_state = EDAC_OPSTATE_POLL; + return; + } + + /* Set the mode according to the configuration data. */ + if (cfg->machine_check) + edac_op_state = EDAC_OPSTATE_INT; + else + edac_op_state = EDAC_OPSTATE_NMI; +} + +static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + u64 mchbar; + int rc; + + edac_dbg(2, "\n"); + + igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); + if (!igen6_pvt) + return -ENOMEM; + + res_cfg = (struct res_config *)ent->driver_data; + + rc = igen6_pci_setup(pdev, &mchbar); + if (rc) + goto fail; + + opstate_set(res_cfg, ent); + + rc = igen6_register_mcis(pdev, mchbar); + if (rc) + goto fail; + + if (res_cfg->num_imc > 1) { + rc = igen6_mem_slice_setup(mchbar); + if (rc) + goto fail2; + } + + ecclog_pool = ecclog_gen_pool_create(); + if (!ecclog_pool) { + rc = -ENOMEM; + goto fail2; + } + + INIT_WORK(&ecclog_work, ecclog_work_cb); + init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); + + rc = register_err_handler(); + if (rc) + goto fail3; + + /* Enable error reporting */ + rc = errcmd_enable_error_reporting(true); + if (rc) { + igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); + goto fail4; + } + + /* Check if any pending errors before/during the registration of the error handler */ + ecclog_handler(); + + igen6_debug_setup(); + return 0; +fail4: + unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); +fail3: + gen_pool_destroy(ecclog_pool); +fail2: + igen6_unregister_mcis(); +fail: + kfree(igen6_pvt); + return rc; +} + +static void igen6_remove(struct pci_dev *pdev) +{ + edac_dbg(2, "\n"); + + igen6_debug_teardown(); + errcmd_enable_error_reporting(false); + unregister_err_handler(); + irq_work_sync(&ecclog_irq_work); + flush_work(&ecclog_work); + gen_pool_destroy(ecclog_pool); + igen6_unregister_mcis(); + kfree(igen6_pvt); +} + +static struct pci_driver igen6_driver = { + .name = EDAC_MOD_STR, + .probe = igen6_probe, + .remove = igen6_remove, + .id_table = igen6_pci_tbl, +}; + +static int __init igen6_init(void) +{ + const char *owner; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + rc = pci_register_driver(&igen6_driver); + if (rc) + return rc; + + igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); + + return 0; +} + +static void __exit igen6_exit(void) +{ + edac_dbg(2, "\n"); + + pci_unregister_driver(&igen6_driver); +} + +module_init(igen6_init); +module_exit(igen6_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Qiuxu Zhuo"); +MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c new file mode 100644 index 000000000000..4348b3883b45 --- /dev/null +++ b/drivers/edac/imh_base.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller. + * Copyright (c) 2025, Intel Corporation. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include <asm/cpu.h> +#include "edac_module.h" +#include "skx_common.h" + +#define IMH_REVISION "v0.0.1" +#define EDAC_MOD_STR "imh_edac" + +/* Debug macros */ +#define imh_printk(level, fmt, arg...) \ + edac_printk(level, "imh", fmt, ##arg) + +/* Configuration Agent(Ubox) */ +#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23) +#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3) + +/* PUNIT */ +#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30) + +/* Memory Controller */ +#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2) +#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15) + +/* System Cache Agent(SCA) */ +#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) +#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) + +/* Home Agent (HA) */ +#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8) + +/** + * struct local_reg - A register as described in the local package view. + * + * @pkg: (input) The package where the register is located. + * @pbase: (input) The IP MMIO base physical address in the local package view. + * @size: (input) The IP MMIO size. + * @offset: (input) The register offset from the IP MMIO base @pbase. + * @width: (input) The register width in byte. + * @vbase: (internal) The IP MMIO base virtual address. + * @val: (output) The register value. + */ +struct local_reg { + int pkg; + u64 pbase; + u32 size; + u32 offset; + u8 width; + void __iomem *vbase; + u64 val; +}; + +#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \ + struct local_reg name = { \ + .pkg = package, \ + .pbase = (north ? (cfg)->mmio_base_l_north : \ + (cfg)->mmio_base_l_south) + \ + (cfg)->ip_name##_base + \ + (cfg)->ip_name##_size * (ip_idx), \ + .size = (cfg)->ip_name##_size, \ + .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \ + .width = (cfg)->ip_name##_reg_##reg_name##_width, \ + } + +static u64 readx(void __iomem *addr, u8 width) +{ + switch (width) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width); + return 0; + } +} + +static void __read_local_reg(void *reg) +{ + struct local_reg *r = (struct local_reg *)reg; + + r->val = readx(r->vbase + r->offset, r->width); +} + +/* Read a local-view register. */ +static bool read_local_reg(struct local_reg *reg) +{ + int cpu; + + /* Get the target CPU in the package @reg->pkg. */ + for_each_online_cpu(cpu) { + if (reg->pkg == topology_physical_package_id(cpu)) + break; + } + + if (cpu >= nr_cpu_ids) + return false; + + reg->vbase = ioremap(reg->pbase, reg->size); + if (!reg->vbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase); + return false; + } + + /* Get the target CPU to read the register. */ + smp_call_function_single(cpu, __read_local_reg, reg, 1); + iounmap(reg->vbase); + + return true; +} + +/* Get the bitmap of memory controller instances in package @pkg. */ +static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north) +{ + DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3); + + if (!read_local_reg(®)) + return 0; + + edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n", + pkg, north ? "north" : "south", + DDR_IMC_BITMAP(reg.val), reg.val); + + return DDR_IMC_BITMAP(reg.val); +} + +static void imc_release(struct device *dev) +{ + edac_dbg(2, "imc device %s released\n", dev_name(dev)); + kfree(dev); +} + +static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d, + bool north, int lmc) +{ + unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num; + unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north); + void __iomem *mbase; + struct device *dev; + int i, rc, pmc; + u64 base; + + for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) { + base = north ? d->mmio_base_h_north : d->mmio_base_h_south; + base += cfg->ddr_imc_base + size * i; + + edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n", + d->pkg, lmc, base, size); + + /* Set up the imc MMIO. */ + mbase = ioremap(base, size); + if (!mbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].lmc = lmc; + + /* Create the imc device instance. */ + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->release = imc_release; + device_initialize(dev); + rc = dev_set_name(dev, "0x%llx", base); + if (rc) { + imh_printk(KERN_ERR, "Failed to set dev name\n"); + put_device(dev); + return rc; + } + + d->imc[lmc].dev = dev; + + /* Set up the imc index mapping. */ + pmc = north ? i : 8 + i; + skx_set_mc_mapping(d, pmc, lmc); + + lmc++; + } + + return lmc; +} + +static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d) +{ + int lmc = __get_ddr_munits(cfg, d, true, 0); + + if (lmc < 0) + return false; + + lmc = __get_ddr_munits(cfg, d, false, lmc); + if (lmc <= 0) + return false; + + return true; +} + +static bool get_socket_id(struct res_config *cfg, struct skx_dev *d) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id); + u8 src_id; + int i; + + if (!read_local_reg(®)) + return false; + + src_id = SOCKET_ID(reg.val); + edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val); + + for (i = 0; i < cfg->ddr_imc_num; i++) + d->imc[i].src_id = src_id; + + return true; +} + +/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */ +static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm) +{ + DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm); + + if (!read_local_reg(®)) + return false; + + *tolm = TOLM(reg.val); + edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val); + + DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm); + + if (!read_local_reg(®2)) + return false; + + *tohm = TOHM(reg2.val); + edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val); + + return true; +} + +/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */ +static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list) +{ + int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; + struct skx_dev *d; + + for (i = 0; i < n; i++) { + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); + if (!d) + return -ENOMEM; + + DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base); + + /* Get MMIO_BASE_H for the north-IMH. */ + if (!read_local_reg(®) || !reg.val) { + kfree(d); + imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i); + return -ENODEV; + } + + d->mmio_base_h_north = MMIO_BASE_H(reg.val); + edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_north, reg.val); + + /* Get MMIO_BASE_H for the south-IMH (optional). */ + DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base); + + if (read_local_reg(®2)) { + d->mmio_base_h_south = MMIO_BASE_H(reg2.val); + edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_south, reg2.val); + } + + d->pkg = i; + d->num_imc = imc_num; + skx_init_mc_mapping(d); + list_add_tail(&d->list, edac_list); + } + + return 0; +} + +/* Get the number of per-package memory controllers. */ +static int imh_get_imc_num(struct res_config *cfg) +{ + int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) + + hweight32(get_imc_bitmap(cfg, 0, false)); + + if (!imc_num) { + imh_printk(KERN_ERR, "Invalid mc number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set ddr mc number %d\n", imc_num); + } + + return 0; +} + +/* Get all memory controllers' parameters. */ +static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + u8 mc = 0; + int i; + + list_for_each_entry(d, edac_list, list) { + if (!get_ddr_munits(cfg, d)) { + imh_printk(KERN_ERR, "No mc found\n"); + return -ENODEV; + } + + if (!get_socket_id(cfg, d)) { + imh_printk(KERN_ERR, "Failed to get socket id\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz; + imc->num_channels = cfg->ddr_chan_num; + imc->num_dimms = cfg->ddr_dimm_num; + imc->mc = mc++; + } + } + + return 0; +} + +static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode); + + if (!read_local_reg(®)) + return false; + + if (!NMCACHING(reg.val)) + return false; + + edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx); + return true; +} + +/* Check whether the system has a 2-level memory configuration. */ +static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head) +{ + struct skx_dev *d; + int i; + + list_for_each_entry(d, head, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) + if (check_2lm_enabled(cfg, d, i)) + return true; + } + + return false; +} + +/* Helpers to read memory controller registers */ +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width); +} + +static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width); +} + +static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset + + cfg->ddr_reg_dimmmtr_width * dimm, + cfg->ddr_reg_dimmmtr_width); +} + +static bool ecc_enabled(u32 mcmtr) +{ + return (bool)ECC_ENABLED(mcmtr); +} + +static bool dimm_populated(u32 dimmmtr) +{ + return (bool)DIMM_POPULATED(dimmmtr); +} + +/* Get each DIMM's configurations of the memory controller @mci. */ +static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + u32 mcmtr, dimmmtr; + int i, j, ndimms; + + for (i = 0; i < imc->num_channels; i++) { + if (!imc->mbase) + continue; + + mcmtr = read_imc_mcmtr(cfg, imc, i); + + for (ndimms = 0, j = 0; j < imc->num_dimms; j++) { + dimmmtr = read_imc_dimmmtr(cfg, imc, i, j); + edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n", + mcmtr, dimmmtr, imc->mc, i, j); + + if (!dimm_populated(dimmmtr)) + continue; + + dimm = edac_get_dimm(mci, i, j, 0); + ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm, + imc, i, j, cfg); + } + + if (ndimms && !ecc_enabled(mcmtr)) { + imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n", + imc->mc, i); + return -ENODEV; + } + } + + return 0; +} + +/* Register all memory controllers to the EDAC core. */ +static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + int i, rc; + + list_for_each_entry(d, edac_list, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + rc = skx_register_mci(imc, imc->dev, + dev_name(imc->dev), + "Intel IMH-based Socket", + EDAC_MOD_STR, + imh_get_dimm_config, cfg); + if (rc) + return rc; + } + } + + return 0; +} + +static struct res_config dmr_cfg = { + .type = DMR, + .support_ddr5 = true, + .mmio_base_l_north = 0xf6800000, + .mmio_base_l_south = 0xf6000000, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_imc_base = 0x39b000, + .ddr_chan_mmio_sz = 0x8000, + .ddr_reg_mcmtr_offset = 0x360, + .ddr_reg_mcmtr_width = 4, + .ddr_reg_dimmmtr_offset = 0x370, + .ddr_reg_dimmmtr_width = 4, + .ubox_base = 0x0, + .ubox_size = 0x2000, + .ubox_reg_mmio_base_offset = 0x580, + .ubox_reg_mmio_base_width = 4, + .ubox_reg_socket_id_offset = 0x1080, + .ubox_reg_socket_id_width = 4, + .pcu_base = 0x3000, + .pcu_size = 0x10000, + .pcu_reg_capid3_offset = 0x290, + .pcu_reg_capid3_width = 4, + .sca_base = 0x24c000, + .sca_size = 0x2500, + .sca_reg_tolm_offset = 0x2100, + .sca_reg_tolm_width = 8, + .sca_reg_tohm_offset = 0x2108, + .sca_reg_tohm_width = 8, + .ha_base = 0x3eb000, + .ha_size = 0x1000, + .ha_reg_mode_offset = 0x4a0, + .ha_reg_mode_width = 4, +}; + +static const struct x86_cpu_id imh_cpuids[] = { + X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, imh_cpuids); + +static struct notifier_block imh_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init imh_init(void) +{ + const struct x86_cpu_id *id; + struct list_head *edac_list; + struct res_config *cfg; + const char *owner; + u64 tolm, tohm; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(imh_cpuids); + if (!id) + return -ENODEV; + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + + if (!imh_get_tolm_tohm(cfg, &tolm, &tohm)) + return -ENODEV; + + skx_set_hi_lo(tolm, tohm); + + rc = imh_get_imc_num(cfg); + if (rc < 0) + goto fail; + + edac_list = skx_get_edac_list(); + + rc = imh_get_all_mmio_base_h(cfg, edac_list); + if (rc) + goto fail; + + rc = imh_get_munits(cfg, edac_list); + if (rc) + goto fail; + + skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list)); + + rc = imh_register_mci(cfg, edac_list); + if (rc) + goto fail; + + rc = skx_adxl_get(); + if (rc) + goto fail; + + opstate_init(); + mce_register_decode_chain(&imh_mce_dec); + skx_setup_debug("imh_test"); + + imh_printk(KERN_INFO, "%s\n", IMH_REVISION); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit imh_exit(void) +{ + edac_dbg(2, "\n"); + + skx_teardown_debug(); + mce_unregister_decode_chain(&imh_mce_dec); + skx_adxl_put(); + skx_remove(); +} + +module_init(imh_init); +module_exit(imh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Qiuxu Zhuo"); +MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller"); diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c new file mode 100644 index 000000000000..a2caa7fc5412 --- /dev/null +++ b/drivers/edac/layerscape_edac.c @@ -0,0 +1,77 @@ +/* + * Freescale Memory Controller kernel module + * + * Author: York Sun <york.sun@nxp.com> + * + * Copyright 2016 NXP Semiconductor + * + * Derived from mpc85xx_edac.c + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "edac_module.h" +#include "fsl_ddr_edac.h" + +static const struct of_device_id fsl_ddr_mc_err_of_match[] = { + { .compatible = "fsl,qoriq-memory-controller", }, + { .compatible = "nxp,imx9-memory-controller", .data = (void *)TYPE_IMX9, }, + {}, +}; +MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match); + +static struct platform_driver fsl_ddr_mc_err_driver = { + .probe = fsl_mc_err_probe, + .remove = fsl_mc_err_remove, + .driver = { + .name = "fsl_ddr_mc_err", + .of_match_table = fsl_ddr_mc_err_of_match, + }, +}; + +static int __init fsl_ddr_mc_init(void) +{ + int res; + + if (ghes_get_devices()) + return -EBUSY; + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + + res = platform_driver_register(&fsl_ddr_mc_err_driver); + if (res) { + pr_err("MC fails to register\n"); + return res; + } + + return 0; +} + +module_init(fsl_ddr_mc_init); + +static void __exit fsl_ddr_mc_exit(void) +{ + platform_driver_unregister(&fsl_ddr_mc_err_driver); +} + +module_exit(fsl_ddr_mc_exit); + +MODULE_DESCRIPTION("Freescale Layerscape EDAC driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("NXP Semiconductor"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c new file mode 100644 index 000000000000..38745800ed01 --- /dev/null +++ b/drivers/edac/loongson_edac.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited. + */ + +#include <linux/acpi.h> +#include <linux/edac.h> +#include <linux/init.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include "edac_module.h" + +#define ECC_CS_COUNT_REG 0x18 + +struct loongson_edac_pvt { + void __iomem *ecc_base; + + /* + * The ECC register in this controller records the number of errors + * encountered since reset and cannot be zeroed so in order to be able + * to report the error count at each check, this records the previous + * register state. + */ + int last_ce_count; +}; + +static int read_ecc(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + u64 ecc; + int cs; + + ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG); + /* cs0 -- cs3 */ + cs = ecc & 0xff; + cs += (ecc >> 8) & 0xff; + cs += (ecc >> 16) & 0xff; + cs += (ecc >> 24) & 0xff; + + return cs; +} + +static void edac_check(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + int new, add; + + new = read_ecc(mci); + add = new - pvt->last_ce_count; + pvt->last_ce_count = new; + if (add <= 0) + return; + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, + 0, 0, 0, 0, 0, -1, "error", ""); +} + +static void dimm_config_init(struct mem_ctl_info *mci) +{ + struct dimm_info *dimm; + u32 size, npages; + + /* size not used */ + size = -1; + npages = MiB_TO_PAGES(size); + + dimm = edac_get_dimm(mci, 0, 0, 0); + dimm->nr_pages = npages; + snprintf(dimm->label, sizeof(dimm->label), + "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0); + dimm->grain = 8; +} + +static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + + pvt->ecc_base = vbase; + pvt->last_ce_count = read_ecc(mci); +} + +static int edac_probe(struct platform_device *pdev) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + void __iomem *vbase; + int ret; + + vbase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(vbase)) + return PTR_ERR(vbase); + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = 1; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct loongson_edac_pvt)); + if (mci == NULL) + return -ENOMEM; + + mci->mc_idx = edac_device_alloc_index(); + mci->mtype_cap = MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "loongson_edac.c"; + mci->ctl_name = "loongson_edac_ctl"; + mci->dev_name = "loongson_edac_dev"; + mci->ctl_page_to_phys = NULL; + mci->pdev = &pdev->dev; + mci->error_desc.grain = 8; + mci->edac_check = edac_check; + + pvt_init(mci, vbase); + dimm_config_init(mci); + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return ret; + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (mci) + edac_mc_free(mci); +} + +static const struct acpi_device_id loongson_edac_acpi_match[] = { + {"LOON0010", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match); + +static struct platform_driver loongson_edac_driver = { + .probe = edac_probe, + .remove = edac_remove, + .driver = { + .name = "loongson-mc-edac", + .acpi_match_table = loongson_edac_acpi_match, + }, +}; +module_platform_driver(loongson_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>"); +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 30f7309446a6..af3c12284a1e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1,34 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/slab.h> +#include <asm/cpu.h> +#include <asm/msr.h> + #include "mce_amd.h" -static struct amd_decoder_ops *fam_ops; +static struct amd_decoder_ops fam_ops; static u8 xec_mask = 0xf; -static u8 nb_err_cpumask = 0xf; - -static bool report_gart_errors; -static void (*nb_bus_decoder)(int node_id, struct mce *m); -void amd_report_gart_errors(bool v) -{ - report_gart_errors = v; -} -EXPORT_SYMBOL_GPL(amd_report_gart_errors); +static void (*decode_dram_ecc)(int node_id, struct mce *m); void amd_register_ecc_decoder(void (*f)(int, struct mce *)) { - nb_bus_decoder = f; + decode_dram_ecc = f; } EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)) { - if (nb_bus_decoder) { - WARN_ON(nb_bus_decoder != f); + if (decode_dram_ecc) { + WARN_ON(decode_dram_ecc != f); - nb_bus_decoder = NULL; + decode_dram_ecc = NULL; } } EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); @@ -79,7 +75,8 @@ static const char * const f15h_mc1_mce_desc[] = { "uop queue", "insn buffer", "predecode buffer", - "fetch address FIFO" + "fetch address FIFO", + "dispatch uop queue" }; static const char * const f15h_mc2_mce_desc[] = { @@ -138,6 +135,15 @@ static const char * const mc5_mce_desc[] = { "Retire status queue" }; +static const char * const mc6_mce_desc[] = { + "Hardware Assertion", + "Free List", + "Physical Register File", + "Retire Queue", + "Scheduler table", + "Status Register File", +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -268,6 +274,12 @@ static bool f15h_mc0_mce(u16 ec, u8 xec) pr_cont("System Read Data Error.\n"); else pr_cont(" Internal error condition type %d.\n", xec); + } else if (INT_ERROR(ec)) { + if (xec <= 0x1f) + pr_cont("Hardware Assert.\n"); + else + ret = false; + } else ret = false; @@ -289,7 +301,7 @@ static void decode_mc0_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->mc0_mce(ec, xec)) + } else if (fam_ops.mc0_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); @@ -374,7 +386,7 @@ static bool f15h_mc1_mce(u16 ec, u8 xec) pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]); break; - case 0x11 ... 0x14: + case 0x11 ... 0x15: pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); break; @@ -398,10 +410,20 @@ static void decode_mc1_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->mc1_mce(ec, xec)) + } else if (INT_ERROR(ec)) { + if (xec <= 0x3f) + pr_cont("Hardware Assert.\n"); + else + goto wrong_mc1_mce; + } else if (fam_ops.mc1_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); + goto wrong_mc1_mce; + + return; + +wrong_mc1_mce: + pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } static bool k8_mc2_mce(u16 ec, u8 xec) @@ -416,8 +438,8 @@ static bool k8_mc2_mce(u16 ec, u8 xec) pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); else if (xec == 0x0) { if (TLB_ERROR(ec)) - pr_cont(": %s error in a Page Descriptor Cache or " - "Guest TLB.\n", TT_MSG(ec)); + pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n", + TT_MSG(ec)); else if (BUS_ERROR(ec)) pr_cont(": %s/ECC error in data read from NB: %s.\n", R4_MSG(ec), PP_MSG(ec)); @@ -469,6 +491,11 @@ static bool f15h_mc2_mce(u16 ec, u8 xec) default: ret = false; } + } else if (INT_ERROR(ec)) { + if (xec <= 0x3f) + pr_cont("Hardware Assert.\n"); + else + ret = false; } return ret; @@ -522,7 +549,7 @@ static void decode_mc2_mce(struct mce *m) pr_emerg(HW_ERR "MC2 Error: "); - if (!fam_ops->mc2_mce(ec, xec)) + if (!fam_ops.mc2_mce(ec, xec)) pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); } @@ -557,8 +584,8 @@ static void decode_mc3_mce(struct mce *m) static void decode_mc4_mce(struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; - int node_id = amd_get_nb_id(m->extcpu); + unsigned int fam = x86_family(m->cpuid); + int node_id = topology_amd_node_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u8 offset = 0; @@ -571,13 +598,13 @@ static void decode_mc4_mce(struct mce *m) /* special handling for DRAM ECCs */ if (xec == 0x0 || xec == 0x8) { /* no ECCs on F11h */ - if (c->x86 == 0x11) + if (fam == 0x11) goto wrong_mc4_mce; pr_cont("%s.\n", mc4_mce_desc[xec]); - if (nb_bus_decoder) - nb_bus_decoder(node_id, m); + if (decode_dram_ecc) + decode_dram_ecc(node_id, m); return; } break; @@ -592,7 +619,7 @@ static void decode_mc4_mce(struct mce *m) return; case 0x19: - if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16) + if (fam == 0x15 || fam == 0x16) pr_cont("Compute Unit Data Error.\n"); else goto wrong_mc4_mce; @@ -615,14 +642,23 @@ static void decode_mc4_mce(struct mce *m) static void decode_mc5_mce(struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; + unsigned int fam = x86_family(m->cpuid); + u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - if (c->x86 == 0xf || c->x86 == 0x11) + if (fam == 0xf || fam == 0x11) goto wrong_mc5_mce; pr_emerg(HW_ERR "MC5 Error: "); + if (INT_ERROR(ec)) { + if (xec <= 0x1f) { + pr_cont("Hardware Assert.\n"); + return; + } else + goto wrong_mc5_mce; + } + if (xec == 0x0 || xec == 0xc) pr_cont("%s.\n", mc5_mce_desc[xec]); else if (xec <= 0xd) @@ -642,38 +678,76 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "MC6 Error: "); - switch (xec) { - case 0x1: - pr_cont("Free List"); - break; + if (xec > 0x5) + goto wrong_mc6_mce; - case 0x2: - pr_cont("Physical Register File"); - break; + pr_cont("%s parity error.\n", mc6_mce_desc[xec]); + return; - case 0x3: - pr_cont("Retire Queue"); - break; + wrong_mc6_mce: + pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); +} - case 0x4: - pr_cont("Scheduler table"); - break; +static const char * const smca_long_names[] = { + [SMCA_LS ... SMCA_LS_V2] = "Load Store Unit", + [SMCA_IF] = "Instruction Fetch Unit", + [SMCA_L2_CACHE] = "L2 Cache", + [SMCA_DE] = "Decode Unit", + [SMCA_RESERVED] = "Reserved", + [SMCA_EX] = "Execution Unit", + [SMCA_FP] = "Floating Point Unit", + [SMCA_L3_CACHE] = "L3 Cache", + [SMCA_CS ... SMCA_CS_V2] = "Coherent Slave", + [SMCA_PIE] = "Power, Interrupts, etc.", + + /* UMC v2 is separate because both of them can exist in a single system. */ + [SMCA_UMC] = "Unified Memory Controller", + [SMCA_UMC_V2] = "Unified Memory Controller v2", + [SMCA_PB] = "Parameter Block", + [SMCA_PSP ... SMCA_PSP_V2] = "Platform Security Processor", + [SMCA_SMU ... SMCA_SMU_V2] = "System Management Unit", + [SMCA_MP5] = "Microprocessor 5 Unit", + [SMCA_MPDMA] = "MPDMA Unit", + [SMCA_NBIO] = "Northbridge IO Unit", + [SMCA_PCIE ... SMCA_PCIE_V2] = "PCI Express Unit", + [SMCA_XGMI_PCS] = "Ext Global Memory Interconnect PCS Unit", + [SMCA_NBIF] = "NBIF Unit", + [SMCA_SHUB] = "System Hub Unit", + [SMCA_SATA] = "SATA Unit", + [SMCA_USB] = "USB Unit", + [SMCA_GMI_PCS] = "Global Memory Interconnect PCS Unit", + [SMCA_XGMI_PHY] = "Ext Global Memory Interconnect PHY Unit", + [SMCA_WAFL_PHY] = "WAFL PHY Unit", + [SMCA_GMI_PHY] = "Global Memory Interconnect PHY Unit", +}; - case 0x5: - pr_cont("Status Register File"); - break; +static const char *smca_get_long_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; - default: - goto wrong_mc6_mce; - break; - } + return smca_long_names[t]; +} - pr_cont(" parity error.\n"); +/* Decode errors according to Scalable MCA specification */ +static void decode_smca_error(struct mce *m) +{ + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); + u8 xec = XEC(m->status, xec_mask); - return; + if (bank_type >= N_SMCA_BANK_TYPES) + return; - wrong_mc6_mce: - pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); + if (bank_type == SMCA_RESERVED) { + pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank); + return; + } + + pr_emerg(HW_ERR "%s Ext. Error Code: %d", smca_get_long_name(bank_type), xec); + + if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && + xec == 0 && decode_dram_ecc) + decode_dram_ecc(topology_amd_node_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) @@ -700,22 +774,6 @@ static inline void amd_decode_err_code(u16 ec) pr_cont("\n"); } -/* - * Filter out unwanted MCE signatures here. - */ -static bool amd_filter_mce(struct mce *m) -{ - u8 xec = (m->status >> 16) & 0x1f; - - /* - * NB GART TLB error reporting is disabled by default. - */ - if (m->bank == 4 && xec == 0x5 && !report_gart_errors) - return true; - - return false; -} - static const char *decode_error_status(struct mce *m) { if (m->status & MCI_STATUS_UC) { @@ -727,19 +785,97 @@ static const char *decode_error_status(struct mce *m) } if (m->status & MCI_STATUS_DEFERRED) - return "Deferred error."; + return "Deferred error, no action required."; return "Corrected error, no action required."; } -int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) +static int +amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; - struct cpuinfo_x86 *c = &cpu_data(m->extcpu); + struct mce_hw_err *err = to_mce_hw_err(m); + unsigned int fam = x86_family(m->cpuid); + u32 mca_config_lo = 0, dummy; int ecc; - if (amd_filter_mce(m)) - return NOTIFY_STOP; + if (m->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, + fam, x86_model(m->cpuid), x86_stepping(m->cpuid), + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : + (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"), + ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"), + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-")); + + if (boot_cpu_has(X86_FEATURE_SMCA)) { + rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(m->bank), &mca_config_lo, &dummy); + + if (mca_config_lo & MCI_CONFIG_MCAX) + pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); + + pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); + } + + /* do the two bits[14:13] together */ + ecc = (m->status >> 45) & 0x3; + if (ecc) + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); + + if (fam >= 0x15) { + pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-")); + + /* F15h, bank4, bit 43 is part of McaStatSubCache. */ + if (fam != 0x15 || m->bank != 4) + pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-")); + } + + if (fam >= 0x17) + pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-")); + + pr_cont("]: 0x%016llx\n", m->status); + + if (m->status & MCI_STATUS_ADDRV) + pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr); + + if (m->ppin) + pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin); + + if (boot_cpu_has(X86_FEATURE_SMCA)) { + pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); + + if (m->status & MCI_STATUS_SYNDV) { + pr_cont(", Syndrome: 0x%016llx\n", m->synd); + if (mca_config_lo & MCI_CONFIG_FRUTEXT) { + char frutext[17]; + + frutext[16] = '\0'; + memcpy(&frutext[0], &err->vendor.amd.synd1, 8); + memcpy(&frutext[8], &err->vendor.amd.synd2, 8); + + pr_emerg(HW_ERR "FRU Text: %s", frutext); + } + } + + pr_cont("\n"); + + decode_smca_error(m); + goto err_code; + } + + if (m->tsc) + pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); + + /* Doesn't matter which member to test. */ + if (!fam_ops.mc0_mce) + goto err_code; switch (m->bank) { case 0: @@ -774,109 +910,91 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; } - pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m)); - - pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", - m->extcpu, - c->x86, c->x86_model, c->x86_mask, - m->bank, - ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), - ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), - ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), - ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), - ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - - if (c->x86 == 0x15 || c->x86 == 0x16) - pr_cont("|%s|%s", - ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), - ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); - - /* do the two bits[14:13] together */ - ecc = (m->status >> 45) & 0x3; - if (ecc) - pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); - - pr_cont("]: 0x%016llx\n", m->status); - - if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); - + err_code: amd_decode_err_code(m->status & 0xffff); - return NOTIFY_STOP; + m->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } -EXPORT_SYMBOL_GPL(amd_decode_mce); static struct notifier_block amd_mce_dec_nb = { .notifier_call = amd_decode_mce, + .priority = MCE_PRIO_EDAC, }; static int __init mce_amd_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_AMD) - return 0; + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) + return -ENODEV; - if (c->x86 < 0xf || c->x86 > 0x16) - return 0; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; - fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); - if (!fam_ops) - return -ENOMEM; + if (boot_cpu_has(X86_FEATURE_SMCA)) { + xec_mask = 0x3f; + goto out; + } switch (c->x86) { case 0xf: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->mc0_mce = f10h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f10h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->mc0_mce = f12h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f12h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x14: - nb_err_cpumask = 0x3; - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x15: - xec_mask = 0x1f; - fam_ops->mc0_mce = f15h_mc0_mce; - fam_ops->mc1_mce = f15h_mc1_mce; - fam_ops->mc2_mce = f15h_mc2_mce; + xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; + + fam_ops.mc0_mce = f15h_mc0_mce; + fam_ops.mc1_mce = f15h_mc1_mce; + fam_ops.mc2_mce = f15h_mc2_mce; break; case 0x16: xec_mask = 0x1f; - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = f16h_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = f16h_mc2_mce; break; + case 0x17: + case 0x18: + pr_warn_once("Decoding supported only on Scalable MCA processors.\n"); + return -EINVAL; + default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); - kfree(fam_ops); return -EINVAL; } +out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); mce_register_decode_chain(&amd_mce_dec_nb); @@ -889,7 +1007,6 @@ early_initcall(mce_amd_init); static void __exit mce_amd_exit(void) { mce_unregister_decode_chain(&amd_mce_dec_nb); - kfree(fam_ops); } MODULE_DESCRIPTION("AMD MCE decoder"); diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 51b7e3a36e37..4811b18d9606 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _EDAC_MCE_AMD_H #define _EDAC_MCE_AMD_H @@ -6,7 +7,6 @@ #include <asm/mce.h> #define EC(x) ((x) & 0xffff) -#define XEC(x, mask) (((x) >> 16) & mask) #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) @@ -32,9 +32,6 @@ #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") -#define MCI_STATUS_DEFERRED BIT_64(44) -#define MCI_STATUS_POISON BIT_64(43) - extern const char * const pp_msgs[]; enum tt_ids { @@ -79,9 +76,7 @@ struct amd_decoder_ops { bool (*mc2_mce)(u16, u8); }; -void amd_report_gart_errors(bool); void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); -int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); #endif /* _EDAC_MCE_AMD_H */ diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c deleted file mode 100644 index 5e46a9fea31b..000000000000 --- a/drivers/edac/mce_amd_inj.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * A simple MCE injection facility for testing the MCE decoding code. This - * driver should be built as module so that it can be loaded on production - * kernels for testing purposes. - * - * This file may be distributed under the terms of the GNU General Public - * License version 2. - * - * Copyright (c) 2010: Borislav Petkov <bp@alien8.de> - * Advanced Micro Devices Inc. - */ - -#include <linux/kobject.h> -#include <linux/device.h> -#include <linux/edac.h> -#include <linux/module.h> -#include <asm/mce.h> - -#include "mce_amd.h" - -struct edac_mce_attr { - struct attribute attr; - ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf); - ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr, - const char *buf, size_t count); -}; - -#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \ -static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store) - -static struct kobject *mce_kobj; - -/* - * Collect all the MCi_XXX settings - */ -static struct mce i_mce; - -#define MCE_INJECT_STORE(reg) \ -static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - const char *data, size_t count)\ -{ \ - int ret = 0; \ - unsigned long value; \ - \ - ret = kstrtoul(data, 16, &value); \ - if (ret < 0) \ - printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ - \ - i_mce.reg = value; \ - \ - return count; \ -} - -MCE_INJECT_STORE(status); -MCE_INJECT_STORE(misc); -MCE_INJECT_STORE(addr); - -#define MCE_INJECT_SHOW(reg) \ -static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - char *buf) \ -{ \ - return sprintf(buf, "0x%016llx\n", i_mce.reg); \ -} - -MCE_INJECT_SHOW(status); -MCE_INJECT_SHOW(misc); -MCE_INJECT_SHOW(addr); - -EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store); -EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store); -EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store); - -/* - * This denotes into which bank we're injecting and triggers - * the injection, at the same time. - */ -static ssize_t edac_inject_bank_store(struct kobject *kobj, - struct edac_mce_attr *attr, - const char *data, size_t count) -{ - int ret = 0; - unsigned long value; - - ret = kstrtoul(data, 10, &value); - if (ret < 0) { - printk(KERN_ERR "Invalid bank value!\n"); - return -EINVAL; - } - - if (value > 5) - if (boot_cpu_data.x86 != 0x15 || value > 6) { - printk(KERN_ERR "Non-existent MCE bank: %lu\n", value); - return -EINVAL; - } - - i_mce.bank = value; - - amd_decode_mce(NULL, 0, &i_mce); - - return count; -} - -static ssize_t edac_inject_bank_show(struct kobject *kobj, - struct edac_mce_attr *attr, char *buf) -{ - return sprintf(buf, "%d\n", i_mce.bank); -} - -EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store); - -static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc, - &mce_attr_addr, &mce_attr_bank -}; - -static int __init edac_init_mce_inject(void) -{ - struct bus_type *edac_subsys = NULL; - int i, err = 0; - - edac_subsys = edac_get_sysfs_subsys(); - if (!edac_subsys) - return -EINVAL; - - mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj); - if (!mce_kobj) { - printk(KERN_ERR "Error creating a mce kset.\n"); - err = -ENOMEM; - goto err_mce_kobj; - } - - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) { - err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr); - if (err) { - printk(KERN_ERR "Error creating %s in sysfs.\n", - sysfs_attrs[i]->attr.name); - goto err_sysfs_create; - } - } - return 0; - -err_sysfs_create: - while (--i >= 0) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); - - kobject_del(mce_kobj); - -err_mce_kobj: - edac_put_sysfs_subsys(); - - return err; -} - -static void __exit edac_exit_mce_inject(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); - - kobject_del(mce_kobj); - - edac_put_sysfs_subsys(); -} - -module_init(edac_init_mce_inject); -module_exit(edac_exit_mce_inject); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>"); -MODULE_AUTHOR("AMD Inc."); -MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c new file mode 100644 index 000000000000..108d69209146 --- /dev/null +++ b/drivers/edac/mem_repair.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC memory repair driver is designed to control the memory + * devices with memory repair features, such as Post Package Repair (PPR), + * memory sparing etc. The common sysfs memory repair interface abstracts + * the control of various arbitrary memory repair functionalities into a + * unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +enum edac_mem_repair_attributes { + MR_TYPE, + MR_PERSIST_MODE, + MR_SAFE_IN_USE, + MR_HPA, + MR_MIN_HPA, + MR_MAX_HPA, + MR_DPA, + MR_MIN_DPA, + MR_MAX_DPA, + MR_NIBBLE_MASK, + MR_BANK_GROUP, + MR_BANK, + MR_RANK, + MR_ROW, + MR_COLUMN, + MR_CHANNEL, + MR_SUB_CHANNEL, + MEM_DO_REPAIR, + MR_MAX_ATTRS +}; + +struct edac_mem_repair_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_mem_repair_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS]; + struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +const char * const edac_repair_type[] = { + [EDAC_REPAIR_PPR] = "ppr", + [EDAC_REPAIR_CACHELINE_SPARING] = "cacheline-sparing", + [EDAC_REPAIR_ROW_SPARING] = "row-sparing", + [EDAC_REPAIR_BANK_SPARING] = "bank-sparing", + [EDAC_REPAIR_RANK_SPARING] = "rank-sparing", +}; +EXPORT_SYMBOL_GPL(edac_repair_type); + +#define TO_MR_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr) + +#define MR_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n") +MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n") +MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n") +MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") +MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n") +MR_ATTR_SHOW(bank, get_bank, u32, "%u\n") +MR_ATTR_SHOW(rank, get_rank, u32, "%u\n") +MR_ATTR_SHOW(row, get_row, u32, "0x%x\n") +MR_ATTR_SHOW(column, get_column, u32, "%u\n") +MR_ATTR_SHOW(channel, get_channel, u32, "%u\n") +MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n") + +#define MR_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) +MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) +MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) +MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) +MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul) +MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul) +MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul) +MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul) +MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul) +MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul) +MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul) + +#define MR_DO_OP(attrib, cb) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \ + unsigned long data; \ + int ret; \ + \ + ret = kstrtoul(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_DO_OP(repair, do_repair) + +static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance; + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; + + switch (attr_id) { + case MR_TYPE: + if (ops->get_repair_type) + return a->mode; + break; + case MR_PERSIST_MODE: + if (ops->get_persist_mode) { + if (ops->set_persist_mode) + return a->mode; + else + return 0444; + } + break; + case MR_SAFE_IN_USE: + if (ops->get_repair_safe_when_in_use) + return a->mode; + break; + case MR_HPA: + if (ops->get_hpa) { + if (ops->set_hpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_HPA: + if (ops->get_min_hpa) + return a->mode; + break; + case MR_MAX_HPA: + if (ops->get_max_hpa) + return a->mode; + break; + case MR_DPA: + if (ops->get_dpa) { + if (ops->set_dpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_DPA: + if (ops->get_min_dpa) + return a->mode; + break; + case MR_MAX_DPA: + if (ops->get_max_dpa) + return a->mode; + break; + case MR_NIBBLE_MASK: + if (ops->get_nibble_mask) { + if (ops->set_nibble_mask) + return a->mode; + else + return 0444; + } + break; + case MR_BANK_GROUP: + if (ops->get_bank_group) { + if (ops->set_bank_group) + return a->mode; + else + return 0444; + } + break; + case MR_BANK: + if (ops->get_bank) { + if (ops->set_bank) + return a->mode; + else + return 0444; + } + break; + case MR_RANK: + if (ops->get_rank) { + if (ops->set_rank) + return a->mode; + else + return 0444; + } + break; + case MR_ROW: + if (ops->get_row) { + if (ops->set_row) + return a->mode; + else + return 0444; + } + break; + case MR_COLUMN: + if (ops->get_column) { + if (ops->set_column) + return a->mode; + else + return 0444; + } + break; + case MR_CHANNEL: + if (ops->get_channel) { + if (ops->set_channel) + return a->mode; + else + return 0444; + } + break; + case MR_SUB_CHANNEL: + if (ops->get_sub_channel) { + if (ops->set_sub_channel) + return a->mode; + else + return 0444; + } + break; + case MEM_DO_REPAIR: + if (ops->do_repair) + return a->mode; + break; + default: + break; + } + + return 0; +} + +static const struct device_attribute mem_repair_dev_attr[] = { + [MR_TYPE] = __ATTR_RO(repair_type), + [MR_PERSIST_MODE] = __ATTR_RW(persist_mode), + [MR_SAFE_IN_USE] = __ATTR_RO(repair_safe_when_in_use), + [MR_HPA] = __ATTR_RW(hpa), + [MR_MIN_HPA] = __ATTR_RO(min_hpa), + [MR_MAX_HPA] = __ATTR_RO(max_hpa), + [MR_DPA] = __ATTR_RW(dpa), + [MR_MIN_DPA] = __ATTR_RO(min_dpa), + [MR_MAX_DPA] = __ATTR_RO(max_dpa), + [MR_NIBBLE_MASK] = __ATTR_RW(nibble_mask), + [MR_BANK_GROUP] = __ATTR_RW(bank_group), + [MR_BANK] = __ATTR_RW(bank), + [MR_RANK] = __ATTR_RW(rank), + [MR_ROW] = __ATTR_RW(row), + [MR_COLUMN] = __ATTR_RW(column), + [MR_CHANNEL] = __ATTR_RW(channel), + [MR_SUB_CHANNEL] = __ATTR_RW(sub_channel), + [MEM_DO_REPAIR] = __ATTR_WO(repair) +}; + +static int mem_repair_create_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ + struct edac_mem_repair_context *ctx; + struct attribute_group *group; + int i; + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < MR_MAX_ATTRS; i++) { + ctx->mem_repair_dev_attr[i].dev_attr = mem_repair_dev_attr[i]; + ctx->mem_repair_dev_attr[i].instance = instance; + sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr); + ctx->mem_repair_attrs[i] = + &ctx->mem_repair_dev_attr[i].dev_attr.attr; + } + + sprintf(ctx->name, "%s%d", "mem_repair", instance); + group = &ctx->group; + group->name = ctx->name; + group->attrs = ctx->mem_repair_attrs; + group->is_visible = mem_repair_attr_visible; + attr_groups[0] = group; + + return 0; +} + +/** + * edac_mem_repair_get_desc - get EDAC memory repair descriptors + * @dev: client device with memory repair feature + * @attr_groups: pointer to attribute group container + * @instance: device's memory repair instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!dev || !attr_groups) + return -EINVAL; + + return mem_repair_create_desc(dev, attr_groups, instance); +} diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 3eb32f62d72a..a45dc6b35ede 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -1,5 +1,7 @@ /* - * Freescale MPC85xx Memory Controller kenel module + * Freescale MPC85xx Memory Controller kernel module + * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. * * Author: Dave Jiang <djiang@mvista.com> * @@ -18,21 +20,19 @@ #include <linux/edac.h> #include <linux/smp.h> #include <linux/gfp.h> +#include <linux/fsl/edac.h> -#include <linux/of_platform.h> -#include <linux/of_device.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include "edac_module.h" -#include "edac_core.h" #include "mpc85xx_edac.h" +#include "fsl_ddr_edac.h" static int edac_dev_idx; #ifdef CONFIG_PCI static int edac_pci_idx; #endif -static int edac_mc_idx; - -static u32 orig_ddr_err_disable; -static u32 orig_ddr_err_sbe; /* * PCI Err defines @@ -43,118 +43,6 @@ static u32 orig_pci_err_en; #endif static u32 orig_l2_err_disable; -#ifdef CONFIG_FSL_SOC_BOOKE -static u32 orig_hid1[2]; -#endif - -/************************ MC SYSFS parts ***********************************/ - -#define to_mci(k) container_of(k, struct mem_ctl_info, dev) - -static ssize_t mpc85xx_mc_inject_data_hi_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + - MPC85XX_MC_DATA_ERR_INJECT_HI)); -} - -static ssize_t mpc85xx_mc_inject_data_lo_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + - MPC85XX_MC_DATA_ERR_INJECT_LO)); -} - -static ssize_t mpc85xx_mc_inject_ctrl_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT)); -} - -static ssize_t mpc85xx_mc_inject_data_hi_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -static ssize_t mpc85xx_mc_inject_data_lo_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -static ssize_t mpc85xx_mc_inject_ctrl_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_data_hi_show, mpc85xx_mc_inject_data_hi_store); -DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_data_lo_show, mpc85xx_mc_inject_data_lo_store); -DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store); - -static int mpc85xx_create_sysfs_attributes(struct mem_ctl_info *mci) -{ - int rc; - - rc = device_create_file(&mci->dev, &dev_attr_inject_data_hi); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_data_lo); - if (rc < 0) - return rc; - rc = device_create_file(&mci->dev, &dev_attr_inject_ctrl); - if (rc < 0) - return rc; - - return 0; -} - -static void mpc85xx_remove_sysfs_attributes(struct mem_ctl_info *mci) -{ - device_remove_file(&mci->dev, &dev_attr_inject_data_hi); - device_remove_file(&mci->dev, &dev_attr_inject_data_lo); - device_remove_file(&mci->dev, &dev_attr_inject_ctrl); -} /**************************** PCI Err device ***************************/ #ifdef CONFIG_PCI @@ -172,18 +60,18 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) return; } - printk(KERN_ERR "PCI error(s) detected\n"); - printk(KERN_ERR "PCI/X ERR_DR register: %#08x\n", err_detect); + pr_err("PCI error(s) detected\n"); + pr_err("PCI/X ERR_DR register: %#08x\n", err_detect); - printk(KERN_ERR "PCI/X ERR_ATTRIB register: %#08x\n", + pr_err("PCI/X ERR_ATTRIB register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ATTRIB)); - printk(KERN_ERR "PCI/X ERR_ADDR register: %#08x\n", + pr_err("PCI/X ERR_ADDR register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR)); - printk(KERN_ERR "PCI/X ERR_EXT_ADDR register: %#08x\n", + pr_err("PCI/X ERR_EXT_ADDR register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EXT_ADDR)); - printk(KERN_ERR "PCI/X ERR_DL register: %#08x\n", + pr_err("PCI/X ERR_DL register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DL)); - printk(KERN_ERR "PCI/X ERR_DH register: %#08x\n", + pr_err("PCI/X ERR_DH register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DH)); /* clear error bits */ @@ -196,6 +84,45 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) edac_pci_handle_npe(pci, pci->ctl_name); } +static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) +{ + struct mpc85xx_pci_pdata *pdata = pci->pvt_info; + u32 err_detect, err_cap_stat; + + err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR); + err_cap_stat = in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR); + + pr_err("PCIe error(s) detected\n"); + pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect); + pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", err_cap_stat); + pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0)); + pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R1)); + pr_err("PCIe ERR_CAP_R2 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R2)); + pr_err("PCIe ERR_CAP_R3 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R3)); + + /* clear error bits */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect); + + /* reset error capture */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, err_cap_stat | 0x1); +} + +static int mpc85xx_pcie_find_capability(struct device_node *np) +{ + struct pci_controller *hose; + + if (!np) + return -EINVAL; + + hose = pci_find_hose_for_OF_device(np); + + return early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP); +} + static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) { struct edac_pci_ctl_info *pci = dev_id; @@ -207,15 +134,20 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) if (!err_detect) return IRQ_NONE; - mpc85xx_pci_check(pci); + if (pdata->is_pcie) + mpc85xx_pcie_check(pci); + else + mpc85xx_pci_check(pci); return IRQ_HANDLED; } -int mpc85xx_pci_err_probe(struct platform_device *op) +static int mpc85xx_pci_err_probe(struct platform_device *op) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; + struct mpc85xx_edac_pci_plat_data *plat_data; + struct device_node *of_node; struct resource r; int res = 0; @@ -238,22 +170,36 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; - pdata->irq = NO_IRQ; + + plat_data = op->dev.platform_data; + if (!plat_data) { + dev_err(&op->dev, "no platform data"); + res = -ENXIO; + goto err; + } + of_node = plat_data->of_node; + + if (mpc85xx_pcie_find_capability(of_node) > 0) + pdata->is_pcie = true; + dev_set_drvdata(&op->dev, pci); pci->dev = &op->dev; pci->mod_name = EDAC_MOD_STR; pci->ctl_name = pdata->name; pci->dev_name = dev_name(&op->dev); - if (edac_op_state == EDAC_OPSTATE_POLL) - pci->edac_check = mpc85xx_pci_check; + if (edac_op_state == EDAC_OPSTATE_POLL) { + if (pdata->is_pcie) + pci->edac_check = mpc85xx_pcie_check; + else + pci->edac_check = mpc85xx_pci_check; + } pdata->edac_idx = edac_pci_idx++; - res = of_address_to_resource(op->dev.of_node, 0, &r); + res = of_address_to_resource(of_node, 0, &r); if (res) { - printk(KERN_ERR "%s: Unable to get resource for " - "PCI err regs\n", __func__); + pr_err("%s: Unable to get resource for PCI err regs\n", __func__); goto err; } @@ -262,59 +208,87 @@ int mpc85xx_pci_err_probe(struct platform_device *op) if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); + pr_err("%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } pdata->pci_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); if (!pdata->pci_vbase) { - printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); + pr_err("%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; goto err; } - orig_pci_err_cap_dr = - in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); + if (pdata->is_pcie) { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, ~0); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, 0); + } else { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); - /* PCI master abort is expected during config cycles */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); + /* PCI master abort is expected during config cycles */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); - orig_pci_err_en = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); - /* disable master abort reporting */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + /* disable master abort reporting */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + } /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0); + /* reset error capture */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, 0x1); + if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { edac_dbg(3, "failed edac_pci_add_device()\n"); goto err; } if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); + pdata->irq = irq_of_parse_and_map(of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_pci_isr, IRQF_DISABLED, + mpc85xx_pci_isr, + IRQF_SHARED, "[EDAC] PCI err", pci); if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for " - "MPC85xx PCI err\n", __func__, pdata->irq); + pr_err("%s: Unable to request irq %d for MPC85xx PCI err\n", + __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; goto err2; } - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for PCI Err\n", + pr_info(EDAC_MOD_STR " acquired irq %d for PCI Err\n", pdata->irq); } + if (pdata->is_pcie) { + /* + * Enable all PCIe error interrupt & error detect except invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA access interrupt generation + * enable bit and invalid PEX_CONFIG_ADDR/PEX_CONFIG_DATA access + * detection enable bit. Because PCIe bus code to initialize and + * configure these PCIe devices on booting will use some invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA, edac driver prints the much + * notice information. So disable this detect to fix ugly print. + */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0 + & ~PEX_ERR_ICCAIE_EN_BIT); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, 0 + | PEX_ERR_ICCAD_DISR_BIT); + } + devres_remove_group(&op->dev, mpc85xx_pci_err_probe); edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); + pr_info(EDAC_MOD_STR " PCI err registered\n"); return 0; @@ -325,30 +299,37 @@ err: devres_release_group(&op->dev, mpc85xx_pci_err_probe); return res; } -EXPORT_SYMBOL(mpc85xx_pci_err_probe); -static int mpc85xx_pci_err_remove(struct platform_device *op) +static void mpc85xx_pci_err_remove(struct platform_device *op) { struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); struct mpc85xx_pci_pdata *pdata = pci->pvt_info; edac_dbg(0, "\n"); - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, - orig_pci_err_cap_dr); - + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, orig_pci_err_cap_dr); out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en); - edac_pci_del_device(pci->dev); - - if (edac_op_state == EDAC_OPSTATE_INT) - irq_dispose_mapping(pdata->irq); - + edac_pci_del_device(&op->dev); edac_pci_free_ctl_info(pci); - - return 0; } +static const struct platform_device_id mpc85xx_pci_err_match[] = { + { + .name = "mpc85xx-pci-edac" + }, + {} +}; + +static struct platform_driver mpc85xx_pci_err_driver = { + .probe = mpc85xx_pci_err_probe, + .remove = mpc85xx_pci_err_remove, + .id_table = mpc85xx_pci_err_match, + .driver = { + .name = "mpc85xx_pci_err", + .suppress_bind_attrs = true, + }, +}; #endif /* CONFIG_PCI */ /**************************** L2 Err device ***************************/ @@ -465,17 +446,17 @@ static void mpc85xx_l2_check(struct edac_device_ctl_info *edac_dev) if (!(err_detect & L2_EDE_MASK)) return; - printk(KERN_ERR "ECC Error in CPU L2 cache\n"); - printk(KERN_ERR "L2 Error Detect Register: 0x%08x\n", err_detect); - printk(KERN_ERR "L2 Error Capture Data High Register: 0x%08x\n", + pr_err("ECC Error in CPU L2 cache\n"); + pr_err("L2 Error Detect Register: 0x%08x\n", err_detect); + pr_err("L2 Error Capture Data High Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATAHI)); - printk(KERN_ERR "L2 Error Capture Data Lo Register: 0x%08x\n", + pr_err("L2 Error Capture Data Lo Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATALO)); - printk(KERN_ERR "L2 Error Syndrome Register: 0x%08x\n", + pr_err("L2 Error Syndrome Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTECC)); - printk(KERN_ERR "L2 Error Attributes Capture Register: 0x%08x\n", + pr_err("L2 Error Attributes Capture Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_ERRATTR)); - printk(KERN_ERR "L2 Error Address Capture Register: 0x%08x\n", + pr_err("L2 Error Address Capture Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_ERRADDR)); /* clear error detect register */ @@ -515,7 +496,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) return -ENOMEM; edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata), - "cpu", 1, "L", 1, 2, NULL, 0, + "cpu", 1, "L", 1, 2, edac_dev_idx); if (!edac_dev) { devres_release_group(&op->dev, mpc85xx_l2_err_probe); @@ -524,7 +505,6 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) pdata = edac_dev->pvt_info; pdata->name = "mpc85xx_l2_err"; - pdata->irq = NO_IRQ; edac_dev->dev = &op->dev; dev_set_drvdata(edac_dev->dev, edac_dev); edac_dev->ctl_name = pdata->name; @@ -532,8 +512,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) res = of_address_to_resource(op->dev.of_node, 0, &r); if (res) { - printk(KERN_ERR "%s: Unable to get resource for " - "L2 err regs\n", __func__); + pr_err("%s: Unable to get resource for L2 err regs\n", __func__); goto err; } @@ -542,15 +521,14 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); + pr_err("%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } pdata->l2_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); if (!pdata->l2_vbase) { - printk(KERN_ERR "%s: Unable to setup L2 err regs\n", __func__); + pr_err("%s: Unable to setup L2 err regs\n", __func__); res = -ENOMEM; goto err; } @@ -579,19 +557,17 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_l2_isr, IRQF_DISABLED, + mpc85xx_l2_isr, IRQF_SHARED, "[EDAC] L2 err", edac_dev); if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for " - "MPC85xx L2 err\n", __func__, pdata->irq); + pr_err("%s: Unable to request irq %d for MPC85xx L2 err\n", + __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; goto err2; } - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for L2 Err\n", - pdata->irq); + pr_info(EDAC_MOD_STR " acquired irq %d for L2 Err\n", pdata->irq); edac_dev->op_state = OP_RUNNING_INTERRUPT; @@ -601,7 +577,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) devres_remove_group(&op->dev, mpc85xx_l2_err_probe); edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " L2 err registered\n"); + pr_info(EDAC_MOD_STR " L2 err registered\n"); return 0; @@ -613,7 +589,7 @@ err: return res; } -static int mpc85xx_l2_err_remove(struct platform_device *op) +static void mpc85xx_l2_err_remove(struct platform_device *op) { struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&op->dev); struct mpc85xx_l2_pdata *pdata = edac_dev->pvt_info; @@ -628,17 +604,9 @@ static int mpc85xx_l2_err_remove(struct platform_device *op) out_be32(pdata->l2_vbase + MPC85XX_L2_ERRDIS, orig_l2_err_disable); edac_device_del_device(&op->dev); edac_device_free_ctl_info(edac_dev); - return 0; } -static struct of_device_id mpc85xx_l2_err_of_match[] = { -/* deprecate the fsl,85.. forms in the future, 2.6.30? */ - { .compatible = "fsl,8540-l2-cache-controller", }, - { .compatible = "fsl,8541-l2-cache-controller", }, - { .compatible = "fsl,8544-l2-cache-controller", }, - { .compatible = "fsl,8548-l2-cache-controller", }, - { .compatible = "fsl,8555-l2-cache-controller", }, - { .compatible = "fsl,8568-l2-cache-controller", }, +static const struct of_device_id mpc85xx_l2_err_of_match[] = { { .compatible = "fsl,mpc8536-l2-cache-controller", }, { .compatible = "fsl,mpc8540-l2-cache-controller", }, { .compatible = "fsl,mpc8541-l2-cache-controller", }, @@ -652,6 +620,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = { { .compatible = "fsl,p1020-l2-cache-controller", }, { .compatible = "fsl,p1021-l2-cache-controller", }, { .compatible = "fsl,p2020-l2-cache-controller", }, + { .compatible = "fsl,t2080-l2-cache-controller", }, {}, }; MODULE_DEVICE_TABLE(of, mpc85xx_l2_err_of_match); @@ -661,482 +630,11 @@ static struct platform_driver mpc85xx_l2_err_driver = { .remove = mpc85xx_l2_err_remove, .driver = { .name = "mpc85xx_l2_err", - .owner = THIS_MODULE, .of_match_table = mpc85xx_l2_err_of_match, }, }; -/**************************** MC Err device ***************************/ - -/* - * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the - * MPC8572 User's Manual. Each line represents a syndrome bit column as a - * 64-bit value, but split into an upper and lower 32-bit chunk. The labels - * below correspond to Freescale's manuals. - */ -static unsigned int ecc_table[16] = { - /* MSB LSB */ - /* [0:31] [32:63] */ - 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */ - 0x00ff00ff, 0x00fff0ff, - 0x0f0f0f0f, 0x0f0fff00, - 0x11113333, 0x7777000f, - 0x22224444, 0x8888222f, - 0x44448888, 0xffff4441, - 0x8888ffff, 0x11118882, - 0xffff1111, 0x22221114, /* Syndrome bit 0 */ -}; - -/* - * Calculate the correct ECC value for a 64-bit value specified by high:low - */ -static u8 calculate_ecc(u32 high, u32 low) -{ - u32 mask_low; - u32 mask_high; - int bit_cnt; - u8 ecc = 0; - int i; - int j; - - for (i = 0; i < 8; i++) { - mask_high = ecc_table[i * 2]; - mask_low = ecc_table[i * 2 + 1]; - bit_cnt = 0; - - for (j = 0; j < 32; j++) { - if ((mask_high >> j) & 1) - bit_cnt ^= (high >> j) & 1; - if ((mask_low >> j) & 1) - bit_cnt ^= (low >> j) & 1; - } - - ecc |= bit_cnt << i; - } - - return ecc; -} - -/* - * Create the syndrome code which is generated if the data line specified by - * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641 - * User's Manual and 9-61 in the MPC8572 User's Manual. - */ -static u8 syndrome_from_bit(unsigned int bit) { - int i; - u8 syndrome = 0; - - /* - * Cycle through the upper or lower 32-bit portion of each value in - * ecc_table depending on if 'bit' is in the upper or lower half of - * 64-bit data. - */ - for (i = bit < 32; i < 16; i += 2) - syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2); - - return syndrome; -} - -/* - * Decode data and ecc syndrome to determine what went wrong - * Note: This can only decode single-bit errors - */ -static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, - int *bad_data_bit, int *bad_ecc_bit) -{ - int i; - u8 syndrome; - - *bad_data_bit = -1; - *bad_ecc_bit = -1; - - /* - * Calculate the ECC of the captured data and XOR it with the captured - * ECC to find an ECC syndrome value we can search for - */ - syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc; - - /* Check if a data line is stuck... */ - for (i = 0; i < 64; i++) { - if (syndrome == syndrome_from_bit(i)) { - *bad_data_bit = i; - return; - } - } - - /* If data is correct, check ECC bits for errors... */ - for (i = 0; i < 8; i++) { - if ((syndrome >> i) & 0x1) { - *bad_ecc_bit = i; - return; - } - } -} - -static void mpc85xx_mc_check(struct mem_ctl_info *mci) -{ - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - struct csrow_info *csrow; - u32 bus_width; - u32 err_detect; - u32 syndrome; - u32 err_addr; - u32 pfn; - int row_index; - u32 cap_high; - u32 cap_low; - int bad_data_bit; - int bad_ecc_bit; - - err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); - if (!err_detect) - return; - - mpc85xx_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n", - err_detect); - - /* no more processing if not ECC bit errors */ - if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect); - return; - } - - syndrome = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ECC); - - /* Mask off appropriate bits of syndrome based on bus width */ - bus_width = (in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG) & - DSC_DBW_MASK) ? 32 : 64; - if (bus_width == 64) - syndrome &= 0xff; - else - syndrome &= 0xffff; - - err_addr = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS); - pfn = err_addr >> PAGE_SHIFT; - - for (row_index = 0; row_index < mci->nr_csrows; row_index++) { - csrow = mci->csrows[row_index]; - if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page)) - break; - } - - cap_high = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_HI); - cap_low = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_LO); - - /* - * Analyze single-bit errors on 64-bit wide buses - * TODO: Add support for 32-bit wide buses - */ - if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { - sbe_ecc_decode(cap_high, cap_low, syndrome, - &bad_data_bit, &bad_ecc_bit); - - if (bad_data_bit != -1) - mpc85xx_mc_printk(mci, KERN_ERR, - "Faulty Data bit: %d\n", bad_data_bit); - if (bad_ecc_bit != -1) - mpc85xx_mc_printk(mci, KERN_ERR, - "Faulty ECC bit: %d\n", bad_ecc_bit); - - mpc85xx_mc_printk(mci, KERN_ERR, - "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high ^ (1 << (bad_data_bit - 32)), - cap_low ^ (1 << bad_data_bit), - syndrome ^ (1 << bad_ecc_bit)); - } - - mpc85xx_mc_printk(mci, KERN_ERR, - "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high, cap_low, syndrome); - mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8x\n", err_addr); - mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); - - /* we are out of range */ - if (row_index == mci->nr_csrows) - mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n"); - - if (err_detect & DDR_EDE_SBE) - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - pfn, err_addr & ~PAGE_MASK, syndrome, - row_index, 0, -1, - mci->ctl_name, ""); - - if (err_detect & DDR_EDE_MBE) - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - pfn, err_addr & ~PAGE_MASK, syndrome, - row_index, 0, -1, - mci->ctl_name, ""); - - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect); -} - -static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id) -{ - struct mem_ctl_info *mci = dev_id; - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - u32 err_detect; - - err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); - if (!err_detect) - return IRQ_NONE; - - mpc85xx_mc_check(mci); - - return IRQ_HANDLED; -} - -static void mpc85xx_init_csrows(struct mem_ctl_info *mci) -{ - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - struct csrow_info *csrow; - struct dimm_info *dimm; - u32 sdram_ctl; - u32 sdtype; - enum mem_type mtype; - u32 cs_bnds; - int index; - - sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG); - - sdtype = sdram_ctl & DSC_SDTYPE_MASK; - if (sdram_ctl & DSC_RD_EN) { - switch (sdtype) { - case DSC_SDTYPE_DDR: - mtype = MEM_RDDR; - break; - case DSC_SDTYPE_DDR2: - mtype = MEM_RDDR2; - break; - case DSC_SDTYPE_DDR3: - mtype = MEM_RDDR3; - break; - default: - mtype = MEM_UNKNOWN; - break; - } - } else { - switch (sdtype) { - case DSC_SDTYPE_DDR: - mtype = MEM_DDR; - break; - case DSC_SDTYPE_DDR2: - mtype = MEM_DDR2; - break; - case DSC_SDTYPE_DDR3: - mtype = MEM_DDR3; - break; - default: - mtype = MEM_UNKNOWN; - break; - } - } - - for (index = 0; index < mci->nr_csrows; index++) { - u32 start; - u32 end; - - csrow = mci->csrows[index]; - dimm = csrow->channels[0]->dimm; - - cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + - (index * MPC85XX_MC_CS_BNDS_OFS)); - - start = (cs_bnds & 0xffff0000) >> 16; - end = (cs_bnds & 0x0000ffff); - - if (start == end) - continue; /* not populated */ - - start <<= (24 - PAGE_SHIFT); - end <<= (24 - PAGE_SHIFT); - end |= (1 << (24 - PAGE_SHIFT)) - 1; - - csrow->first_page = start; - csrow->last_page = end; - - dimm->nr_pages = end + 1 - start; - dimm->grain = 8; - dimm->mtype = mtype; - dimm->dtype = DEV_UNKNOWN; - if (sdram_ctl & DSC_X32_EN) - dimm->dtype = DEV_X32; - dimm->edac_mode = EDAC_SECDED; - } -} - -static int mpc85xx_mc_err_probe(struct platform_device *op) -{ - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct mpc85xx_mc_pdata *pdata; - struct resource r; - u32 sdram_ctl; - int res; - - if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL)) - return -ENOMEM; - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 4; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, - sizeof(*pdata)); - if (!mci) { - devres_release_group(&op->dev, mpc85xx_mc_err_probe); - return -ENOMEM; - } - - pdata = mci->pvt_info; - pdata->name = "mpc85xx_mc_err"; - pdata->irq = NO_IRQ; - mci->pdev = &op->dev; - pdata->edac_idx = edac_mc_idx++; - dev_set_drvdata(mci->pdev, mci); - mci->ctl_name = pdata->name; - mci->dev_name = pdata->name; - - res = of_address_to_resource(op->dev.of_node, 0, &r); - if (res) { - printk(KERN_ERR "%s: Unable to get resource for MC err regs\n", - __func__); - goto err; - } - - if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); - if (!pdata->mc_vbase) { - printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG); - if (!(sdram_ctl & DSC_ECC_EN)) { - /* no ECC */ - printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); - res = -ENODEV; - goto err; - } - - edac_dbg(3, "init mci\n"); - mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | - MEM_FLAG_DDR | MEM_FLAG_DDR2; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = MPC85XX_REVISION; - - if (edac_op_state == EDAC_OPSTATE_POLL) - mci->edac_check = mpc85xx_mc_check; - - mci->ctl_page_to_phys = NULL; - - mci->scrub_mode = SCRUB_SW_SRC; - - mpc85xx_init_csrows(mci); - - /* store the original error disable bits */ - orig_ddr_err_disable = - in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE); - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE, 0); - - /* clear all error bits */ - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0); - - if (edac_mc_add_mc(mci)) { - edac_dbg(3, "failed edac_mc_add_mc()\n"); - goto err; - } - - if (mpc85xx_create_sysfs_attributes(mci)) { - edac_mc_del_mc(mci->pdev); - edac_dbg(3, "failed edac_mc_add_mc()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, - DDR_EIE_MBEE | DDR_EIE_SBEE); - - /* store the original error management threshold */ - orig_ddr_err_sbe = in_be32(pdata->mc_vbase + - MPC85XX_MC_ERR_SBE) & 0xff0000; - - /* set threshold to 1 error per interrupt */ - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, 0x10000); - - /* register interrupts */ - pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); - res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_mc_isr, - IRQF_DISABLED | IRQF_SHARED, - "[EDAC] MC err", mci); - if (res < 0) { - printk(KERN_ERR "%s: Unable to request irq %d for " - "MPC85xx DRAM ERR\n", __func__, pdata->irq); - irq_dispose_mapping(pdata->irq); - res = -ENODEV; - goto err2; - } - - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for MC\n", - pdata->irq); - } - - devres_remove_group(&op->dev, mpc85xx_mc_err_probe); - edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " MC err registered\n"); - - return 0; - -err2: - edac_mc_del_mc(&op->dev); -err: - devres_release_group(&op->dev, mpc85xx_mc_err_probe); - edac_mc_free(mci); - return res; -} - -static int mpc85xx_mc_err_remove(struct platform_device *op) -{ - struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - - edac_dbg(0, "\n"); - - if (edac_op_state == EDAC_OPSTATE_INT) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, 0); - irq_dispose_mapping(pdata->irq); - } - - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE, - orig_ddr_err_disable); - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe); - - mpc85xx_remove_sysfs_attributes(mci); - edac_mc_del_mc(&op->dev); - edac_mc_free(mci); - return 0; -} - -static struct of_device_id mpc85xx_mc_err_of_match[] = { -/* deprecate the fsl,85.. forms in the future, 2.6.30? */ - { .compatible = "fsl,8540-memory-controller", }, - { .compatible = "fsl,8541-memory-controller", }, - { .compatible = "fsl,8544-memory-controller", }, - { .compatible = "fsl,8548-memory-controller", }, - { .compatible = "fsl,8555-memory-controller", }, - { .compatible = "fsl,8568-memory-controller", }, +static const struct of_device_id mpc85xx_mc_err_of_match[] = { { .compatible = "fsl,mpc8536-memory-controller", }, { .compatible = "fsl,mpc8540-memory-controller", }, { .compatible = "fsl,mpc8541-memory-controller", }, @@ -1157,30 +655,28 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = { MODULE_DEVICE_TABLE(of, mpc85xx_mc_err_of_match); static struct platform_driver mpc85xx_mc_err_driver = { - .probe = mpc85xx_mc_err_probe, - .remove = mpc85xx_mc_err_remove, + .probe = fsl_mc_err_probe, + .remove = fsl_mc_err_remove, .driver = { .name = "mpc85xx_mc_err", - .owner = THIS_MODULE, .of_match_table = mpc85xx_mc_err_of_match, }, }; -#ifdef CONFIG_FSL_SOC_BOOKE -static void __init mpc85xx_mc_clear_rfxe(void *data) -{ - orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); - mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~HID1_RFXE)); -} +static struct platform_driver * const drivers[] = { + &mpc85xx_mc_err_driver, + &mpc85xx_l2_err_driver, +#ifdef CONFIG_PCI + &mpc85xx_pci_err_driver, #endif +}; static int __init mpc85xx_mc_init(void) { int res = 0; - u32 pvr = 0; + u32 __maybe_unused pvr = 0; - printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, " - "(C) 2006 Montavista Software\n"); + pr_info("Freescale(R) MPC85xx EDAC driver, (C) 2006 Montavista Software\n"); /* make sure error reporting method is sane */ switch (edac_op_state) { @@ -1192,58 +688,24 @@ static int __init mpc85xx_mc_init(void) break; } - res = platform_driver_register(&mpc85xx_mc_err_driver); - if (res) - printk(KERN_WARNING EDAC_MOD_STR "MC fails to register\n"); - - res = platform_driver_register(&mpc85xx_l2_err_driver); + res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); if (res) - printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); - -#ifdef CONFIG_FSL_SOC_BOOKE - pvr = mfspr(SPRN_PVR); - - if ((PVR_VER(pvr) == PVR_VER_E500V1) || - (PVR_VER(pvr) == PVR_VER_E500V2)) { - /* - * need to clear HID1[RFXE] to disable machine check int - * so we can catch it - */ - if (edac_op_state == EDAC_OPSTATE_INT) - on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); - } -#endif + pr_warn(EDAC_MOD_STR "drivers fail to register\n"); return 0; } module_init(mpc85xx_mc_init); -#ifdef CONFIG_FSL_SOC_BOOKE -static void __exit mpc85xx_mc_restore_hid1(void *data) -{ - mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); -} -#endif - static void __exit mpc85xx_mc_exit(void) { -#ifdef CONFIG_FSL_SOC_BOOKE - u32 pvr = mfspr(SPRN_PVR); - - if ((PVR_VER(pvr) == PVR_VER_E500V1) || - (PVR_VER(pvr) == PVR_VER_E500V2)) { - on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); - } -#endif - platform_driver_unregister(&mpc85xx_l2_err_driver); - platform_driver_unregister(&mpc85xx_mc_err_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mpc85xx_mc_exit); +MODULE_DESCRIPTION("Freescale MPC85xx Memory Controller EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Montavista Software, Inc."); module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 932016f2cf06..66a046ae33ee 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * Freescale MPC85xx Memory Controller kenel module + * Freescale MPC85xx Memory Controller kernel module * Author: Dave Jiang <djiang@mvista.com> * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * + * 2006-2007 (c) MontaVista Software, Inc. */ #ifndef _MPC85XX_EDAC_H_ #define _MPC85XX_EDAC_H_ @@ -17,64 +14,6 @@ #define mpc85xx_printk(level, fmt, arg...) \ edac_printk(level, "MPC85xx", fmt, ##arg) -#define mpc85xx_mc_printk(mci, level, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "MPC85xx", fmt, ##arg) - -/* - * DRAM error defines - */ - -/* DDR_SDRAM_CFG */ -#define MPC85XX_MC_DDR_SDRAM_CFG 0x0110 -#define MPC85XX_MC_CS_BNDS_0 0x0000 -#define MPC85XX_MC_CS_BNDS_1 0x0008 -#define MPC85XX_MC_CS_BNDS_2 0x0010 -#define MPC85XX_MC_CS_BNDS_3 0x0018 -#define MPC85XX_MC_CS_BNDS_OFS 0x0008 - -#define MPC85XX_MC_DATA_ERR_INJECT_HI 0x0e00 -#define MPC85XX_MC_DATA_ERR_INJECT_LO 0x0e04 -#define MPC85XX_MC_ECC_ERR_INJECT 0x0e08 -#define MPC85XX_MC_CAPTURE_DATA_HI 0x0e20 -#define MPC85XX_MC_CAPTURE_DATA_LO 0x0e24 -#define MPC85XX_MC_CAPTURE_ECC 0x0e28 -#define MPC85XX_MC_ERR_DETECT 0x0e40 -#define MPC85XX_MC_ERR_DISABLE 0x0e44 -#define MPC85XX_MC_ERR_INT_EN 0x0e48 -#define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c -#define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50 -#define MPC85XX_MC_ERR_SBE 0x0e58 - -#define DSC_MEM_EN 0x80000000 -#define DSC_ECC_EN 0x20000000 -#define DSC_RD_EN 0x10000000 -#define DSC_DBW_MASK 0x00180000 -#define DSC_DBW_32 0x00080000 -#define DSC_DBW_64 0x00000000 - -#define DSC_SDTYPE_MASK 0x07000000 - -#define DSC_SDTYPE_DDR 0x02000000 -#define DSC_SDTYPE_DDR2 0x03000000 -#define DSC_SDTYPE_DDR3 0x07000000 -#define DSC_X32_EN 0x00000020 - -/* Err_Int_En */ -#define DDR_EIE_MSEE 0x1 /* memory select */ -#define DDR_EIE_SBEE 0x4 /* single-bit ECC error */ -#define DDR_EIE_MBEE 0x8 /* multi-bit ECC error */ - -/* Err_Detect */ -#define DDR_EDE_MSE 0x1 /* memory select */ -#define DDR_EDE_SBE 0x4 /* single-bit ECC error */ -#define DDR_EDE_MBE 0x8 /* multi-bit ECC error */ -#define DDR_EDE_MME 0x80000000 /* multiple memory errors */ - -/* Err_Disable */ -#define DDR_EDI_MSED 0x1 /* memory select disable */ -#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ -#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ - /* * L2 Err defines */ @@ -134,20 +73,19 @@ #define MPC85XX_PCI_ERR_DR 0x0000 #define MPC85XX_PCI_ERR_CAP_DR 0x0004 #define MPC85XX_PCI_ERR_EN 0x0008 +#define PEX_ERR_ICCAIE_EN_BIT 0x00020000 #define MPC85XX_PCI_ERR_ATTRIB 0x000c #define MPC85XX_PCI_ERR_ADDR 0x0010 +#define PEX_ERR_ICCAD_DISR_BIT 0x00020000 #define MPC85XX_PCI_ERR_EXT_ADDR 0x0014 #define MPC85XX_PCI_ERR_DL 0x0018 #define MPC85XX_PCI_ERR_DH 0x001c #define MPC85XX_PCI_GAS_TIMR 0x0020 #define MPC85XX_PCI_PCIX_TIMR 0x0024 - -struct mpc85xx_mc_pdata { - char *name; - int edac_idx; - void __iomem *mc_vbase; - int irq; -}; +#define MPC85XX_PCIE_ERR_CAP_R0 0x0028 +#define MPC85XX_PCIE_ERR_CAP_R1 0x002c +#define MPC85XX_PCIE_ERR_CAP_R2 0x0030 +#define MPC85XX_PCIE_ERR_CAP_R3 0x0034 struct mpc85xx_l2_pdata { char *name; @@ -158,6 +96,7 @@ struct mpc85xx_l2_pdata { struct mpc85xx_pci_pdata { char *name; + bool is_pcie; int edac_idx; void __iomem *pci_vbase; int irq; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c deleted file mode 100644 index 542fad70e360..000000000000 --- a/drivers/edac/mv64x60_edac.c +++ /dev/null @@ -1,905 +0,0 @@ -/* - * Marvell MV64x60 Memory Controller kernel module for PPC platforms - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/edac.h> -#include <linux/gfp.h> - -#include "edac_core.h" -#include "edac_module.h" -#include "mv64x60_edac.h" - -static const char *mv64x60_ctl_name = "MV64x60"; -static int edac_dev_idx; -static int edac_pci_idx; -static int edac_mc_idx; - -/*********************** PCI err device **********************************/ -#ifdef CONFIG_PCI -static void mv64x60_pci_check(struct edac_pci_ctl_info *pci) -{ - struct mv64x60_pci_pdata *pdata = pci->pvt_info; - u32 cause; - - cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE); - if (!cause) - return; - - printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose); - printk(KERN_ERR "Cause register: 0x%08x\n", cause); - printk(KERN_ERR "Address Low: 0x%08x\n", - in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO)); - printk(KERN_ERR "Address High: 0x%08x\n", - in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI)); - printk(KERN_ERR "Attribute: 0x%08x\n", - in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR)); - printk(KERN_ERR "Command: 0x%08x\n", - in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD)); - out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause); - - if (cause & MV64X60_PCI_PE_MASK) - edac_pci_handle_pe(pci, pci->ctl_name); - - if (!(cause & MV64X60_PCI_PE_MASK)) - edac_pci_handle_npe(pci, pci->ctl_name); -} - -static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id) -{ - struct edac_pci_ctl_info *pci = dev_id; - struct mv64x60_pci_pdata *pdata = pci->pvt_info; - u32 val; - - val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE); - if (!val) - return IRQ_NONE; - - mv64x60_pci_check(pci); - - return IRQ_HANDLED; -} - -/* - * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of - * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as - * well. IOW, don't set bit 0. - */ - -/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */ -static int __init mv64x60_pci_fixup(struct platform_device *pdev) -{ - struct resource *r; - void __iomem *pci_serr; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "PCI err regs\n", __func__); - return -ENOENT; - } - - pci_serr = ioremap(r->start, resource_size(r)); - if (!pci_serr) - return -ENOMEM; - - out_le32(pci_serr, in_le32(pci_serr) & ~0x1); - iounmap(pci_serr); - - return 0; -} - -static int mv64x60_pci_err_probe(struct platform_device *pdev) -{ - struct edac_pci_ctl_info *pci; - struct mv64x60_pci_pdata *pdata; - struct resource *r; - int res = 0; - - if (!devres_open_group(&pdev->dev, mv64x60_pci_err_probe, GFP_KERNEL)) - return -ENOMEM; - - pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mv64x60_pci_err"); - if (!pci) - return -ENOMEM; - - pdata = pci->pvt_info; - - pdata->pci_hose = pdev->id; - pdata->name = "mpc85xx_pci_err"; - pdata->irq = NO_IRQ; - platform_set_drvdata(pdev, pci); - pci->dev = &pdev->dev; - pci->dev_name = dev_name(&pdev->dev); - pci->mod_name = EDAC_MOD_STR; - pci->ctl_name = pdata->name; - - if (edac_op_state == EDAC_OPSTATE_POLL) - pci->edac_check = mv64x60_pci_check; - - pdata->edac_idx = edac_pci_idx++; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "PCI err regs\n", __func__); - res = -ENOENT; - goto err; - } - - if (!devm_request_mem_region(&pdev->dev, - r->start, - resource_size(r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->pci_vbase = devm_ioremap(&pdev->dev, - r->start, - resource_size(r)); - if (!pdata->pci_vbase) { - printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - res = mv64x60_pci_fixup(pdev); - if (res < 0) { - printk(KERN_ERR "%s: PCI fixup failed\n", __func__); - goto err; - } - - out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0); - out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0); - out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, - MV64X60_PCIx_ERR_MASK_VAL); - - if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { - edac_dbg(3, "failed edac_pci_add_device()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, - pdata->irq, - mv64x60_pci_isr, - IRQF_DISABLED, - "[EDAC] PCI err", - pci); - if (res < 0) { - printk(KERN_ERR "%s: Unable to request irq %d for " - "MV64x60 PCI ERR\n", __func__, pdata->irq); - res = -ENODEV; - goto err2; - } - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for PCI Err\n", - pdata->irq); - } - - devres_remove_group(&pdev->dev, mv64x60_pci_err_probe); - - /* get this far and it's successful */ - edac_dbg(3, "success\n"); - - return 0; - -err2: - edac_pci_del_device(&pdev->dev); -err: - edac_pci_free_ctl_info(pci); - devres_release_group(&pdev->dev, mv64x60_pci_err_probe); - return res; -} - -static int mv64x60_pci_err_remove(struct platform_device *pdev) -{ - struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); - - edac_dbg(0, "\n"); - - edac_pci_del_device(&pdev->dev); - - edac_pci_free_ctl_info(pci); - - return 0; -} - -static struct platform_driver mv64x60_pci_err_driver = { - .probe = mv64x60_pci_err_probe, - .remove = mv64x60_pci_err_remove, - .driver = { - .name = "mv64x60_pci_err", - } -}; - -#endif /* CONFIG_PCI */ - -/*********************** SRAM err device **********************************/ -static void mv64x60_sram_check(struct edac_device_ctl_info *edac_dev) -{ - struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info; - u32 cause; - - cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE); - if (!cause) - return; - - printk(KERN_ERR "Error in internal SRAM\n"); - printk(KERN_ERR "Cause register: 0x%08x\n", cause); - printk(KERN_ERR "Address Low: 0x%08x\n", - in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO)); - printk(KERN_ERR "Address High: 0x%08x\n", - in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI)); - printk(KERN_ERR "Data Low: 0x%08x\n", - in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO)); - printk(KERN_ERR "Data High: 0x%08x\n", - in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI)); - printk(KERN_ERR "Parity: 0x%08x\n", - in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY)); - out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0); - - edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); -} - -static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id) -{ - struct edac_device_ctl_info *edac_dev = dev_id; - struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info; - u32 cause; - - cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE); - if (!cause) - return IRQ_NONE; - - mv64x60_sram_check(edac_dev); - - return IRQ_HANDLED; -} - -static int mv64x60_sram_err_probe(struct platform_device *pdev) -{ - struct edac_device_ctl_info *edac_dev; - struct mv64x60_sram_pdata *pdata; - struct resource *r; - int res = 0; - - if (!devres_open_group(&pdev->dev, mv64x60_sram_err_probe, GFP_KERNEL)) - return -ENOMEM; - - edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata), - "sram", 1, NULL, 0, 0, NULL, 0, - edac_dev_idx); - if (!edac_dev) { - devres_release_group(&pdev->dev, mv64x60_sram_err_probe); - return -ENOMEM; - } - - pdata = edac_dev->pvt_info; - pdata->name = "mv64x60_sram_err"; - pdata->irq = NO_IRQ; - edac_dev->dev = &pdev->dev; - platform_set_drvdata(pdev, edac_dev); - edac_dev->dev_name = dev_name(&pdev->dev); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "SRAM err regs\n", __func__); - res = -ENOENT; - goto err; - } - - if (!devm_request_mem_region(&pdev->dev, - r->start, - resource_size(r), - pdata->name)) { - printk(KERN_ERR "%s: Error while request mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->sram_vbase = devm_ioremap(&pdev->dev, - r->start, - resource_size(r)); - if (!pdata->sram_vbase) { - printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", - __func__); - res = -ENOMEM; - goto err; - } - - /* setup SRAM err registers */ - out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0); - - edac_dev->mod_name = EDAC_MOD_STR; - edac_dev->ctl_name = pdata->name; - - if (edac_op_state == EDAC_OPSTATE_POLL) - edac_dev->edac_check = mv64x60_sram_check; - - pdata->edac_idx = edac_dev_idx++; - - if (edac_device_add_device(edac_dev) > 0) { - edac_dbg(3, "failed edac_device_add_device()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, - pdata->irq, - mv64x60_sram_isr, - IRQF_DISABLED, - "[EDAC] SRAM err", - edac_dev); - if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for " - "MV64x60 SRAM ERR\n", __func__, pdata->irq); - res = -ENODEV; - goto err2; - } - - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for SRAM Err\n", - pdata->irq); - } - - devres_remove_group(&pdev->dev, mv64x60_sram_err_probe); - - /* get this far and it's successful */ - edac_dbg(3, "success\n"); - - return 0; - -err2: - edac_device_del_device(&pdev->dev); -err: - devres_release_group(&pdev->dev, mv64x60_sram_err_probe); - edac_device_free_ctl_info(edac_dev); - return res; -} - -static int mv64x60_sram_err_remove(struct platform_device *pdev) -{ - struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev); - - edac_dbg(0, "\n"); - - edac_device_del_device(&pdev->dev); - edac_device_free_ctl_info(edac_dev); - - return 0; -} - -static struct platform_driver mv64x60_sram_err_driver = { - .probe = mv64x60_sram_err_probe, - .remove = mv64x60_sram_err_remove, - .driver = { - .name = "mv64x60_sram_err", - } -}; - -/*********************** CPU err device **********************************/ -static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev) -{ - struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info; - u32 cause; - - cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) & - MV64x60_CPU_CAUSE_MASK; - if (!cause) - return; - - printk(KERN_ERR "Error on CPU interface\n"); - printk(KERN_ERR "Cause register: 0x%08x\n", cause); - printk(KERN_ERR "Address Low: 0x%08x\n", - in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO)); - printk(KERN_ERR "Address High: 0x%08x\n", - in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI)); - printk(KERN_ERR "Data Low: 0x%08x\n", - in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO)); - printk(KERN_ERR "Data High: 0x%08x\n", - in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI)); - printk(KERN_ERR "Parity: 0x%08x\n", - in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY)); - out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0); - - edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); -} - -static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id) -{ - struct edac_device_ctl_info *edac_dev = dev_id; - struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info; - u32 cause; - - cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) & - MV64x60_CPU_CAUSE_MASK; - if (!cause) - return IRQ_NONE; - - mv64x60_cpu_check(edac_dev); - - return IRQ_HANDLED; -} - -static int mv64x60_cpu_err_probe(struct platform_device *pdev) -{ - struct edac_device_ctl_info *edac_dev; - struct resource *r; - struct mv64x60_cpu_pdata *pdata; - int res = 0; - - if (!devres_open_group(&pdev->dev, mv64x60_cpu_err_probe, GFP_KERNEL)) - return -ENOMEM; - - edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata), - "cpu", 1, NULL, 0, 0, NULL, 0, - edac_dev_idx); - if (!edac_dev) { - devres_release_group(&pdev->dev, mv64x60_cpu_err_probe); - return -ENOMEM; - } - - pdata = edac_dev->pvt_info; - pdata->name = "mv64x60_cpu_err"; - pdata->irq = NO_IRQ; - edac_dev->dev = &pdev->dev; - platform_set_drvdata(pdev, edac_dev); - edac_dev->dev_name = dev_name(&pdev->dev); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "CPU err regs\n", __func__); - res = -ENOENT; - goto err; - } - - if (!devm_request_mem_region(&pdev->dev, - r->start, - resource_size(r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, - r->start, - resource_size(r)); - if (!pdata->cpu_vbase[0]) { - printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - r = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "CPU err regs\n", __func__); - res = -ENOENT; - goto err; - } - - if (!devm_request_mem_region(&pdev->dev, - r->start, - resource_size(r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, - r->start, - resource_size(r)); - if (!pdata->cpu_vbase[1]) { - printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - /* setup CPU err registers */ - out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0); - out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0); - out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff); - - edac_dev->mod_name = EDAC_MOD_STR; - edac_dev->ctl_name = pdata->name; - if (edac_op_state == EDAC_OPSTATE_POLL) - edac_dev->edac_check = mv64x60_cpu_check; - - pdata->edac_idx = edac_dev_idx++; - - if (edac_device_add_device(edac_dev) > 0) { - edac_dbg(3, "failed edac_device_add_device()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, - pdata->irq, - mv64x60_cpu_isr, - IRQF_DISABLED, - "[EDAC] CPU err", - edac_dev); - if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for MV64x60 " - "CPU ERR\n", __func__, pdata->irq); - res = -ENODEV; - goto err2; - } - - printk(KERN_INFO EDAC_MOD_STR - " acquired irq %d for CPU Err\n", pdata->irq); - } - - devres_remove_group(&pdev->dev, mv64x60_cpu_err_probe); - - /* get this far and it's successful */ - edac_dbg(3, "success\n"); - - return 0; - -err2: - edac_device_del_device(&pdev->dev); -err: - devres_release_group(&pdev->dev, mv64x60_cpu_err_probe); - edac_device_free_ctl_info(edac_dev); - return res; -} - -static int mv64x60_cpu_err_remove(struct platform_device *pdev) -{ - struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev); - - edac_dbg(0, "\n"); - - edac_device_del_device(&pdev->dev); - edac_device_free_ctl_info(edac_dev); - return 0; -} - -static struct platform_driver mv64x60_cpu_err_driver = { - .probe = mv64x60_cpu_err_probe, - .remove = mv64x60_cpu_err_remove, - .driver = { - .name = "mv64x60_cpu_err", - } -}; - -/*********************** DRAM err device **********************************/ - -static void mv64x60_mc_check(struct mem_ctl_info *mci) -{ - struct mv64x60_mc_pdata *pdata = mci->pvt_info; - u32 reg; - u32 err_addr; - u32 sdram_ecc; - u32 comp_ecc; - u32 syndrome; - - reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR); - if (!reg) - return; - - err_addr = reg & ~0x3; - sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD); - comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC); - syndrome = sdram_ecc ^ comp_ecc; - - /* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */ - if (!(reg & 0x1)) - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - err_addr >> PAGE_SHIFT, - err_addr & PAGE_MASK, syndrome, - 0, 0, -1, - mci->ctl_name, ""); - else /* 2 bit error, UE */ - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - err_addr >> PAGE_SHIFT, - err_addr & PAGE_MASK, 0, - 0, 0, -1, - mci->ctl_name, ""); - - /* clear the error */ - out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0); -} - -static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id) -{ - struct mem_ctl_info *mci = dev_id; - struct mv64x60_mc_pdata *pdata = mci->pvt_info; - u32 reg; - - reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR); - if (!reg) - return IRQ_NONE; - - /* writing 0's to the ECC err addr in check function clears irq */ - mv64x60_mc_check(mci); - - return IRQ_HANDLED; -} - -static void get_total_mem(struct mv64x60_mc_pdata *pdata) -{ - struct device_node *np = NULL; - const unsigned int *reg; - - np = of_find_node_by_type(NULL, "memory"); - if (!np) - return; - - reg = of_get_property(np, "reg", NULL); - - pdata->total_mem = reg[1]; -} - -static void mv64x60_init_csrows(struct mem_ctl_info *mci, - struct mv64x60_mc_pdata *pdata) -{ - struct csrow_info *csrow; - struct dimm_info *dimm; - - u32 devtype; - u32 ctl; - - get_total_mem(pdata); - - ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG); - - csrow = mci->csrows[0]; - dimm = csrow->channels[0]->dimm; - - dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT; - dimm->grain = 8; - - dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR; - - devtype = (ctl >> 20) & 0x3; - switch (devtype) { - case 0x0: - dimm->dtype = DEV_X32; - break; - case 0x2: /* could be X8 too, but no way to tell */ - dimm->dtype = DEV_X16; - break; - case 0x3: - dimm->dtype = DEV_X4; - break; - default: - dimm->dtype = DEV_UNKNOWN; - break; - } - - dimm->edac_mode = EDAC_SECDED; -} - -static int mv64x60_mc_err_probe(struct platform_device *pdev) -{ - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct mv64x60_mc_pdata *pdata; - struct resource *r; - u32 ctl; - int res = 0; - - if (!devres_open_group(&pdev->dev, mv64x60_mc_err_probe, GFP_KERNEL)) - return -ENOMEM; - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, - sizeof(struct mv64x60_mc_pdata)); - if (!mci) { - printk(KERN_ERR "%s: No memory for CPU err\n", __func__); - devres_release_group(&pdev->dev, mv64x60_mc_err_probe); - return -ENOMEM; - } - - pdata = mci->pvt_info; - mci->pdev = &pdev->dev; - platform_set_drvdata(pdev, mci); - pdata->name = "mv64x60_mc_err"; - pdata->irq = NO_IRQ; - mci->dev_name = dev_name(&pdev->dev); - pdata->edac_idx = edac_mc_idx++; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - printk(KERN_ERR "%s: Unable to get resource for " - "MC err regs\n", __func__); - res = -ENOENT; - goto err; - } - - if (!devm_request_mem_region(&pdev->dev, - r->start, - resource_size(r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->mc_vbase = devm_ioremap(&pdev->dev, - r->start, - resource_size(r)); - if (!pdata->mc_vbase) { - printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG); - if (!(ctl & MV64X60_SDRAM_ECC)) { - /* Non-ECC RAM? */ - printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); - res = -ENODEV; - goto err2; - } - - edac_dbg(3, "init mci\n"); - mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = MV64x60_REVISION; - mci->ctl_name = mv64x60_ctl_name; - - if (edac_op_state == EDAC_OPSTATE_POLL) - mci->edac_check = mv64x60_mc_check; - - mci->ctl_page_to_phys = NULL; - - mci->scrub_mode = SCRUB_SW_SRC; - - mv64x60_init_csrows(mci, pdata); - - /* setup MC registers */ - out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0); - ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL); - ctl = (ctl & 0xff00ffff) | 0x10000; - out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl); - - if (edac_mc_add_mc(mci)) { - edac_dbg(3, "failed edac_mc_add_mc()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - /* acquire interrupt that reports errors */ - pdata->irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, - pdata->irq, - mv64x60_mc_isr, - IRQF_DISABLED, - "[EDAC] MC err", - mci); - if (res < 0) { - printk(KERN_ERR "%s: Unable to request irq %d for " - "MV64x60 DRAM ERR\n", __func__, pdata->irq); - res = -ENODEV; - goto err2; - } - - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for MC Err\n", - pdata->irq); - } - - /* get this far and it's successful */ - edac_dbg(3, "success\n"); - - return 0; - -err2: - edac_mc_del_mc(&pdev->dev); -err: - devres_release_group(&pdev->dev, mv64x60_mc_err_probe); - edac_mc_free(mci); - return res; -} - -static int mv64x60_mc_err_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = platform_get_drvdata(pdev); - - edac_dbg(0, "\n"); - - edac_mc_del_mc(&pdev->dev); - edac_mc_free(mci); - return 0; -} - -static struct platform_driver mv64x60_mc_err_driver = { - .probe = mv64x60_mc_err_probe, - .remove = mv64x60_mc_err_remove, - .driver = { - .name = "mv64x60_mc_err", - } -}; - -static int __init mv64x60_edac_init(void) -{ - int ret = 0; - - printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n"); - printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n"); - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_INT: - break; - default: - edac_op_state = EDAC_OPSTATE_INT; - break; - } - - ret = platform_driver_register(&mv64x60_mc_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR "MC err failed to register\n"); - - ret = platform_driver_register(&mv64x60_cpu_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "CPU err failed to register\n"); - - ret = platform_driver_register(&mv64x60_sram_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "SRAM err failed to register\n"); - -#ifdef CONFIG_PCI - ret = platform_driver_register(&mv64x60_pci_err_driver); - if (ret) - printk(KERN_WARNING EDAC_MOD_STR - "PCI err failed to register\n"); -#endif - - return ret; -} -module_init(mv64x60_edac_init); - -static void __exit mv64x60_edac_exit(void) -{ -#ifdef CONFIG_PCI - platform_driver_unregister(&mv64x60_pci_err_driver); -#endif - platform_driver_unregister(&mv64x60_sram_err_driver); - platform_driver_unregister(&mv64x60_cpu_err_driver); - platform_driver_unregister(&mv64x60_mc_err_driver); -} -module_exit(mv64x60_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Montavista Software, Inc."); -module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/mv64x60_edac.h b/drivers/edac/mv64x60_edac.h deleted file mode 100644 index c7f209c92a1a..000000000000 --- a/drivers/edac/mv64x60_edac.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * EDAC defs for Marvell MV64x60 bridge chip - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * - */ -#ifndef _MV64X60_EDAC_H_ -#define _MV64X60_EDAC_H_ - -#define MV64x60_REVISION " Ver: 2.0.0" -#define EDAC_MOD_STR "MV64x60_edac" - -#define mv64x60_printk(level, fmt, arg...) \ - edac_printk(level, "MV64x60", fmt, ##arg) - -#define mv64x60_mc_printk(mci, level, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "MV64x60", fmt, ##arg) - -/* CPU Error Report Registers */ -#define MV64x60_CPU_ERR_ADDR_LO 0x00 /* 0x0070 */ -#define MV64x60_CPU_ERR_ADDR_HI 0x08 /* 0x0078 */ -#define MV64x60_CPU_ERR_DATA_LO 0x00 /* 0x0128 */ -#define MV64x60_CPU_ERR_DATA_HI 0x08 /* 0x0130 */ -#define MV64x60_CPU_ERR_PARITY 0x10 /* 0x0138 */ -#define MV64x60_CPU_ERR_CAUSE 0x18 /* 0x0140 */ -#define MV64x60_CPU_ERR_MASK 0x20 /* 0x0148 */ - -#define MV64x60_CPU_CAUSE_MASK 0x07ffffff - -/* SRAM Error Report Registers */ -#define MV64X60_SRAM_ERR_CAUSE 0x08 /* 0x0388 */ -#define MV64X60_SRAM_ERR_ADDR_LO 0x10 /* 0x0390 */ -#define MV64X60_SRAM_ERR_ADDR_HI 0x78 /* 0x03f8 */ -#define MV64X60_SRAM_ERR_DATA_LO 0x18 /* 0x0398 */ -#define MV64X60_SRAM_ERR_DATA_HI 0x20 /* 0x03a0 */ -#define MV64X60_SRAM_ERR_PARITY 0x28 /* 0x03a8 */ - -/* SDRAM Controller Registers */ -#define MV64X60_SDRAM_CONFIG 0x00 /* 0x1400 */ -#define MV64X60_SDRAM_ERR_DATA_HI 0x40 /* 0x1440 */ -#define MV64X60_SDRAM_ERR_DATA_LO 0x44 /* 0x1444 */ -#define MV64X60_SDRAM_ERR_ECC_RCVD 0x48 /* 0x1448 */ -#define MV64X60_SDRAM_ERR_ECC_CALC 0x4c /* 0x144c */ -#define MV64X60_SDRAM_ERR_ADDR 0x50 /* 0x1450 */ -#define MV64X60_SDRAM_ERR_ECC_CNTL 0x54 /* 0x1454 */ -#define MV64X60_SDRAM_ERR_ECC_ERR_CNT 0x58 /* 0x1458 */ - -#define MV64X60_SDRAM_REGISTERED 0x20000 -#define MV64X60_SDRAM_ECC 0x40000 - -#ifdef CONFIG_PCI -/* - * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of - * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as - * well. IOW, don't set bit 0. - */ -#define MV64X60_PCIx_ERR_MASK_VAL 0x00a50c24 - -/* Register offsets from PCIx error address low register */ -#define MV64X60_PCI_ERROR_ADDR_LO 0x00 -#define MV64X60_PCI_ERROR_ADDR_HI 0x04 -#define MV64X60_PCI_ERROR_ATTR 0x08 -#define MV64X60_PCI_ERROR_CMD 0x10 -#define MV64X60_PCI_ERROR_CAUSE 0x18 -#define MV64X60_PCI_ERROR_MASK 0x1c - -#define MV64X60_PCI_ERR_SWrPerr 0x0002 -#define MV64X60_PCI_ERR_SRdPerr 0x0004 -#define MV64X60_PCI_ERR_MWrPerr 0x0020 -#define MV64X60_PCI_ERR_MRdPerr 0x0040 - -#define MV64X60_PCI_PE_MASK (MV64X60_PCI_ERR_SWrPerr | \ - MV64X60_PCI_ERR_SRdPerr | \ - MV64X60_PCI_ERR_MWrPerr | \ - MV64X60_PCI_ERR_MRdPerr) - -struct mv64x60_pci_pdata { - int pci_hose; - void __iomem *pci_vbase; - char *name; - int irq; - int edac_idx; -}; - -#endif /* CONFIG_PCI */ - -struct mv64x60_mc_pdata { - void __iomem *mc_vbase; - int total_mem; - char *name; - int irq; - int edac_idx; -}; - -struct mv64x60_cpu_pdata { - void __iomem *cpu_vbase[2]; - char *name; - int irq; - int edac_idx; -}; - -struct mv64x60_sram_pdata { - void __iomem *sram_vbase; - char *name; - int irq; - int edac_idx; -}; - -#endif diff --git a/drivers/edac/npcm_edac.c b/drivers/edac/npcm_edac.c new file mode 100644 index 000000000000..e60a99eb8cfb --- /dev/null +++ b/drivers/edac/npcm_edac.c @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2022 Nuvoton Technology Corporation + +#include <linux/debugfs.h> +#include <linux/iopoll.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include "edac_module.h" + +#define EDAC_MOD_NAME "npcm-edac" +#define EDAC_MSG_SIZE 256 + +/* chip serials */ +#define NPCM7XX_CHIP BIT(0) +#define NPCM8XX_CHIP BIT(1) + +/* syndrome values */ +#define UE_SYNDROME 0x03 + +/* error injection */ +#define ERROR_TYPE_CORRECTABLE 0 +#define ERROR_TYPE_UNCORRECTABLE 1 +#define ERROR_LOCATION_DATA 0 +#define ERROR_LOCATION_CHECKCODE 1 +#define ERROR_BIT_DATA_MAX 63 +#define ERROR_BIT_CHECKCODE_MAX 7 + +static char data_synd[] = { + 0xf4, 0xf1, 0xec, 0xea, 0xe9, 0xe6, 0xe5, 0xe3, + 0xdc, 0xda, 0xd9, 0xd6, 0xd5, 0xd3, 0xce, 0xcb, + 0xb5, 0xb0, 0xad, 0xab, 0xa8, 0xa7, 0xa4, 0xa2, + 0x9d, 0x9b, 0x98, 0x97, 0x94, 0x92, 0x8f, 0x8a, + 0x75, 0x70, 0x6d, 0x6b, 0x68, 0x67, 0x64, 0x62, + 0x5e, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x4f, 0x4a, + 0x34, 0x31, 0x2c, 0x2a, 0x29, 0x26, 0x25, 0x23, + 0x1c, 0x1a, 0x19, 0x16, 0x15, 0x13, 0x0e, 0x0b +}; + +static struct regmap *npcm_regmap; + +struct npcm_platform_data { + /* chip serials */ + int chip; + + /* memory controller registers */ + u32 ctl_ecc_en; + u32 ctl_int_status; + u32 ctl_int_ack; + u32 ctl_int_mask_master; + u32 ctl_int_mask_ecc; + u32 ctl_ce_addr_l; + u32 ctl_ce_addr_h; + u32 ctl_ce_data_l; + u32 ctl_ce_data_h; + u32 ctl_ce_synd; + u32 ctl_ue_addr_l; + u32 ctl_ue_addr_h; + u32 ctl_ue_data_l; + u32 ctl_ue_data_h; + u32 ctl_ue_synd; + u32 ctl_source_id; + u32 ctl_controller_busy; + u32 ctl_xor_check_bits; + + /* masks and shifts */ + u32 ecc_en_mask; + u32 int_status_ce_mask; + u32 int_status_ue_mask; + u32 int_ack_ce_mask; + u32 int_ack_ue_mask; + u32 int_mask_master_non_ecc_mask; + u32 int_mask_master_global_mask; + u32 int_mask_ecc_non_event_mask; + u32 ce_addr_h_mask; + u32 ce_synd_mask; + u32 ce_synd_shift; + u32 ue_addr_h_mask; + u32 ue_synd_mask; + u32 ue_synd_shift; + u32 source_id_ce_mask; + u32 source_id_ce_shift; + u32 source_id_ue_mask; + u32 source_id_ue_shift; + u32 controller_busy_mask; + u32 xor_check_bits_mask; + u32 xor_check_bits_shift; + u32 writeback_en_mask; + u32 fwc_mask; +}; + +struct priv_data { + void __iomem *reg; + char message[EDAC_MSG_SIZE]; + const struct npcm_platform_data *pdata; + + /* error injection */ + struct dentry *debugfs; + u8 error_type; + u8 location; + u8 bit; +}; + +static void handle_ce(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val_h = 0, val_l, id, synd; + u64 addr = 0, data = 0; + + pdata = priv->pdata; + regmap_read(npcm_regmap, pdata->ctl_ce_addr_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) { + regmap_read(npcm_regmap, pdata->ctl_ce_addr_h, &val_h); + val_h &= pdata->ce_addr_h_mask; + } + addr = ((addr | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_ce_data_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) + regmap_read(npcm_regmap, pdata->ctl_ce_data_h, &val_h); + data = ((data | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_source_id, &id); + id = (id & pdata->source_id_ce_mask) >> pdata->source_id_ce_shift; + + regmap_read(npcm_regmap, pdata->ctl_ce_synd, &synd); + synd = (synd & pdata->ce_synd_mask) >> pdata->ce_synd_shift; + + snprintf(priv->message, EDAC_MSG_SIZE, + "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, addr >> PAGE_SHIFT, + addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, ""); +} + +static void handle_ue(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val_h = 0, val_l, id, synd; + u64 addr = 0, data = 0; + + pdata = priv->pdata; + regmap_read(npcm_regmap, pdata->ctl_ue_addr_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) { + regmap_read(npcm_regmap, pdata->ctl_ue_addr_h, &val_h); + val_h &= pdata->ue_addr_h_mask; + } + addr = ((addr | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_ue_data_l, &val_l); + if (pdata->chip == NPCM8XX_CHIP) + regmap_read(npcm_regmap, pdata->ctl_ue_data_h, &val_h); + data = ((data | val_h) << 32) | val_l; + + regmap_read(npcm_regmap, pdata->ctl_source_id, &id); + id = (id & pdata->source_id_ue_mask) >> pdata->source_id_ue_shift; + + regmap_read(npcm_regmap, pdata->ctl_ue_synd, &synd); + synd = (synd & pdata->ue_synd_mask) >> pdata->ue_synd_shift; + + snprintf(priv->message, EDAC_MSG_SIZE, + "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, addr >> PAGE_SHIFT, + addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, ""); +} + +static irqreturn_t edac_ecc_isr(int irq, void *dev_id) +{ + const struct npcm_platform_data *pdata; + struct mem_ctl_info *mci = dev_id; + u32 status; + + pdata = ((struct priv_data *)mci->pvt_info)->pdata; + regmap_read(npcm_regmap, pdata->ctl_int_status, &status); + if (status & pdata->int_status_ce_mask) { + handle_ce(mci); + + /* acknowledge the CE interrupt */ + regmap_write(npcm_regmap, pdata->ctl_int_ack, + pdata->int_ack_ce_mask); + return IRQ_HANDLED; + } else if (status & pdata->int_status_ue_mask) { + handle_ue(mci); + + /* acknowledge the UE interrupt */ + regmap_write(npcm_regmap, pdata->ctl_int_ack, + pdata->int_ack_ue_mask); + return IRQ_HANDLED; + } + + WARN_ON_ONCE(1); + return IRQ_NONE; +} + +static ssize_t force_ecc_error(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + u32 val, syndrome; + int ret; + + pdata = priv->pdata; + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "force an ECC error, type = %d, location = %d, bit = %d\n", + priv->error_type, priv->location, priv->bit); + + /* ensure no pending writes */ + ret = regmap_read_poll_timeout(npcm_regmap, pdata->ctl_controller_busy, + val, !(val & pdata->controller_busy_mask), + 1000, 10000); + if (ret) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "wait pending writes timeout\n"); + return count; + } + + regmap_read(npcm_regmap, pdata->ctl_xor_check_bits, &val); + val &= ~pdata->xor_check_bits_mask; + + /* write syndrome to XOR_CHECK_BITS */ + if (priv->error_type == ERROR_TYPE_CORRECTABLE) { + if (priv->location == ERROR_LOCATION_DATA && + priv->bit > ERROR_BIT_DATA_MAX) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "data bit should not exceed %d (%d)\n", + ERROR_BIT_DATA_MAX, priv->bit); + return count; + } + + if (priv->location == ERROR_LOCATION_CHECKCODE && + priv->bit > ERROR_BIT_CHECKCODE_MAX) { + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "checkcode bit should not exceed %d (%d)\n", + ERROR_BIT_CHECKCODE_MAX, priv->bit); + return count; + } + + syndrome = priv->location ? 1 << priv->bit + : data_synd[priv->bit]; + + regmap_write(npcm_regmap, pdata->ctl_xor_check_bits, + val | (syndrome << pdata->xor_check_bits_shift) | + pdata->writeback_en_mask); + } else if (priv->error_type == ERROR_TYPE_UNCORRECTABLE) { + regmap_write(npcm_regmap, pdata->ctl_xor_check_bits, + val | (UE_SYNDROME << pdata->xor_check_bits_shift)); + } + + /* force write check */ + regmap_update_bits(npcm_regmap, pdata->ctl_xor_check_bits, + pdata->fwc_mask, pdata->fwc_mask); + + return count; +} + +static const struct file_operations force_ecc_error_fops = { + .open = simple_open, + .write = force_ecc_error, + .llseek = generic_file_llseek, +}; + +/* + * Setup debugfs for error injection. + * + * Nodes: + * error_type - 0: CE, 1: UE + * location - 0: data, 1: checkcode + * bit - 0 ~ 63 for data and 0 ~ 7 for checkcode + * force_ecc_error - trigger + * + * Examples: + * 1. Inject a correctable error (CE) at checkcode bit 7. + * ~# echo 0 > /sys/kernel/debug/edac/npcm-edac/error_type + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/location + * ~# echo 7 > /sys/kernel/debug/edac/npcm-edac/bit + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error + * + * 2. Inject an uncorrectable error (UE). + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/error_type + * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error + */ +static void setup_debugfs(struct mem_ctl_info *mci) +{ + struct priv_data *priv = mci->pvt_info; + + priv->debugfs = edac_debugfs_create_dir(mci->mod_name); + if (!priv->debugfs) + return; + + edac_debugfs_create_x8("error_type", 0644, priv->debugfs, &priv->error_type); + edac_debugfs_create_x8("location", 0644, priv->debugfs, &priv->location); + edac_debugfs_create_x8("bit", 0644, priv->debugfs, &priv->bit); + edac_debugfs_create_file("force_ecc_error", 0200, priv->debugfs, + &mci->dev, &force_ecc_error_fops); +} + +static int setup_irq(struct mem_ctl_info *mci, struct platform_device *pdev) +{ + const struct npcm_platform_data *pdata; + int ret, irq; + + pdata = ((struct priv_data *)mci->pvt_info)->pdata; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "IRQ not defined in DTS\n"); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, edac_ecc_isr, 0, + dev_name(&pdev->dev), mci); + if (ret < 0) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "failed to request IRQ\n"); + return ret; + } + + /* enable the functional group of ECC and mask the others */ + regmap_write(npcm_regmap, pdata->ctl_int_mask_master, + pdata->int_mask_master_non_ecc_mask); + + if (pdata->chip == NPCM8XX_CHIP) + regmap_write(npcm_regmap, pdata->ctl_int_mask_ecc, + pdata->int_mask_ecc_non_event_mask); + + return 0; +} + +static const struct regmap_config npcm_regmap_cfg = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +}; + +static int edac_probe(struct platform_device *pdev) +{ + const struct npcm_platform_data *pdata; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct priv_data *priv; + void __iomem *reg; + u32 val; + int rc; + + reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + npcm_regmap = devm_regmap_init_mmio(dev, reg, &npcm_regmap_cfg); + if (IS_ERR(npcm_regmap)) + return PTR_ERR(npcm_regmap); + + pdata = of_device_get_match_data(dev); + if (!pdata) + return -EINVAL; + + /* bail out if ECC is not enabled */ + regmap_read(npcm_regmap, pdata->ctl_ecc_en, &val); + if (!(val & pdata->ecc_en_mask)) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, "ECC is not enabled\n"); + return -EPERM; + } + + edac_op_state = EDAC_OPSTATE_INT; + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = 1; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct priv_data)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + priv = mci->pvt_info; + priv->reg = reg; + priv->pdata = pdata; + platform_set_drvdata(pdev, mci); + + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "npcm_ddr_controller"; + mci->dev_name = dev_name(&pdev->dev); + mci->mod_name = EDAC_MOD_NAME; + mci->ctl_page_to_phys = NULL; + + rc = setup_irq(mci, pdev); + if (rc) + goto free_edac_mc; + + rc = edac_mc_add_mc(mci); + if (rc) + goto free_edac_mc; + + if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP) + setup_debugfs(mci); + + return rc; + +free_edac_mc: + edac_mc_free(mci); + return rc; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + struct priv_data *priv = mci->pvt_info; + const struct npcm_platform_data *pdata; + + pdata = priv->pdata; + if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP) + edac_debugfs_remove_recursive(priv->debugfs); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + regmap_write(npcm_regmap, pdata->ctl_int_mask_master, + pdata->int_mask_master_global_mask); + regmap_update_bits(npcm_regmap, pdata->ctl_ecc_en, pdata->ecc_en_mask, 0); +} + +static const struct npcm_platform_data npcm750_edac = { + .chip = NPCM7XX_CHIP, + + /* memory controller registers */ + .ctl_ecc_en = 0x174, + .ctl_int_status = 0x1d0, + .ctl_int_ack = 0x1d4, + .ctl_int_mask_master = 0x1d8, + .ctl_ce_addr_l = 0x188, + .ctl_ce_data_l = 0x190, + .ctl_ce_synd = 0x18c, + .ctl_ue_addr_l = 0x17c, + .ctl_ue_data_l = 0x184, + .ctl_ue_synd = 0x180, + .ctl_source_id = 0x194, + + /* masks and shifts */ + .ecc_en_mask = BIT(24), + .int_status_ce_mask = GENMASK(4, 3), + .int_status_ue_mask = GENMASK(6, 5), + .int_ack_ce_mask = GENMASK(4, 3), + .int_ack_ue_mask = GENMASK(6, 5), + .int_mask_master_non_ecc_mask = GENMASK(30, 7) | GENMASK(2, 0), + .int_mask_master_global_mask = BIT(31), + .ce_synd_mask = GENMASK(6, 0), + .ce_synd_shift = 0, + .ue_synd_mask = GENMASK(6, 0), + .ue_synd_shift = 0, + .source_id_ce_mask = GENMASK(29, 16), + .source_id_ce_shift = 16, + .source_id_ue_mask = GENMASK(13, 0), + .source_id_ue_shift = 0, +}; + +static const struct npcm_platform_data npcm845_edac = { + .chip = NPCM8XX_CHIP, + + /* memory controller registers */ + .ctl_ecc_en = 0x16c, + .ctl_int_status = 0x228, + .ctl_int_ack = 0x244, + .ctl_int_mask_master = 0x220, + .ctl_int_mask_ecc = 0x260, + .ctl_ce_addr_l = 0x18c, + .ctl_ce_addr_h = 0x190, + .ctl_ce_data_l = 0x194, + .ctl_ce_data_h = 0x198, + .ctl_ce_synd = 0x190, + .ctl_ue_addr_l = 0x17c, + .ctl_ue_addr_h = 0x180, + .ctl_ue_data_l = 0x184, + .ctl_ue_data_h = 0x188, + .ctl_ue_synd = 0x180, + .ctl_source_id = 0x19c, + .ctl_controller_busy = 0x20c, + .ctl_xor_check_bits = 0x174, + + /* masks and shifts */ + .ecc_en_mask = GENMASK(17, 16), + .int_status_ce_mask = GENMASK(1, 0), + .int_status_ue_mask = GENMASK(3, 2), + .int_ack_ce_mask = GENMASK(1, 0), + .int_ack_ue_mask = GENMASK(3, 2), + .int_mask_master_non_ecc_mask = GENMASK(30, 3) | GENMASK(1, 0), + .int_mask_master_global_mask = BIT(31), + .int_mask_ecc_non_event_mask = GENMASK(8, 4), + .ce_addr_h_mask = GENMASK(1, 0), + .ce_synd_mask = GENMASK(15, 8), + .ce_synd_shift = 8, + .ue_addr_h_mask = GENMASK(1, 0), + .ue_synd_mask = GENMASK(15, 8), + .ue_synd_shift = 8, + .source_id_ce_mask = GENMASK(29, 16), + .source_id_ce_shift = 16, + .source_id_ue_mask = GENMASK(13, 0), + .source_id_ue_shift = 0, + .controller_busy_mask = BIT(0), + .xor_check_bits_mask = GENMASK(23, 16), + .xor_check_bits_shift = 16, + .writeback_en_mask = BIT(24), + .fwc_mask = BIT(8), +}; + +static const struct of_device_id npcm_edac_of_match[] = { + { + .compatible = "nuvoton,npcm750-memory-controller", + .data = &npcm750_edac + }, + { + .compatible = "nuvoton,npcm845-memory-controller", + .data = &npcm845_edac + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, npcm_edac_of_match); + +static struct platform_driver npcm_edac_driver = { + .driver = { + .name = "npcm-edac", + .of_match_table = npcm_edac_of_match, + }, + .probe = edac_probe, + .remove = edac_remove, +}; + +module_platform_driver(npcm_edac_driver); + +MODULE_AUTHOR("Medad CChien <medadyoung@gmail.com>"); +MODULE_AUTHOR("Marvin Lin <kflin@nuvoton.com>"); +MODULE_DESCRIPTION("Nuvoton NPCM EDAC Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d3645..e6b1595a3cb5 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -16,7 +16,6 @@ #include <asm/octeon/cvmx.h> -#include "edac_core.h" #include "edac_module.h" #define EDAC_MOD_STR "octeon-l2c" @@ -139,7 +138,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) /* 'Tags' are block 0, 'Data' is block 1*/ l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0, - NULL, 0, edac_device_alloc_index()); + edac_device_alloc_index()); if (!l2c) return -ENOMEM; @@ -151,7 +150,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; @@ -185,14 +184,12 @@ err: return -ENXIO; } -static int octeon_l2c_remove(struct platform_device *pdev) +static void octeon_l2c_remove(struct platform_device *pdev) { struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(l2c); - - return 0; } static struct platform_driver octeon_l2c_driver = { @@ -204,5 +201,6 @@ static struct platform_driver octeon_l2c_driver = { }; module_platform_driver(octeon_l2c_driver); +MODULE_DESCRIPTION("Cavium Octeon Secondary Caches (L2C) EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index 93412d6b3af1..f7176b95b4fe 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -5,21 +5,36 @@ * * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle <ralf@linux-mips.org> + * + * Copyright (c) 2013 by Cisco Systems, Inc. + * All rights reserved. */ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/io.h> #include <linux/edac.h> +#include <linux/ctype.h> #include <asm/octeon/octeon.h> #include <asm/octeon/cvmx-lmcx-defs.h> -#include "edac_core.h" #include "edac_module.h" #define OCTEON_MAX_MC 4 +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +struct octeon_lmc_pvt { + unsigned long inject; + unsigned long error_type; + unsigned long dimm; + unsigned long rank; + unsigned long bank; + unsigned long row; + unsigned long col; +}; + static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) { union cvmx_lmcx_mem_cfg0 cfg0; @@ -55,14 +70,32 @@ static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) { + struct octeon_lmc_pvt *pvt = mci->pvt_info; union cvmx_lmcx_int int_reg; bool do_clear = false; char msg[64]; - int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + if (!pvt->inject) + int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + else { + int_reg.u64 = 0; + if (pvt->error_type == 1) + int_reg.s.sec_err = 1; + if (pvt->error_type == 2) + int_reg.s.ded_err = 1; + } + if (int_reg.s.sec_err || int_reg.s.ded_err) { union cvmx_lmcx_fadr fadr; - fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + if (likely(!pvt->inject)) + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + else { + fadr.cn61xx.fdimm = pvt->dimm; + fadr.cn61xx.fbunk = pvt->rank; + fadr.cn61xx.fbank = pvt->bank; + fadr.cn61xx.frow = pvt->row; + fadr.cn61xx.fcol = pvt->col; + } snprintf(msg, sizeof(msg), "DIMM %d rank %d bank %d row %d col %d", fadr.cn61xx.fdimm, fadr.cn61xx.fbunk, @@ -82,21 +115,126 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) int_reg.s.ded_err = -1; /* Done, re-arm */ do_clear = true; } - if (do_clear) - cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); + + if (do_clear) { + if (likely(!pvt->inject)) + cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); + else + pvt->inject = 0; + } +} + +/************************ MC SYSFS parts ***********************************/ + +/* Only a couple naming differences per template, so very similar */ +#define TEMPLATE_SHOW(reg) \ +static ssize_t octeon_mc_inject_##reg##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *data) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + return sprintf(data, "%016llu\n", (u64)pvt->reg); \ +} + +#define TEMPLATE_STORE(reg) \ +static ssize_t octeon_mc_inject_##reg##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *data, size_t count) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + if (isdigit(*data)) { \ + if (!kstrtoul(data, 0, &pvt->reg)) \ + return count; \ + } \ + return 0; \ } +TEMPLATE_SHOW(inject); +TEMPLATE_STORE(inject); +TEMPLATE_SHOW(dimm); +TEMPLATE_STORE(dimm); +TEMPLATE_SHOW(bank); +TEMPLATE_STORE(bank); +TEMPLATE_SHOW(rank); +TEMPLATE_STORE(rank); +TEMPLATE_SHOW(row); +TEMPLATE_STORE(row); +TEMPLATE_SHOW(col); +TEMPLATE_STORE(col); + +static ssize_t octeon_mc_inject_error_type_store(struct device *dev, + struct device_attribute *attr, + const char *data, + size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + + if (!strncmp(data, "single", 6)) + pvt->error_type = 1; + else if (!strncmp(data, "double", 6)) + pvt->error_type = 2; + + return count; +} + +static ssize_t octeon_mc_inject_error_type_show(struct device *dev, + struct device_attribute *attr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + if (pvt->error_type == 1) + return sprintf(data, "single"); + else if (pvt->error_type == 2) + return sprintf(data, "double"); + + return 0; +} + +static DEVICE_ATTR(inject, S_IRUGO | S_IWUSR, + octeon_mc_inject_inject_show, octeon_mc_inject_inject_store); +static DEVICE_ATTR(error_type, S_IRUGO | S_IWUSR, + octeon_mc_inject_error_type_show, octeon_mc_inject_error_type_store); +static DEVICE_ATTR(dimm, S_IRUGO | S_IWUSR, + octeon_mc_inject_dimm_show, octeon_mc_inject_dimm_store); +static DEVICE_ATTR(rank, S_IRUGO | S_IWUSR, + octeon_mc_inject_rank_show, octeon_mc_inject_rank_store); +static DEVICE_ATTR(bank, S_IRUGO | S_IWUSR, + octeon_mc_inject_bank_show, octeon_mc_inject_bank_store); +static DEVICE_ATTR(row, S_IRUGO | S_IWUSR, + octeon_mc_inject_row_show, octeon_mc_inject_row_store); +static DEVICE_ATTR(col, S_IRUGO | S_IWUSR, + octeon_mc_inject_col_show, octeon_mc_inject_col_store); + +static struct attribute *octeon_dev_attrs[] = { + &dev_attr_inject.attr, + &dev_attr_error_type.attr, + &dev_attr_dimm.attr, + &dev_attr_rank.attr, + &dev_attr_bank.attr, + &dev_attr_row.attr, + &dev_attr_col.attr, + NULL +}; + +ATTRIBUTE_GROUPS(octeon_dev); + static int octeon_lmc_edac_probe(struct platform_device *pdev) { struct mem_ctl_info *mci; struct edac_mc_layer layers[1]; int mc = pdev->id; + opstate_init(); + layers[0].type = EDAC_MC_LAYER_CHANNEL; layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); @@ -105,7 +243,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } - mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); if (!mci) return -ENXIO; @@ -116,7 +254,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) mci->ctl_name = "octeon-lmc-err"; mci->edac_check = octeon_lmc_edac_poll; - if (edac_mc_add_mc(mci)) { + if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) { dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); edac_mc_free(mci); return -ENXIO; @@ -137,7 +275,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } - mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); if (!mci) return -ENXIO; @@ -148,7 +286,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) mci->ctl_name = "co_lmc_err"; mci->edac_check = octeon_lmc_edac_poll_o2; - if (edac_mc_add_mc(mci)) { + if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) { dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); edac_mc_free(mci); return -ENXIO; @@ -164,13 +302,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } -static int octeon_lmc_edac_remove(struct platform_device *pdev) +static void octeon_lmc_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - return 0; } static struct platform_driver octeon_lmc_edac_driver = { @@ -182,5 +319,6 @@ static struct platform_driver octeon_lmc_edac_driver = { }; module_platform_driver(octeon_lmc_edac_driver); +MODULE_DESCRIPTION("Cavium Octeon DRAM Memory Controller (LMC) EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1f..aa1219db0b17 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -15,7 +15,6 @@ #include <linux/io.h> #include <linux/edac.h> -#include "edac_core.h" #include "edac_module.h" #include <asm/octeon/cvmx.h> @@ -73,7 +72,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); @@ -93,7 +92,7 @@ static int co_cache_error_probe(struct platform_device *pdev) platform_set_drvdata(pdev, p); p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), - "cache", 2, 0, NULL, 0, + "cache", 2, 0, edac_device_alloc_index()); if (!p->ed) goto err; @@ -120,14 +119,13 @@ err: return -ENXIO; } -static int co_cache_error_remove(struct platform_device *pdev) +static void co_cache_error_remove(struct platform_device *pdev) { struct co_cache_error *p = platform_get_drvdata(pdev); unregister_co_cache_error_notifier(&p->notifier); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(p->ed); - return 0; } static struct platform_driver co_cache_error_driver = { @@ -139,5 +137,6 @@ static struct platform_driver co_cache_error_driver = { }; module_platform_driver(co_cache_error_driver); +MODULE_DESCRIPTION("Cavium Octeon Primary Caches EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c index 9ca73cec74e7..c4f3bc33a971 100644 --- a/drivers/edac/octeon_edac-pci.c +++ b/drivers/edac/octeon_edac-pci.c @@ -18,7 +18,6 @@ #include <asm/octeon/cvmx-pci-defs.h> #include <asm/octeon/octeon.h> -#include "edac_core.h" #include "edac_module.h" static void octeon_pci_poll(struct edac_pci_ctl_info *pci) @@ -88,14 +87,12 @@ err: return res; } -static int octeon_pci_remove(struct platform_device *pdev) +static void octeon_pci_remove(struct platform_device *pdev) { struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); edac_pci_del_device(&pdev->dev); edac_pci_free_ctl_info(pci); - - return 0; } static struct platform_driver octeon_pci_driver = { @@ -107,5 +104,6 @@ static struct platform_driver octeon_pci_driver = { }; module_platform_driver(octeon_pci_driver); +MODULE_DESCRIPTION("Cavium Octeon PCI Controller EDAC driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c index 9c971b575530..1a1c3296ccc8 100644 --- a/drivers/edac/pasemi_edac.c +++ b/drivers/edac/pasemi_edac.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2006-2007 PA Semi, Inc * @@ -5,19 +6,6 @@ * Maintained by: Olof Johansson <olof@lixom.net> * * Driver for the PWRficient onchip memory controllers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -26,7 +14,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" #define MODULE_NAME "pasemi_edac" diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c new file mode 100644 index 000000000000..af14c8a3279f --- /dev/null +++ b/drivers/edac/pnd2_edac.c @@ -0,0 +1,1584 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + * + * [Derived from sb_edac.c] + * + * Translation of system physical addresses to DIMM addresses + * is a two stage process: + * + * First the Pondicherry 2 memory controller handles slice and channel interleaving + * in "sys2pmi()". This is (almost) completley common between platforms. + * + * Then a platform specific dunit (DIMM unit) completes the process to provide DIMM, + * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters. + */ + +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/init.h> +#include <linux/math64.h> +#include <linux/mmzone.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/smp.h> + +#include <linux/platform_data/x86/p2sb.h> + +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/processor.h> +#include <asm/mce.h> + +#include "edac_mc.h" +#include "edac_module.h" +#include "pnd2_edac.h" + +#define EDAC_MOD_STR "pnd2_edac" + +#define APL_NUM_CHANNELS 4 +#define DNV_NUM_CHANNELS 2 +#define DNV_MAX_DIMMS 2 /* Max DIMMs per channel */ + +enum type { + APL, + DNV, /* All requests go to PMI CH0 on each slice (CH1 disabled) */ +}; + +struct dram_addr { + int chan; + int dimm; + int rank; + int bank; + int row; + int col; +}; + +struct pnd2_pvt { + int dimm_geom[APL_NUM_CHANNELS]; + u64 tolm, tohm; +}; + +/* + * System address space is divided into multiple regions with + * different interleave rules in each. The as0/as1 regions + * have no interleaving at all. The as2 region is interleaved + * between two channels. The mot region is magic and may overlap + * other regions, with its interleave rules taking precedence. + * Addresses not in any of these regions are interleaved across + * all four channels. + */ +static struct region { + u64 base; + u64 limit; + u8 enabled; +} mot, as0, as1, as2; + +static struct dunit_ops { + char *name; + enum type type; + int pmiaddr_shift; + int pmiidx_shift; + int channels; + int dimms_per_channel; + int (*rd_reg)(int port, int off, int op, void *data, size_t sz, char *name); + int (*get_registers)(void); + int (*check_ecc)(void); + void (*mk_region)(char *name, struct region *rp, void *asym); + void (*get_dimm_config)(struct mem_ctl_info *mci); + int (*pmi2mem)(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg); +} *ops; + +static struct mem_ctl_info *pnd2_mci; + +#define PND2_MSG_SIZE 256 + +/* Debug macros */ +#define pnd2_printk(level, fmt, arg...) \ + edac_printk(level, "pnd2", fmt, ##arg) + +#define pnd2_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "pnd2", fmt, ##arg) + +#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12 +#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13 +#define SELECTOR_DISABLED (-1) + +#define PMI_ADDRESS_WIDTH 31 +#define PND_MAX_PHYS_BIT 39 + +#define APL_ASYMSHIFT 28 +#define DNV_ASYMSHIFT 31 +#define CH_HASH_MASK_LSB 6 +#define SLICE_HASH_MASK_LSB 6 +#define MOT_SLC_INTLV_BIT 12 +#define LOG2_PMI_ADDR_GRANULARITY 5 +#define MOT_SHIFT 24 + +#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) +#define U64_LSHIFT(val, s) ((u64)(val) << (s)) + +/* + * On Apollo Lake we access memory controller registers via a + * side-band mailbox style interface in a hidden PCI device + * configuration space. + */ +static struct pci_bus *p2sb_bus; +#define P2SB_DEVFN PCI_DEVFN(0xd, 0) +#define P2SB_ADDR_OFF 0xd0 +#define P2SB_DATA_OFF 0xd4 +#define P2SB_STAT_OFF 0xd8 +#define P2SB_ROUT_OFF 0xda +#define P2SB_EADD_OFF 0xdc +#define P2SB_HIDE_OFF 0xe1 + +#define P2SB_BUSY 1 + +#define P2SB_READ(size, off, ptr) \ + pci_bus_read_config_##size(p2sb_bus, P2SB_DEVFN, off, ptr) +#define P2SB_WRITE(size, off, val) \ + pci_bus_write_config_##size(p2sb_bus, P2SB_DEVFN, off, val) + +static bool p2sb_is_busy(u16 *status) +{ + P2SB_READ(word, P2SB_STAT_OFF, status); + + return !!(*status & P2SB_BUSY); +} + +static int _apl_rd_reg(int port, int off, int op, u32 *data) +{ + int retries = 0xff, ret; + u16 status; + u8 hidden; + + /* Unhide the P2SB device, if it's hidden */ + P2SB_READ(byte, P2SB_HIDE_OFF, &hidden); + if (hidden) + P2SB_WRITE(byte, P2SB_HIDE_OFF, 0); + + if (p2sb_is_busy(&status)) { + ret = -EAGAIN; + goto out; + } + + P2SB_WRITE(dword, P2SB_ADDR_OFF, (port << 24) | off); + P2SB_WRITE(dword, P2SB_DATA_OFF, 0); + P2SB_WRITE(dword, P2SB_EADD_OFF, 0); + P2SB_WRITE(word, P2SB_ROUT_OFF, 0); + P2SB_WRITE(word, P2SB_STAT_OFF, (op << 8) | P2SB_BUSY); + + while (p2sb_is_busy(&status)) { + if (retries-- == 0) { + ret = -EBUSY; + goto out; + } + } + + P2SB_READ(dword, P2SB_DATA_OFF, data); + ret = (status >> 1) & GENMASK(1, 0); +out: + /* Hide the P2SB device, if it was hidden before */ + if (hidden) + P2SB_WRITE(byte, P2SB_HIDE_OFF, hidden); + + return ret; +} + +static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + int ret = 0; + + edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op); + switch (sz) { + case 8: + ret = _apl_rd_reg(port, off + 4, op, (u32 *)(data + 4)); + fallthrough; + case 4: + ret |= _apl_rd_reg(port, off, op, (u32 *)data); + pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name, + sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret); + break; + } + + return ret; +} + +static u64 get_mem_ctrl_hub_base_addr(void) +{ + struct b_cr_mchbar_lo_pci lo; + struct b_cr_mchbar_hi_pci hi; + struct pci_dev *pdev; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (pdev) { + pci_read_config_dword(pdev, 0x48, (u32 *)&lo); + pci_read_config_dword(pdev, 0x4c, (u32 *)&hi); + pci_dev_put(pdev); + } else { + return 0; + } + + if (!lo.enable) { + edac_dbg(2, "MMIO via memory controller hub base address is disabled!\n"); + return 0; + } + + return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15); +} + +#define DNV_MCHBAR_SIZE 0x8000 +#define DNV_SB_PORT_SIZE 0x10000 +static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + struct pci_dev *pdev; + void __iomem *base; + struct resource r; + int ret; + + if (op == 4) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, off, data); + pci_dev_put(pdev); + } else { + /* MMIO via memory controller hub base address */ + if (op == 0 && port == 0x4c) { + memset(&r, 0, sizeof(r)); + + r.start = get_mem_ctrl_hub_base_addr(); + if (!r.start) + return -ENODEV; + r.end = r.start + DNV_MCHBAR_SIZE - 1; + } else { + /* MMIO via sideband register base address */ + ret = p2sb_bar(NULL, 0, &r); + if (ret) + return ret; + + r.start += (port << 16); + r.end = r.start + DNV_SB_PORT_SIZE - 1; + } + + base = ioremap(r.start, resource_size(&r)); + if (!base) + return -ENODEV; + + if (sz == 8) + *(u64 *)data = readq(base + off); + else + *(u32 *)data = readl(base + off); + + iounmap(base); + } + + edac_dbg(2, "Read %s=%.8x_%.8x\n", name, + (sz == 8) ? *(u32 *)(data + 4) : 0, *(u32 *)data); + + return 0; +} + +#define RD_REGP(regp, regname, port) \ + ops->rd_reg(port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +#define RD_REG(regp, regname) \ + ops->rd_reg(regname ## _port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +static u64 top_lm, top_hm; +static bool two_slices; +static bool two_channels; /* Both PMI channels in one slice enabled */ + +static u8 sym_chan_mask; +static u8 asym_chan_mask; +static unsigned long chan_mask; + +static int slice_selector = -1; +static int chan_selector = -1; +static u64 slice_hash_mask; +static u64 chan_hash_mask; + +static void mk_region(char *name, struct region *rp, u64 base, u64 limit) +{ + rp->enabled = 1; + rp->base = base; + rp->limit = limit; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, limit); +} + +static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask) +{ + if (mask == 0) { + pr_info(FW_BUG "MOT mask cannot be zero\n"); + return; + } + if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) { + pr_info(FW_BUG "MOT mask is invalid\n"); + return; + } + if (base & ~mask) { + pr_info(FW_BUG "MOT region base/mask alignment error\n"); + return; + } + rp->base = base; + rp->limit = (base | ~mask) & GENMASK_ULL(PND_MAX_PHYS_BIT, 0); + rp->enabled = 1; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, rp->limit); +} + +static bool in_region(struct region *rp, u64 addr) +{ + if (!rp->enabled) + return false; + + return rp->base <= addr && addr <= rp->limit; +} + +static int gen_sym_mask(struct b_cr_slice_channel_hash *p) +{ + int mask = 0; + + if (!p->slice_0_mem_disabled) + mask |= p->sym_slice0_channel_enabled; + + if (!p->slice_1_disabled) + mask |= p->sym_slice1_channel_enabled << 2; + + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static int gen_asym_mask(struct b_cr_slice_channel_hash *p, + struct b_cr_asym_mem_region0_mchbar *as0, + struct b_cr_asym_mem_region1_mchbar *as1, + struct b_cr_asym_2way_mem_region_mchbar *as2way) +{ + static const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + int mask = 0; + + if (as2way->asym_2way_interleave_enable) + mask = intlv[as2way->asym_2way_intlv_mode]; + if (as0->slice0_asym_enable) + mask |= (1 << as0->slice0_asym_channel_select); + if (as1->slice1_asym_enable) + mask |= (4 << as1->slice1_asym_channel_select); + if (p->slice_0_mem_disabled) + mask &= 0xc; + if (p->slice_1_disabled) + mask &= 0x3; + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static struct b_cr_tolud_pci tolud; +static struct b_cr_touud_lo_pci touud_lo; +static struct b_cr_touud_hi_pci touud_hi; +static struct b_cr_asym_mem_region0_mchbar asym0; +static struct b_cr_asym_mem_region1_mchbar asym1; +static struct b_cr_asym_2way_mem_region_mchbar asym_2way; +static struct b_cr_mot_out_base_mchbar mot_base; +static struct b_cr_mot_out_mask_mchbar mot_mask; +static struct b_cr_slice_channel_hash chash; + +/* Apollo Lake dunit */ +/* + * Validated on board with just two DIMMs in the [0] and [2] positions + * in this array. Other port number matches documentation, but caution + * advised. + */ +static const int apl_dports[APL_NUM_CHANNELS] = { 0x18, 0x10, 0x11, 0x19 }; +static struct d_cr_drp0 drp0[APL_NUM_CHANNELS]; + +/* Denverton dunit */ +static const int dnv_dports[DNV_NUM_CHANNELS] = { 0x10, 0x12 }; +static struct d_cr_dsch dsch; +static struct d_cr_ecc_ctrl ecc_ctrl[DNV_NUM_CHANNELS]; +static struct d_cr_drp drp[DNV_NUM_CHANNELS]; +static struct d_cr_dmap dmap[DNV_NUM_CHANNELS]; +static struct d_cr_dmap1 dmap1[DNV_NUM_CHANNELS]; +static struct d_cr_dmap2 dmap2[DNV_NUM_CHANNELS]; +static struct d_cr_dmap3 dmap3[DNV_NUM_CHANNELS]; +static struct d_cr_dmap4 dmap4[DNV_NUM_CHANNELS]; +static struct d_cr_dmap5 dmap5[DNV_NUM_CHANNELS]; + +static void apl_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region0_mchbar *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice0_asym_base, APL_ASYMSHIFT), + U64_LSHIFT(a->slice0_asym_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); +} + +static void dnv_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region_denverton *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice_asym_base, DNV_ASYMSHIFT), + U64_LSHIFT(a->slice_asym_limit, DNV_ASYMSHIFT) + + GENMASK_ULL(DNV_ASYMSHIFT - 1, 0)); +} + +static int apl_get_registers(void) +{ + int ret = -ENODEV; + int i; + + if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar)) + return -ENODEV; + + /* + * RD_REGP() will fail for unpopulated or non-existent + * DIMM slots. Return success if we find at least one DIMM. + */ + for (i = 0; i < APL_NUM_CHANNELS; i++) + if (!RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i])) + ret = 0; + + return ret; +} + +static int dnv_get_registers(void) +{ + int i; + + if (RD_REG(&dsch, d_cr_dsch)) + return -ENODEV; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + if (RD_REGP(&ecc_ctrl[i], d_cr_ecc_ctrl, dnv_dports[i]) || + RD_REGP(&drp[i], d_cr_drp, dnv_dports[i]) || + RD_REGP(&dmap[i], d_cr_dmap, dnv_dports[i]) || + RD_REGP(&dmap1[i], d_cr_dmap1, dnv_dports[i]) || + RD_REGP(&dmap2[i], d_cr_dmap2, dnv_dports[i]) || + RD_REGP(&dmap3[i], d_cr_dmap3, dnv_dports[i]) || + RD_REGP(&dmap4[i], d_cr_dmap4, dnv_dports[i]) || + RD_REGP(&dmap5[i], d_cr_dmap5, dnv_dports[i])) + return -ENODEV; + + return 0; +} + +/* + * Read all the h/w config registers once here (they don't + * change at run time. Figure out which address ranges have + * which interleave characteristics. + */ +static int get_registers(void) +{ + static const int intlv[] = { 10, 11, 12, 12 }; + + if (RD_REG(&tolud, b_cr_tolud_pci) || + RD_REG(&touud_lo, b_cr_touud_lo_pci) || + RD_REG(&touud_hi, b_cr_touud_hi_pci) || + RD_REG(&asym0, b_cr_asym_mem_region0_mchbar) || + RD_REG(&asym1, b_cr_asym_mem_region1_mchbar) || + RD_REG(&mot_base, b_cr_mot_out_base_mchbar) || + RD_REG(&mot_mask, b_cr_mot_out_mask_mchbar) || + RD_REG(&chash, b_cr_slice_channel_hash)) + return -ENODEV; + + if (ops->get_registers()) + return -ENODEV; + + if (ops->type == DNV) { + /* PMI channel idx (always 0) for asymmetric region */ + asym0.slice0_asym_channel_select = 0; + asym1.slice1_asym_channel_select = 0; + /* PMI channel bitmap (always 1) for symmetric region */ + chash.sym_slice0_channel_enabled = 0x1; + chash.sym_slice1_channel_enabled = 0x1; + } + + if (asym0.slice0_asym_enable) + ops->mk_region("as0", &as0, &asym0); + + if (asym1.slice1_asym_enable) + ops->mk_region("as1", &as1, &asym1); + + if (asym_2way.asym_2way_interleave_enable) { + mk_region("as2way", &as2, + U64_LSHIFT(asym_2way.asym_2way_base, APL_ASYMSHIFT), + U64_LSHIFT(asym_2way.asym_2way_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); + } + + if (mot_base.imr_en) { + mk_region_mask("mot", &mot, + U64_LSHIFT(mot_base.mot_out_base, MOT_SHIFT), + U64_LSHIFT(mot_mask.mot_out_mask, MOT_SHIFT)); + } + + top_lm = U64_LSHIFT(tolud.tolud, 20); + top_hm = U64_LSHIFT(touud_hi.touud, 32) | U64_LSHIFT(touud_lo.touud, 20); + + two_slices = !chash.slice_1_disabled && + !chash.slice_0_mem_disabled && + (chash.sym_slice0_channel_enabled != 0) && + (chash.sym_slice1_channel_enabled != 0); + two_channels = !chash.ch_1_disabled && + !chash.enable_pmi_dual_data_mode && + ((chash.sym_slice0_channel_enabled == 3) || + (chash.sym_slice1_channel_enabled == 3)); + + sym_chan_mask = gen_sym_mask(&chash); + asym_chan_mask = gen_asym_mask(&chash, &asym0, &asym1, &asym_2way); + chan_mask = sym_chan_mask | asym_chan_mask; + + if (two_slices && !two_channels) { + if (chash.hvm_mode) + slice_selector = 29; + else + slice_selector = intlv[chash.interleave_mode]; + } else if (!two_slices && two_channels) { + if (chash.hvm_mode) + chan_selector = 29; + else + chan_selector = intlv[chash.interleave_mode]; + } else if (two_slices && two_channels) { + if (chash.hvm_mode) { + slice_selector = 29; + chan_selector = 30; + } else { + slice_selector = intlv[chash.interleave_mode]; + chan_selector = intlv[chash.interleave_mode] + 1; + } + } + + if (two_slices) { + if (!chash.hvm_mode) + slice_hash_mask = chash.slice_hash_mask << SLICE_HASH_MASK_LSB; + if (!two_channels) + slice_hash_mask |= BIT_ULL(slice_selector); + } + + if (two_channels) { + if (!chash.hvm_mode) + chan_hash_mask = chash.ch_hash_mask << CH_HASH_MASK_LSB; + if (!two_slices) + chan_hash_mask |= BIT_ULL(chan_selector); + } + + return 0; +} + +/* Get a contiguous memory address (remove the MMIO gap) */ +static u64 remove_mmio_gap(u64 sys) +{ + return (sys < SZ_4G) ? sys : sys - (SZ_4G - top_lm); +} + +/* Squeeze out one address bit, shift upper part down to fill gap */ +static void remove_addr_bit(u64 *addr, int bitidx) +{ + u64 mask; + + if (bitidx == -1) + return; + + mask = BIT_ULL(bitidx) - 1; + *addr = ((*addr >> 1) & ~mask) | (*addr & mask); +} + +/* XOR all the bits from addr specified in mask */ +static int hash_by_mask(u64 addr, u64 mask) +{ + u64 result = addr & mask; + + result = (result >> 32) ^ result; + result = (result >> 16) ^ result; + result = (result >> 8) ^ result; + result = (result >> 4) ^ result; + result = (result >> 2) ^ result; + result = (result >> 1) ^ result; + + return (int)result & 1; +} + +/* + * First stage decode. Take the system address and figure out which + * second stage will deal with it based on interleave modes. + */ +static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg) +{ + u64 contig_addr, contig_base, contig_offset, contig_base_adj; + int mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + int slice_intlv_bit_rm = SELECTOR_DISABLED; + int chan_intlv_bit_rm = SELECTOR_DISABLED; + /* Determine if address is in the MOT region. */ + bool mot_hit = in_region(&mot, addr); + /* Calculate the number of symmetric regions enabled. */ + int sym_channels = hweight8(sym_chan_mask); + + /* + * The amount we need to shift the asym base can be determined by the + * number of enabled symmetric channels. + * NOTE: This can only work because symmetric memory is not supposed + * to do a 3-way interleave. + */ + int sym_chan_shift = sym_channels >> 1; + + /* Give up if address is out of range, or in MMIO gap */ + if (addr >= BIT(PND_MAX_PHYS_BIT) || + (addr >= top_lm && addr < SZ_4G) || addr >= top_hm) { + snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr); + return -EINVAL; + } + + /* Get a contiguous memory address (remove the MMIO gap) */ + contig_addr = remove_mmio_gap(addr); + + if (in_region(&as0, addr)) { + *pmiidx = asym0.slice0_asym_channel_select; + + contig_base = remove_mmio_gap(as0.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice0_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as1, addr)) { + *pmiidx = 2u + asym1.slice1_asym_channel_select; + + contig_base = remove_mmio_gap(as1.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice1_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as2, addr) && (asym_2way.asym_2way_intlv_mode == 0x3ul)) { + bool channel1; + + mot_intlv_bit = MOT_CHAN_INTLV_BIT_1SLC_2CH; + *pmiidx = (asym_2way.asym_2way_intlv_mode & 1) << 1; + channel1 = mot_hit ? ((bool)((addr >> mot_intlv_bit) & 1)) : + hash_by_mask(contig_addr, chan_hash_mask); + *pmiidx |= (u32)channel1; + + contig_base = remove_mmio_gap(as2.base); + chan_intlv_bit_rm = mot_hit ? mot_intlv_bit : chan_selector; + contig_offset = contig_addr - contig_base; + remove_addr_bit(&contig_offset, chan_intlv_bit_rm); + contig_addr = (contig_base >> sym_chan_shift) + contig_offset; + } else { + /* Otherwise we're in normal, boring symmetric mode. */ + *pmiidx = 0u; + + if (two_slices) { + bool slice1; + + if (mot_hit) { + slice_intlv_bit_rm = MOT_SLC_INTLV_BIT; + slice1 = (addr >> MOT_SLC_INTLV_BIT) & 1; + } else { + slice_intlv_bit_rm = slice_selector; + slice1 = hash_by_mask(addr, slice_hash_mask); + } + + *pmiidx = (u32)slice1 << 1; + } + + if (two_channels) { + bool channel1; + + mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + + if (mot_hit) { + chan_intlv_bit_rm = mot_intlv_bit; + channel1 = (addr >> mot_intlv_bit) & 1; + } else { + chan_intlv_bit_rm = chan_selector; + channel1 = hash_by_mask(contig_addr, chan_hash_mask); + } + + *pmiidx |= (u32)channel1; + } + } + + /* Remove the chan_selector bit first */ + remove_addr_bit(&contig_addr, chan_intlv_bit_rm); + /* Remove the slice bit (we remove it second because it must be lower */ + remove_addr_bit(&contig_addr, slice_intlv_bit_rm); + *pmiaddr = contig_addr; + + return 0; +} + +/* Translate PMI address to memory (rank, row, bank, column) */ +#define C(n) (BIT(4) | (n)) /* column */ +#define B(n) (BIT(5) | (n)) /* bank */ +#define R(n) (BIT(6) | (n)) /* row */ +#define RS (BIT(7)) /* rank */ + +/* addrdec values */ +#define AMAP_1KB 0 +#define AMAP_2KB 1 +#define AMAP_4KB 2 +#define AMAP_RSVD 3 + +/* dden values */ +#define DEN_4Gb 0 +#define DEN_8Gb 2 + +/* dwid values */ +#define X8 0 +#define X16 1 + +static struct dimm_geometry { + u8 addrdec; + u8 dden; + u8 dwid; + u8 rowbits, colbits; + u16 bits[PMI_ADDRESS_WIDTH]; +} dimms[] = { + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + } +}; + +static int bank_hash(u64 pmiaddr, int idx, int shft) +{ + int bhash = 0; + + switch (idx) { + case 0: + bhash ^= ((pmiaddr >> (12 + shft)) ^ (pmiaddr >> (9 + shft))) & 1; + break; + case 1: + bhash ^= (((pmiaddr >> (10 + shft)) ^ (pmiaddr >> (8 + shft))) & 1) << 1; + bhash ^= ((pmiaddr >> 22) & 1) << 1; + break; + case 2: + bhash ^= (((pmiaddr >> (13 + shft)) ^ (pmiaddr >> (11 + shft))) & 1) << 2; + break; + } + + return bhash; +} + +static int rank_hash(u64 pmiaddr) +{ + return ((pmiaddr >> 16) ^ (pmiaddr >> 10)) & 1; +} + +/* Second stage decode. Compute rank, bank, row & column. */ +static int apl_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + struct d_cr_drp0 *cr_drp0 = &drp0[pmiidx]; + struct pnd2_pvt *pvt = mci->pvt_info; + int g = pvt->dimm_geom[pmiidx]; + struct dimm_geometry *d = &dimms[g]; + int column = 0, bank = 0, row = 0, rank = 0; + int i, idx, type, skiprs = 0; + + for (i = 0; i < PMI_ADDRESS_WIDTH; i++) { + int bit = (pmiaddr >> i) & 1; + + if (i + skiprs >= PMI_ADDRESS_WIDTH) { + snprintf(msg, PND2_MSG_SIZE, "Bad dimm_geometry[] table\n"); + return -EINVAL; + } + + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + + /* + * On single rank DIMMs ignore the rank select bit + * and shift remainder of "bits[]" down one place. + */ + if (type == RS && (cr_drp0->rken0 + cr_drp0->rken1) == 1) { + skiprs = 1; + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + } + + switch (type) { + case C(0): + column |= (bit << idx); + break; + case B(0): + bank |= (bit << idx); + if (cr_drp0->bahen) + bank ^= bank_hash(pmiaddr, idx, d->addrdec); + break; + case R(0): + row |= (bit << idx); + break; + case RS: + rank = bit; + if (cr_drp0->rsien) + rank ^= rank_hash(pmiaddr); + break; + default: + if (bit) { + snprintf(msg, PND2_MSG_SIZE, "Bad translation\n"); + return -EINVAL; + } + goto done; + } + } + +done: + daddr->col = column; + daddr->bank = bank; + daddr->row = row; + daddr->rank = rank; + daddr->dimm = 0; + + return 0; +} + +/* Pluck bit "in" from pmiaddr and return value shifted to bit "out" */ +#define dnv_get_bit(pmi, in, out) ((int)(((pmi) >> (in)) & 1u) << (out)) + +static int dnv_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + /* Rank 0 or 1 */ + daddr->rank = dnv_get_bit(pmiaddr, dmap[pmiidx].rs0 + 13, 0); + /* Rank 2 or 3 */ + daddr->rank |= dnv_get_bit(pmiaddr, dmap[pmiidx].rs1 + 13, 1); + + /* + * Normally ranks 0,1 are DIMM0, and 2,3 are DIMM1, but we + * flip them if DIMM1 is larger than DIMM0. + */ + daddr->dimm = (daddr->rank >= 2) ^ drp[pmiidx].dimmflip; + + daddr->bank = dnv_get_bit(pmiaddr, dmap[pmiidx].ba0 + 6, 0); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].ba1 + 6, 1); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg0 + 6, 2); + if (dsch.ddr4en) + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg1 + 6, 3); + if (dmap1[pmiidx].bxor) { + if (dsch.ddr4en) { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 1); + if (dsch.chan_width == 0) + /* 64/72 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + /* 32/40 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 3); + } else { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 1); + if (dsch.chan_width == 0) + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + } + } + + daddr->row = dnv_get_bit(pmiaddr, dmap2[pmiidx].row0 + 6, 0); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row1 + 6, 1); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 2); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row3 + 6, 3); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row4 + 6, 4); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row5 + 6, 5); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 6); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 7); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row8 + 6, 8); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row9 + 6, 9); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row10 + 6, 10); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row11 + 6, 11); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row12 + 6, 12); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row13 + 6, 13); + if (dmap4[pmiidx].row14 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row14 + 6, 14); + if (dmap4[pmiidx].row15 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row15 + 6, 15); + if (dmap4[pmiidx].row16 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row16 + 6, 16); + if (dmap4[pmiidx].row17 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row17 + 6, 17); + + daddr->col = dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 3); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 4); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca5 + 6, 5); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca6 + 6, 6); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca7 + 6, 7); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca8 + 6, 8); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca9 + 6, 9); + if (!dsch.ddr4en && dmap1[pmiidx].ca11 != 0x3f) + daddr->col |= dnv_get_bit(pmiaddr, dmap1[pmiidx].ca11 + 13, 11); + + return 0; +} + +static int check_channel(int ch) +{ + if (drp0[ch].dramtype != 0) { + pnd2_printk(KERN_INFO, "Unsupported DIMM in channel %d\n", ch); + return 1; + } else if (drp0[ch].eccen == 0) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int apl_check_ecc_active(void) +{ + int i, ret = 0; + + /* Check dramtype and ECC mode for each present DIMM */ + for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS) + ret += check_channel(i); + + return ret ? -EINVAL : 0; +} + +#define DIMMS_PRESENT(d) ((d)->rken0 + (d)->rken1 + (d)->rken2 + (d)->rken3) + +static int check_unit(int ch) +{ + struct d_cr_drp *d = &drp[ch]; + + if (DIMMS_PRESENT(d) && !ecc_ctrl[ch].eccen) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int dnv_check_ecc_active(void) +{ + int i, ret = 0; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + ret += check_unit(i); + return ret ? -EINVAL : 0; +} + +static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, + struct dram_addr *daddr, char *msg) +{ + u64 pmiaddr; + u32 pmiidx; + int ret; + + ret = sys2pmi(addr, &pmiidx, &pmiaddr, msg); + if (ret) + return ret; + + pmiaddr >>= ops->pmiaddr_shift; + /* pmi channel idx to dimm channel idx */ + pmiidx >>= ops->pmiidx_shift; + daddr->chan = pmiidx; + + ret = ops->pmi2mem(mci, pmiaddr, pmiidx, daddr, msg); + if (ret) + return ret; + + edac_dbg(0, "SysAddr=%llx PmiAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + addr, pmiaddr, daddr->chan, daddr->dimm, daddr->rank, daddr->bank, daddr->row, daddr->col); + + return 0; +} + +static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, + struct dram_addr *daddr) +{ + enum hw_event_mc_err_type tp_event; + char *optype, msg[PND2_MSG_SIZE]; + bool ripv = m->mcgstatus & MCG_STATUS_RIPV; + bool overflow = m->status & MCI_STATUS_OVER; + bool uc_err = m->status & MCI_STATUS_UC; + bool recov = m->status & MCI_STATUS_S; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + int rc; + + tp_event = uc_err ? (ripv ? HW_EVENT_ERR_UNCORRECTED : HW_EVENT_ERR_FATAL) : + HW_EVENT_ERR_CORRECTED; + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + /* Only decode errors with an valid address (ADDRV) */ + if (!(m->status & MCI_STATUS_ADDRV)) + return; + + rc = get_memory_error_data(mci, m->addr, daddr, msg); + if (rc) + goto address_error; + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x channel:%d DIMM:%d rank:%d row:%d bank:%d col:%d", + overflow ? " OVERFLOW" : "", (uc_err && recov) ? " recoverable" : "", mscod, + errcode, daddr->chan, daddr->dimm, daddr->rank, daddr->row, daddr->bank, daddr->col); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, + m->addr & ~PAGE_MASK, 0, daddr->chan, daddr->dimm, -1, optype, msg); + + return; + +address_error: + edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, msg, ""); +} + +static void apl_get_dimm_config(struct mem_ctl_info *mci) +{ + struct pnd2_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + struct d_cr_drp0 *d; + u64 capacity; + int i, g; + + for_each_set_bit(i, &chan_mask, APL_NUM_CHANNELS) { + dimm = edac_get_dimm(mci, i, 0, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d\n", i); + continue; + } + + d = &drp0[i]; + for (g = 0; g < ARRAY_SIZE(dimms); g++) + if (dimms[g].addrdec == d->addrdec && + dimms[g].dden == d->dden && + dimms[g].dwid == d->dwid) + break; + + if (g == ARRAY_SIZE(dimms)) { + edac_dbg(0, "Channel %d: unrecognized DIMM\n", i); + continue; + } + + pvt->dimm_geom[i] = g; + capacity = (d->rken0 + d->rken1) * 8 * BIT(dimms[g].rowbits + dimms[g].colbits); + edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = (d->dwid == 0) ? DEV_X8 : DEV_X16; + dimm->mtype = MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Slice#%d_Chan#%d", i / 2, i % 2); + } +} + +static const int dnv_dtypes[] = { + DEV_X8, DEV_X4, DEV_X16, DEV_UNKNOWN +}; + +static void dnv_get_dimm_config(struct mem_ctl_info *mci) +{ + int i, j, ranks_of_dimm[DNV_MAX_DIMMS], banks, rowbits, colbits, memtype; + struct dimm_info *dimm; + struct d_cr_drp *d; + u64 capacity; + + if (dsch.ddr4en) { + memtype = MEM_DDR4; + banks = 16; + colbits = 10; + } else { + memtype = MEM_DDR3; + banks = 8; + } + + for (i = 0; i < DNV_NUM_CHANNELS; i++) { + if (dmap4[i].row14 == 31) + rowbits = 14; + else if (dmap4[i].row15 == 31) + rowbits = 15; + else if (dmap4[i].row16 == 31) + rowbits = 16; + else if (dmap4[i].row17 == 31) + rowbits = 17; + else + rowbits = 18; + + if (memtype == MEM_DDR3) { + if (dmap1[i].ca11 != 0x3f) + colbits = 12; + else + colbits = 10; + } + + d = &drp[i]; + /* DIMM0 is present if rank0 and/or rank1 is enabled */ + ranks_of_dimm[0] = d->rken0 + d->rken1; + /* DIMM1 is present if rank2 and/or rank3 is enabled */ + ranks_of_dimm[1] = d->rken2 + d->rken3; + + for (j = 0; j < DNV_MAX_DIMMS; j++) { + if (!ranks_of_dimm[j]) + continue; + + dimm = edac_get_dimm(mci, i, j, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j); + continue; + } + + capacity = ranks_of_dimm[j] * banks * BIT(rowbits + colbits); + edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = dnv_dtypes[j ? d->dimmdwid0 : d->dimmdwid1]; + dimm->mtype = memtype; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Chan#%d_DIMM#%d", i, j); + } + } +} + +static int pnd2_register_mci(struct mem_ctl_info **ppmci) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct pnd2_pvt *pvt; + int rc; + + rc = ops->check_ecc(); + if (rc < 0) + return rc; + + /* Allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = ops->channels; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = ops->dimms_per_channel; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (!mci) + return -ENOMEM; + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + + mci->mod_name = EDAC_MOD_STR; + mci->dev_name = ops->name; + mci->ctl_name = "Pondicherry2"; + + /* Get dimm basic config and the memory layout */ + ops->get_dimm_config(mci); + + if (edac_mc_add_mc(mci)) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return -EINVAL; + } + + *ppmci = mci; + + return 0; +} + +static void pnd2_unregister_mci(struct mem_ctl_info *mci) +{ + if (unlikely(!mci || !mci->pvt_info)) { + pnd2_printk(KERN_ERR, "Couldn't find mci handler\n"); + return; + } + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(NULL); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + edac_mc_free(mci); +} + +/* + * Callback function registered with core kernel mce code. + * Called once for each logged error. + */ +static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + struct mem_ctl_info *mci; + struct dram_addr daddr; + char *type; + + mci = pnd2_mci; + if (!mci || (mce->kflags & MCE_HANDLED_CEC)) + return NOTIFY_DONE; + + /* + * Just let mcelog handle it if the error is + * outside the memory controller. A memory error + * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. + * bit 12 has an special meaning. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + pnd2_mc_printk(mci, KERN_INFO, "HANDLING MCE MEMORY ERROR\n"); + pnd2_mc_printk(mci, KERN_INFO, "CPU %u: Machine Check %s: %llx Bank %u: %llx\n", + mce->extcpu, type, mce->mcgstatus, mce->bank, mce->status); + pnd2_mc_printk(mci, KERN_INFO, "TSC %llx ", mce->tsc); + pnd2_mc_printk(mci, KERN_INFO, "ADDR %llx ", mce->addr); + pnd2_mc_printk(mci, KERN_INFO, "MISC %llx ", mce->misc); + pnd2_mc_printk(mci, KERN_INFO, "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid); + + pnd2_mce_output_error(mci, mce, &daddr); + + /* Advice mcelog that the error were handled */ + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; +} + +static struct notifier_block pnd2_mce_dec = { + .notifier_call = pnd2_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +#ifdef CONFIG_EDAC_DEBUG +/* + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static u64 pnd2_fake_addr; +#define PND2_BLOB_SIZE 1024 +static char pnd2_result[PND2_BLOB_SIZE]; +static struct dentry *pnd2_test; +static struct debugfs_blob_wrapper pnd2_blob = { + .data = pnd2_result, + .size = 0 +}; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct dram_addr daddr; + struct mce m; + + *(u64 *)data = val; + m.mcgstatus = 0; + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x9f; + m.addr = val; + pnd2_mce_output_error(pnd2_mci, &m, &daddr); + snprintf(pnd2_blob.data, PND2_BLOB_SIZE, + "SysAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + m.addr, daddr.chan, daddr.dimm, daddr.rank, daddr.bank, daddr.row, daddr.col); + pnd2_blob.size = strlen(pnd2_blob.data); + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_pnd2_debug(void) +{ + pnd2_test = edac_debugfs_create_dir("pnd2_test"); + edac_debugfs_create_file("pnd2_debug_addr", 0200, pnd2_test, + &pnd2_fake_addr, &fops_u64_wo); + debugfs_create_blob("pnd2_debug_results", 0400, pnd2_test, &pnd2_blob); +} + +static void teardown_pnd2_debug(void) +{ + debugfs_remove_recursive(pnd2_test); +} +#else +static void setup_pnd2_debug(void) {} +static void teardown_pnd2_debug(void) {} +#endif /* CONFIG_EDAC_DEBUG */ + + +static int pnd2_probe(void) +{ + int rc; + + edac_dbg(2, "\n"); + rc = get_registers(); + if (rc) + return rc; + + return pnd2_register_mci(&pnd2_mci); +} + +static void pnd2_remove(void) +{ + edac_dbg(0, "\n"); + pnd2_unregister_mci(pnd2_mci); +} + +static struct dunit_ops apl_ops = { + .name = "pnd2/apl", + .type = APL, + .pmiaddr_shift = LOG2_PMI_ADDR_GRANULARITY, + .pmiidx_shift = 0, + .channels = APL_NUM_CHANNELS, + .dimms_per_channel = 1, + .rd_reg = apl_rd_reg, + .get_registers = apl_get_registers, + .check_ecc = apl_check_ecc_active, + .mk_region = apl_mk_region, + .get_dimm_config = apl_get_dimm_config, + .pmi2mem = apl_pmi2mem, +}; + +static struct dunit_ops dnv_ops = { + .name = "pnd2/dnv", + .type = DNV, + .pmiaddr_shift = 0, + .pmiidx_shift = 1, + .channels = DNV_NUM_CHANNELS, + .dimms_per_channel = 2, + .rd_reg = dnv_rd_reg, + .get_registers = dnv_get_registers, + .check_ecc = dnv_check_ecc_active, + .mk_region = dnv_mk_region, + .get_dimm_config = dnv_get_dimm_config, + .pmi2mem = dnv_pmi2mem, +}; + +static const struct x86_cpu_id pnd2_cpuids[] = { + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &apl_ops), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &dnv_ops), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); + +static int __init pnd2_init(void) +{ + const struct x86_cpu_id *id; + const char *owner; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(pnd2_cpuids); + if (!id) + return -ENODEV; + + ops = (struct dunit_ops *)id->driver_data; + + if (ops->type == APL) { + p2sb_bus = pci_find_bus(0, 0); + if (!p2sb_bus) + return -ENODEV; + } + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + rc = pnd2_probe(); + if (rc < 0) { + pnd2_printk(KERN_ERR, "Failed to register device with error %d.\n", rc); + return rc; + } + + if (!pnd2_mci) + return -ENODEV; + + mce_register_decode_chain(&pnd2_mce_dec); + setup_pnd2_debug(); + + return 0; +} + +static void __exit pnd2_exit(void) +{ + edac_dbg(2, "\n"); + teardown_pnd2_debug(); + mce_unregister_decode_chain(&pnd2_mce_dec); + pnd2_remove(); +} + +module_init(pnd2_init); +module_exit(pnd2_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel SoC using Pondicherry memory controller"); diff --git a/drivers/edac/pnd2_edac.h b/drivers/edac/pnd2_edac.h new file mode 100644 index 000000000000..028ef701701b --- /dev/null +++ b/drivers/edac/pnd2_edac.h @@ -0,0 +1,293 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Register bitfield descriptions for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + */ + +#ifndef _PND2_REGS_H +#define _PND2_REGS_H + +struct b_cr_touud_lo_pci { + u32 lock : 1; + u32 reserved_1 : 19; + u32 touud : 12; +}; + +#define b_cr_touud_lo_pci_port 0x4c +#define b_cr_touud_lo_pci_offset 0xa8 +#define b_cr_touud_lo_pci_r_opcode 0x04 + +struct b_cr_touud_hi_pci { + u32 touud : 7; + u32 reserved_0 : 25; +}; + +#define b_cr_touud_hi_pci_port 0x4c +#define b_cr_touud_hi_pci_offset 0xac +#define b_cr_touud_hi_pci_r_opcode 0x04 + +struct b_cr_tolud_pci { + u32 lock : 1; + u32 reserved_0 : 19; + u32 tolud : 12; +}; + +#define b_cr_tolud_pci_port 0x4c +#define b_cr_tolud_pci_offset 0xbc +#define b_cr_tolud_pci_r_opcode 0x04 + +struct b_cr_mchbar_lo_pci { + u32 enable : 1; + u32 pad_3_1 : 3; + u32 pad_14_4: 11; + u32 base: 17; +}; + +struct b_cr_mchbar_hi_pci { + u32 base : 7; + u32 pad_31_7 : 25; +}; + +/* Symmetric region */ +struct b_cr_slice_channel_hash { + u64 slice_1_disabled : 1; + u64 hvm_mode : 1; + u64 interleave_mode : 2; + u64 slice_0_mem_disabled : 1; + u64 reserved_0 : 1; + u64 slice_hash_mask : 14; + u64 reserved_1 : 11; + u64 enable_pmi_dual_data_mode : 1; + u64 ch_1_disabled : 1; + u64 reserved_2 : 1; + u64 sym_slice0_channel_enabled : 2; + u64 sym_slice1_channel_enabled : 2; + u64 ch_hash_mask : 14; + u64 reserved_3 : 11; + u64 lock : 1; +}; + +#define b_cr_slice_channel_hash_port 0x4c +#define b_cr_slice_channel_hash_offset 0x4c58 +#define b_cr_slice_channel_hash_r_opcode 0x06 + +struct b_cr_mot_out_base_mchbar { + u32 reserved_0 : 14; + u32 mot_out_base : 15; + u32 reserved_1 : 1; + u32 tr_en : 1; + u32 imr_en : 1; +}; + +#define b_cr_mot_out_base_mchbar_port 0x4c +#define b_cr_mot_out_base_mchbar_offset 0x6af0 +#define b_cr_mot_out_base_mchbar_r_opcode 0x00 + +struct b_cr_mot_out_mask_mchbar { + u32 reserved_0 : 14; + u32 mot_out_mask : 15; + u32 reserved_1 : 1; + u32 ia_iwb_en : 1; + u32 gt_iwb_en : 1; +}; + +#define b_cr_mot_out_mask_mchbar_port 0x4c +#define b_cr_mot_out_mask_mchbar_offset 0x6af4 +#define b_cr_mot_out_mask_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region0_mchbar { + u32 pad : 4; + u32 slice0_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice0_asym_limit : 11; + u32 slice0_asym_channel_select : 1; + u32 slice0_asym_enable : 1; +}; + +#define b_cr_asym_mem_region0_mchbar_port 0x4c +#define b_cr_asym_mem_region0_mchbar_offset 0x6e40 +#define b_cr_asym_mem_region0_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region1_mchbar { + u32 pad : 4; + u32 slice1_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice1_asym_limit : 11; + u32 slice1_asym_channel_select : 1; + u32 slice1_asym_enable : 1; +}; + +#define b_cr_asym_mem_region1_mchbar_port 0x4c +#define b_cr_asym_mem_region1_mchbar_offset 0x6e44 +#define b_cr_asym_mem_region1_mchbar_r_opcode 0x00 + +/* Some bit fields moved in above two structs on Denverton */ +struct b_cr_asym_mem_region_denverton { + u32 pad : 4; + u32 slice_asym_base : 8; + u32 pad_19_12 : 8; + u32 slice_asym_limit : 8; + u32 pad_28_30 : 3; + u32 slice_asym_enable : 1; +}; + +struct b_cr_asym_2way_mem_region_mchbar { + u32 pad : 2; + u32 asym_2way_intlv_mode : 2; + u32 asym_2way_base : 11; + u32 pad_16_15 : 2; + u32 asym_2way_limit : 11; + u32 pad_30_28 : 3; + u32 asym_2way_interleave_enable : 1; +}; + +#define b_cr_asym_2way_mem_region_mchbar_port 0x4c +#define b_cr_asym_2way_mem_region_mchbar_offset 0x6e50 +#define b_cr_asym_2way_mem_region_mchbar_r_opcode 0x00 + +/* Apollo Lake d-unit */ + +struct d_cr_drp0 { + u32 rken0 : 1; + u32 rken1 : 1; + u32 ddmen : 1; + u32 rsvd3 : 1; + u32 dwid : 2; + u32 dden : 3; + u32 rsvd13_9 : 5; + u32 rsien : 1; + u32 bahen : 1; + u32 rsvd18_16 : 3; + u32 caswizzle : 2; + u32 eccen : 1; + u32 dramtype : 3; + u32 blmode : 3; + u32 addrdec : 2; + u32 dramdevice_pr : 2; +}; + +#define d_cr_drp0_offset 0x1400 +#define d_cr_drp0_r_opcode 0x00 + +/* Denverton d-unit */ + +struct d_cr_dsch { + u32 ch0en : 1; + u32 ch1en : 1; + u32 ddr4en : 1; + u32 coldwake : 1; + u32 newbypdis : 1; + u32 chan_width : 1; + u32 rsvd6_6 : 1; + u32 ooodis : 1; + u32 rsvd18_8 : 11; + u32 ic : 1; + u32 rsvd31_20 : 12; +}; + +#define d_cr_dsch_port 0x16 +#define d_cr_dsch_offset 0x0 +#define d_cr_dsch_r_opcode 0x0 + +struct d_cr_ecc_ctrl { + u32 eccen : 1; + u32 rsvd31_1 : 31; +}; + +#define d_cr_ecc_ctrl_offset 0x180 +#define d_cr_ecc_ctrl_r_opcode 0x0 + +struct d_cr_drp { + u32 rken0 : 1; + u32 rken1 : 1; + u32 rken2 : 1; + u32 rken3 : 1; + u32 dimmdwid0 : 2; + u32 dimmdden0 : 2; + u32 dimmdwid1 : 2; + u32 dimmdden1 : 2; + u32 rsvd15_12 : 4; + u32 dimmflip : 1; + u32 rsvd31_17 : 15; +}; + +#define d_cr_drp_offset 0x158 +#define d_cr_drp_r_opcode 0x0 + +struct d_cr_dmap { + u32 ba0 : 5; + u32 ba1 : 5; + u32 bg0 : 5; /* if ddr3, ba2 = bg0 */ + u32 bg1 : 5; /* if ddr3, ba3 = bg1 */ + u32 rs0 : 5; + u32 rs1 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap_offset 0x174 +#define d_cr_dmap_r_opcode 0x0 + +struct d_cr_dmap1 { + u32 ca11 : 6; + u32 bxor : 1; + u32 rsvd : 25; +}; + +#define d_cr_dmap1_offset 0xb4 +#define d_cr_dmap1_r_opcode 0x0 + +struct d_cr_dmap2 { + u32 row0 : 5; + u32 row1 : 5; + u32 row2 : 5; + u32 row3 : 5; + u32 row4 : 5; + u32 row5 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap2_offset 0x148 +#define d_cr_dmap2_r_opcode 0x0 + +struct d_cr_dmap3 { + u32 row6 : 5; + u32 row7 : 5; + u32 row8 : 5; + u32 row9 : 5; + u32 row10 : 5; + u32 row11 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap3_offset 0x14c +#define d_cr_dmap3_r_opcode 0x0 + +struct d_cr_dmap4 { + u32 row12 : 5; + u32 row13 : 5; + u32 row14 : 5; + u32 row15 : 5; + u32 row16 : 5; + u32 row17 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap4_offset 0x150 +#define d_cr_dmap4_r_opcode 0x0 + +struct d_cr_dmap5 { + u32 ca3 : 4; + u32 ca4 : 4; + u32 ca5 : 4; + u32 ca6 : 4; + u32 ca7 : 4; + u32 ca8 : 4; + u32 ca9 : 4; + u32 rsvd : 4; +}; + +#define d_cr_dmap5_offset 0x154 +#define d_cr_dmap5_r_opcode 0x0 + +#endif /* _PND2_REGS_H */ diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c deleted file mode 100644 index ef6b7e08f485..000000000000 --- a/drivers/edac/ppc4xx_edac.c +++ /dev/null @@ -1,1440 +0,0 @@ -/* - * Copyright (c) 2008 Nuovation System Designs, LLC - * Grant Erickson <gerickson@nuovations.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; version 2 of the - * License. - * - */ - -#include <linux/edac.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/of_device.h> -#include <linux/of_platform.h> -#include <linux/types.h> - -#include <asm/dcr.h> - -#include "edac_core.h" -#include "ppc4xx_edac.h" - -/* - * This file implements a driver for monitoring and handling events - * associated with the IMB DDR2 ECC controller found in the AMCC/IBM - * 405EX[r], 440SP, 440SPe, 460EX, 460GT and 460SX. - * - * As realized in the 405EX[r], this controller features: - * - * - Support for registered- and non-registered DDR1 and DDR2 memory. - * - 32-bit or 16-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 4-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Two (2) memory banks/ranks. - * - Up to 1 GiB per bank/rank in 32-bit mode and up to 512 MiB per - * bank/rank in 16-bit mode. - * - * As realized in the 440SP and 440SPe, this controller changes/adds: - * - * - 64-bit or 32-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 8-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Up to 4 GiB per bank/rank in 64-bit mode and up to 2 GiB - * per bank/rank in 32-bit mode. - * - * As realized in the 460EX and 460GT, this controller changes/adds: - * - * - 64-bit or 32-bit memory interface with optional ECC. - * - * o ECC support includes: - * - * - 8-bit SEC/DED - * - Aligned-nibble error detect - * - Bypass mode - * - * - Four (4) memory banks/ranks. - * - Up to 16 GiB per bank/rank in 64-bit mode and up to 8 GiB - * per bank/rank in 32-bit mode. - * - * At present, this driver has ONLY been tested against the controller - * realization in the 405EX[r] on the AMCC Kilauea and Haleakala - * boards (256 MiB w/o ECC memory soldered onto the board) and a - * proprietary board based on those designs (128 MiB ECC memory, also - * soldered onto the board). - * - * Dynamic feature detection and handling needs to be added for the - * other realizations of this controller listed above. - * - * Eventually, this driver will likely be adapted to the above variant - * realizations of this controller as well as broken apart to handle - * the other known ECC-capable controllers prevalent in other 4xx - * processors: - * - * - IBM SDRAM (405GP, 405CR and 405EP) "ibm,sdram-4xx" - * - IBM DDR1 (440GP, 440GX, 440EP and 440GR) "ibm,sdram-4xx-ddr" - * - Denali DDR1/DDR2 (440EPX and 440GRX) "denali,sdram-4xx-ddr2" - * - * For this controller, unfortunately, correctable errors report - * nothing more than the beat/cycle and byte/lane the correction - * occurred on and the check bit group that covered the error. - * - * In contrast, uncorrectable errors also report the failing address, - * the bus master and the transaction direction (i.e. read or write) - * - * Regardless of whether the error is a CE or a UE, we report the - * following pieces of information in the driver-unique message to the - * EDAC subsystem: - * - * - Device tree path - * - Bank(s) - * - Check bit error group - * - Beat(s)/lane(s) - */ - -/* Preprocessor Definitions */ - -#define EDAC_OPSTATE_INT_STR "interrupt" -#define EDAC_OPSTATE_POLL_STR "polled" -#define EDAC_OPSTATE_UNKNOWN_STR "unknown" - -#define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac" -#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0" - -#define PPC4XX_EDAC_MESSAGE_SIZE 256 - -/* - * Kernel logging without an EDAC instance - */ -#define ppc4xx_edac_printk(level, fmt, arg...) \ - edac_printk(level, "PPC4xx MC", fmt, ##arg) - -/* - * Kernel logging with an EDAC instance - */ -#define ppc4xx_edac_mc_printk(level, mci, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "PPC4xx", fmt, ##arg) - -/* - * Macros to convert bank configuration size enumerations into MiB and - * page values. - */ -#define SDRAM_MBCF_SZ_MiB_MIN 4 -#define SDRAM_MBCF_SZ_TO_MiB(n) (SDRAM_MBCF_SZ_MiB_MIN \ - << (SDRAM_MBCF_SZ_DECODE(n))) -#define SDRAM_MBCF_SZ_TO_PAGES(n) (SDRAM_MBCF_SZ_MiB_MIN \ - << (20 - PAGE_SHIFT + \ - SDRAM_MBCF_SZ_DECODE(n))) - -/* - * The ibm,sdram-4xx-ddr2 Device Control Registers (DCRs) are - * indirectly accessed and have a base and length defined by the - * device tree. The base can be anything; however, we expect the - * length to be precisely two registers, the first for the address - * window and the second for the data window. - */ -#define SDRAM_DCR_RESOURCE_LEN 2 -#define SDRAM_DCR_ADDR_OFFSET 0 -#define SDRAM_DCR_DATA_OFFSET 1 - -/* - * Device tree interrupt indices - */ -#define INTMAP_ECCDED_INDEX 0 /* Double-bit Error Detect */ -#define INTMAP_ECCSEC_INDEX 1 /* Single-bit Error Correct */ - -/* Type Definitions */ - -/* - * PPC4xx SDRAM memory controller private instance data - */ -struct ppc4xx_edac_pdata { - dcr_host_t dcr_host; /* Indirect DCR address/data window mapping */ - struct { - int sec; /* Single-bit correctable error IRQ assigned */ - int ded; /* Double-bit detectable error IRQ assigned */ - } irqs; -}; - -/* - * Various status data gathered and manipulated when checking and - * reporting ECC status. - */ -struct ppc4xx_ecc_status { - u32 ecces; - u32 besr; - u32 bearh; - u32 bearl; - u32 wmirq; -}; - -/* Function Prototypes */ - -static int ppc4xx_edac_probe(struct platform_device *device); -static int ppc4xx_edac_remove(struct platform_device *device); - -/* Global Variables */ - -/* - * Device tree node type and compatible tuples this driver can match - * on. - */ -static struct of_device_id ppc4xx_edac_match[] = { - { - .compatible = "ibm,sdram-4xx-ddr2" - }, - { } -}; - -static struct platform_driver ppc4xx_edac_driver = { - .probe = ppc4xx_edac_probe, - .remove = ppc4xx_edac_remove, - .driver = { - .owner = THIS_MODULE, - .name = PPC4XX_EDAC_MODULE_NAME, - .of_match_table = ppc4xx_edac_match, - }, -}; - -/* - * TODO: The row and channel parameters likely need to be dynamically - * set based on the aforementioned variant controller realizations. - */ -static const unsigned ppc4xx_edac_nr_csrows = 2; -static const unsigned ppc4xx_edac_nr_chans = 1; - -/* - * Strings associated with PLB master IDs capable of being posted in - * SDRAM_BESR or SDRAM_WMIRQ on uncorrectable ECC errors. - */ -static const char * const ppc4xx_plb_masters[9] = { - [SDRAM_PLB_M0ID_ICU] = "ICU", - [SDRAM_PLB_M0ID_PCIE0] = "PCI-E 0", - [SDRAM_PLB_M0ID_PCIE1] = "PCI-E 1", - [SDRAM_PLB_M0ID_DMA] = "DMA", - [SDRAM_PLB_M0ID_DCU] = "DCU", - [SDRAM_PLB_M0ID_OPB] = "OPB", - [SDRAM_PLB_M0ID_MAL] = "MAL", - [SDRAM_PLB_M0ID_SEC] = "SEC", - [SDRAM_PLB_M0ID_AHB] = "AHB" -}; - -/** - * mfsdram - read and return controller register data - * @dcr_host: A pointer to the DCR mapping. - * @idcr_n: The indirect DCR register to read. - * - * This routine reads and returns the data associated with the - * controller's specified indirect DCR register. - * - * Returns the read data. - */ -static inline u32 -mfsdram(const dcr_host_t *dcr_host, unsigned int idcr_n) -{ - return __mfdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET, - dcr_host->base + SDRAM_DCR_DATA_OFFSET, - idcr_n); -} - -/** - * mtsdram - write controller register data - * @dcr_host: A pointer to the DCR mapping. - * @idcr_n: The indirect DCR register to write. - * @value: The data to write. - * - * This routine writes the provided data to the controller's specified - * indirect DCR register. - */ -static inline void -mtsdram(const dcr_host_t *dcr_host, unsigned int idcr_n, u32 value) -{ - return __mtdcri(dcr_host->base + SDRAM_DCR_ADDR_OFFSET, - dcr_host->base + SDRAM_DCR_DATA_OFFSET, - idcr_n, - value); -} - -/** - * ppc4xx_edac_check_bank_error - check a bank for an ECC bank error - * @status: A pointer to the ECC status structure to check for an - * ECC bank error. - * @bank: The bank to check for an ECC error. - * - * This routine determines whether the specified bank has an ECC - * error. - * - * Returns true if the specified bank has an ECC error; otherwise, - * false. - */ -static bool -ppc4xx_edac_check_bank_error(const struct ppc4xx_ecc_status *status, - unsigned int bank) -{ - switch (bank) { - case 0: - return status->ecces & SDRAM_ECCES_BK0ER; - case 1: - return status->ecces & SDRAM_ECCES_BK1ER; - default: - return false; - } -} - -/** - * ppc4xx_edac_generate_bank_message - generate interpretted bank status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the bank message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[BKNER] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_bank_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - unsigned int row, rows; - - n = snprintf(buffer, size, "%s: Banks: ", mci->dev_name); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - for (rows = 0, row = 0; row < mci->nr_csrows; row++) { - if (ppc4xx_edac_check_bank_error(status, row)) { - n = snprintf(buffer, size, "%s%u", - (rows++ ? ", " : ""), row); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - } - } - - n = snprintf(buffer, size, "%s; ", rows ? "" : "None"); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_checkbit_message - generate interpretted checkbit message - * @mci: A pointer to the EDAC memory controller instance associated - * with the checkbit message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[CKBER] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_checkbit_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const char *ckber = NULL; - - switch (status->ecces & SDRAM_ECCES_CKBER_MASK) { - case SDRAM_ECCES_CKBER_NONE: - ckber = "None"; - break; - case SDRAM_ECCES_CKBER_32_ECC_0_3: - ckber = "ECC0:3"; - break; - case SDRAM_ECCES_CKBER_32_ECC_4_8: - switch (mfsdram(&pdata->dcr_host, SDRAM_MCOPT1) & - SDRAM_MCOPT1_WDTH_MASK) { - case SDRAM_MCOPT1_WDTH_16: - ckber = "ECC0:3"; - break; - case SDRAM_MCOPT1_WDTH_32: - ckber = "ECC4:8"; - break; - default: - ckber = "Unknown"; - break; - } - break; - case SDRAM_ECCES_CKBER_32_ECC_0_8: - ckber = "ECC0:8"; - break; - default: - ckber = "Unknown"; - break; - } - - return snprintf(buffer, size, "Checkbit Error: %s", ckber); -} - -/** - * ppc4xx_edac_generate_lane_message - generate interpretted byte lane message - * @mci: A pointer to the EDAC memory controller instance associated - * with the byte lane message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS[BNCE] - * field of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_lane_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - unsigned int lane, lanes; - const unsigned int first_lane = 0; - const unsigned int lane_count = 16; - - n = snprintf(buffer, size, "; Byte Lane Errors: "); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - for (lanes = 0, lane = first_lane; lane < lane_count; lane++) { - if ((status->ecces & SDRAM_ECCES_BNCE_ENCODE(lane)) != 0) { - n = snprintf(buffer, size, - "%s%u", - (lanes++ ? ", " : ""), lane); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - } - } - - n = snprintf(buffer, size, "%s; ", lanes ? "" : "None"); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_ecc_message - generate interpretted ECC status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the ECCES message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the ECCESS register of - * the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_ecc_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n, total = 0; - - n = ppc4xx_edac_generate_bank_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - n = ppc4xx_edac_generate_checkbit_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - n = ppc4xx_edac_generate_lane_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - goto fail; - - buffer += n; - size -= n; - total += n; - - fail: - return total; -} - -/** - * ppc4xx_edac_generate_plb_message - generate interpretted PLB status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the PLB message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the portion of the - * driver-unique report message associated with the PLB-related BESR - * and/or WMIRQ registers of the specified ECC status. - * - * Returns the number of characters generated on success; otherwise, < - * 0 on error. - */ -static int -ppc4xx_edac_generate_plb_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - unsigned int master; - bool read; - - if ((status->besr & SDRAM_BESR_MASK) == 0) - return 0; - - if ((status->besr & SDRAM_BESR_M0ET_MASK) == SDRAM_BESR_M0ET_NONE) - return 0; - - read = ((status->besr & SDRAM_BESR_M0RW_MASK) == SDRAM_BESR_M0RW_READ); - - master = SDRAM_BESR_M0ID_DECODE(status->besr); - - return snprintf(buffer, size, - "%s error w/ PLB master %u \"%s\"; ", - (read ? "Read" : "Write"), - master, - (((master >= SDRAM_PLB_M0ID_FIRST) && - (master <= SDRAM_PLB_M0ID_LAST)) ? - ppc4xx_plb_masters[master] : "UNKNOWN")); -} - -/** - * ppc4xx_edac_generate_message - generate interpretted status message - * @mci: A pointer to the EDAC memory controller instance associated - * with the driver-unique message being generated. - * @status: A pointer to the ECC status structure to generate the - * message from. - * @buffer: A pointer to the buffer in which to generate the - * message. - * @size: The size, in bytes, of space available in buffer. - * - * This routine generates to the provided buffer the driver-unique - * EDAC report message from the specified ECC status. - */ -static void -ppc4xx_edac_generate_message(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status, - char *buffer, - size_t size) -{ - int n; - - if (buffer == NULL || size == 0) - return; - - n = ppc4xx_edac_generate_ecc_message(mci, status, buffer, size); - - if (n < 0 || n >= size) - return; - - buffer += n; - size -= n; - - ppc4xx_edac_generate_plb_message(mci, status, buffer, size); -} - -#ifdef DEBUG -/** - * ppc4xx_ecc_dump_status - dump controller ECC status registers - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being dumped. - * @status: A pointer to the ECC status structure to generate the - * dump from. - * - * This routine dumps to the kernel log buffer the raw and - * interpretted specified ECC status. - */ -static void -ppc4xx_ecc_dump_status(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - ppc4xx_edac_mc_printk(KERN_INFO, mci, - "\n" - "\tECCES: 0x%08x\n" - "\tWMIRQ: 0x%08x\n" - "\tBESR: 0x%08x\n" - "\tBEAR: 0x%08x%08x\n" - "\t%s\n", - status->ecces, - status->wmirq, - status->besr, - status->bearh, - status->bearl, - message); -} -#endif /* DEBUG */ - -/** - * ppc4xx_ecc_get_status - get controller ECC status - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being retrieved. - * @status: A pointer to the ECC status structure to populate the - * ECC status with. - * - * This routine reads and masks, as appropriate, all the relevant - * status registers that deal with ibm,sdram-4xx-ddr2 ECC errors. - * While we read all of them, for correctable errors, we only expect - * to deal with ECCES. For uncorrectable errors, we expect to deal - * with all of them. - */ -static void -ppc4xx_ecc_get_status(const struct mem_ctl_info *mci, - struct ppc4xx_ecc_status *status) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const dcr_host_t *dcr_host = &pdata->dcr_host; - - status->ecces = mfsdram(dcr_host, SDRAM_ECCES) & SDRAM_ECCES_MASK; - status->wmirq = mfsdram(dcr_host, SDRAM_WMIRQ) & SDRAM_WMIRQ_MASK; - status->besr = mfsdram(dcr_host, SDRAM_BESR) & SDRAM_BESR_MASK; - status->bearl = mfsdram(dcr_host, SDRAM_BEARL); - status->bearh = mfsdram(dcr_host, SDRAM_BEARH); -} - -/** - * ppc4xx_ecc_clear_status - clear controller ECC status - * @mci: A pointer to the EDAC memory controller instance - * associated with the status being cleared. - * @status: A pointer to the ECC status structure containing the - * values to write to clear the ECC status. - * - * This routine clears--by writing the masked (as appropriate) status - * values back to--the status registers that deal with - * ibm,sdram-4xx-ddr2 ECC errors. - */ -static void -ppc4xx_ecc_clear_status(const struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - const dcr_host_t *dcr_host = &pdata->dcr_host; - - mtsdram(dcr_host, SDRAM_ECCES, status->ecces & SDRAM_ECCES_MASK); - mtsdram(dcr_host, SDRAM_WMIRQ, status->wmirq & SDRAM_WMIRQ_MASK); - mtsdram(dcr_host, SDRAM_BESR, status->besr & SDRAM_BESR_MASK); - mtsdram(dcr_host, SDRAM_BEARL, 0); - mtsdram(dcr_host, SDRAM_BEARH, 0); -} - -/** - * ppc4xx_edac_handle_ce - handle controller correctable ECC error (CE) - * @mci: A pointer to the EDAC memory controller instance - * associated with the correctable error being handled and reported. - * @status: A pointer to the ECC status structure associated with - * the correctable error being handled and reported. - * - * This routine handles an ibm,sdram-4xx-ddr2 controller ECC - * correctable error. Per the aforementioned discussion, there's not - * enough status available to use the full EDAC correctable error - * interface, so we just pass driver-unique message to the "no info" - * interface. - */ -static void -ppc4xx_edac_handle_ce(struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - int row; - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - for (row = 0; row < mci->nr_csrows; row++) - if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, 0, - row, 0, -1, - message, ""); -} - -/** - * ppc4xx_edac_handle_ue - handle controller uncorrectable ECC error (UE) - * @mci: A pointer to the EDAC memory controller instance - * associated with the uncorrectable error being handled and - * reported. - * @status: A pointer to the ECC status structure associated with - * the uncorrectable error being handled and reported. - * - * This routine handles an ibm,sdram-4xx-ddr2 controller ECC - * uncorrectable error. - */ -static void -ppc4xx_edac_handle_ue(struct mem_ctl_info *mci, - const struct ppc4xx_ecc_status *status) -{ - const u64 bear = ((u64)status->bearh << 32 | status->bearl); - const unsigned long page = bear >> PAGE_SHIFT; - const unsigned long offset = bear & ~PAGE_MASK; - int row; - char message[PPC4XX_EDAC_MESSAGE_SIZE]; - - ppc4xx_edac_generate_message(mci, status, message, sizeof(message)); - - for (row = 0; row < mci->nr_csrows; row++) - if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - page, offset, 0, - row, 0, -1, - message, ""); -} - -/** - * ppc4xx_edac_check - check controller for ECC errors - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller being - * checked. - * - * This routine is used to check and post ECC errors and is called by - * both the EDAC polling thread and this driver's CE and UE interrupt - * handler. - */ -static void -ppc4xx_edac_check(struct mem_ctl_info *mci) -{ -#ifdef DEBUG - static unsigned int count; -#endif - struct ppc4xx_ecc_status status; - - ppc4xx_ecc_get_status(mci, &status); - -#ifdef DEBUG - if (count++ % 30 == 0) - ppc4xx_ecc_dump_status(mci, &status); -#endif - - if (status.ecces & SDRAM_ECCES_UE) - ppc4xx_edac_handle_ue(mci, &status); - - if (status.ecces & SDRAM_ECCES_CE) - ppc4xx_edac_handle_ce(mci, &status); - - ppc4xx_ecc_clear_status(mci, &status); -} - -/** - * ppc4xx_edac_isr - SEC (CE) and DED (UE) interrupt service routine - * @irq: The virtual interrupt number being serviced. - * @dev_id: A pointer to the EDAC memory controller instance - * associated with the interrupt being handled. - * - * This routine implements the interrupt handler for both correctable - * (CE) and uncorrectable (UE) ECC errors for the ibm,sdram-4xx-ddr2 - * controller. It simply calls through to the same routine used during - * polling to check, report and clear the ECC status. - * - * Unconditionally returns IRQ_HANDLED. - */ -static irqreturn_t -ppc4xx_edac_isr(int irq, void *dev_id) -{ - struct mem_ctl_info *mci = dev_id; - - ppc4xx_edac_check(mci); - - return IRQ_HANDLED; -} - -/** - * ppc4xx_edac_get_dtype - return the controller memory width - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which the width - * is derived. - * - * This routine returns the EDAC device type width appropriate for the - * current controller configuration. - * - * TODO: This needs to be conditioned dynamically through feature - * flags or some such when other controller variants are supported as - * the 405EX[r] is 16-/32-bit and the others are 32-/64-bit with the - * 16- and 64-bit field definition/value/enumeration (b1) overloaded - * among them. - * - * Returns a device type width enumeration. - */ -static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1) -{ - switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) { - case SDRAM_MCOPT1_WDTH_16: - return DEV_X2; - case SDRAM_MCOPT1_WDTH_32: - return DEV_X4; - default: - return DEV_UNKNOWN; - } -} - -/** - * ppc4xx_edac_get_mtype - return controller memory type - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which the memory type - * is derived. - * - * This routine returns the EDAC memory type appropriate for the - * current controller configuration. - * - * Returns a memory type enumeration. - */ -static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1) -{ - bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN); - - switch (mcopt1 & SDRAM_MCOPT1_DDR_TYPE_MASK) { - case SDRAM_MCOPT1_DDR2_TYPE: - return rden ? MEM_RDDR2 : MEM_DDR2; - case SDRAM_MCOPT1_DDR1_TYPE: - return rden ? MEM_RDDR : MEM_DDR; - default: - return MEM_UNKNOWN; - } -} - -/** - * ppc4xx_edac_init_csrows - initialize driver instance rows - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller for which - * the csrows (i.e. banks/ranks) are being initialized. - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which bank width - * and memory typ information is derived. - * - * This routine initializes the virtual "chip select rows" associated - * with the EDAC memory controller instance. An ibm,sdram-4xx-ddr2 - * controller bank/rank is mapped to a row. - * - * Returns 0 if OK; otherwise, -EINVAL if the memory bank size - * configuration cannot be determined. - */ -static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) -{ - const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - int status = 0; - enum mem_type mtype; - enum dev_type dtype; - enum edac_type edac_mode; - int row, j; - u32 mbxcf, size, nr_pages; - - /* Establish the memory type and width */ - - mtype = ppc4xx_edac_get_mtype(mcopt1); - dtype = ppc4xx_edac_get_dtype(mcopt1); - - /* Establish EDAC mode */ - - if (mci->edac_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - else if (mci->edac_cap & EDAC_FLAG_EC) - edac_mode = EDAC_EC; - else - edac_mode = EDAC_NONE; - - /* - * Initialize each chip select row structure which correspond - * 1:1 with a controller bank/rank. - */ - - for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *csi = &mci->csrows[row]; - - /* - * Get the configuration settings for this - * row/bank/rank and skip disabled banks. - */ - - mbxcf = mfsdram(&pdata->dcr_host, SDRAM_MBXCF(row)); - - if ((mbxcf & SDRAM_MBCF_BE_MASK) != SDRAM_MBCF_BE_ENABLE) - continue; - - /* Map the bank configuration size setting to pages. */ - - size = mbxcf & SDRAM_MBCF_SZ_MASK; - - switch (size) { - case SDRAM_MBCF_SZ_4MB: - case SDRAM_MBCF_SZ_8MB: - case SDRAM_MBCF_SZ_16MB: - case SDRAM_MBCF_SZ_32MB: - case SDRAM_MBCF_SZ_64MB: - case SDRAM_MBCF_SZ_128MB: - case SDRAM_MBCF_SZ_256MB: - case SDRAM_MBCF_SZ_512MB: - case SDRAM_MBCF_SZ_1GB: - case SDRAM_MBCF_SZ_2GB: - case SDRAM_MBCF_SZ_4GB: - case SDRAM_MBCF_SZ_8GB: - nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size); - break; - default: - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unrecognized memory bank %d " - "size 0x%08x\n", - row, SDRAM_MBCF_SZ_DECODE(size)); - status = -EINVAL; - goto done; - } - - /* - * It's unclear exactly what grain should be set to - * here. The SDRAM_ECCES register allows resolution of - * an error down to a nibble which would potentially - * argue for a grain of '1' byte, even though we only - * know the associated address for uncorrectable - * errors. This value is not used at present for - * anything other than error reporting so getting it - * wrong should be of little consequence. Other - * possible values would be the PLB width (16), the - * page size (PAGE_SIZE) or the memory width (2 or 4). - */ - for (j = 0; j < csi->nr_channels; j++) { - struct dimm_info *dimm = csi->channels[j].dimm; - - dimm->nr_pages = nr_pages / csi->nr_channels; - dimm->grain = 1; - - dimm->mtype = mtype; - dimm->dtype = dtype; - - dimm->edac_mode = edac_mode; - } - } - - done: - return status; -} - -/** - * ppc4xx_edac_mc_init - initialize driver instance - * @mci: A pointer to the EDAC memory controller instance being - * initialized. - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is bound to. - * @dcr_host: A pointer to the DCR data containing the DCR mapping - * for this controller instance. - * @mcopt1: The 32-bit Memory Controller Option 1 register value - * currently set for the controller, from which ECC capabilities - * and scrub mode are derived. - * - * This routine performs initialization of the EDAC memory controller - * instance and related driver-private data associated with the - * ibm,sdram-4xx-ddr2 memory controller the instance is bound to. - * - * Returns 0 if OK; otherwise, < 0 on error. - */ -static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, - struct platform_device *op, - const dcr_host_t *dcr_host, u32 mcopt1) -{ - int status = 0; - const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); - struct ppc4xx_edac_pdata *pdata = NULL; - const struct device_node *np = op->dev.of_node; - - if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL) - return -EINVAL; - - /* Initial driver pointers and private data */ - - mci->pdev = &op->dev; - - dev_set_drvdata(mci->pdev, mci); - - pdata = mci->pvt_info; - - pdata->dcr_host = *dcr_host; - pdata->irqs.sec = NO_IRQ; - pdata->irqs.ded = NO_IRQ; - - /* Initialize controller capabilities and configuration */ - - mci->mtype_cap = (MEM_FLAG_DDR | MEM_FLAG_RDDR | - MEM_FLAG_DDR2 | MEM_FLAG_RDDR2); - - mci->edac_ctl_cap = (EDAC_FLAG_NONE | - EDAC_FLAG_EC | - EDAC_FLAG_SECDED); - - mci->scrub_cap = SCRUB_NONE; - mci->scrub_mode = SCRUB_NONE; - - /* - * Update the actual capabilites based on the MCOPT1[MCHK] - * settings. Scrubbing is only useful if reporting is enabled. - */ - - switch (memcheck) { - case SDRAM_MCOPT1_MCHK_CHK: - mci->edac_cap = EDAC_FLAG_EC; - break; - case SDRAM_MCOPT1_MCHK_CHK_REP: - mci->edac_cap = (EDAC_FLAG_EC | EDAC_FLAG_SECDED); - mci->scrub_mode = SCRUB_SW_SRC; - break; - default: - mci->edac_cap = EDAC_FLAG_NONE; - break; - } - - /* Initialize strings */ - - mci->mod_name = PPC4XX_EDAC_MODULE_NAME; - mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION; - mci->ctl_name = ppc4xx_edac_match->compatible, - mci->dev_name = np->full_name; - - /* Initialize callbacks */ - - mci->edac_check = ppc4xx_edac_check; - mci->ctl_page_to_phys = NULL; - - /* Initialize chip select rows */ - - status = ppc4xx_edac_init_csrows(mci, mcopt1); - - if (status) - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to initialize rows!\n"); - - return status; -} - -/** - * ppc4xx_edac_register_irq - setup and register controller interrupts - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is bound to. - * @mci: A pointer to the EDAC memory controller instance - * associated with the ibm,sdram-4xx-ddr2 controller for which - * interrupts are being registered. - * - * This routine parses the correctable (CE) and uncorrectable error (UE) - * interrupts from the device tree node and maps and assigns them to - * the associated EDAC memory controller instance. - * - * Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be - * mapped and assigned. - */ -static int ppc4xx_edac_register_irq(struct platform_device *op, - struct mem_ctl_info *mci) -{ - int status = 0; - int ded_irq, sec_irq; - struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - struct device_node *np = op->dev.of_node; - - ded_irq = irq_of_parse_and_map(np, INTMAP_ECCDED_INDEX); - sec_irq = irq_of_parse_and_map(np, INTMAP_ECCSEC_INDEX); - - if (ded_irq == NO_IRQ || sec_irq == NO_IRQ) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to map interrupts.\n"); - status = -ENODEV; - goto fail; - } - - status = request_irq(ded_irq, - ppc4xx_edac_isr, - IRQF_DISABLED, - "[EDAC] MC ECCDED", - mci); - - if (status < 0) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to request irq %d for ECC DED", - ded_irq); - status = -ENODEV; - goto fail1; - } - - status = request_irq(sec_irq, - ppc4xx_edac_isr, - IRQF_DISABLED, - "[EDAC] MC ECCSEC", - mci); - - if (status < 0) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Unable to request irq %d for ECC SEC", - sec_irq); - status = -ENODEV; - goto fail2; - } - - ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCDED irq is %d\n", ded_irq); - ppc4xx_edac_mc_printk(KERN_INFO, mci, "ECCSEC irq is %d\n", sec_irq); - - pdata->irqs.ded = ded_irq; - pdata->irqs.sec = sec_irq; - - return 0; - - fail2: - free_irq(sec_irq, mci); - - fail1: - free_irq(ded_irq, mci); - - fail: - return status; -} - -/** - * ppc4xx_edac_map_dcrs - locate and map controller registers - * @np: A pointer to the device tree node containing the DCR - * resources to map. - * @dcr_host: A pointer to the DCR data to populate with the - * DCR mapping. - * - * This routine attempts to locate in the device tree and map the DCR - * register resources associated with the controller's indirect DCR - * address and data windows. - * - * Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on - * error. - */ -static int ppc4xx_edac_map_dcrs(const struct device_node *np, - dcr_host_t *dcr_host) -{ - unsigned int dcr_base, dcr_len; - - if (np == NULL || dcr_host == NULL) - return -EINVAL; - - /* Get the DCR resource extent and sanity check the values. */ - - dcr_base = dcr_resource_start(np, 0); - dcr_len = dcr_resource_len(np, 0); - - if (dcr_base == 0 || dcr_len == 0) { - ppc4xx_edac_printk(KERN_ERR, - "Failed to obtain DCR property.\n"); - return -ENODEV; - } - - if (dcr_len != SDRAM_DCR_RESOURCE_LEN) { - ppc4xx_edac_printk(KERN_ERR, - "Unexpected DCR length %d, expected %d.\n", - dcr_len, SDRAM_DCR_RESOURCE_LEN); - return -ENODEV; - } - - /* Attempt to map the DCR extent. */ - - *dcr_host = dcr_map(np, dcr_base, dcr_len); - - if (!DCR_MAP_OK(*dcr_host)) { - ppc4xx_edac_printk(KERN_INFO, "Failed to map DCRs.\n"); - return -ENODEV; - } - - return 0; -} - -/** - * ppc4xx_edac_probe - check controller and bind driver - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller being probed for driver binding. - * - * This routine probes a specific ibm,sdram-4xx-ddr2 controller - * instance for binding with the driver. - * - * Returns 0 if the controller instance was successfully bound to the - * driver; otherwise, < 0 on error. - */ -static int ppc4xx_edac_probe(struct platform_device *op) -{ - int status = 0; - u32 mcopt1, memcheck; - dcr_host_t dcr_host; - const struct device_node *np = op->dev.of_node; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - static int ppc4xx_edac_instance; - - /* - * At this point, we only support the controller realized on - * the AMCC PPC 405EX[r]. Reject anything else. - */ - - if (!of_device_is_compatible(np, "ibm,sdram-405ex") && - !of_device_is_compatible(np, "ibm,sdram-405exr")) { - ppc4xx_edac_printk(KERN_NOTICE, - "Only the PPC405EX[r] is supported.\n"); - return -ENODEV; - } - - /* - * Next, get the DCR property and attempt to map it so that we - * can probe the controller. - */ - - status = ppc4xx_edac_map_dcrs(np, &dcr_host); - - if (status) - return status; - - /* - * First determine whether ECC is enabled at all. If not, - * there is no useful checking or monitoring that can be done - * for this controller. - */ - - mcopt1 = mfsdram(&dcr_host, SDRAM_MCOPT1); - memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); - - if (memcheck == SDRAM_MCOPT1_MCHK_NON) { - ppc4xx_edac_printk(KERN_INFO, "%s: No ECC memory detected or " - "ECC is disabled.\n", np->full_name); - status = -ENODEV; - goto done; - } - - /* - * At this point, we know ECC is enabled, allocate an EDAC - * controller instance and perform the appropriate - * initialization. - */ - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = ppc4xx_edac_nr_csrows; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = ppc4xx_edac_nr_chans; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers, - sizeof(struct ppc4xx_edac_pdata)); - if (mci == NULL) { - ppc4xx_edac_printk(KERN_ERR, "%s: " - "Failed to allocate EDAC MC instance!\n", - np->full_name); - status = -ENOMEM; - goto done; - } - - status = ppc4xx_edac_mc_init(mci, op, &dcr_host, mcopt1); - - if (status) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to initialize instance!\n"); - goto fail; - } - - /* - * We have a valid, initialized EDAC instance bound to the - * controller. Attempt to register it with the EDAC subsystem - * and, if necessary, register interrupts. - */ - - if (edac_mc_add_mc(mci)) { - ppc4xx_edac_mc_printk(KERN_ERR, mci, - "Failed to add instance!\n"); - status = -ENODEV; - goto fail; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - status = ppc4xx_edac_register_irq(op, mci); - - if (status) - goto fail1; - } - - ppc4xx_edac_instance++; - - return 0; - - fail1: - edac_mc_del_mc(mci->pdev); - - fail: - edac_mc_free(mci); - - done: - return status; -} - -/** - * ppc4xx_edac_remove - unbind driver from controller - * @op: A pointer to the OpenFirmware device tree node associated - * with the controller this EDAC instance is to be unbound/removed - * from. - * - * This routine unbinds the EDAC memory controller instance associated - * with the specified ibm,sdram-4xx-ddr2 controller described by the - * OpenFirmware device tree node passed as a parameter. - * - * Unconditionally returns 0. - */ -static int -ppc4xx_edac_remove(struct platform_device *op) -{ - struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); - struct ppc4xx_edac_pdata *pdata = mci->pvt_info; - - if (edac_op_state == EDAC_OPSTATE_INT) { - free_irq(pdata->irqs.sec, mci); - free_irq(pdata->irqs.ded, mci); - } - - dcr_unmap(pdata->dcr_host, SDRAM_DCR_RESOURCE_LEN); - - edac_mc_del_mc(mci->pdev); - edac_mc_free(mci); - - return 0; -} - -/** - * ppc4xx_edac_opstate_init - initialize EDAC reporting method - * - * This routine ensures that the EDAC memory controller reporting - * method is mapped to a sane value as the EDAC core defines the value - * to EDAC_OPSTATE_INVAL by default. We don't call the global - * opstate_init as that defaults to polling and we want interrupt as - * the default. - */ -static inline void __init -ppc4xx_edac_opstate_init(void) -{ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_INT: - break; - default: - edac_op_state = EDAC_OPSTATE_INT; - break; - } - - ppc4xx_edac_printk(KERN_INFO, "Reporting type: %s\n", - ((edac_op_state == EDAC_OPSTATE_POLL) ? - EDAC_OPSTATE_POLL_STR : - ((edac_op_state == EDAC_OPSTATE_INT) ? - EDAC_OPSTATE_INT_STR : - EDAC_OPSTATE_UNKNOWN_STR))); -} - -/** - * ppc4xx_edac_init - driver/module insertion entry point - * - * This routine is the driver/module insertion entry point. It - * initializes the EDAC memory controller reporting state and - * registers the driver as an OpenFirmware device tree platform - * driver. - */ -static int __init -ppc4xx_edac_init(void) -{ - ppc4xx_edac_printk(KERN_INFO, PPC4XX_EDAC_MODULE_REVISION "\n"); - - ppc4xx_edac_opstate_init(); - - return platform_driver_register(&ppc4xx_edac_driver); -} - -/** - * ppc4xx_edac_exit - driver/module removal entry point - * - * This routine is the driver/module removal entry point. It - * unregisters the driver as an OpenFirmware device tree platform - * driver. - */ -static void __exit -ppc4xx_edac_exit(void) -{ - platform_driver_unregister(&ppc4xx_edac_driver); -} - -module_init(ppc4xx_edac_init); -module_exit(ppc4xx_edac_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Grant Erickson <gerickson@nuovations.com>"); -MODULE_DESCRIPTION("EDAC MC Driver for the PPC4xx IBM DDR2 Memory Controller"); -module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting State: " - "0=" EDAC_OPSTATE_POLL_STR ", 2=" EDAC_OPSTATE_INT_STR); diff --git a/drivers/edac/ppc4xx_edac.h b/drivers/edac/ppc4xx_edac.h deleted file mode 100644 index d3154764c449..000000000000 --- a/drivers/edac/ppc4xx_edac.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2008 Nuovation System Designs, LLC - * Grant Erickson <gerickson@nuovations.com> - * - * This file defines processor mnemonics for accessing and managing - * the IBM DDR1/DDR2 ECC controller found in the 405EX[r], 440SP, - * 440SPe, 460EX, 460GT and 460SX. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; version 2 of the - * License. - * - */ - -#ifndef __PPC4XX_EDAC_H -#define __PPC4XX_EDAC_H - -#include <linux/types.h> - -/* - * Macro for generating register field mnemonics - */ -#define PPC_REG_BITS 32 -#define PPC_REG_VAL(bit, val) ((val) << ((PPC_REG_BITS - 1) - (bit))) -#define PPC_REG_DECODE(bit, val) ((val) >> ((PPC_REG_BITS - 1) - (bit))) - -/* - * IBM 4xx DDR1/DDR2 SDRAM memory controller registers (at least those - * relevant to ECC) - */ -#define SDRAM_BESR 0x00 /* Error status (read/clear) */ -#define SDRAM_BESRT 0x01 /* Error statuss (test/set) */ -#define SDRAM_BEARL 0x02 /* Error address low */ -#define SDRAM_BEARH 0x03 /* Error address high */ -#define SDRAM_WMIRQ 0x06 /* Write master (read/clear) */ -#define SDRAM_WMIRQT 0x07 /* Write master (test/set) */ -#define SDRAM_MCOPT1 0x20 /* Controller options 1 */ -#define SDRAM_MBXCF_BASE 0x40 /* Bank n configuration base */ -#define SDRAM_MBXCF(n) (SDRAM_MBXCF_BASE + (4 * (n))) -#define SDRAM_MB0CF SDRAM_MBXCF(0) -#define SDRAM_MB1CF SDRAM_MBXCF(1) -#define SDRAM_MB2CF SDRAM_MBXCF(2) -#define SDRAM_MB3CF SDRAM_MBXCF(3) -#define SDRAM_ECCCR 0x98 /* ECC error status */ -#define SDRAM_ECCES SDRAM_ECCCR - -/* - * PLB Master IDs - */ -#define SDRAM_PLB_M0ID_FIRST 0 -#define SDRAM_PLB_M0ID_ICU SDRAM_PLB_M0ID_FIRST -#define SDRAM_PLB_M0ID_PCIE0 1 -#define SDRAM_PLB_M0ID_PCIE1 2 -#define SDRAM_PLB_M0ID_DMA 3 -#define SDRAM_PLB_M0ID_DCU 4 -#define SDRAM_PLB_M0ID_OPB 5 -#define SDRAM_PLB_M0ID_MAL 6 -#define SDRAM_PLB_M0ID_SEC 7 -#define SDRAM_PLB_M0ID_AHB 8 -#define SDRAM_PLB_M0ID_LAST SDRAM_PLB_M0ID_AHB -#define SDRAM_PLB_M0ID_COUNT (SDRAM_PLB_M0ID_LAST - \ - SDRAM_PLB_M0ID_FIRST + 1) - -/* - * Memory Controller Bus Error Status Register - */ -#define SDRAM_BESR_MASK PPC_REG_VAL(7, 0xFF) -#define SDRAM_BESR_M0ID_MASK PPC_REG_VAL(3, 0xF) -#define SDRAM_BESR_M0ID_DECODE(n) PPC_REG_DECODE(3, n) -#define SDRAM_BESR_M0ID_ICU PPC_REG_VAL(3, SDRAM_PLB_M0ID_ICU) -#define SDRAM_BESR_M0ID_PCIE0 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE0) -#define SDRAM_BESR_M0ID_PCIE1 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE1) -#define SDRAM_BESR_M0ID_DMA PPC_REG_VAL(3, SDRAM_PLB_M0ID_DMA) -#define SDRAM_BESR_M0ID_DCU PPC_REG_VAL(3, SDRAM_PLB_M0ID_DCU) -#define SDRAM_BESR_M0ID_OPB PPC_REG_VAL(3, SDRAM_PLB_M0ID_OPB) -#define SDRAM_BESR_M0ID_MAL PPC_REG_VAL(3, SDRAM_PLB_M0ID_MAL) -#define SDRAM_BESR_M0ID_SEC PPC_REG_VAL(3, SDRAM_PLB_M0ID_SEC) -#define SDRAM_BESR_M0ID_AHB PPC_REG_VAL(3, SDRAM_PLB_M0ID_AHB) -#define SDRAM_BESR_M0ET_MASK PPC_REG_VAL(6, 0x7) -#define SDRAM_BESR_M0ET_NONE PPC_REG_VAL(6, 0) -#define SDRAM_BESR_M0ET_ECC PPC_REG_VAL(6, 1) -#define SDRAM_BESR_M0RW_MASK PPC_REG_VAL(7, 1) -#define SDRAM_BESR_M0RW_WRITE PPC_REG_VAL(7, 0) -#define SDRAM_BESR_M0RW_READ PPC_REG_VAL(7, 1) - -/* - * Memory Controller PLB Write Master Interrupt Register - */ -#define SDRAM_WMIRQ_MASK PPC_REG_VAL(8, 0x1FF) -#define SDRAM_WMIRQ_ENCODE(id) PPC_REG_VAL((id % \ - SDRAM_PLB_M0ID_COUNT), 1) -#define SDRAM_WMIRQ_ICU PPC_REG_VAL(SDRAM_PLB_M0ID_ICU, 1) -#define SDRAM_WMIRQ_PCIE0 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE0, 1) -#define SDRAM_WMIRQ_PCIE1 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE1, 1) -#define SDRAM_WMIRQ_DMA PPC_REG_VAL(SDRAM_PLB_M0ID_DMA, 1) -#define SDRAM_WMIRQ_DCU PPC_REG_VAL(SDRAM_PLB_M0ID_DCU, 1) -#define SDRAM_WMIRQ_OPB PPC_REG_VAL(SDRAM_PLB_M0ID_OPB, 1) -#define SDRAM_WMIRQ_MAL PPC_REG_VAL(SDRAM_PLB_M0ID_MAL, 1) -#define SDRAM_WMIRQ_SEC PPC_REG_VAL(SDRAM_PLB_M0ID_SEC, 1) -#define SDRAM_WMIRQ_AHB PPC_REG_VAL(SDRAM_PLB_M0ID_AHB, 1) - -/* - * Memory Controller Options 1 Register - */ -#define SDRAM_MCOPT1_MCHK_MASK PPC_REG_VAL(3, 0x3) /* ECC mask */ -#define SDRAM_MCOPT1_MCHK_NON PPC_REG_VAL(3, 0x0) /* No ECC gen */ -#define SDRAM_MCOPT1_MCHK_GEN PPC_REG_VAL(3, 0x2) /* ECC gen */ -#define SDRAM_MCOPT1_MCHK_CHK PPC_REG_VAL(3, 0x1) /* ECC gen and chk */ -#define SDRAM_MCOPT1_MCHK_CHK_REP PPC_REG_VAL(3, 0x3) /* ECC gen/chk/rpt */ -#define SDRAM_MCOPT1_MCHK_DECODE(n) ((((u32)(n)) >> 28) & 0x3) -#define SDRAM_MCOPT1_RDEN_MASK PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM mask */ -#define SDRAM_MCOPT1_RDEN PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM enbl */ -#define SDRAM_MCOPT1_WDTH_MASK PPC_REG_VAL(7, 0x1) /* Width mask */ -#define SDRAM_MCOPT1_WDTH_32 PPC_REG_VAL(7, 0x0) /* 32 bits */ -#define SDRAM_MCOPT1_WDTH_16 PPC_REG_VAL(7, 0x1) /* 16 bits */ -#define SDRAM_MCOPT1_DDR_TYPE_MASK PPC_REG_VAL(11, 0x1) /* DDR type mask */ -#define SDRAM_MCOPT1_DDR1_TYPE PPC_REG_VAL(11, 0x0) /* DDR1 type */ -#define SDRAM_MCOPT1_DDR2_TYPE PPC_REG_VAL(11, 0x1) /* DDR2 type */ - -/* - * Memory Bank 0 - n Configuration Register - */ -#define SDRAM_MBCF_BA_MASK PPC_REG_VAL(12, 0x1FFF) -#define SDRAM_MBCF_SZ_MASK PPC_REG_VAL(19, 0xF) -#define SDRAM_MBCF_SZ_DECODE(mbxcf) PPC_REG_DECODE(19, mbxcf) -#define SDRAM_MBCF_SZ_4MB PPC_REG_VAL(19, 0x0) -#define SDRAM_MBCF_SZ_8MB PPC_REG_VAL(19, 0x1) -#define SDRAM_MBCF_SZ_16MB PPC_REG_VAL(19, 0x2) -#define SDRAM_MBCF_SZ_32MB PPC_REG_VAL(19, 0x3) -#define SDRAM_MBCF_SZ_64MB PPC_REG_VAL(19, 0x4) -#define SDRAM_MBCF_SZ_128MB PPC_REG_VAL(19, 0x5) -#define SDRAM_MBCF_SZ_256MB PPC_REG_VAL(19, 0x6) -#define SDRAM_MBCF_SZ_512MB PPC_REG_VAL(19, 0x7) -#define SDRAM_MBCF_SZ_1GB PPC_REG_VAL(19, 0x8) -#define SDRAM_MBCF_SZ_2GB PPC_REG_VAL(19, 0x9) -#define SDRAM_MBCF_SZ_4GB PPC_REG_VAL(19, 0xA) -#define SDRAM_MBCF_SZ_8GB PPC_REG_VAL(19, 0xB) -#define SDRAM_MBCF_AM_MASK PPC_REG_VAL(23, 0xF) -#define SDRAM_MBCF_AM_MODE0 PPC_REG_VAL(23, 0x0) -#define SDRAM_MBCF_AM_MODE1 PPC_REG_VAL(23, 0x1) -#define SDRAM_MBCF_AM_MODE2 PPC_REG_VAL(23, 0x2) -#define SDRAM_MBCF_AM_MODE3 PPC_REG_VAL(23, 0x3) -#define SDRAM_MBCF_AM_MODE4 PPC_REG_VAL(23, 0x4) -#define SDRAM_MBCF_AM_MODE5 PPC_REG_VAL(23, 0x5) -#define SDRAM_MBCF_AM_MODE6 PPC_REG_VAL(23, 0x6) -#define SDRAM_MBCF_AM_MODE7 PPC_REG_VAL(23, 0x7) -#define SDRAM_MBCF_AM_MODE8 PPC_REG_VAL(23, 0x8) -#define SDRAM_MBCF_AM_MODE9 PPC_REG_VAL(23, 0x9) -#define SDRAM_MBCF_BE_MASK PPC_REG_VAL(31, 0x1) -#define SDRAM_MBCF_BE_DISABLE PPC_REG_VAL(31, 0x0) -#define SDRAM_MBCF_BE_ENABLE PPC_REG_VAL(31, 0x1) - -/* - * ECC Error Status - */ -#define SDRAM_ECCES_MASK PPC_REG_VAL(21, 0x3FFFFF) -#define SDRAM_ECCES_BNCE_MASK PPC_REG_VAL(15, 0xFFFF) -#define SDRAM_ECCES_BNCE_ENCODE(lane) PPC_REG_VAL(((lane) & 0xF), 1) -#define SDRAM_ECCES_CKBER_MASK PPC_REG_VAL(17, 0x3) -#define SDRAM_ECCES_CKBER_NONE PPC_REG_VAL(17, 0) -#define SDRAM_ECCES_CKBER_16_ECC_0_3 PPC_REG_VAL(17, 2) -#define SDRAM_ECCES_CKBER_32_ECC_0_3 PPC_REG_VAL(17, 1) -#define SDRAM_ECCES_CKBER_32_ECC_4_8 PPC_REG_VAL(17, 2) -#define SDRAM_ECCES_CKBER_32_ECC_0_8 PPC_REG_VAL(17, 3) -#define SDRAM_ECCES_CE PPC_REG_VAL(18, 1) -#define SDRAM_ECCES_UE PPC_REG_VAL(19, 1) -#define SDRAM_ECCES_BKNER_MASK PPC_REG_VAL(21, 0x3) -#define SDRAM_ECCES_BK0ER PPC_REG_VAL(20, 1) -#define SDRAM_ECCES_BK1ER PPC_REG_VAL(21, 1) - -#endif /* __PPC4XX_EDAC_H */ diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c new file mode 100644 index 000000000000..f3da9385ca0d --- /dev/null +++ b/drivers/edac/qcom_edac.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/soc/qcom/llcc-qcom.h> + +#include "edac_mc.h" +#include "edac_device.h" + +#define EDAC_LLCC "qcom_llcc" + +#define LLCC_ERP_PANIC_ON_UE 1 + +#define TRP_SYN_REG_CNT 6 +#define DRP_SYN_REG_CNT 8 + +#define LLCC_LB_CNT_MASK GENMASK(31, 28) +#define LLCC_LB_CNT_SHIFT 28 + +/* Mask and shift macros */ +#define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0) +#define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16) +#define ECC_DB_ERR_WAYS_SHIFT BIT(4) + +#define ECC_SB_ERR_COUNT_MASK GENMASK(23, 16) +#define ECC_SB_ERR_COUNT_SHIFT BIT(4) +#define ECC_SB_ERR_WAYS_MASK GENMASK(15, 0) + +#define SB_ECC_ERROR BIT(0) +#define DB_ECC_ERROR BIT(1) + +#define DRP_TRP_INT_CLEAR GENMASK(1, 0) +#define DRP_TRP_CNT_CLEAR GENMASK(1, 0) + +#define SB_ERROR_THRESHOLD 0x1 +#define SB_ERROR_THRESHOLD_SHIFT 24 +#define SB_DB_TRP_INTERRUPT_ENABLE 0x3 +#define TRP0_INTERRUPT_ENABLE 0x1 +#define DRP0_INTERRUPT_ENABLE BIT(6) +#define SB_DB_DRP_INTERRUPT_ENABLE 0x3 + +#define ECC_POLL_MSEC 5000 + +enum { + LLCC_DRAM_CE = 0, + LLCC_DRAM_UE, + LLCC_TRAM_CE, + LLCC_TRAM_UE, +}; + +static const struct llcc_edac_reg_data edac_reg_data[] = { + [LLCC_DRAM_CE] = { + .name = "DRAM Single-bit", + .reg_cnt = DRP_SYN_REG_CNT, + .count_mask = ECC_SB_ERR_COUNT_MASK, + .ways_mask = ECC_SB_ERR_WAYS_MASK, + .count_shift = ECC_SB_ERR_COUNT_SHIFT, + }, + [LLCC_DRAM_UE] = { + .name = "DRAM Double-bit", + .reg_cnt = DRP_SYN_REG_CNT, + .count_mask = ECC_DB_ERR_COUNT_MASK, + .ways_mask = ECC_DB_ERR_WAYS_MASK, + .ways_shift = ECC_DB_ERR_WAYS_SHIFT, + }, + [LLCC_TRAM_CE] = { + .name = "TRAM Single-bit", + .reg_cnt = TRP_SYN_REG_CNT, + .count_mask = ECC_SB_ERR_COUNT_MASK, + .ways_mask = ECC_SB_ERR_WAYS_MASK, + .count_shift = ECC_SB_ERR_COUNT_SHIFT, + }, + [LLCC_TRAM_UE] = { + .name = "TRAM Double-bit", + .reg_cnt = TRP_SYN_REG_CNT, + .count_mask = ECC_DB_ERR_COUNT_MASK, + .ways_mask = ECC_DB_ERR_WAYS_MASK, + .ways_shift = ECC_DB_ERR_WAYS_SHIFT, + }, +}; + +static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap) +{ + u32 sb_err_threshold; + int ret; + + /* + * Configure interrupt enable registers such that Tag, Data RAM related + * interrupts are propagated to interrupt controller for servicing + */ + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, + TRP0_INTERRUPT_ENABLE, + TRP0_INTERRUPT_ENABLE); + if (ret) + return ret; + + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable, + SB_DB_TRP_INTERRUPT_ENABLE, + SB_DB_TRP_INTERRUPT_ENABLE); + if (ret) + return ret; + + sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT); + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg, + sb_err_threshold); + if (ret) + return ret; + + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, + DRP0_INTERRUPT_ENABLE, + DRP0_INTERRUPT_ENABLE); + if (ret) + return ret; + + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable, + SB_DB_DRP_INTERRUPT_ENABLE); + return ret; +} + +/* Clear the error interrupt and counter registers */ +static int +qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) +{ + int ret; + + switch (err_type) { + case LLCC_DRAM_CE: + case LLCC_DRAM_UE: + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_interrupt_clear, + DRP_TRP_INT_CLEAR); + if (ret) + return ret; + + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_ecc_error_cntr_clear, + DRP_TRP_CNT_CLEAR); + if (ret) + return ret; + break; + case LLCC_TRAM_CE: + case LLCC_TRAM_UE: + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_interrupt_0_clear, + DRP_TRP_INT_CLEAR); + if (ret) + return ret; + + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_ecc_error_cntr_clear, + DRP_TRP_CNT_CLEAR); + if (ret) + return ret; + break; + default: + ret = -EINVAL; + edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n", + err_type); + } + return ret; +} + +struct qcom_llcc_syn_regs { + u32 synd_reg; + u32 count_status_reg; + u32 ways_status_reg; +}; + +static void get_reg_offsets(struct llcc_drv_data *drv, int err_type, + struct qcom_llcc_syn_regs *syn_regs) +{ + const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset; + + switch (err_type) { + case LLCC_DRAM_CE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_DRAM_UE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_TRAM_CE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + case LLCC_TRAM_UE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + } +} + +/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/ +static int +dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) +{ + struct llcc_edac_reg_data reg_data = edac_reg_data[err_type]; + struct qcom_llcc_syn_regs regs = { }; + int err_cnt, err_ways, ret, i; + u32 synd_reg, synd_val; + + get_reg_offsets(drv, err_type, ®s); + + for (i = 0; i < reg_data.reg_cnt; i++) { + synd_reg = regs.synd_reg + (i * 4); + ret = regmap_read(drv->regmaps[bank], synd_reg, + &synd_val); + if (ret) + goto clear; + + edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n", + reg_data.name, i, synd_val); + } + + ret = regmap_read(drv->regmaps[bank], regs.count_status_reg, + &err_cnt); + if (ret) + goto clear; + + err_cnt &= reg_data.count_mask; + err_cnt >>= reg_data.count_shift; + edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n", + reg_data.name, err_cnt); + + ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg, + &err_ways); + if (ret) + goto clear; + + err_ways &= reg_data.ways_mask; + err_ways >>= reg_data.ways_shift; + + edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n", + reg_data.name, err_ways); + +clear: + return qcom_llcc_clear_error_status(err_type, drv); +} + +static int +dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank) +{ + struct llcc_drv_data *drv = edev_ctl->dev->platform_data; + int ret; + + ret = dump_syn_reg_values(drv, bank, err_type); + if (ret) + return ret; + + switch (err_type) { + case LLCC_DRAM_CE: + edac_device_handle_ce(edev_ctl, 0, bank, + "LLCC Data RAM correctable Error"); + break; + case LLCC_DRAM_UE: + edac_device_handle_ue(edev_ctl, 0, bank, + "LLCC Data RAM uncorrectable Error"); + break; + case LLCC_TRAM_CE: + edac_device_handle_ce(edev_ctl, 0, bank, + "LLCC Tag RAM correctable Error"); + break; + case LLCC_TRAM_UE: + edac_device_handle_ue(edev_ctl, 0, bank, + "LLCC Tag RAM uncorrectable Error"); + break; + default: + ret = -EINVAL; + edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n", + err_type); + } + + return ret; +} + +static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl) +{ + struct edac_device_ctl_info *edac_dev_ctl = edev_ctl; + struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data; + irqreturn_t irq_rc = IRQ_NONE; + u32 drp_error, trp_error, i; + int ret; + + /* Iterate over the banks and look for Tag RAM or Data RAM errors */ + for (i = 0; i < drv->num_banks; i++) { + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status, + &drp_error); + + if (!ret && (drp_error & SB_ECC_ERROR)) { + edac_printk(KERN_CRIT, EDAC_LLCC, + "Single Bit Error detected in Data RAM\n"); + ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i); + } else if (!ret && (drp_error & DB_ECC_ERROR)) { + edac_printk(KERN_CRIT, EDAC_LLCC, + "Double Bit Error detected in Data RAM\n"); + ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i); + } + if (!ret) + irq_rc = IRQ_HANDLED; + + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status, + &trp_error); + + if (!ret && (trp_error & SB_ECC_ERROR)) { + edac_printk(KERN_CRIT, EDAC_LLCC, + "Single Bit Error detected in Tag RAM\n"); + ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i); + } else if (!ret && (trp_error & DB_ECC_ERROR)) { + edac_printk(KERN_CRIT, EDAC_LLCC, + "Double Bit Error detected in Tag RAM\n"); + ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i); + } + if (!ret) + irq_rc = IRQ_HANDLED; + } + + return irq_rc; +} + +static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl) +{ + llcc_ecc_irq_handler(0, edev_ctl); +} + +static int qcom_llcc_edac_probe(struct platform_device *pdev) +{ + struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data; + struct edac_device_ctl_info *edev_ctl; + struct device *dev = &pdev->dev; + int ecc_irq; + int rc; + + if (!llcc_driv_data->ecc_irq_configured) { + rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); + if (rc) + return rc; + } + + /* Allocate edac control info */ + edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank", + llcc_driv_data->num_banks, 1, + edac_device_alloc_index()); + + if (!edev_ctl) + return -ENOMEM; + + edev_ctl->dev = dev; + edev_ctl->mod_name = dev_name(dev); + edev_ctl->dev_name = dev_name(dev); + edev_ctl->ctl_name = "llcc"; + edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE; + + /* Check if LLCC driver has passed ECC IRQ */ + ecc_irq = llcc_driv_data->ecc_irq; + if (ecc_irq > 0) { + /* Use interrupt mode if IRQ is available */ + rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler, + IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl); + if (!rc) { + edac_op_state = EDAC_OPSTATE_INT; + goto irq_done; + } + } + + /* Fall back to polling mode otherwise */ + edev_ctl->poll_msec = ECC_POLL_MSEC; + edev_ctl->edac_check = llcc_ecc_check; + edac_op_state = EDAC_OPSTATE_POLL; + +irq_done: + rc = edac_device_add_device(edev_ctl); + if (rc) { + edac_device_free_ctl_info(edev_ctl); + return rc; + } + + platform_set_drvdata(pdev, edev_ctl); + + return rc; +} + +static void qcom_llcc_edac_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev); + + edac_device_del_device(edev_ctl->dev); + edac_device_free_ctl_info(edev_ctl); +} + +static const struct platform_device_id qcom_llcc_edac_id_table[] = { + { .name = "qcom_llcc_edac" }, + {} +}; +MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table); + +static struct platform_driver qcom_llcc_edac_driver = { + .probe = qcom_llcc_edac_probe, + .remove = qcom_llcc_edac_remove, + .driver = { + .name = "qcom_llcc_edac", + }, + .id_table = qcom_llcc_edac_id_table, +}; +module_platform_driver(qcom_llcc_edac_driver); + +MODULE_DESCRIPTION("QCOM EDAC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index 2fd6a5490905..61e979d5437a 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -20,9 +20,8 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" +#include "edac_module.h" -#define R82600_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "r82600_edac" #define r82600_printk(level, fmt, arg...) \ @@ -205,7 +204,6 @@ static void r82600_check(struct mem_ctl_info *mci) { struct r82600_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); r82600_get_error_info(mci, &info); r82600_process_error_info(mci, &info, 1); } @@ -316,7 +314,6 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = R82600_REVISION; mci->ctl_name = "R82600"; mci->dev_name = pci_name(pdev); mci->edac_check = r82600_check; @@ -383,7 +380,7 @@ static void r82600_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(r82600_pci_tbl) = { +static const struct pci_device_id r82600_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) }, @@ -418,8 +415,7 @@ module_init(r82600_init); module_exit(r82600_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. " - "on behalf of EADS Astrium"); +MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. on behalf of EADS Astrium"); MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index e04462b60756..d5f12219598a 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1,13 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module * * This driver supports the memory controllers found on the Intel * processor family Sandy Bridge. * - * This file may be distributed under the terms of the - * GNU General Public License version 2 only. - * * Copyright (c) 2011 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab */ #include <linux/module.h> @@ -21,21 +19,24 @@ #include <linux/smp.h> #include <linux/bitmap.h> #include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include <asm/processor.h> #include <asm/mce.h> -#include "edac_core.h" +#include "edac_module.h" /* Static vars */ static LIST_HEAD(sbridge_edac_list); -static DEFINE_MUTEX(sbridge_edac_lock); -static int probed; +static char sb_msg[256]; +static char sb_msg_full[512]; /* * Alter this version for the module when modifications are made */ -#define SBRIDGE_REVISION " Ver: 1.0.0 " -#define EDAC_MOD_STR "sbridge_edac" +#define SBRIDGE_REVISION " Ver: 1.1.2 " +#define EDAC_MOD_STR "sb_edac" /* * Debug macros @@ -50,53 +51,35 @@ static int probed; * Get a bit field at register value <v>, from bit <lo> to bit <hi> */ #define GET_BITFIELD(v, lo, hi) \ - (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo)) - -/* - * sbridge Memory Controller Registers - */ - -/* - * FIXME: For now, let's order by device function, as it makes - * easier for driver's development process. This table should be - * moved to pci_id.h when submitted upstream - */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ - - /* - * Currently, unused, but will be needed in the future - * implementations, as they hold the error counters - */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */ -#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ + (((v) & GENMASK_ULL(hi, lo)) >> (lo)) /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ -static const u32 dram_rule[] = { +static const u32 sbridge_dram_rule[] = { 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, }; -#define MAX_SAD ARRAY_SIZE(dram_rule) -#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) -#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) -#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) +static const u32 ibridge_dram_rule[] = { + 0x60, 0x68, 0x70, 0x78, 0x80, + 0x88, 0x90, 0x98, 0xa0, 0xa8, + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, + 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, +}; + +static const u32 knl_dram_rule[] = { + 0x60, 0x68, 0x70, 0x78, 0x80, /* 0-4 */ + 0x88, 0x90, 0x98, 0xa0, 0xa8, /* 5-9 */ + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, /* 10-14 */ + 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, /* 15-19 */ + 0x100, 0x108, 0x110, 0x118, /* 20-23 */ +}; + #define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) +#define A7MODE(reg) GET_BITFIELD(reg, 26, 26) -static char *get_dram_attr(u32 reg) +static char *show_dram_attr(u32 attr) { - switch(DRAM_ATTR(reg)) { + switch (attr) { case 0: return "DRAM"; case 1: @@ -108,49 +91,74 @@ static char *get_dram_attr(u32 reg) } } -static const u32 interleave_list[] = { +static const u32 sbridge_interleave_list[] = { 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc, 0xc4, 0xcc, }; -#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list) -#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2) -#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5) -#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10) -#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13) -#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18) -#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21) -#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26) -#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29) +static const u32 ibridge_interleave_list[] = { + 0x64, 0x6c, 0x74, 0x7c, 0x84, + 0x8c, 0x94, 0x9c, 0xa4, 0xac, + 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, + 0xdc, 0xe4, 0xec, 0xf4, 0xfc, +}; + +static const u32 knl_interleave_list[] = { + 0x64, 0x6c, 0x74, 0x7c, 0x84, /* 0-4 */ + 0x8c, 0x94, 0x9c, 0xa4, 0xac, /* 5-9 */ + 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, /* 10-14 */ + 0xdc, 0xe4, 0xec, 0xf4, 0xfc, /* 15-19 */ + 0x104, 0x10c, 0x114, 0x11c, /* 20-23 */ +}; +#define MAX_INTERLEAVE \ + (MAX_T(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ + MAX_T(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ + ARRAY_SIZE(knl_interleave_list)))) + +struct interleave_pkg { + unsigned char start; + unsigned char end; +}; + +static const struct interleave_pkg sbridge_interleave_pkg[] = { + { 0, 2 }, + { 3, 5 }, + { 8, 10 }, + { 11, 13 }, + { 16, 18 }, + { 19, 21 }, + { 24, 26 }, + { 27, 29 }, +}; + +static const struct interleave_pkg ibridge_interleave_pkg[] = { + { 0, 3 }, + { 4, 7 }, + { 8, 11 }, + { 12, 15 }, + { 16, 19 }, + { 20, 23 }, + { 24, 27 }, + { 28, 31 }, +}; -static inline int sad_pkg(u32 reg, int interleave) +static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, + int interleave) { - switch (interleave) { - case 0: - return SAD_PKG0(reg); - case 1: - return SAD_PKG1(reg); - case 2: - return SAD_PKG2(reg); - case 3: - return SAD_PKG3(reg); - case 4: - return SAD_PKG4(reg); - case 5: - return SAD_PKG5(reg); - case 6: - return SAD_PKG6(reg); - case 7: - return SAD_PKG7(reg); - default: - return -EINVAL; - } + return GET_BITFIELD(reg, table[interleave].start, + table[interleave].end); } /* Devices 12 Function 7 */ #define TOLM 0x80 -#define TOHM 0x84 +#define TOHM 0x84 +#define HASWELL_TOLM 0xd0 +#define HASWELL_TOHM_0 0xd4 +#define HASWELL_TOHM_1 0xd8 +#define KNL_TOLM 0xd0 +#define KNL_TOHM_0 0xd4 +#define KNL_TOHM_1 0xd8 #define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) #define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) @@ -161,9 +169,9 @@ static inline int sad_pkg(u32 reg, int interleave) #define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) -#define SAD_CONTROL 0xf4 +#define SOURCE_ID_KNL(reg) GET_BITFIELD(reg, 12, 14) -#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2) +#define SAD_CONTROL 0xf4 /* Device 14 function 0 */ @@ -185,6 +193,7 @@ static const u32 tad_dram_rule[] = { /* Device 15, function 0 */ #define MCMTR 0x7c +#define KNL_MCMTR 0x624 #define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) #define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) @@ -201,6 +210,8 @@ static const int mtr_regs[] = { 0x80, 0x84, 0x88, }; +static const int knl_mtr_reg = 0xb60; + #define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) #define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) #define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) @@ -222,7 +233,6 @@ static const u32 rir_way_limit[] = { #define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31) #define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29) -#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff) #define MAX_RIR_WAY 8 @@ -234,8 +244,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, }; -#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) -#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) +#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \ + GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19)) + +#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \ + GET_BITFIELD(reg, 2, 15) : GET_BITFIELD(reg, 2, 14)) /* Device 16, functions 2-7 */ @@ -243,18 +256,20 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { * FIXME: Implement the error count reads directly */ -static const u32 correrrcnt[] = { - 0x104, 0x108, 0x10c, 0x110, -}; - #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) +#if 0 /* Currently unused*/ +static const u32 correrrcnt[] = { + 0x104, 0x108, 0x10c, 0x110, +}; + static const u32 correrrthrsld[] = { 0x11c, 0x120, 0x124, 0x128, }; +#endif #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) @@ -262,51 +277,117 @@ static const u32 correrrthrsld[] = { /* Device 17, function 0 */ -#define RANK_CFG_A 0x0328 +#define SB_RANK_CFG_A 0x0328 -#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) +#define IB_RANK_CFG_A 0x0320 /* * sbridge structs */ -#define NUM_CHANNELS 4 -#define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define NUM_CHANNELS 6 /* Max channels per MC */ +#define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define KNL_MAX_CHAS 38 /* KNL max num. of Cache Home Agents */ +#define KNL_MAX_CHANNELS 6 /* KNL max num. of PCI channels */ +#define KNL_MAX_EDCS 8 /* Embedded DRAM controllers */ +#define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */ + +enum type { + SANDY_BRIDGE, + IVY_BRIDGE, + HASWELL, + BROADWELL, + KNIGHTS_LANDING, +}; +enum domain { + IMC0 = 0, + IMC1, + SOCK, +}; + +enum mirroring_mode { + NON_MIRRORING, + ADDR_RANGE_MIRRORING, + FULL_MIRRORING, +}; + +struct sbridge_pvt; struct sbridge_info { - u32 mcmtr; + enum type type; + u32 mcmtr; + u32 rankcfgr; + u64 (*get_tolm)(struct sbridge_pvt *pvt); + u64 (*get_tohm)(struct sbridge_pvt *pvt); + u64 (*rir_limit)(u32 reg); + u64 (*sad_limit)(u32 reg); + u32 (*interleave_mode)(u32 reg); + u32 (*dram_attr)(u32 reg); + const u32 *dram_rule; + const u32 *interleave_list; + const struct interleave_pkg *interleave_pkg; + u8 max_sad; + u8 (*get_node_id)(struct sbridge_pvt *pvt); + u8 (*get_ha)(u8 bank); + enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); + enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); + struct pci_dev *pci_vtd; }; struct sbridge_channel { u32 ranks; u32 dimms; + struct dimm { + u32 rowbits; + u32 colbits; + u32 bank_xor_enable; + u32 amap_fine; + } dimm[MAX_DIMMS]; }; struct pci_id_descr { - int dev; - int func; - int dev_id; + int dev_id; int optional; + enum domain dom; }; struct pci_id_table { const struct pci_id_descr *descr; - int n_devs; + int n_devs_per_imc; + int n_devs_per_sock; + int n_imcs_per_sock; + enum type type; }; struct sbridge_dev { struct list_head list; + int seg; u8 bus, mc; u8 node_id, source_id; struct pci_dev **pdev; + enum domain dom; int n_devs; + int i_devs; struct mem_ctl_info *mci; }; +struct knl_pvt { + struct pci_dev *pci_cha[KNL_MAX_CHAS]; + struct pci_dev *pci_channel[KNL_MAX_CHANNELS]; + struct pci_dev *pci_mc0; + struct pci_dev *pci_mc1; + struct pci_dev *pci_mc0_misc; + struct pci_dev *pci_mc1_misc; + struct pci_dev *pci_mc_info; /* tolm, tohm */ +}; + struct sbridge_pvt { - struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; - struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0; - struct pci_dev *pci_br; + /* Devices per socket */ + struct pci_dev *pci_ddrio; + struct pci_dev *pci_sad0, *pci_sad1; + struct pci_dev *pci_br0, *pci_br1; + /* Devices per memory controller */ + struct pci_dev *pci_ha, *pci_ta, *pci_ras; struct pci_dev *pci_tad[NUM_CHANNELS]; struct sbridge_dev *sbridge_dev; @@ -315,61 +396,299 @@ struct sbridge_pvt { struct sbridge_channel channel[NUM_CHANNELS]; /* Memory type detection */ - bool is_mirrored, is_lockstep, is_close_pg; - - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; + bool is_cur_addr_mirrored, is_lockstep, is_close_pg; + bool is_chan_hash; + enum mirroring_mode mirror_mode; /* Memory description */ u64 tolm, tohm; + struct knl_pvt knl; }; -#define PCI_DESCR(device, function, device_id, opt) \ - .dev = (device), \ - .func = (function), \ - .dev_id = (device_id), \ - .optional = opt +#define PCI_DESCR(device_id, opt, domain) \ + .dev_id = (device_id), \ + .optional = opt, \ + .dom = domain static const struct pci_id_descr pci_dev_descr_sbridge[] = { /* Processor Home Agent */ - { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0, IMC0) }, /* Memory controller */ - { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) }, - { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) }, - { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) }, - { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) }, - { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) }, - { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) }, - { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1, SOCK) }, /* System Address Decoder */ - { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) }, - { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0, SOCK) }, /* Broadcast Registers */ - { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0, SOCK) }, }; -#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } +#define PCI_ID_TABLE_ENTRY(A, N, M, T) { \ + .descr = A, \ + .n_devs_per_imc = N, \ + .n_devs_per_sock = ARRAY_SIZE(A), \ + .n_imcs_per_sock = M, \ + .type = T \ +} + static const struct pci_id_table pci_dev_descr_sbridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), - {0,} /* 0 terminated list. */ + PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE), + { NULL, } +}; + +/* This changes depending if 1HA or 2HA: + * 1HA: + * 0x0eb8 (17.0) is DDRIO0 + * 2HA: + * 0x0ebc (17.4) is DDRIO0 + */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc + +/* pci ids */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead +#define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2 0x0e6c +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3 0x0e6d + +static const struct pci_id_descr pci_dev_descr_ibridge[] = { + /* Processor Home Agent */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) }, + + /* Memory controller */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0, IMC0) }, + + /* Optional, mode 2HA */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1, IMC1) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1, SOCK) }, + + /* System Address Decoder */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0, SOCK) }, + + /* Broadcast Registers */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0, SOCK) }, + +}; + +static const struct pci_id_table pci_dev_descr_ibridge_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE), + { NULL, } +}; + +/* Haswell support */ +/* EN processor: + * - 1 IMC + * - 3 DDR3 channels, 2 DPC per channel + * EP processor: + * - 1 or 2 IMC + * - 4 DDR4 channels, 3 DPC per channel + * EP 4S processor: + * - 2 IMC + * - 4 DDR4 channels, 3 DPC per channel + * EX processor: + * - 2 IMC + * - each IMC interfaces with a SMI 2 channel + * - each SMI channel interfaces with a scalable memory buffer + * - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC + */ +#define HASWELL_DDRCRCLKCONTROLS 0xa10 /* Ditto on Broadwell */ +#define HASWELL_HASYSDEFEATURE2 0x84 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC 0x2f28 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0 0x2fa0 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1 0x2f60 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA 0x2fa8 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM 0x2f71 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA 0x2f68 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM 0x2f79 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1 0x2fab +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2 0x2fac +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3 0x2fad +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 0x2f6a +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1 0x2f6b +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2 0x2f6c +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3 0x2f6d +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0 0x2fbd +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1 0x2fbf +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2 0x2fb9 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb +static const struct pci_id_descr pci_dev_descr_haswell[] = { + /* first item must be the HA */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1, IMC1) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1, IMC0) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1, IMC1) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1, SOCK) }, +}; + +static const struct pci_id_table pci_dev_descr_haswell_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL), + { NULL, } +}; + +/* Knight's Landing Support */ +/* + * KNL's memory channels are swizzled between memory controllers. + * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2 + */ +#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3) + +/* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840 +/* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN 0x7843 +/* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_TA 0x7844 +/* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0 0x782a +/* SAD target - 1-29-1 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1 0x782b +/* Caching / Home Agent */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHA 0x782c +/* Device with TOLM and TOHM, 0-5-0 (1 of these) */ +#define PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM 0x7810 + +/* + * KNL differs from SB, IB, and Haswell in that it has multiple + * instances of the same device with the same device ID, so we handle that + * by creating as many copies in the table as we expect to find. + * (Like device ID must be grouped together.) + */ + +static const struct pci_id_descr pci_dev_descr_knl[] = { + [0 ... 1] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0, IMC0)}, + [2 ... 7] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN, 0, IMC0) }, + [8] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0, IMC0) }, + [9] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0, IMC0) }, + [10] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0, SOCK) }, + [11] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0, SOCK) }, + [12 ... 49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0, SOCK) }, +}; + +static const struct pci_id_table pci_dev_descr_knl_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING), + { NULL, } }; /* - * pci_device_id table for which devices we are looking for + * Broadwell support + * + * DE processor: + * - 1 IMC + * - 2 DDR3 channels, 2 DPC per channel + * EP processor: + * - 1 or 2 IMC + * - 4 DDR4 channels, 3 DPC per channel + * EP 4S processor: + * - 2 IMC + * - 4 DDR4 channels, 3 DPC per channel + * EX processor: + * - 2 IMC + * - each IMC interfaces with a SMI 2 channel + * - each SMI channel interfaces with a scalable memory buffer + * - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC */ -static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, - {0,} /* 0 terminated list. */ +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC 0x6f28 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0 0x6fa0 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1 0x6f60 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA 0x6fa8 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM 0x6f71 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA 0x6f68 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM 0x6f79 +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1 0x6fab +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2 0x6fac +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3 0x6fad +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 0x6f6a +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1 0x6f6b +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2 0x6f6c +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3 0x6f6d +#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0 0x6faf + +static const struct pci_id_descr pci_dev_descr_broadwell[] = { + /* first item must be the HA */ + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1, IMC1) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1, IMC0) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1, IMC0) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1, IMC1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1, IMC1) }, + + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0, SOCK) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1, SOCK) }, +}; + +static const struct pci_id_table pci_dev_descr_broadwell_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL), + { NULL, } }; @@ -377,13 +696,17 @@ static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = { Ancillary status routines ****************************************************************************/ -static inline int numrank(u32 mtr) +static inline int numrank(enum type type, u32 mtr) { int ranks = (1 << RANK_CNT_BITS(mtr)); + int max = 4; - if (ranks > 4) { - edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n", - ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); + if (type == HASWELL || type == BROADWELL || type == KNIGHTS_LANDING) + max = 8; + + if (ranks > max) { + edac_dbg(0, "Invalid number of ranks: %d (max = %i) raw value = %x (%04x)\n", + ranks, max, (unsigned int)RANK_CNT_BITS(mtr), mtr); return -EINVAL; } @@ -416,20 +739,35 @@ static inline int numcol(u32 mtr) return 1 << cols; } -static struct sbridge_dev *get_sbridge_dev(u8 bus) +static struct sbridge_dev *get_sbridge_dev(int seg, u8 bus, enum domain dom, + int multi_bus, + struct sbridge_dev *prev) { struct sbridge_dev *sbridge_dev; - list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { - if (sbridge_dev->bus == bus) + /* + * If we have devices scattered across several busses that pertain + * to the same memory controller, we'll lump them all together. + */ + if (multi_bus) { + return list_first_entry_or_null(&sbridge_edac_list, + struct sbridge_dev, list); + } + + sbridge_dev = list_entry(prev ? prev->list.next + : sbridge_edac_list.next, struct sbridge_dev, list); + + list_for_each_entry_from(sbridge_dev, &sbridge_edac_list, list) { + if ((sbridge_dev->seg == seg) && (sbridge_dev->bus == bus) && + (dom == SOCK || dom == sbridge_dev->dom)) return sbridge_dev; } return NULL; } -static struct sbridge_dev *alloc_sbridge_dev(u8 bus, - const struct pci_id_table *table) +static struct sbridge_dev *alloc_sbridge_dev(int seg, u8 bus, enum domain dom, + const struct pci_id_table *table) { struct sbridge_dev *sbridge_dev; @@ -437,15 +775,18 @@ static struct sbridge_dev *alloc_sbridge_dev(u8 bus, if (!sbridge_dev) return NULL; - sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs, - GFP_KERNEL); + sbridge_dev->pdev = kcalloc(table->n_devs_per_imc, + sizeof(*sbridge_dev->pdev), + GFP_KERNEL); if (!sbridge_dev->pdev) { kfree(sbridge_dev); return NULL; } + sbridge_dev->seg = seg; sbridge_dev->bus = bus; - sbridge_dev->n_devs = table->n_devs; + sbridge_dev->dom = dom; + sbridge_dev->n_devs = table->n_devs_per_imc; list_add_tail(&sbridge_dev->list, &sbridge_edac_list); return sbridge_dev; @@ -458,156 +799,886 @@ static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) kfree(sbridge_dev); } -/**************************************************************************** - Memory check routines - ****************************************************************************/ -static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, - unsigned func) +static u64 sbridge_get_tolm(struct sbridge_pvt *pvt) { - struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus); - int i; + u32 reg; - if (!sbridge_dev) - return NULL; + /* Address range is 32:28 */ + pci_read_config_dword(pvt->pci_sad1, TOLM, ®); + return GET_TOLM(reg); +} - for (i = 0; i < sbridge_dev->n_devs; i++) { - if (!sbridge_dev->pdev[i]) - continue; +static u64 sbridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; - if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && - PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { - edac_dbg(1, "Associated %02x.%02x.%d with %p\n", - bus, slot, func, sbridge_dev->pdev[i]); - return sbridge_dev->pdev[i]; - } + pci_read_config_dword(pvt->pci_sad1, TOHM, ®); + return GET_TOHM(reg); +} + +static u64 ibridge_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOLM, ®); + + return GET_TOLM(reg); +} + +static u64 ibridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOHM, ®); + + return GET_TOHM(reg); +} + +static u64 rir_limit(u32 reg) +{ + return ((u64)GET_BITFIELD(reg, 1, 10) << 29) | 0x1fffffff; +} + +static u64 sad_limit(u32 reg) +{ + return (GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff; +} + +static u32 interleave_mode(u32 reg) +{ + return GET_BITFIELD(reg, 1, 1); +} + +static u32 dram_attr(u32 reg) +{ + return GET_BITFIELD(reg, 2, 3); +} + +static u64 knl_sad_limit(u32 reg) +{ + return (GET_BITFIELD(reg, 7, 26) << 26) | 0x3ffffff; +} + +static u32 knl_interleave_mode(u32 reg) +{ + return GET_BITFIELD(reg, 1, 2); +} + +static const char * const knl_intlv_mode[] = { + "[8:6]", "[10:8]", "[14:12]", "[32:30]" +}; + +static const char *get_intlv_mode_str(u32 reg, enum type t) +{ + if (t == KNIGHTS_LANDING) + return knl_intlv_mode[knl_interleave_mode(reg)]; + else + return interleave_mode(reg) ? "[8:6]" : "[8:6]XOR[18:16]"; +} + +static u32 dram_attr_knl(u32 reg) +{ + return GET_BITFIELD(reg, 3, 4); +} + + +static enum mem_type get_memory_type(struct sbridge_pvt *pvt) +{ + u32 reg; + enum mem_type mtype; + + if (pvt->pci_ddrio) { + pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr, + ®); + if (GET_BITFIELD(reg, 11, 11)) + /* FIXME: Can also be LRDIMM */ + mtype = MEM_RDDR3; + else + mtype = MEM_DDR3; + } else + mtype = MEM_UNKNOWN; + + return mtype; +} + +static enum mem_type haswell_get_memory_type(struct sbridge_pvt *pvt) +{ + u32 reg; + bool registered = false; + enum mem_type mtype = MEM_UNKNOWN; + + if (!pvt->pci_ddrio) + goto out; + + pci_read_config_dword(pvt->pci_ddrio, + HASWELL_DDRCRCLKCONTROLS, ®); + /* Is_Rdimm */ + if (GET_BITFIELD(reg, 16, 16)) + registered = true; + + pci_read_config_dword(pvt->pci_ta, MCMTR, ®); + if (GET_BITFIELD(reg, 14, 14)) { + if (registered) + mtype = MEM_RDDR4; + else + mtype = MEM_DDR4; + } else { + if (registered) + mtype = MEM_RDDR3; + else + mtype = MEM_DDR3; } - return NULL; +out: + return mtype; +} + +static enum dev_type knl_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* for KNL value is fixed */ + return DEV_X16; +} + +static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* there's no way to figure out */ + return DEV_UNKNOWN; } -/** - * check_if_ecc_is_active() - Checks if ECC is active - * bus: Device bus +static enum dev_type __ibridge_get_width(u32 mtr) +{ + enum dev_type type = DEV_UNKNOWN; + + switch (mtr) { + case 2: + type = DEV_X16; + break; + case 1: + type = DEV_X8; + break; + case 0: + type = DEV_X4; + break; + } + + return type; +} + +static enum dev_type ibridge_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* + * ddr3_width on the documentation but also valid for DDR4 on + * Haswell + */ + return __ibridge_get_width(GET_BITFIELD(mtr, 7, 8)); +} + +static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* ddr3_width on the documentation but also valid for DDR4 */ + return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9)); +} + +static enum mem_type knl_get_memory_type(struct sbridge_pvt *pvt) +{ + /* DDR4 RDIMMS and LRDIMMS are supported */ + return MEM_RDDR4; +} + +static u8 get_node_id(struct sbridge_pvt *pvt) +{ + u32 reg; + pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®); + return GET_BITFIELD(reg, 0, 2); +} + +static u8 haswell_get_node_id(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, ®); + return GET_BITFIELD(reg, 0, 3); +} + +static u8 knl_get_node_id(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, ®); + return GET_BITFIELD(reg, 0, 2); +} + +/* + * Use the reporting bank number to determine which memory + * controller (also known as "ha" for "home agent"). Sandy + * Bridge only has one memory controller per socket, so the + * answer is always zero. */ -static int check_if_ecc_is_active(const u8 bus) +static u8 sbridge_get_ha(u8 bank) { - struct pci_dev *pdev = NULL; - u32 mcmtr; + return 0; +} - pdev = get_pdev_slot_func(bus, 15, 0); - if (!pdev) { - sbridge_printk(KERN_ERR, "Couldn't find PCI device " - "%2x.%02d.%d!!!\n", - bus, 15, 0); - return -ENODEV; +/* + * On Ivy Bridge, Haswell and Broadwell the error may be in a + * home agent bank (7, 8), or one of the per-channel memory + * controller banks (9 .. 16). + */ +static u8 ibridge_get_ha(u8 bank) +{ + switch (bank) { + case 7 ... 8: + return bank - 7; + case 9 ... 16: + return (bank - 9) / 4; + default: + return 0xff; + } +} + +/* Not used, but included for safety/symmetry */ +static u8 knl_get_ha(u8 bank) +{ + return 0xff; +} + +static u64 haswell_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOLM, ®); + return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff; +} + +static u64 haswell_get_tohm(struct sbridge_pvt *pvt) +{ + u64 rc; + u32 reg; + + pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_0, ®); + rc = GET_BITFIELD(reg, 26, 31); + pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®); + rc = ((reg << 6) | rc) << 26; + + return rc | 0x3ffffff; +} + +static u64 knl_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOLM, ®); + return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff; +} + +static u64 knl_get_tohm(struct sbridge_pvt *pvt) +{ + u64 rc; + u32 reg_lo, reg_hi; + + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_0, ®_lo); + pci_read_config_dword(pvt->knl.pci_mc_info, KNL_TOHM_1, ®_hi); + rc = ((u64)reg_hi << 32) | reg_lo; + return rc | 0x3ffffff; +} + + +static u64 haswell_rir_limit(u32 reg) +{ + return (((u64)GET_BITFIELD(reg, 1, 11) + 1) << 29) - 1; +} + +static inline u8 sad_pkg_socket(u8 pkg) +{ + /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */ + return ((pkg >> 3) << 2) | (pkg & 0x3); +} + +static inline u8 sad_pkg_ha(u8 pkg) +{ + return (pkg >> 2) & 0x1; +} + +static int haswell_chan_hash(int idx, u64 addr) +{ + int i; + + /* + * XOR even bits from 12:26 to bit0 of idx, + * odd bits from 13:27 to bit1 + */ + for (i = 12; i < 28; i += 2) + idx ^= (addr >> i) & 3; + + return idx; +} + +/* Low bits of TAD limit, and some metadata. */ +static const u32 knl_tad_dram_limit_lo[] = { + 0x400, 0x500, 0x600, 0x700, + 0x800, 0x900, 0xa00, 0xb00, +}; + +/* Low bits of TAD offset. */ +static const u32 knl_tad_dram_offset_lo[] = { + 0x404, 0x504, 0x604, 0x704, + 0x804, 0x904, 0xa04, 0xb04, +}; + +/* High 16 bits of TAD limit and offset. */ +static const u32 knl_tad_dram_hi[] = { + 0x408, 0x508, 0x608, 0x708, + 0x808, 0x908, 0xa08, 0xb08, +}; + +/* Number of ways a tad entry is interleaved. */ +static const u32 knl_tad_ways[] = { + 8, 6, 4, 3, 2, 1, +}; + +/* + * Retrieve the n'th Target Address Decode table entry + * from the memory controller's TAD table. + * + * @pvt: driver private data + * @entry: which entry you want to retrieve + * @mc: which memory controller (0 or 1) + * @offset: output tad range offset + * @limit: output address of first byte above tad range + * @ways: output number of interleave ways + * + * The offset value has curious semantics. It's a sort of running total + * of the sizes of all the memory regions that aren't mapped in this + * tad table. + */ +static int knl_get_tad(const struct sbridge_pvt *pvt, + const int entry, + const int mc, + u64 *offset, + u64 *limit, + int *ways) +{ + u32 reg_limit_lo, reg_offset_lo, reg_hi; + struct pci_dev *pci_mc; + int way_id; + + switch (mc) { + case 0: + pci_mc = pvt->knl.pci_mc0; + break; + case 1: + pci_mc = pvt->knl.pci_mc1; + break; + default: + WARN_ON(1); + return -EINVAL; } - pci_read_config_dword(pdev, MCMTR, &mcmtr); - if (!IS_ECC_ENABLED(mcmtr)) { - sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); + pci_read_config_dword(pci_mc, + knl_tad_dram_limit_lo[entry], ®_limit_lo); + pci_read_config_dword(pci_mc, + knl_tad_dram_offset_lo[entry], ®_offset_lo); + pci_read_config_dword(pci_mc, + knl_tad_dram_hi[entry], ®_hi); + + /* Is this TAD entry enabled? */ + if (!GET_BITFIELD(reg_limit_lo, 0, 0)) + return -ENODEV; + + way_id = GET_BITFIELD(reg_limit_lo, 3, 5); + + if (way_id < ARRAY_SIZE(knl_tad_ways)) { + *ways = knl_tad_ways[way_id]; + } else { + *ways = 0; + sbridge_printk(KERN_ERR, + "Unexpected value %d in mc_tad_limit_lo wayness field\n", + way_id); return -ENODEV; } + + /* + * The least significant 6 bits of base and limit are truncated. + * For limit, we fill the missing bits with 1s. + */ + *offset = ((u64) GET_BITFIELD(reg_offset_lo, 6, 31) << 6) | + ((u64) GET_BITFIELD(reg_hi, 0, 15) << 32); + *limit = ((u64) GET_BITFIELD(reg_limit_lo, 6, 31) << 6) | 63 | + ((u64) GET_BITFIELD(reg_hi, 16, 31) << 32); + return 0; } -static int get_dimm_config(struct mem_ctl_info *mci) +/* Determine which memory controller is responsible for a given channel. */ +static int knl_channel_mc(int channel) { - struct sbridge_pvt *pvt = mci->pvt_info; - struct dimm_info *dimm; - unsigned i, j, banks, ranks, rows, cols, npages; - u64 size; - u32 reg; - enum edac_type mode; - enum mem_type mtype; + WARN_ON(channel < 0 || channel >= 6); - pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); - pvt->sbridge_dev->source_id = SOURCE_ID(reg); + return channel < 3 ? 1 : 0; +} - pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); - pvt->sbridge_dev->node_id = NODE_ID(reg); - edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", - pvt->sbridge_dev->mc, - pvt->sbridge_dev->node_id, - pvt->sbridge_dev->source_id); +/* + * Get the Nth entry from EDC_ROUTE_TABLE register. + * (This is the per-tile mapping of logical interleave targets to + * physical EDC modules.) + * + * entry 0: 0:2 + * 1: 3:5 + * 2: 6:8 + * 3: 9:11 + * 4: 12:14 + * 5: 15:17 + * 6: 18:20 + * 7: 21:23 + * reserved: 24:31 + */ +static u32 knl_get_edc_route(int entry, u32 reg) +{ + WARN_ON(entry >= KNL_MAX_EDCS); + return GET_BITFIELD(reg, entry*3, (entry*3)+2); +} - pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); - if (IS_MIRROR_ENABLED(reg)) { - edac_dbg(0, "Memory mirror is enabled\n"); - pvt->is_mirrored = true; - } else { - edac_dbg(0, "Memory mirror is disabled\n"); - pvt->is_mirrored = false; +/* + * Get the Nth entry from MC_ROUTE_TABLE register. + * (This is the per-tile mapping of logical interleave targets to + * physical DRAM channels modules.) + * + * entry 0: mc 0:2 channel 18:19 + * 1: mc 3:5 channel 20:21 + * 2: mc 6:8 channel 22:23 + * 3: mc 9:11 channel 24:25 + * 4: mc 12:14 channel 26:27 + * 5: mc 15:17 channel 28:29 + * reserved: 30:31 + * + * Though we have 3 bits to identify the MC, we should only see + * the values 0 or 1. + */ + +static u32 knl_get_mc_route(int entry, u32 reg) +{ + int mc, chan; + + WARN_ON(entry >= KNL_MAX_CHANNELS); + + mc = GET_BITFIELD(reg, entry*3, (entry*3)+2); + chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1); + + return knl_channel_remap(mc, chan); +} + +/* + * Render the EDC_ROUTE register in human-readable form. + * Output string s should be at least KNL_MAX_EDCS*2 bytes. + */ +static void knl_show_edc_route(u32 reg, char *s) +{ + int i; + + for (i = 0; i < KNL_MAX_EDCS; i++) { + s[i*2] = knl_get_edc_route(i, reg) + '0'; + s[i*2+1] = '-'; } - pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); - if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { - edac_dbg(0, "Lockstep is enabled\n"); - mode = EDAC_S8ECD8ED; - pvt->is_lockstep = true; - } else { - edac_dbg(0, "Lockstep is disabled\n"); - mode = EDAC_S4ECD4ED; - pvt->is_lockstep = false; + s[KNL_MAX_EDCS*2 - 1] = '\0'; +} + +/* + * Render the MC_ROUTE register in human-readable form. + * Output string s should be at least KNL_MAX_CHANNELS*2 bytes. + */ +static void knl_show_mc_route(u32 reg, char *s) +{ + int i; + + for (i = 0; i < KNL_MAX_CHANNELS; i++) { + s[i*2] = knl_get_mc_route(i, reg) + '0'; + s[i*2+1] = '-'; } - if (IS_CLOSE_PG(pvt->info.mcmtr)) { - edac_dbg(0, "address map is on closed page mode\n"); - pvt->is_close_pg = true; - } else { - edac_dbg(0, "address map is on open page mode\n"); - pvt->is_close_pg = false; + + s[KNL_MAX_CHANNELS*2 - 1] = '\0'; +} + +#define KNL_EDC_ROUTE 0xb8 +#define KNL_MC_ROUTE 0xb4 + +/* Is this dram rule backed by regular DRAM in flat mode? */ +#define KNL_EDRAM(reg) GET_BITFIELD(reg, 29, 29) + +/* Is this dram rule cached? */ +#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28) + +/* Is this rule backed by edc ? */ +#define KNL_EDRAM_ONLY(reg) GET_BITFIELD(reg, 29, 29) + +/* Is this rule backed by DRAM, cacheable in EDRAM? */ +#define KNL_CACHEABLE(reg) GET_BITFIELD(reg, 28, 28) + +/* Is this rule mod3? */ +#define KNL_MOD3(reg) GET_BITFIELD(reg, 27, 27) + +/* + * Figure out how big our RAM modules are. + * + * The DIMMMTR register in KNL doesn't tell us the size of the DIMMs, so we + * have to figure this out from the SAD rules, interleave lists, route tables, + * and TAD rules. + * + * SAD rules can have holes in them (e.g. the 3G-4G hole), so we have to + * inspect the TAD rules to figure out how large the SAD regions really are. + * + * When we know the real size of a SAD region and how many ways it's + * interleaved, we know the individual contribution of each channel to + * TAD is size/ways. + * + * Finally, we have to check whether each channel participates in each SAD + * region. + * + * Fortunately, KNL only supports one DIMM per channel, so once we know how + * much memory the channel uses, we know the DIMM is at least that large. + * (The BIOS might possibly choose not to map all available memory, in which + * case we will underreport the size of the DIMM.) + * + * In theory, we could try to determine the EDC sizes as well, but that would + * only work in flat mode, not in cache mode. + * + * @mc_sizes: Output sizes of channels (must have space for KNL_MAX_CHANNELS + * elements) + */ +static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes) +{ + u64 sad_base, sad_limit = 0; + u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace; + int sad_rule = 0; + int tad_rule = 0; + int intrlv_ways, tad_ways; + u32 first_pkg, pkg; + int i; + u64 sad_actual_size[2]; /* sad size accounting for holes, per mc */ + u32 dram_rule, interleave_reg; + u32 mc_route_reg[KNL_MAX_CHAS]; + u32 edc_route_reg[KNL_MAX_CHAS]; + int edram_only; + char edc_route_string[KNL_MAX_EDCS*2]; + char mc_route_string[KNL_MAX_CHANNELS*2]; + int cur_reg_start; + int mc; + int channel; + int participants[KNL_MAX_CHANNELS]; + + for (i = 0; i < KNL_MAX_CHANNELS; i++) + mc_sizes[i] = 0; + + /* Read the EDC route table in each CHA. */ + cur_reg_start = 0; + for (i = 0; i < KNL_MAX_CHAS; i++) { + pci_read_config_dword(pvt->knl.pci_cha[i], + KNL_EDC_ROUTE, &edc_route_reg[i]); + + if (i > 0 && edc_route_reg[i] != edc_route_reg[i-1]) { + knl_show_edc_route(edc_route_reg[i-1], + edc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "edc route table for CHA %d: %s\n", + cur_reg_start, edc_route_string); + else + edac_dbg(0, "edc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, edc_route_string); + cur_reg_start = i; + } } + knl_show_edc_route(edc_route_reg[i-1], edc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "edc route table for CHA %d: %s\n", + cur_reg_start, edc_route_string); + else + edac_dbg(0, "edc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, edc_route_string); + + /* Read the MC route table in each CHA. */ + cur_reg_start = 0; + for (i = 0; i < KNL_MAX_CHAS; i++) { + pci_read_config_dword(pvt->knl.pci_cha[i], + KNL_MC_ROUTE, &mc_route_reg[i]); + + if (i > 0 && mc_route_reg[i] != mc_route_reg[i-1]) { + knl_show_mc_route(mc_route_reg[i-1], mc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "mc route table for CHA %d: %s\n", + cur_reg_start, mc_route_string); + else + edac_dbg(0, "mc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, mc_route_string); + cur_reg_start = i; + } + } + knl_show_mc_route(mc_route_reg[i-1], mc_route_string); + if (cur_reg_start == i-1) + edac_dbg(0, "mc route table for CHA %d: %s\n", + cur_reg_start, mc_route_string); + else + edac_dbg(0, "mc route table for CHA %d-%d: %s\n", + cur_reg_start, i-1, mc_route_string); - if (pvt->pci_ddrio) { - pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, ®); - if (IS_RDIMM_ENABLED(reg)) { - /* FIXME: Can also be LRDIMM */ - edac_dbg(0, "Memory is registered\n"); - mtype = MEM_RDDR3; - } else { - edac_dbg(0, "Memory is unregistered\n"); - mtype = MEM_DDR3; + /* Process DRAM rules */ + for (sad_rule = 0; sad_rule < pvt->info.max_sad; sad_rule++) { + /* previous limit becomes the new base */ + sad_base = sad_limit; + + pci_read_config_dword(pvt->pci_sad0, + pvt->info.dram_rule[sad_rule], &dram_rule); + + if (!DRAM_RULE_ENABLE(dram_rule)) + break; + + edram_only = KNL_EDRAM_ONLY(dram_rule); + + sad_limit = pvt->info.sad_limit(dram_rule)+1; + + pci_read_config_dword(pvt->pci_sad0, + pvt->info.interleave_list[sad_rule], &interleave_reg); + + /* + * Find out how many ways this dram rule is interleaved. + * We stop when we see the first channel again. + */ + first_pkg = sad_pkg(pvt->info.interleave_pkg, + interleave_reg, 0); + for (intrlv_ways = 1; intrlv_ways < 8; intrlv_ways++) { + pkg = sad_pkg(pvt->info.interleave_pkg, + interleave_reg, intrlv_ways); + + if ((pkg & 0x8) == 0) { + /* + * 0 bit means memory is non-local, + * which KNL doesn't support + */ + edac_dbg(0, "Unexpected interleave target %d\n", + pkg); + return -1; + } + + if (pkg == first_pkg) + break; + } + if (KNL_MOD3(dram_rule)) + intrlv_ways *= 3; + + edac_dbg(3, "dram rule %d (base 0x%llx, limit 0x%llx), %d way interleave%s\n", + sad_rule, + sad_base, + sad_limit, + intrlv_ways, + edram_only ? ", EDRAM" : ""); + + /* + * Find out how big the SAD region really is by iterating + * over TAD tables (SAD regions may contain holes). + * Each memory controller might have a different TAD table, so + * we have to look at both. + * + * Livespace is the memory that's mapped in this TAD table, + * deadspace is the holes (this could be the MMIO hole, or it + * could be memory that's mapped by the other TAD table but + * not this one). + */ + for (mc = 0; mc < 2; mc++) { + sad_actual_size[mc] = 0; + tad_livespace = 0; + for (tad_rule = 0; + tad_rule < ARRAY_SIZE( + knl_tad_dram_limit_lo); + tad_rule++) { + if (knl_get_tad(pvt, + tad_rule, + mc, + &tad_deadspace, + &tad_limit, + &tad_ways)) + break; + + tad_size = (tad_limit+1) - + (tad_livespace + tad_deadspace); + tad_livespace += tad_size; + tad_base = (tad_limit+1) - tad_size; + + if (tad_base < sad_base) { + if (tad_limit > sad_base) + edac_dbg(0, "TAD region overlaps lower SAD boundary -- TAD tables may be configured incorrectly.\n"); + } else if (tad_base < sad_limit) { + if (tad_limit+1 > sad_limit) { + edac_dbg(0, "TAD region overlaps upper SAD boundary -- TAD tables may be configured incorrectly.\n"); + } else { + /* TAD region is completely inside SAD region */ + edac_dbg(3, "TAD region %d 0x%llx - 0x%llx (%lld bytes) table%d\n", + tad_rule, tad_base, + tad_limit, tad_size, + mc); + sad_actual_size[mc] += tad_size; + } + } + } + } + + for (mc = 0; mc < 2; mc++) { + edac_dbg(3, " total TAD DRAM footprint in table%d : 0x%llx (%lld bytes)\n", + mc, sad_actual_size[mc], sad_actual_size[mc]); + } + + /* Ignore EDRAM rule */ + if (edram_only) + continue; + + /* Figure out which channels participate in interleave. */ + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) + participants[channel] = 0; + + /* For each channel, does at least one CHA have + * this channel mapped to the given target? + */ + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) { + int target; + int cha; + + for (target = 0; target < KNL_MAX_CHANNELS; target++) { + for (cha = 0; cha < KNL_MAX_CHAS; cha++) { + if (knl_get_mc_route(target, + mc_route_reg[cha]) == channel + && !participants[channel]) { + participants[channel] = 1; + break; + } + } + } + } + + for (channel = 0; channel < KNL_MAX_CHANNELS; channel++) { + mc = knl_channel_mc(channel); + if (participants[channel]) { + edac_dbg(4, "mc channel %d contributes %lld bytes via sad entry %d\n", + channel, + sad_actual_size[mc]/intrlv_ways, + sad_rule); + mc_sizes[channel] += + sad_actual_size[mc]/intrlv_ways; + } } - } else { - edac_dbg(0, "Cannot determine memory type\n"); - mtype = MEM_UNKNOWN; } - /* On all supported DDR3 DIMM types, there are 8 banks available */ - banks = 8; + return 0; +} - for (i = 0; i < NUM_CHANNELS; i++) { - u32 mtr; +static void get_source_id(struct mem_ctl_info *mci) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + u32 reg; - for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - i, j, 0); - pci_read_config_dword(pvt->pci_tad[i], - mtr_regs[j], &mtr); + if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL || + pvt->info.type == KNIGHTS_LANDING) + pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®); + else + pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); + + if (pvt->info.type == KNIGHTS_LANDING) + pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg); + else + pvt->sbridge_dev->source_id = SOURCE_ID(reg); +} + +static int __populate_dimms(struct mem_ctl_info *mci, + u64 knl_mc_sizes[KNL_MAX_CHANNELS], + enum edac_type mode) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + int channels = pvt->info.type == KNIGHTS_LANDING ? KNL_MAX_CHANNELS + : NUM_CHANNELS; + unsigned int i, j, banks, ranks, rows, cols, npages; + struct dimm_info *dimm; + enum mem_type mtype; + u64 size; + + mtype = pvt->info.get_memory_type(pvt); + if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4) + edac_dbg(0, "Memory is registered\n"); + else if (mtype == MEM_UNKNOWN) + edac_dbg(0, "Cannot determine memory type\n"); + else + edac_dbg(0, "Memory is unregistered\n"); + + if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) + banks = 16; + else + banks = 8; + + for (i = 0; i < channels; i++) { + u32 mtr, amap = 0; + + int max_dimms_per_channel; + + if (pvt->info.type == KNIGHTS_LANDING) { + max_dimms_per_channel = 1; + if (!pvt->knl.pci_channel[i]) + continue; + } else { + max_dimms_per_channel = ARRAY_SIZE(mtr_regs); + if (!pvt->pci_tad[i]) + continue; + pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap); + } + + for (j = 0; j < max_dimms_per_channel; j++) { + dimm = edac_get_dimm(mci, i, j, 0); + if (pvt->info.type == KNIGHTS_LANDING) { + pci_read_config_dword(pvt->knl.pci_channel[i], + knl_mtr_reg, &mtr); + } else { + pci_read_config_dword(pvt->pci_tad[i], + mtr_regs[j], &mtr); + } edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) { + if (!IS_ECC_ENABLED(pvt->info.mcmtr)) { + sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n", + pvt->sbridge_dev->source_id, + pvt->sbridge_dev->dom, i); + return -ENODEV; + } pvt->channel[i].dimms++; - ranks = numrank(mtr); - rows = numrow(mtr); - cols = numcol(mtr); + ranks = numrank(pvt->info.type, mtr); + + if (pvt->info.type == KNIGHTS_LANDING) { + /* For DDR4, this is fixed. */ + cols = 1 << 10; + rows = knl_mc_sizes[i] / + ((u64) cols * ranks * banks * 8); + } else { + rows = numrow(mtr); + cols = numcol(mtr); + } - /* DDR3 has 8 I/O banks */ size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", - pvt->sbridge_dev->mc, i, j, + edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j, size, npages, banks, ranks, rows, cols); dimm->nr_pages = npages; dimm->grain = 32; - dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; + dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->mtype = mtype; dimm->edac_mode = mode; + pvt->channel[i].dimm[j].rowbits = order_base_2(rows); + pvt->channel[i].dimm[j].colbits = order_base_2(cols); + pvt->channel[i].dimm[j].bank_xor_enable = + GET_BITFIELD(pvt->info.mcmtr, 9, 9); + pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0); snprintf(dimm->label, sizeof(dimm->label), - "CPU_SrcID#%u_Channel#%u_DIMM#%u", - pvt->sbridge_dev->source_id, i, j); + "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j); } } } @@ -615,6 +1686,84 @@ static int get_dimm_config(struct mem_ctl_info *mci) return 0; } +static int get_dimm_config(struct mem_ctl_info *mci) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + u64 knl_mc_sizes[KNL_MAX_CHANNELS]; + enum edac_type mode; + u32 reg; + + pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt); + edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", + pvt->sbridge_dev->mc, + pvt->sbridge_dev->node_id, + pvt->sbridge_dev->source_id); + + /* KNL doesn't support mirroring or lockstep, + * and is always closed page + */ + if (pvt->info.type == KNIGHTS_LANDING) { + mode = EDAC_S4ECD4ED; + pvt->mirror_mode = NON_MIRRORING; + pvt->is_cur_addr_mirrored = false; + + if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0) + return -1; + if (pci_read_config_dword(pvt->pci_ta, KNL_MCMTR, &pvt->info.mcmtr)) { + edac_dbg(0, "Failed to read KNL_MCMTR register\n"); + return -ENODEV; + } + } else { + if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) { + if (pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, ®)) { + edac_dbg(0, "Failed to read HASWELL_HASYSDEFEATURE2 register\n"); + return -ENODEV; + } + pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21); + if (GET_BITFIELD(reg, 28, 28)) { + pvt->mirror_mode = ADDR_RANGE_MIRRORING; + edac_dbg(0, "Address range partial memory mirroring is enabled\n"); + goto next; + } + } + if (pci_read_config_dword(pvt->pci_ras, RASENABLES, ®)) { + edac_dbg(0, "Failed to read RASENABLES register\n"); + return -ENODEV; + } + if (IS_MIRROR_ENABLED(reg)) { + pvt->mirror_mode = FULL_MIRRORING; + edac_dbg(0, "Full memory mirroring is enabled\n"); + } else { + pvt->mirror_mode = NON_MIRRORING; + edac_dbg(0, "Memory mirroring is disabled\n"); + } + +next: + if (pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr)) { + edac_dbg(0, "Failed to read MCMTR register\n"); + return -ENODEV; + } + if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { + edac_dbg(0, "Lockstep is enabled\n"); + mode = EDAC_S8ECD8ED; + pvt->is_lockstep = true; + } else { + edac_dbg(0, "Lockstep is disabled\n"); + mode = EDAC_S4ECD4ED; + pvt->is_lockstep = false; + } + if (IS_CLOSE_PG(pvt->info.mcmtr)) { + edac_dbg(0, "address map is on closed page mode\n"); + pvt->is_close_pg = true; + } else { + edac_dbg(0, "address map is on open page mode\n"); + pvt->is_close_pg = false; + } + } + + return __populate_dimms(mci, knl_mc_sizes, mode); +} + static void get_memory_layout(const struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; @@ -622,30 +1771,27 @@ static void get_memory_layout(const struct mem_ctl_info *mci) u32 reg; u64 limit, prv = 0; u64 tmp_mb; - u32 mb, kb; + u32 gb, mb; u32 rir_way; /* * Step 1) Get TOLM/TOHM ranges */ - /* Address range is 32:28 */ - pci_read_config_dword(pvt->pci_sad1, TOLM, - ®); - pvt->tolm = GET_TOLM(reg); + pvt->tolm = pvt->info.get_tolm(pvt); tmp_mb = (1 + pvt->tolm) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); + gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", + gb, (mb*1000)/1024, (u64)pvt->tolm); /* Address range is already 45:25 */ - pci_read_config_dword(pvt->pci_sad1, TOHM, - ®); - pvt->tohm = GET_TOHM(reg); + pvt->tohm = pvt->info.get_tohm(pvt); tmp_mb = (1 + pvt->tohm) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); + gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", + gb, (mb*1000)/1024, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -654,11 +1800,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci) * algorithm bellow. */ prv = 0; - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { /* SAD_LIMIT Address range is 45:26 */ - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); - limit = SAD_LIMIT(reg); + limit = pvt->info.sad_limit(reg); if (!DRAM_RULE_ENABLE(reg)) continue; @@ -667,46 +1813,49 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", n_sads, - get_dram_attr(reg), - mb, kb, + show_dram_attr(pvt->info.dram_attr(reg)), + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", + get_intlv_mode_str(reg, pvt->info.type), reg); prv = limit; - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); for (j = 0; j < 8; j++) { - if (j > 0 && sad_interl == sad_pkg(reg, j)) + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j); + if (j > 0 && sad_interl == pkg) break; edac_dbg(0, "SAD#%d, interleave #%d: %d\n", - n_sads, j, sad_pkg(reg, j)); + n_sads, j, pkg); } } + if (pvt->info.type == KNIGHTS_LANDING) + return; + /* * Step 3) Get TAD range */ prv = 0; for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { - pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], - ®); + pci_read_config_dword(pvt->pci_ha, tad_dram_rule[n_tads], ®); limit = TAD_LIMIT(reg); if (limit <= prv) break; tmp_mb = (limit + 1) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", - n_tads, mb, kb, + n_tads, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)TAD_SOCK(reg), - (u32)TAD_CH(reg), + (u32)(1 << TAD_SOCK(reg)), + (u32)TAD_CH(reg) + 1, (u32)TAD_TGT0(reg), (u32)TAD_TGT1(reg), (u32)TAD_TGT2(reg), @@ -726,10 +1875,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tad_ch_nilv_offset[j], ®); tmp_mb = TAD_OFFSET(reg) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", i, j, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, reg); } @@ -749,12 +1898,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci) if (!IS_RIR_VALID(reg)) continue; - tmp_mb = RIR_LIMIT(reg) >> 20; + tmp_mb = pvt->info.rir_limit(reg) >> 20; rir_way = 1 << RIR_WAY(reg); - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", i, j, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, rir_way, reg); @@ -763,51 +1912,146 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_tad[i], rir_offset[j][k], ®); - tmp_mb = RIR_OFFSET(reg) << 6; + tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), + (u32)RIR_RNK_TGT(pvt->info.type, reg), reg); } } } } -struct mem_ctl_info *get_mci_for_node_id(u8 node_id) +static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha) { struct sbridge_dev *sbridge_dev; list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { - if (sbridge_dev->node_id == node_id) + if (sbridge_dev->node_id == node_id && sbridge_dev->dom == ha) return sbridge_dev->mci; } return NULL; } +static u8 sb_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; + +static u8 sb_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; + +static u8 sb_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; + +static u8 sb_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; + +static u8 sb_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int sb_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + int dimmno = 0; + int row, col, bank_address, bank_group; + struct sbridge_pvt *pvt; + u32 bg0 = 0, rowbits = 0, colbits = 0; + u32 amap_fine = 0, bank_xor_enable = 0; + + dimmno = (rank < 12) ? rank / 4 : 2; + pvt = mci->pvt_info; + amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine; + bg0 = amap_fine ? 6 : 13; + rowbits = pvt->channel[ch].dimm[dimmno].rowbits; + colbits = pvt->channel[ch].dimm[dimmno].colbits; + bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable; + + if (pvt->is_lockstep) { + pr_warn_once("LockStep row/column decode is not supported yet!\n"); + msg[0] = '\0'; + return false; + } + + if (pvt->is_close_pg) { + row = sb_bits(rank_addr, rowbits, sb_close_row); + col = sb_bits(rank_addr, colbits, sb_close_column); + col |= 0x400; /* C10 is autoprecharge, always set */ + bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28); + bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21); + } else { + row = sb_bits(rank_addr, rowbits, sb_open_row); + if (amap_fine) + col = sb_bits(rank_addr, colbits, sb_open_fine_column); + else + col = sb_bits(rank_addr, colbits, sb_open_column); + bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23); + bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21); + } + + row &= (1u << rowbits) - 1; + + sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d", + row, col, bank_address, bank_group); + return true; +} + +static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + pr_warn_once("DDR3 row/column decode not support yet!\n"); + msg[0] = '\0'; + return false; +} + static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, - u8 *socket, + u8 *socket, u8 *ha, long *channel_mask, u8 *rank, char **area_type, char *msg) { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - int n_rir, n_sads, n_tads, sad_way, sck_xch; + struct pci_dev *pci_ha; + int n_rir, n_sads, n_tads, sad_way, sck_xch; int sad_interl, idx, base_ch; - int interleave_mode; - unsigned sad_interleave[MAX_INTERLEAVE]; - u32 reg; - u8 ch_way,sck_way; + int interleave_mode, shiftup = 0; + unsigned int sad_interleave[MAX_INTERLEAVE]; + u32 reg, dram_rule; + u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0; u32 tad_offset; u32 rir_way; - u32 mb, kb; - u64 ch_addr, offset, limit, prv = 0; - + u32 mb, gb; + u64 ch_addr, offset, limit = 0, prv = 0; + u64 rank_addr; + enum mem_type mtype; /* * Step 0) Check if the address is at special memory ranges @@ -828,14 +2072,14 @@ static int get_memory_error_data(struct mem_ctl_info *mci, /* * Step 1) Get socket */ - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); if (!DRAM_RULE_ENABLE(reg)) continue; - limit = SAD_LIMIT(reg); + limit = pvt->info.sad_limit(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory socket"); return -EINVAL; @@ -844,59 +2088,103 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; prv = limit; } - if (n_sads == MAX_SAD) { + if (n_sads == pvt->info.max_sad) { sprintf(msg, "Can't discover the memory socket"); return -EINVAL; } - *area_type = get_dram_attr(reg); - interleave_mode = INTERLEAVE_MODE(reg); + dram_rule = reg; + *area_type = show_dram_attr(pvt->info.dram_attr(dram_rule)); + interleave_mode = pvt->info.interleave_mode(dram_rule); - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); - for (sad_way = 0; sad_way < 8; sad_way++) { - if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) + + if (pvt->info.type == SANDY_BRIDGE) { + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); + for (sad_way = 0; sad_way < 8; sad_way++) { + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way); + if (sad_way > 0 && sad_interl == pkg) + break; + sad_interleave[sad_way] = pkg; + edac_dbg(0, "SAD interleave #%d: %d\n", + sad_way, sad_interleave[sad_way]); + } + edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", + pvt->sbridge_dev->mc, + n_sads, + addr, + limit, + sad_way + 7, + !interleave_mode ? "" : "XOR[18:16]"); + if (interleave_mode) + idx = ((addr >> 6) ^ (addr >> 16)) & 7; + else + idx = (addr >> 6) & 7; + switch (sad_way) { + case 1: + idx = 0; break; - sad_interleave[sad_way] = sad_pkg(reg, sad_way); - edac_dbg(0, "SAD interleave #%d: %d\n", - sad_way, sad_interleave[sad_way]); - } - edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", - pvt->sbridge_dev->mc, - n_sads, - addr, - limit, - sad_way + 7, - interleave_mode ? "" : "XOR[18:16]"); - if (interleave_mode) - idx = ((addr >> 6) ^ (addr >> 16)) & 7; - else + case 2: + idx = idx & 1; + break; + case 4: + idx = idx & 3; + break; + case 8: + break; + default: + sprintf(msg, "Can't discover socket interleave"); + return -EINVAL; + } + *socket = sad_interleave[idx]; + edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", + idx, sad_way, *socket); + } else if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) { + int bits, a7mode = A7MODE(dram_rule); + + if (a7mode) { + /* A7 mode swaps P9 with P6 */ + bits = GET_BITFIELD(addr, 7, 8) << 1; + bits |= GET_BITFIELD(addr, 9, 9); + } else + bits = GET_BITFIELD(addr, 6, 8); + + if (interleave_mode == 0) { + /* interleave mode will XOR {8,7,6} with {18,17,16} */ + idx = GET_BITFIELD(addr, 16, 18); + idx ^= bits; + } else + idx = bits; + + pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); + *socket = sad_pkg_socket(pkg); + sad_ha = sad_pkg_ha(pkg); + + if (a7mode) { + /* MCChanShiftUpEnable */ + pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, ®); + shiftup = GET_BITFIELD(reg, 22, 22); + } + + edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %i, shiftup: %i\n", + idx, *socket, sad_ha, shiftup); + } else { + /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */ idx = (addr >> 6) & 7; - switch (sad_way) { - case 1: - idx = 0; - break; - case 2: - idx = idx & 1; - break; - case 4: - idx = idx & 3; - break; - case 8: - break; - default: - sprintf(msg, "Can't discover socket interleave"); - return -EINVAL; + pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); + *socket = sad_pkg_socket(pkg); + sad_ha = sad_pkg_ha(pkg); + edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", + idx, *socket, sad_ha); } - *socket = sad_interleave[idx]; - edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", - idx, sad_way, *socket); + + *ha = sad_ha; /* * Move to the proper node structure, in order to access the * right PCI registers */ - new_mci = get_mci_for_node_id(*socket); + new_mci = get_mci_for_node_id(*socket, sad_ha); if (!new_mci) { sprintf(msg, "Struct for socket #%u wasn't initialized", *socket); @@ -909,9 +2197,9 @@ static int get_memory_error_data(struct mem_ctl_info *mci, * Step 2) Get memory channel */ prv = 0; + pci_ha = pvt->pci_ha; for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { - pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], - ®); + pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®); limit = TAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory channel"); @@ -921,19 +2209,21 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; prv = limit; } + if (n_tads == MAX_TAD) { + sprintf(msg, "Can't discover the memory channel"); + return -EINVAL; + } + ch_way = TAD_CH(reg) + 1; - sck_way = TAD_SOCK(reg) + 1; - /* - * FIXME: Is it right to always use channel 0 for offsets? - */ - pci_read_config_dword(pvt->pci_tad[0], - tad_ch_nilv_offset[n_tads], - &tad_offset); + sck_way = TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; - else - idx = addr >> (6 + sck_way); + else { + idx = (addr >> (6 + sck_way + shiftup)) & 0x3; + if (pvt->is_chan_hash) + idx = haswell_chan_hash(idx, addr); + } idx = idx % ch_way; /* @@ -958,19 +2248,26 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } *channel_mask = 1 << base_ch; - if (pvt->is_mirrored) { + pci_read_config_dword(pvt->pci_tad[base_ch], tad_ch_nilv_offset[n_tads], &tad_offset); + + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && n_tads == 0)) { *channel_mask |= 1 << ((base_ch + 2) % 4); switch(ch_way) { case 2: case 4: - sck_xch = 1 << sck_way * (ch_way >> 1); + sck_xch = (1 << sck_way) * (ch_way >> 1); break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); return -EINVAL; } - } else + + pvt->is_cur_addr_mirrored = true; + } else { sck_xch = (1 << sck_way) * ch_way; + pvt->is_cur_addr_mirrored = false; + } if (pvt->is_lockstep) *channel_mask |= 1 << ((base_ch + 1) % 4); @@ -981,7 +2278,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, n_tads, addr, limit, - (u32)TAD_SOCK(reg), + sck_way, ch_way, offset, idx, @@ -996,35 +2293,27 @@ static int get_memory_error_data(struct mem_ctl_info *mci, offset, addr); return -EINVAL; } - addr -= offset; - /* Store the low bits [0:6] of the addr */ - ch_addr = addr & 0x7f; - /* Remove socket wayness and remove 6 bits */ - addr >>= 6; - addr = div_u64(addr, sck_xch); -#if 0 - /* Divide by channel way */ - addr = addr / ch_way; -#endif - /* Recover the last 6 bits */ - ch_addr |= addr << 6; + + ch_addr = addr - offset; + ch_addr >>= (6 + shiftup); + ch_addr /= sck_xch; + ch_addr <<= (6 + shiftup); + ch_addr |= addr & ((1 << (6 + shiftup)) - 1); /* * Step 3) Decode rank */ for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { - pci_read_config_dword(pvt->pci_tad[base_ch], - rir_way_limit[n_rir], - ®); + pci_read_config_dword(pvt->pci_tad[base_ch], rir_way_limit[n_rir], ®); if (!IS_RIR_VALID(reg)) continue; - limit = RIR_LIMIT(reg); - mb = div_u64_rem(limit >> 20, 1000, &kb); + limit = pvt->info.rir_limit(reg); + gb = div_u64_rem(limit >> 20, 1024, &mb); edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", n_rir, - mb, kb, + gb, (mb*1000)/1024, limit, 1 << RIR_WAY(reg)); if (ch_addr <= limit) @@ -1036,16 +2325,37 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return -EINVAL; } rir_way = RIR_WAY(reg); + if (pvt->is_close_pg) idx = (ch_addr >> 6); else idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */ idx %= 1 << rir_way; - pci_read_config_dword(pvt->pci_tad[base_ch], - rir_offset[n_rir][idx], - ®); - *rank = RIR_RNK_TGT(reg); + pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®); + *rank = RIR_RNK_TGT(pvt->info.type, reg); + + if (pvt->info.type == BROADWELL) { + if (pvt->is_close_pg) + shiftup = 6; + else + shiftup = 13; + + rank_addr = ch_addr >> shiftup; + rank_addr /= (1 << rir_way); + rank_addr <<= shiftup; + rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0); + rank_addr -= RIR_OFFSET(pvt->info.type, reg); + + mtype = pvt->info.get_memory_type(pvt); + rankid = *rank; + if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) + sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg); + else + sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg); + } else { + msg[0] = '\0'; + } edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, @@ -1057,6 +2367,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return 0; } +static int get_memory_error_data_from_mce(struct mem_ctl_info *mci, + const struct mce *m, u8 *socket, + u8 *ha, long *channel_mask, + char *msg) +{ + u32 reg, channel = GET_BITFIELD(m->status, 0, 3); + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt; + struct pci_dev *pci_ha; + bool tad0; + + if (channel >= NUM_CHANNELS) { + sprintf(msg, "Invalid channel 0x%x", channel); + return -EINVAL; + } + + pvt = mci->pvt_info; + if (!pvt->info.get_ha) { + sprintf(msg, "No get_ha()"); + return -EINVAL; + } + *ha = pvt->info.get_ha(m->bank); + if (*ha != 0 && *ha != 1) { + sprintf(msg, "Impossible bank %d", m->bank); + return -EINVAL; + } + + *socket = m->socketid; + new_mci = get_mci_for_node_id(*socket, *ha); + if (!new_mci) { + strcpy(msg, "mci socket got corrupted!"); + return -EINVAL; + } + + pvt = new_mci->pvt_info; + pci_ha = pvt->pci_ha; + pci_read_config_dword(pci_ha, tad_dram_rule[0], ®); + tad0 = m->addr <= TAD_LIMIT(reg); + + *channel_mask = 1 << channel; + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) { + *channel_mask |= 1 << ((channel + 2) % 4); + pvt->is_cur_addr_mirrored = true; + } else { + pvt->is_cur_addr_mirrored = false; + } + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((channel + 1) % 4); + + return 0; +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -1091,26 +2455,21 @@ static void sbridge_put_all_devices(void) } } -/* - * sbridge_get_all_devices Find and perform 'get' operation on the MCH's - * device/functions we want to reference for this driver - * - * Need to 'get' device 16 func 1 and func 2 - */ static int sbridge_get_onedevice(struct pci_dev **prev, u8 *num_mc, const struct pci_id_table *table, - const unsigned devno) + const unsigned devno, + const int multi_bus) { - struct sbridge_dev *sbridge_dev; + struct sbridge_dev *sbridge_dev = NULL; const struct pci_id_descr *dev_descr = &table->descr[devno]; - struct pci_dev *pdev = NULL; + int seg = 0; u8 bus = 0; + int i = 0; - sbridge_printk(KERN_INFO, - "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", - dev_descr->dev, dev_descr->func, + sbridge_printk(KERN_DEBUG, + "Seeking for: PCI ID %04x:%04x\n", PCI_VENDOR_ID_INTEL, dev_descr->dev_id); pdev = pci_get_device(PCI_VENDOR_ID_INTEL, @@ -1125,22 +2484,36 @@ static int sbridge_get_onedevice(struct pci_dev **prev, if (dev_descr->optional) return 0; + /* if the HA wasn't found */ if (devno == 0) return -ENODEV; sbridge_printk(KERN_INFO, - "Device not found: dev %02x.%d PCI ID %04x:%04x\n", - dev_descr->dev, dev_descr->func, + "Device not found: %04x:%04x\n", PCI_VENDOR_ID_INTEL, dev_descr->dev_id); /* End of list, leave */ return -ENODEV; } + seg = pci_domain_nr(pdev->bus); bus = pdev->bus->number; - sbridge_dev = get_sbridge_dev(bus); +next_imc: + sbridge_dev = get_sbridge_dev(seg, bus, dev_descr->dom, + multi_bus, sbridge_dev); if (!sbridge_dev) { - sbridge_dev = alloc_sbridge_dev(bus, table); + /* If the HA1 wasn't found, don't create EDAC second memory controller */ + if (dev_descr->dom == IMC1 && devno != 1) { + edac_dbg(0, "Skip IMC1: %04x:%04x (since HA1 was absent)\n", + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + pci_dev_put(pdev); + return 0; + } + + if (dev_descr->dom == SOCK) + goto out_imc; + + sbridge_dev = alloc_sbridge_dev(seg, bus, dev_descr->dom, table); if (!sbridge_dev) { pci_dev_put(pdev); return -ENOMEM; @@ -1148,42 +2521,33 @@ static int sbridge_get_onedevice(struct pci_dev **prev, (*num_mc)++; } - if (sbridge_dev->pdev[devno]) { + if (sbridge_dev->pdev[sbridge_dev->i_devs]) { sbridge_printk(KERN_ERR, - "Duplicated device for " - "dev %02x:%d.%d PCI ID %04x:%04x\n", - bus, dev_descr->dev, dev_descr->func, + "Duplicated device for %04x:%04x\n", PCI_VENDOR_ID_INTEL, dev_descr->dev_id); pci_dev_put(pdev); return -ENODEV; } - sbridge_dev->pdev[devno] = pdev; + sbridge_dev->pdev[sbridge_dev->i_devs++] = pdev; - /* Sanity check */ - if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev || - PCI_FUNC(pdev->devfn) != dev_descr->func)) { - sbridge_printk(KERN_ERR, - "Device PCI ID %04x:%04x " - "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n", - PCI_VENDOR_ID_INTEL, dev_descr->dev_id, - bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - bus, dev_descr->dev, dev_descr->func); - return -ENODEV; - } + /* pdev belongs to more than one IMC, do extra gets */ + if (++i > 1) + pci_dev_get(pdev); + if (dev_descr->dom == SOCK && i < table->n_imcs_per_sock) + goto next_imc; + +out_imc: /* Be sure that the device is enabled */ if (unlikely(pci_enable_device(pdev) < 0)) { sbridge_printk(KERN_ERR, - "Couldn't enable " - "dev %02x:%d.%d PCI ID %04x:%04x\n", - bus, dev_descr->dev, dev_descr->func, + "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, dev_descr->dev_id); return -ENODEV; } - edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n", - bus, dev_descr->dev, dev_descr->func, + edac_dbg(0, "Detected %04x:%04x\n", PCI_VENDOR_ID_INTEL, dev_descr->dev_id); /* @@ -1198,27 +2562,44 @@ static int sbridge_get_onedevice(struct pci_dev **prev, return 0; } -static int sbridge_get_all_devices(u8 *num_mc) +/* + * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's + * devices we want to reference for this driver. + * @num_mc: pointer to the memory controllers count, to be incremented in case + * of success. + * @table: model specific table + * + * returns 0 in case of success or error code + */ +static int sbridge_get_all_devices(u8 *num_mc, + const struct pci_id_table *table) { int i, rc; struct pci_dev *pdev = NULL; - const struct pci_id_table *table = pci_dev_descr_sbridge_table; + int allow_dups = 0; + int multi_bus = 0; + if (table->type == KNIGHTS_LANDING) + allow_dups = multi_bus = 1; while (table && table->descr) { - for (i = 0; i < table->n_devs; i++) { - pdev = NULL; + for (i = 0; i < table->n_devs_per_sock; i++) { + if (!allow_dups || i == 0 || + table->descr[i].dev_id != + table->descr[i-1].dev_id) { + pdev = NULL; + } do { rc = sbridge_get_onedevice(&pdev, num_mc, - table, i); + table, i, multi_bus); if (rc < 0) { if (i == 0) { - i = table->n_devs; + i = table->n_devs_per_sock; break; } sbridge_put_all_devices(); return -ENODEV; } - } while (pdev); + } while (pdev && !allow_dups); } table++; } @@ -1226,76 +2607,141 @@ static int sbridge_get_all_devices(u8 *num_mc) return 0; } -static int mci_bind_devs(struct mem_ctl_info *mci, - struct sbridge_dev *sbridge_dev) +/* + * Device IDs for {SBRIDGE,IBRIDGE,HASWELL,BROADWELL}_IMC_HA0_TAD0 are in + * the format: XXXa. So we can convert from a device to the corresponding + * channel like this + */ +#define TAD_DEV_TO_CHAN(dev) (((dev) & 0xf) - 0xa) + +static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) { struct sbridge_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; - int i, func, slot; + u8 saw_chan_mask = 0; + int i; for (i = 0; i < sbridge_dev->n_devs; i++) { pdev = sbridge_dev->pdev[i]; if (!pdev) continue; - slot = PCI_SLOT(pdev->devfn); - func = PCI_FUNC(pdev->devfn); - switch (slot) { - case 12: - switch (func) { - case 6: - pvt->pci_sad0 = pdev; - break; - case 7: - pvt->pci_sad1 = pdev; - break; - default: - goto error; - } + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0: + pvt->pci_sad0 = pdev; break; - case 13: - switch (func) { - case 6: - pvt->pci_br = pdev; - break; - default: - goto error; - } + case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1: + pvt->pci_sad1 = pdev; break; - case 14: - switch (func) { - case 0: - pvt->pci_ha0 = pdev; - break; - default: - goto error; - } + case PCI_DEVICE_ID_INTEL_SBRIDGE_BR: + pvt->pci_br0 = pdev; break; - case 15: - switch (func) { - case 0: - pvt->pci_ta = pdev; - break; - case 1: - pvt->pci_ras = pdev; - break; - case 2: - case 3: - case 4: - case 5: - pvt->pci_tad[func - 2] = pdev; - break; - default: - goto error; - } + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: + pvt->pci_ha = pdev; break; - case 17: - switch (func) { - case 0: - pvt->pci_ddrio = pdev; - break; - default: - goto error; - } + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA: + pvt->pci_ta = pdev; + break; + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS: + pvt->pci_ras = pdev; + break; + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0: + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1: + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2: + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3: + { + int id = TAD_DEV_TO_CHAN(pdev->device); + pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; + } + break; + case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO: + pvt->pci_ddrio = pdev; + break; + default: + goto error; + } + + edac_dbg(0, "Associated PCI %02x:%02x, bus %d with dev = %p\n", + pdev->vendor, pdev->device, + sbridge_dev->bus, + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha || + !pvt->pci_ras || !pvt->pci_ta) + goto enodev; + + if (saw_chan_mask != 0x0f) + goto enodev; + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; + +error: + sbridge_printk(KERN_ERR, "Unexpected device %02x:%02x\n", + PCI_VENDOR_ID_INTEL, pdev->device); + return -EINVAL; +} + +static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + u8 saw_chan_mask = 0; + int i; + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1: + pvt->pci_ha = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA: + pvt->pci_ta = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS: + pvt->pci_ras = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2: + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3: + { + int id = TAD_DEV_TO_CHAN(pdev->device); + pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; + } + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0: + pvt->pci_ddrio = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0: + pvt->pci_ddrio = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_SAD: + pvt->pci_sad0 = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_BR0: + pvt->pci_br0 = pdev; + break; + case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1: + pvt->pci_br1 = pdev; break; default: goto error; @@ -1308,14 +2754,13 @@ static int mci_bind_devs(struct mem_ctl_info *mci, } /* Check if everything were registered */ - if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || - !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) + if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_br0 || + !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta) goto enodev; - for (i = 0; i < NUM_CHANNELS; i++) { - if (!pvt->pci_tad[i]) - goto enodev; - } + if (saw_chan_mask != 0x0f && /* -EN/-EX */ + saw_chan_mask != 0x03) /* -EP */ + goto enodev; return 0; enodev: @@ -1323,12 +2768,303 @@ enodev: return -ENODEV; error: - sbridge_printk(KERN_ERR, "Device %d, function %d " - "is out of the expected range\n", - slot, func); + sbridge_printk(KERN_ERR, + "Unexpected device %02x:%02x\n", PCI_VENDOR_ID_INTEL, + pdev->device); return -EINVAL; } +static int haswell_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + u8 saw_chan_mask = 0; + int i; + + /* there's only one device per system; not tied to any bus */ + if (pvt->info.pci_vtd == NULL) + /* result will be checked later */ + pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC, + NULL); + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0: + pvt->pci_sad0 = pdev; + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1: + pvt->pci_sad1 = pdev; + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1: + pvt->pci_ha = pdev; + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA: + pvt->pci_ta = pdev; + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM: + pvt->pci_ras = pdev; + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3: + { + int id = TAD_DEV_TO_CHAN(pdev->device); + pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; + } + break; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3: + if (!pvt->pci_ddrio) + pvt->pci_ddrio = pdev; + break; + default: + break; + } + + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 || + !pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd) + goto enodev; + + if (saw_chan_mask != 0x0f && /* -EN/-EX */ + saw_chan_mask != 0x03) /* -EP */ + goto enodev; + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; +} + +static int broadwell_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + u8 saw_chan_mask = 0; + int i; + + /* there's only one device per system; not tied to any bus */ + if (pvt->info.pci_vtd == NULL) + /* result will be checked later */ + pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC, + NULL); + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0: + pvt->pci_sad0 = pdev; + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1: + pvt->pci_sad1 = pdev; + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1: + pvt->pci_ha = pdev; + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA: + pvt->pci_ta = pdev; + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM: + pvt->pci_ras = pdev; + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2: + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3: + { + int id = TAD_DEV_TO_CHAN(pdev->device); + pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; + } + break; + case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0: + pvt->pci_ddrio = pdev; + break; + default: + break; + } + + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 || + !pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd) + goto enodev; + + if (saw_chan_mask != 0x0f && /* -EN/-EX */ + saw_chan_mask != 0x03) /* -EP */ + goto enodev; + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; +} + +static int knl_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + int dev, func; + + int i; + int devidx; + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + + /* Extract PCI device and function. */ + dev = (pdev->devfn >> 3) & 0x1f; + func = pdev->devfn & 0x7; + + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_KNL_IMC_MC: + if (dev == 8) + pvt->knl.pci_mc0 = pdev; + else if (dev == 9) + pvt->knl.pci_mc1 = pdev; + else { + sbridge_printk(KERN_ERR, + "Memory controller in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0: + pvt->pci_sad0 = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1: + pvt->pci_sad1 = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_CHA: + /* There are one of these per tile, and range from + * 1.14.0 to 1.18.5. + */ + devidx = ((dev-14)*8)+func; + + if (devidx < 0 || devidx >= KNL_MAX_CHAS) { + sbridge_printk(KERN_ERR, + "Caching and Home Agent in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + + WARN_ON(pvt->knl.pci_cha[devidx] != NULL); + + pvt->knl.pci_cha[devidx] = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN: + devidx = -1; + + /* + * MC0 channels 0-2 are device 9 function 2-4, + * MC1 channels 3-5 are device 8 function 2-4. + */ + + if (dev == 9) + devidx = func-2; + else if (dev == 8) + devidx = 3 + (func-2); + + if (devidx < 0 || devidx >= KNL_MAX_CHANNELS) { + sbridge_printk(KERN_ERR, + "DRAM Channel Registers in unexpected place! (dev %d, fn %d)\n", + dev, func); + continue; + } + + WARN_ON(pvt->knl.pci_channel[devidx] != NULL); + pvt->knl.pci_channel[devidx] = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM: + pvt->knl.pci_mc_info = pdev; + break; + + case PCI_DEVICE_ID_INTEL_KNL_IMC_TA: + pvt->pci_ta = pdev; + break; + + default: + sbridge_printk(KERN_ERR, "Unexpected device %d\n", + pdev->device); + break; + } + } + + if (!pvt->knl.pci_mc0 || !pvt->knl.pci_mc1 || + !pvt->pci_sad0 || !pvt->pci_sad1 || + !pvt->pci_ta) { + goto enodev; + } + + for (i = 0; i < KNL_MAX_CHANNELS; i++) { + if (!pvt->knl.pci_channel[i]) { + sbridge_printk(KERN_ERR, "Missing channel %d\n", i); + goto enodev; + } + } + + for (i = 0; i < KNL_MAX_CHAS; i++) { + if (!pvt->knl.pci_cha[i]) { + sbridge_printk(KERN_ERR, "Missing CHA %d\n", i); + goto enodev; + } + } + + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; +} + /**************************************************************************** Error check routines ****************************************************************************/ @@ -1345,31 +3081,39 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *type, *optype, msg[256]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); - bool recoverable = GET_BITFIELD(m->status, 56, 56); + bool recoverable; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); u32 mscod = GET_BITFIELD(m->status, 16, 31); u32 errcode = GET_BITFIELD(m->status, 0, 15); u32 channel = GET_BITFIELD(m->status, 0, 3); u32 optypenum = GET_BITFIELD(m->status, 4, 6); + /* + * Bits 5-0 of MCi_MISC give the least significant bit that is valid. + * A value 6 is for cache line aligned address, a value 12 is for page + * aligned address reported by patrol scrubber. + */ + u32 lsb = GET_BITFIELD(m->misc, 0, 5); + char *optype, *area_type = "DRAM"; long channel_mask, first_channel; - u8 rank, socket; + u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = NULL; + + if (pvt->info.type != SANDY_BRIDGE) + recoverable = true; + else + recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { - type = "FATAL"; - tp_event = HW_EVENT_ERR_FATAL; - } else { - type = "NON_FATAL"; tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; } } else { - type = "CORRECTED"; tp_event = HW_EVENT_ERR_CORRECTED; } @@ -1384,38 +3128,73 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + + if (pvt->info.type == KNIGHTS_LANDING) { + if (channel == 14) { + edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) + ? " recoverable" : "", + mscod, errcode, + m->bank); + } else { + char A = *("A"); + + /* + * Reported channel is in range 0-2, so we can't map it + * back to mc. To figure out mc we check machine check + * bank register that reported this error. + * bank15 means mc0 and bank16 means mc1. + */ + channel = knl_channel_remap(m->bank == 16, channel); + channel_mask = 1 << channel; + + snprintf(sb_msg, sizeof(sb_msg), + "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) + ? " recoverable" : " ", + mscod, errcode, channel, A + channel); + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + channel, 0, -1, + optype, sb_msg); } + return; + } else if (lsb < 12) { + rc = get_memory_error_data(mci, m->addr, &socket, &ha, + &channel_mask, &rank, + &area_type, sb_msg); + } else { + rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, + &channel_mask, sb_msg); } - rc = get_memory_error_data(mci, m->addr, &socket, - &channel_mask, &rank, &area_type, msg); if (rc < 0) goto err_parsing; - new_mci = get_mci_for_node_id(socket); + new_mci = get_mci_for_node_id(socket, ha); if (!new_mci) { - strcpy(msg, "Error: socket got corrupted!"); + strscpy(sb_msg, "Error: socket got corrupted!"); goto err_parsing; } mci = new_mci; @@ -1423,117 +3202,66 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); - if (rank < 4) + if (rank == 0xff) + dimm = -1; + else if (rank < 4) dimm = 0; else if (rank < 8) dimm = 1; else dimm = 2; - /* * FIXME: On some memory configurations (mirror, lockstep), the * Memory Controller can't point the error to a single DIMM. The * EDAC core should be handling the channel mask, in order to point * to the group of dimm's where the error may be happening. */ - snprintf(msg, sizeof(msg), - "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", + if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) + channel = first_channel; + snprintf(sb_msg_full, sizeof(sb_msg_full), + "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", area_type, mscod, errcode, - socket, + socket, ha, channel_mask, - rank); + rank, sb_msg); - edac_dbg(0, "%s\n", msg); + edac_dbg(0, "%s\n", sb_msg_full); /* FIXME: need support for channel mask */ + if (channel == CHANNEL_UNSPECIFIED) + channel = -1; + /* Call the helper to output message */ edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, dimm, -1, - optype, msg); + optype, sb_msg_full); return; err_parsing: edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, - msg, ""); + sb_msg, ""); } /* - * sbridge_check_error Retrieve and process errors reported by the - * hardware. Called by the Core module. - */ -static void sbridge_check_error(struct mem_ctl_info *mci) -{ - struct sbridge_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * loosing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - return; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - sbridge_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); -} - -/* - * sbridge_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by sbridge_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; + char *type; - mci = get_mci_for_node_id(mce->socketid); - if (!mci) - return NOTIFY_BAD; - pvt = mci->pvt_info; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; /* * Just let mcelog handle it if the error is @@ -1544,44 +3272,50 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; - printk("sbridge: HANDLING MCE MEMORY ERROR\n"); - - printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - mce->extcpu, mce->mcgstatus, mce->bank, mce->status); - printk("TSC %llx ", mce->tsc); - printk("ADDR %llx ", mce->addr); - printk("MISC %llx ", mce->misc); + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; - printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - mce->cpuvendor, mce->cpuid, mce->time, - mce->socketid, mce->apicid); + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; - /* Only handle if it is the right mc controller */ - if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) return NOTIFY_DONE; - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) return NOTIFY_DONE; - } - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - sbridge_check_error(mci); + sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); + + sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); + + sbridge_mce_output_error(mci, mce); /* Advice mcelog that the error were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block sbridge_mce_dec = { - .notifier_call = sbridge_mce_check_error, + .notifier_call = sbridge_mce_check_error, + .priority = MCE_PRIO_EDAC, }; /**************************************************************************** @@ -1591,7 +3325,6 @@ static struct notifier_block sbridge_mce_dec = { static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) { struct mem_ctl_info *mci = sbridge_dev->mci; - struct sbridge_pvt *pvt; if (unlikely(!mci || !mci->pvt_info)) { edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); @@ -1600,8 +3333,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) return; } - pvt = mci->pvt_info; - edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &sbridge_dev->pdev[0]->dev); @@ -1614,24 +3345,21 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) sbridge_dev->mci = NULL; } -static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) +static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct sbridge_pvt *pvt; + struct pci_dev *pdev = sbridge_dev->pdev[0]; int rc; - /* Check the number of active and not disabled channels */ - rc = check_if_ecc_is_active(sbridge_dev->bus); - if (unlikely(rc < 0)) - return rc; - /* allocate a new MC control structure */ layers[0].type = EDAC_MC_LAYER_CHANNEL; - layers[0].size = NUM_CHANNELS; + layers[0].size = type == KNIGHTS_LANDING ? + KNL_MAX_CHANNELS : NUM_CHANNELS; layers[0].is_virt_csrow = false; layers[1].type = EDAC_MC_LAYER_SLOT; - layers[1].size = MAX_DIMMS; + layers[1].size = type == KNIGHTS_LANDING ? 1 : MAX_DIMMS; layers[1].is_virt_csrow = true; mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, sizeof(*pvt)); @@ -1640,7 +3368,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) return -ENOMEM; edac_dbg(0, "MC: mci = %p, dev = %p\n", - mci, &sbridge_dev->pdev[0]->dev); + mci, &pdev->dev); pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); @@ -1649,89 +3377,224 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) pvt->sbridge_dev = sbridge_dev; sbridge_dev->mci = mci; - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = type == KNIGHTS_LANDING ? + MEM_FLAG_DDR4 : MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; - mci->mod_name = "sbridge_edac.c"; - mci->mod_ver = SBRIDGE_REVISION; - mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); - mci->dev_name = pci_name(sbridge_dev->pdev[0]); + mci->mod_name = EDAC_MOD_STR; + mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; - /* Set the function pointer to an actual operation function */ - mci->edac_check = sbridge_check_error; + pvt->info.type = type; + switch (type) { + case IVY_BRIDGE: + pvt->info.rankcfgr = IB_RANK_CFG_A; + pvt->info.get_tolm = ibridge_get_tolm; + pvt->info.get_tohm = ibridge_get_tohm; + pvt->info.dram_rule = ibridge_dram_rule; + pvt->info.get_memory_type = get_memory_type; + pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = ibridge_get_ha; + pvt->info.rir_limit = rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.dram_attr = dram_attr; + pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); + pvt->info.interleave_list = ibridge_interleave_list; + pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = ibridge_get_width; + + /* Store pci devices at mci for faster access */ + rc = ibridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + get_source_id(mci); + mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge SrcID#%d_Ha#%d", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom); + break; + case SANDY_BRIDGE: + pvt->info.rankcfgr = SB_RANK_CFG_A; + pvt->info.get_tolm = sbridge_get_tolm; + pvt->info.get_tohm = sbridge_get_tohm; + pvt->info.dram_rule = sbridge_dram_rule; + pvt->info.get_memory_type = get_memory_type; + pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = sbridge_get_ha; + pvt->info.rir_limit = rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.dram_attr = dram_attr; + pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); + pvt->info.interleave_list = sbridge_interleave_list; + pvt->info.interleave_pkg = sbridge_interleave_pkg; + pvt->info.get_width = sbridge_get_width; + + /* Store pci devices at mci for faster access */ + rc = sbridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + get_source_id(mci); + mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge SrcID#%d_Ha#%d", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom); + break; + case HASWELL: + /* rankcfgr isn't used */ + pvt->info.get_tolm = haswell_get_tolm; + pvt->info.get_tohm = haswell_get_tohm; + pvt->info.dram_rule = ibridge_dram_rule; + pvt->info.get_memory_type = haswell_get_memory_type; + pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; + pvt->info.rir_limit = haswell_rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.dram_attr = dram_attr; + pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); + pvt->info.interleave_list = ibridge_interleave_list; + pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = ibridge_get_width; + + /* Store pci devices at mci for faster access */ + rc = haswell_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + get_source_id(mci); + mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell SrcID#%d_Ha#%d", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom); + break; + case BROADWELL: + /* rankcfgr isn't used */ + pvt->info.get_tolm = haswell_get_tolm; + pvt->info.get_tohm = haswell_get_tohm; + pvt->info.dram_rule = ibridge_dram_rule; + pvt->info.get_memory_type = haswell_get_memory_type; + pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; + pvt->info.rir_limit = haswell_rir_limit; + pvt->info.sad_limit = sad_limit; + pvt->info.interleave_mode = interleave_mode; + pvt->info.dram_attr = dram_attr; + pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); + pvt->info.interleave_list = ibridge_interleave_list; + pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = broadwell_get_width; + + /* Store pci devices at mci for faster access */ + rc = broadwell_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + get_source_id(mci); + mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell SrcID#%d_Ha#%d", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom); + break; + case KNIGHTS_LANDING: + /* pvt->info.rankcfgr == ??? */ + pvt->info.get_tolm = knl_get_tolm; + pvt->info.get_tohm = knl_get_tohm; + pvt->info.dram_rule = knl_dram_rule; + pvt->info.get_memory_type = knl_get_memory_type; + pvt->info.get_node_id = knl_get_node_id; + pvt->info.get_ha = knl_get_ha; + pvt->info.rir_limit = NULL; + pvt->info.sad_limit = knl_sad_limit; + pvt->info.interleave_mode = knl_interleave_mode; + pvt->info.dram_attr = dram_attr_knl; + pvt->info.max_sad = ARRAY_SIZE(knl_dram_rule); + pvt->info.interleave_list = knl_interleave_list; + pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = knl_get_width; + + rc = knl_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + get_source_id(mci); + mci->ctl_name = kasprintf(GFP_KERNEL, "Knights Landing SrcID#%d_Ha#%d", + pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom); + break; + } - /* Store pci devices at mci for faster access */ - rc = mci_bind_devs(mci, sbridge_dev); - if (unlikely(rc < 0)) + if (!mci->ctl_name) { + rc = -ENOMEM; goto fail0; + } /* Get dimm basic config and the memory layout */ - get_dimm_config(mci); + rc = get_dimm_config(mci); + if (rc < 0) { + edac_dbg(0, "MC: failed to get_dimm_config()\n"); + goto fail; + } get_memory_layout(mci); /* record ptr to the generic device */ - mci->pdev = &sbridge_dev->pdev[0]->dev; + mci->pdev = &pdev->dev; /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); rc = -EINVAL; - goto fail0; + goto fail; } return 0; -fail0: +fail: kfree(mci->ctl_name); +fail0: edac_mc_free(mci); sbridge_dev->mci = NULL; return rc; } +static const struct x86_cpu_id sbridge_cpuids[] = { + X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &pci_dev_descr_sbridge_table), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &pci_dev_descr_ibridge_table), + X86_MATCH_VFM(INTEL_HASWELL_X, &pci_dev_descr_haswell_table), + X86_MATCH_VFM(INTEL_BROADWELL_X, &pci_dev_descr_broadwell_table), + X86_MATCH_VFM(INTEL_BROADWELL_D, &pci_dev_descr_broadwell_table), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &pci_dev_descr_knl_table), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &pci_dev_descr_knl_table), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); + /* - * sbridge_probe Probe for ONE instance of device to see if it is + * sbridge_probe Get all devices and register memory controllers * present. * return: * 0 for FOUND a device * < 0 for error code */ -static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int sbridge_probe(const struct x86_cpu_id *id) { int rc; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; + struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data; /* get the pci devices we want to reserve for our use */ - mutex_lock(&sbridge_edac_lock); + rc = sbridge_get_all_devices(&num_mc, ptable); - /* - * All memory controllers are allocated at the first pass. - */ - if (unlikely(probed >= 1)) { - mutex_unlock(&sbridge_edac_lock); - return -ENODEV; + if (unlikely(rc < 0)) { + edac_dbg(0, "couldn't get all devices\n"); + goto fail0; } - probed++; - rc = sbridge_get_all_devices(&num_mc); - if (unlikely(rc < 0)) - goto fail0; mc = 0; list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { edac_dbg(0, "Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); + sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev); + rc = sbridge_register_mci(sbridge_dev, ptable->type); if (unlikely(rc < 0)) goto fail1; } - sbridge_printk(KERN_INFO, "Driver loaded.\n"); + sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION); - mutex_unlock(&sbridge_edac_lock); return 0; fail1: @@ -1740,83 +3603,66 @@ fail1: sbridge_put_all_devices(); fail0: - mutex_unlock(&sbridge_edac_lock); return rc; } /* - * sbridge_remove destructor for one instance of device + * sbridge_remove cleanup * */ -static void sbridge_remove(struct pci_dev *pdev) +static void sbridge_remove(void) { struct sbridge_dev *sbridge_dev; edac_dbg(0, "\n"); - /* - * we have a trouble here: pdev value for removal will be wrong, since - * it will point to the X58 register used to detect that the machine - * is a Nehalem or upper design. However, due to the way several PCI - * devices are grouped together to provide MC functionality, we need - * to use a different method for releasing the devices - */ - - mutex_lock(&sbridge_edac_lock); - - if (unlikely(!probed)) { - mutex_unlock(&sbridge_edac_lock); - return; - } - list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) sbridge_unregister_mci(sbridge_dev); /* Release PCI resources */ sbridge_put_all_devices(); - - probed--; - - mutex_unlock(&sbridge_edac_lock); } -MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); - -/* - * sbridge_driver pci_driver structure for this module - * - */ -static struct pci_driver sbridge_driver = { - .name = "sbridge_edac", - .probe = sbridge_probe, - .remove = sbridge_remove, - .id_table = sbridge_pci_tbl, -}; - /* * sbridge_init Module entry function * Try to initialize this module for its devices */ static int __init sbridge_init(void) { - int pci_rc; + const struct x86_cpu_id *id; + const char *owner; + int rc; edac_dbg(2, "\n"); + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(sbridge_cpuids); + if (!id) + return -ENODEV; + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - pci_rc = pci_register_driver(&sbridge_driver); + rc = sbridge_probe(id); - if (pci_rc >= 0) { + if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); return 0; } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", - pci_rc); + rc); - return pci_rc; + return rc; } /* @@ -1826,7 +3672,7 @@ static int __init sbridge_init(void) static void __exit sbridge_exit(void) { edac_dbg(2, "\n"); - pci_unregister_driver(&sbridge_driver); + sbridge_remove(); mce_unregister_decode_chain(&sbridge_mce_dec); } @@ -1837,7 +3683,7 @@ module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); -MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - " +MODULE_AUTHOR("Mauro Carvalho Chehab"); +MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - " SBRIDGE_REVISION); diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c new file mode 100644 index 000000000000..f9d02af2fc3a --- /dev/null +++ b/drivers/edac/scrub.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC scrub driver controls the memory scrubbers in the + * system. The common sysfs scrub interface abstracts the control of + * various arbitrary scrubbing functionalities into a unified set of + * functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include <linux/edac.h> + +enum edac_scrub_attributes { + SCRUB_ADDRESS, + SCRUB_SIZE, + SCRUB_ENABLE_BACKGROUND, + SCRUB_MIN_CYCLE_DURATION, + SCRUB_MAX_CYCLE_DURATION, + SCRUB_CUR_CYCLE_DURATION, + SCRUB_MAX_ATTRS +}; + +struct edac_scrub_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_scrub_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS]; + struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_SCRUB_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr) + +#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n") +EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n") + +#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul) +EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul) + +static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance; + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; + + switch (attr_id) { + case SCRUB_ADDRESS: + if (ops->read_addr) { + if (ops->write_addr) + return a->mode; + else + return 0444; + } + break; + case SCRUB_SIZE: + if (ops->read_size) { + if (ops->write_size) + return a->mode; + else + return 0444; + } + break; + case SCRUB_ENABLE_BACKGROUND: + if (ops->get_enabled_bg) { + if (ops->set_enabled_bg) + return a->mode; + else + return 0444; + } + break; + case SCRUB_MIN_CYCLE_DURATION: + if (ops->get_min_cycle) + return a->mode; + break; + case SCRUB_MAX_CYCLE_DURATION: + if (ops->get_max_cycle) + return a->mode; + break; + case SCRUB_CUR_CYCLE_DURATION: + if (ops->get_cycle_duration) { + if (ops->set_cycle_duration) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_SCRUB_ATTR_RO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_WO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_RW(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int scrub_create_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + struct edac_scrub_context *scrub_ctx; + struct attribute_group *group; + int i; + struct edac_scrub_dev_attr dev_attr[] = { + [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance), + [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance), + [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance), + [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance), + [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance), + [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance) + }; + + scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL); + if (!scrub_ctx) + return -ENOMEM; + + group = &scrub_ctx->group; + for (i = 0; i < SCRUB_MAX_ATTRS; i++) { + memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr); + scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; + } + sprintf(scrub_ctx->name, "%s%d", "scrub", instance); + group->name = scrub_ctx->name; + group->attrs = scrub_ctx->scrub_attrs; + group->is_visible = scrub_attr_visible; + + attr_groups[0] = group; + + return 0; +} + +/** + * edac_scrub_get_desc - get EDAC scrub descriptors + * @scrub_dev: client device, with scrub support + * @attr_groups: pointer to attribute group container + * @instance: device's scrub instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!scrub_dev || !attr_groups) + return -EINVAL; + + return scrub_create_desc(scrub_dev, attr_groups, instance); +} diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c new file mode 100644 index 000000000000..a2b193dc6604 --- /dev/null +++ b/drivers/edac/sifive_edac.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SiFive Platform EDAC Driver + * + * Copyright (C) 2018-2022 SiFive, Inc. + * + * This driver is partially based on octeon_edac-pc.c + * + */ +#include <linux/edac.h> +#include <linux/platform_device.h> +#include "edac_module.h" +#include <soc/sifive/sifive_ccache.h> + +#define DRVNAME "sifive_edac" + +struct sifive_edac_priv { + struct notifier_block notifier; + struct edac_device_ctl_info *dci; +}; + +/* + * EDAC error callback + * + * @event: non-zero if unrecoverable. + */ +static +int ecc_err_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + const char *msg = (char *)ptr; + struct sifive_edac_priv *p; + + p = container_of(this, struct sifive_edac_priv, notifier); + + if (event == SIFIVE_CCACHE_ERR_TYPE_UE) + edac_device_handle_ue(p->dci, 0, 0, msg); + else if (event == SIFIVE_CCACHE_ERR_TYPE_CE) + edac_device_handle_ce(p->dci, 0, 0, msg); + + return NOTIFY_OK; +} + +static int ecc_register(struct platform_device *pdev) +{ + struct sifive_edac_priv *p; + + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->notifier.notifier_call = ecc_err_event; + platform_set_drvdata(pdev, p); + + p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc", + 1, 1, edac_device_alloc_index()); + if (!p->dci) + return -ENOMEM; + + p->dci->dev = &pdev->dev; + p->dci->mod_name = "Sifive ECC Manager"; + p->dci->ctl_name = dev_name(&pdev->dev); + p->dci->dev_name = dev_name(&pdev->dev); + + if (edac_device_add_device(p->dci)) { + dev_err(p->dci->dev, "failed to register with EDAC core\n"); + goto err; + } + + register_sifive_ccache_error_notifier(&p->notifier); + + return 0; + +err: + edac_device_free_ctl_info(p->dci); + + return -ENXIO; +} + +static int ecc_unregister(struct platform_device *pdev) +{ + struct sifive_edac_priv *p = platform_get_drvdata(pdev); + + unregister_sifive_ccache_error_notifier(&p->notifier); + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(p->dci); + + return 0; +} + +static struct platform_device *sifive_pdev; + +static int __init sifive_edac_init(void) +{ + int ret; + + sifive_pdev = platform_device_register_simple(DRVNAME, 0, NULL, 0); + if (IS_ERR(sifive_pdev)) + return PTR_ERR(sifive_pdev); + + ret = ecc_register(sifive_pdev); + if (ret) + platform_device_unregister(sifive_pdev); + + return ret; +} + +static void __exit sifive_edac_exit(void) +{ + ecc_unregister(sifive_pdev); + platform_device_unregister(sifive_pdev); +} + +module_init(sifive_edac_init); +module_exit(sifive_edac_exit); + +MODULE_AUTHOR("SiFive Inc."); +MODULE_DESCRIPTION("SiFive platform EDAC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c new file mode 100644 index 000000000000..aa6593ccda2d --- /dev/null +++ b/drivers/edac/skx_base.c @@ -0,0 +1,715 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * EDAC driver for Intel(R) Xeon(R) Skylake processors + * Copyright (c) 2016, Intel Corporation. + */ + +#include <linux/kernel.h> +#include <linux/processor.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> + +#include "edac_module.h" +#include "skx_common.h" + +#define EDAC_MOD_STR "skx_edac" + +/* + * Debug macros + */ +#define skx_printk(level, fmt, arg...) \ + edac_printk(level, "skx", fmt, ##arg) + +#define skx_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) + +static struct list_head *skx_edac_list; + +static u64 skx_tolm, skx_tohm; +static int skx_num_sockets; +static unsigned int nvdimm_count; + +#define MASK26 0x3FFFFFF /* Mask for 2^26 */ +#define MASK29 0x1FFFFFFF /* Mask for 2^29 */ + +static struct res_config skx_cfg = { + .type = SKX, + .decs_did = 0x2016, + .busno_cfg_offset = 0xcc, + .ddr_imc_num = 2, + .ddr_chan_num = 3, + .ddr_dimm_num = 2, +}; + +static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx) +{ + struct skx_dev *d; + + list_for_each_entry(d, skx_edac_list, list) { + if (d->seg == pci_domain_nr(bus) && d->bus[idx] == bus->number) + return d; + } + + return NULL; +} + +enum munittype { + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD, + ERRCHAN0, ERRCHAN1, ERRCHAN2, +}; + +struct munit { + u16 did; + u16 devfn[2]; + u8 busidx; + u8 per_socket; + enum munittype mtype; +}; + +/* + * List of PCI device ids that we need together with some device + * number and function numbers to tell which memory controller the + * device belongs to. + */ +static const struct munit skx_all_munits[] = { + { 0x2054, { }, 1, 1, SAD_ALL }, + { 0x2055, { }, 1, 1, UTIL_ALL }, + { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, + { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, + { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, + { 0x2043, { PCI_DEVFN(10, 3), PCI_DEVFN(12, 3) }, 2, 2, ERRCHAN0 }, + { 0x2047, { PCI_DEVFN(10, 7), PCI_DEVFN(12, 7) }, 2, 2, ERRCHAN1 }, + { 0x204b, { PCI_DEVFN(11, 3), PCI_DEVFN(13, 3) }, 2, 2, ERRCHAN2 }, + { 0x208e, { }, 1, 0, SAD }, + { } +}; + +static int get_all_munits(const struct munit *m) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int i = 0, ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev); + if (!pdev) + break; + ndev++; + if (m->per_socket == skx_cfg.ddr_imc_num) { + for (i = 0; i < skx_cfg.ddr_imc_num; i++) + if (m->devfn[i] == pdev->devfn) + break; + if (i == skx_cfg.ddr_imc_num) + goto fail; + } + d = get_skx_dev(pdev->bus, m->busidx); + if (!d) + goto fail; + + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + skx_printk(KERN_ERR, "Couldn't enable device %04x:%04x\n", + PCI_VENDOR_ID_INTEL, m->did); + goto fail; + } + + switch (m->mtype) { + case CHAN0: + case CHAN1: + case CHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype].cdev = pdev; + break; + case ERRCHAN0: + case ERRCHAN1: + case ERRCHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype - ERRCHAN0].edev = pdev; + break; + case SAD_ALL: + pci_dev_get(pdev); + d->sad_all = pdev; + break; + case UTIL_ALL: + pci_dev_get(pdev); + d->util_all = pdev; + break; + case SAD: + /* + * one of these devices per core, including cores + * that don't exist on this SKU. Ignore any that + * read a route table of zero, make sure all the + * non-zero values match. + */ + pci_read_config_dword(pdev, 0xB4, ®); + if (reg != 0) { + if (d->mcroute == 0) { + d->mcroute = reg; + } else if (d->mcroute != reg) { + skx_printk(KERN_ERR, "mcroute mismatch\n"); + goto fail; + } + } + ndev--; + break; + } + + prev = pdev; + } + + return ndev; +fail: + pci_dev_put(pdev); + return -ENODEV; +} + +static const struct x86_cpu_id skx_cpuids[] = { + X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); + +static bool skx_check_ecc(u32 mcmtr) +{ + return !!GET_BITFIELD(mcmtr, 2, 2); +} + +static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + u32 mtr, mcmtr, amap, mcddrtcfg; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + int i, j; + int ndimms; + + /* Only the mcmtr on the first channel is effective */ + pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr); + + for (i = 0; i < cfg->ddr_chan_num; i++) { + ndimms = 0; + pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); + pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg); + for (j = 0; j < cfg->ddr_dimm_num; j++) { + dimm = edac_get_dimm(mci, i, j, 0); + pci_read_config_dword(imc->chan[i].cdev, + 0x80 + 4 * j, &mtr); + if (IS_DIMM_PRESENT(mtr)) { + ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j, cfg); + } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) { + ndimms += skx_get_nvdimm_info(dimm, imc, i, j, + EDAC_MOD_STR); + nvdimm_count++; + } + } + if (ndimms && !skx_check_ecc(mcmtr)) { + skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); + return -ENODEV; + } + } + + return 0; +} + +#define SKX_MAX_SAD 24 + +#define SKX_GET_SAD(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &(reg)) +#define SKX_GET_ILV(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &(reg)) + +#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) +#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) +#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26) +#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6) +#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4) +#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2) +#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0) + +#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) +#define SKX_ILV_TARGET(tgt) ((tgt) & 7) + +static void skx_show_retry_rd_err_log(struct decoded_addr *res, + char *msg, int len, + bool scrub_err) +{ + u32 log0, log1, log2, log3, log4; + u32 corr0, corr1, corr2, corr3; + struct pci_dev *edev; + int n; + + edev = res->dev->imc[res->imc].chan[res->channel].edev; + + pci_read_config_dword(edev, 0x154, &log0); + pci_read_config_dword(edev, 0x148, &log1); + pci_read_config_dword(edev, 0x150, &log2); + pci_read_config_dword(edev, 0x15c, &log3); + pci_read_config_dword(edev, 0x114, &log4); + + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x]", + log0, log1, log2, log3, log4); + + pci_read_config_dword(edev, 0x104, &corr0); + pci_read_config_dword(edev, 0x108, &corr1); + pci_read_config_dword(edev, 0x10c, &corr2); + pci_read_config_dword(edev, 0x110, &corr3); + + if (len - n > 0) + snprintf(msg + n, len - n, + " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", + corr0 & 0xffff, corr0 >> 16, + corr1 & 0xffff, corr1 >> 16, + corr2 & 0xffff, corr2 >> 16, + corr3 & 0xffff, corr3 >> 16); +} + +static bool skx_sad_decode(struct decoded_addr *res) +{ + struct skx_dev *d = list_first_entry(skx_edac_list, typeof(*d), list); + u64 addr = res->addr; + int i, idx, tgt, lchan, shift; + u32 sad, ilv; + u64 limit, prev_limit; + int remote = 0; + + /* Simple sanity check for I/O space or out of range */ + if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) { + edac_dbg(0, "Address 0x%llx out of range\n", addr); + return false; + } + +restart: + prev_limit = 0; + for (i = 0; i < SKX_MAX_SAD; i++) { + SKX_GET_SAD(d, i, sad); + limit = SKX_SAD_LIMIT(sad); + if (SKX_SAD_ENABLE(sad)) { + if (addr >= prev_limit && addr <= limit) + goto sad_found; + } + prev_limit = limit + 1; + } + edac_dbg(0, "No SAD entry for 0x%llx\n", addr); + return false; + +sad_found: + SKX_GET_ILV(d, i, ilv); + + switch (SKX_SAD_INTERLEAVE(sad)) { + case 0: + idx = GET_BITFIELD(addr, 6, 8); + break; + case 1: + idx = GET_BITFIELD(addr, 8, 10); + break; + case 2: + idx = GET_BITFIELD(addr, 12, 14); + break; + case 3: + idx = GET_BITFIELD(addr, 30, 32); + break; + } + + tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3); + + /* If point to another node, find it and start over */ + if (SKX_ILV_REMOTE(tgt)) { + if (remote) { + edac_dbg(0, "Double remote!\n"); + return false; + } + remote = 1; + list_for_each_entry(d, skx_edac_list, list) { + if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) + goto restart; + } + edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt)); + return false; + } + + if (SKX_SAD_MOD3(sad) == 0) { + lchan = SKX_ILV_TARGET(tgt); + } else { + switch (SKX_SAD_MOD3MODE(sad)) { + case 0: + shift = 6; + break; + case 1: + shift = 8; + break; + case 2: + shift = 12; + break; + default: + edac_dbg(0, "illegal mod3mode\n"); + return false; + } + switch (SKX_SAD_MOD3ASMOD2(sad)) { + case 0: + lchan = (addr >> shift) % 3; + break; + case 1: + lchan = (addr >> shift) % 2; + break; + case 2: + lchan = (addr >> shift) % 2; + lchan = (lchan << 1) | !lchan; + break; + case 3: + lchan = ((addr >> shift) % 2) << 1; + break; + } + lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1); + } + + res->dev = d; + res->socket = d->imc[0].src_id; + res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2); + res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19); + + edac_dbg(2, "0x%llx: socket=%d imc=%d channel=%d\n", + res->addr, res->socket, res->imc, res->channel); + return true; +} + +#define SKX_MAX_TAD 8 + +#define SKX_GET_TADBASE(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &(reg)) +#define SKX_GET_TADWAYNESS(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &(reg)) +#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &(reg)) + +#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) +#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) +#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7) +#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26) +#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26) +#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11)) +#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1) + +/* which bit used for both socket and channel interleave */ +static int skx_granularity[] = { 6, 8, 12, 30 }; + +static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits) +{ + addr >>= shift; + addr /= ways; + addr <<= shift; + + return addr | (lowbits & ((1ull << shift) - 1)); +} + +static bool skx_tad_decode(struct decoded_addr *res) +{ + int i; + u32 base, wayness, chnilvoffset; + int skt_interleave_bit, chn_interleave_bit; + u64 channel_addr; + + for (i = 0; i < SKX_MAX_TAD; i++) { + SKX_GET_TADBASE(res->dev, res->imc, i, base); + SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness); + if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness)) + goto tad_found; + } + edac_dbg(0, "No TAD entry for 0x%llx\n", res->addr); + return false; + +tad_found: + res->sktways = SKX_TAD_SKTWAYS(wayness); + res->chanways = SKX_TAD_CHNWAYS(wayness); + skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)]; + chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)]; + + SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset); + channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset); + + if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) { + /* Must handle channel first, then socket */ + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, channel_addr); + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, channel_addr); + } else { + /* Handle socket then channel. Preserve low bits from original address */ + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, res->addr); + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, res->addr); + } + + res->chan_addr = channel_addr; + + edac_dbg(2, "0x%llx: chan_addr=0x%llx sktways=%d chanways=%d\n", + res->addr, res->chan_addr, res->sktways, res->chanways); + return true; +} + +#define SKX_MAX_RIR 4 + +#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x108 + 4 * (i), &(reg)) +#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x120 + 16 * (idx) + 4 * (i), &(reg)) + +#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) +#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) +#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29)) +#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19) +#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26)) + +static bool skx_rir_decode(struct decoded_addr *res) +{ + int i, idx, chan_rank; + int shift; + u32 rirway, rirlv; + u64 rank_addr, prev_limit = 0, limit; + + if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg) + shift = 6; + else + shift = 13; + + for (i = 0; i < SKX_MAX_RIR; i++) { + SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway); + limit = SKX_RIR_LIMIT(rirway); + if (SKX_RIR_VALID(rirway)) { + if (prev_limit <= res->chan_addr && + res->chan_addr <= limit) + goto rir_found; + } + prev_limit = limit; + } + edac_dbg(0, "No RIR entry for 0x%llx\n", res->addr); + return false; + +rir_found: + rank_addr = res->chan_addr >> shift; + rank_addr /= SKX_RIR_WAYS(rirway); + rank_addr <<= shift; + rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0); + + res->rank_address = rank_addr; + idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway); + + SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv); + res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv); + chan_rank = SKX_RIR_CHAN_RANK(rirlv); + res->channel_rank = chan_rank; + res->dimm = chan_rank / 4; + res->rank = chan_rank % 4; + + edac_dbg(2, "0x%llx: dimm=%d rank=%d chan_rank=%d rank_addr=0x%llx\n", + res->addr, res->dimm, res->rank, + res->channel_rank, res->rank_address); + return true; +} + +static u8 skx_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33, 34 +}; + +static u8 skx_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; + +static u8 skx_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34 +}; + +static u8 skx_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; + +static u8 skx_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int skx_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool skx_mad_decode(struct decoded_addr *r) +{ + struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm]; + int bg0 = dimm->fine_grain_bank ? 6 : 13; + + if (dimm->close_pg) { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row); + r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column); + r->column |= 0x400; /* C10 is autoprecharge, always set */ + r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28); + r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21); + } else { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row); + if (dimm->fine_grain_bank) + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column); + else + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column); + r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23); + r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21); + } + r->row &= (1u << dimm->rowbits) - 1; + + edac_dbg(2, "0x%llx: row=0x%x col=0x%x bank_addr=%d bank_group=%d\n", + r->addr, r->row, r->column, r->bank_address, + r->bank_group); + return true; +} + +static bool skx_decode(struct decoded_addr *res) +{ + return skx_sad_decode(res) && skx_tad_decode(res) && + skx_rir_decode(res) && skx_mad_decode(res); +} + +static struct notifier_block skx_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +/* + * skx_init: + * make sure we are running on the correct cpu model + * search for all the devices we need + * check which DIMMs are present. + */ +static int __init skx_init(void) +{ + const struct x86_cpu_id *id; + struct res_config *cfg; + const struct munit *m; + const char *owner; + int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; + u8 mc = 0, src_id; + struct skx_dev *d; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(skx_cpuids); + if (!id) + return -ENODEV; + + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + + rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm); + if (rc) + return rc; + + rc = skx_get_all_bus_mappings(cfg, &skx_edac_list); + if (rc < 0) + goto fail; + if (rc == 0) { + edac_dbg(2, "No memory controllers found\n"); + return -ENODEV; + } + skx_num_sockets = rc; + + for (m = skx_all_munits; m->did; m++) { + rc = get_all_munits(m); + if (rc < 0) + goto fail; + if (rc != m->per_socket * skx_num_sockets) { + edac_dbg(2, "Expected %d, got %d of 0x%x\n", + m->per_socket * skx_num_sockets, rc, m->did); + rc = -ENODEV; + goto fail; + } + } + + list_for_each_entry(d, skx_edac_list, list) { + rc = skx_get_src_id(d, 0xf0, &src_id); + if (rc < 0) + goto fail; + + edac_dbg(2, "src_id = %d\n", src_id); + for (i = 0; i < cfg->ddr_imc_num; i++) { + d->imc[i].mc = mc++; + d->imc[i].lmc = i; + d->imc[i].src_id = src_id; + d->imc[i].num_channels = cfg->ddr_chan_num; + d->imc[i].num_dimms = cfg->ddr_dimm_num; + rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev, + pci_name(d->imc[i].chan[0].cdev), + "Skylake Socket", EDAC_MOD_STR, + skx_get_dimm_config, cfg); + if (rc < 0) + goto fail; + } + } + + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); + + if (nvdimm_count && skx_adxl_get() != -ENODEV) { + skx_set_decode(NULL, skx_show_retry_rd_err_log); + } else { + if (nvdimm_count) + skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); + } + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + skx_setup_debug("skx_test"); + + mce_register_decode_chain(&skx_mce_dec); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit skx_exit(void) +{ + edac_dbg(2, "\n"); + mce_unregister_decode_chain(&skx_mce_dec); + skx_teardown_debug(); + if (nvdimm_count) + skx_adxl_put(); + skx_remove(); +} + +module_init(skx_init); +module_exit(skx_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors"); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c new file mode 100644 index 000000000000..3276afe43922 --- /dev/null +++ b/drivers/edac/skx_common.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Shared code by both skx_edac and i10nm_edac. Originally split out + * from the skx_edac driver. + * + * This file is linked into both skx_edac and i10nm_edac drivers. In + * order to avoid link errors, this file must be like a pure library + * without including symbols and defines which would otherwise conflict, + * when linked once into a module and into a built-in object, at the + * same time. For example, __this_module symbol references when that + * file is being linked into a built-in object. + * + * Copyright (c) 2018, Intel Corporation. + */ + +#include <linux/topology.h> +#include <linux/acpi.h> +#include <linux/dmi.h> +#include <linux/adxl.h> +#include <linux/overflow.h> +#include <acpi/nfit.h> +#include <asm/mce.h> +#include <asm/uv/uv.h> +#include "edac_module.h" +#include "skx_common.h" + +static const char * const component_names[] = { + [INDEX_SOCKET] = "ProcessorSocketId", + [INDEX_MEMCTRL] = "MemoryControllerId", + [INDEX_CHANNEL] = "ChannelId", + [INDEX_DIMM] = "DimmSlotId", + [INDEX_CS] = "ChipSelect", + [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", + [INDEX_NM_CHANNEL] = "NmChannelId", + [INDEX_NM_DIMM] = "NmDimmSlotId", + [INDEX_NM_CS] = "NmChipSelect", +}; + +static int component_indices[ARRAY_SIZE(component_names)]; +static int adxl_component_count; +static const char * const *adxl_component_names; +static u64 *adxl_values; +static char *adxl_msg; +static unsigned long adxl_nm_bitmap; + +static char skx_msg[MSG_SIZE]; +static skx_decode_f driver_decode; +static skx_show_retry_log_f skx_show_retry_rd_err_log; +static u64 skx_tolm, skx_tohm; +static LIST_HEAD(dev_edac_list); +static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; + +int skx_adxl_get(void) +{ + const char * const *names; + int i, j; + + names = adxl_get_component_names(); + if (!names) { + skx_printk(KERN_NOTICE, "No firmware support for address translation.\n"); + return -ENODEV; + } + + for (i = 0; i < INDEX_MAX; i++) { + for (j = 0; names[j]; j++) { + if (!strcmp(component_names[i], names[j])) { + component_indices[i] = j; + + if (i >= INDEX_NM_FIRST) + adxl_nm_bitmap |= 1 << i; + + break; + } + } + + if (!names[j] && i < INDEX_NM_FIRST) + goto err; + } + + if (skx_mem_cfg_2lm) { + if (!adxl_nm_bitmap) + skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); + else + edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); + } + + adxl_component_names = names; + while (*names++) + adxl_component_count++; + + adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), + GFP_KERNEL); + if (!adxl_values) { + adxl_component_count = 0; + return -ENOMEM; + } + + adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); + if (!adxl_msg) { + adxl_component_count = 0; + kfree(adxl_values); + return -ENOMEM; + } + + return 0; +err: + skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", + component_names[i]); + for (j = 0; names[j]; j++) + skx_printk(KERN_CONT, "%s ", names[j]); + skx_printk(KERN_CONT, "\n"); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(skx_adxl_get); + +void skx_adxl_put(void) +{ + adxl_component_count = 0; + kfree(adxl_values); + kfree(adxl_msg); +} +EXPORT_SYMBOL_GPL(skx_adxl_put); + +void skx_init_mc_mapping(struct skx_dev *d) +{ + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (int i = 0; i < d->num_imc; i++) + d->imc[i].mc_mapping = i; +} +EXPORT_SYMBOL_GPL(skx_init_mc_mapping); + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->imc[lmc].mc_mapping = pmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + for (int lmc = 0; lmc < d->num_imc; lmc++) { + if (d->imc[lmc].mc_mapping == pmc) { + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + return lmc; + } + } + + return -1; +} + +static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) +{ + int i, lmc, len = 0; + struct skx_dev *d; + + if (res->addr >= skx_tohm || (res->addr >= skx_tolm && + res->addr < BIT_ULL(32))) { + edac_dbg(0, "Address 0x%llx out of range\n", res->addr); + return false; + } + + if (adxl_decode(res->addr, adxl_values)) { + edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); + return false; + } + + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; + if (err_src == ERR_SRC_2LM_NM) { + res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? + (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; + res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? + (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; + res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? + (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? + (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; + } else { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + res->cs = (int)adxl_values[component_indices[INDEX_CS]]; + } + + if (res->imc < 0) { + skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); + return false; + } + + list_for_each_entry(d, &dev_edac_list, list) { + if (d->imc[0].src_id == res->socket) { + res->dev = d; + break; + } + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + res->socket, res->imc); + return false; + } + + lmc = skx_get_mc_mapping(d, res->imc); + if (lmc < 0) { + skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc); + return false; + } + + res->imc = lmc; + + for (i = 0; i < adxl_component_count; i++) { + if (adxl_values[i] == ~0x0ull) + continue; + + len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", + adxl_component_names[i], adxl_values[i]); + if (MSG_SIZE - len <= 0) + break; + } + + res->decoded_by_adxl = true; + + return true; +} + +void skx_set_mem_cfg(bool mem_cfg_2lm) +{ + skx_mem_cfg_2lm = mem_cfg_2lm; +} +EXPORT_SYMBOL_GPL(skx_set_mem_cfg); + +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); + +void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) +{ + driver_decode = decode; + skx_show_retry_rd_err_log = show_retry_log; +} +EXPORT_SYMBOL_GPL(skx_set_decode); + +static int skx_get_pkg_id(struct skx_dev *d, u8 *id) +{ + int node; + int cpu; + + node = pcibus_to_node(d->util_all->bus); + if (numa_valid_node(node)) { + for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + *id = topology_physical_package_id(cpu); + return 0; + } + } + } + + skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); + return -ENODEV; +} + +int skx_get_src_id(struct skx_dev *d, int off, u8 *id) +{ + u32 reg; + + /* + * The 3-bit source IDs in PCI configuration space registers are limited + * to 8 unique IDs, and each ID is local to a UPI/QPI domain. + * + * Source IDs cannot be used to map devices to sockets on UV systems + * because they can exceed 8 sockets and have multiple UPI/QPI domains + * with identical, repeating source IDs. + */ + if (is_uv_system()) + return skx_get_pkg_id(d, id); + + if (pci_read_config_dword(d->util_all, off, ®)) { + skx_printk(KERN_ERR, "Failed to read src id\n"); + return -ENODEV; + } + + *id = GET_BITFIELD(reg, 12, 14); + return 0; +} +EXPORT_SYMBOL_GPL(skx_get_src_id); + +static int get_width(u32 mtr) +{ + switch (GET_BITFIELD(mtr, 8, 9)) { + case 0: + return DEV_X4; + case 1: + return DEV_X8; + case 2: + return DEV_X16; + } + return DEV_UNKNOWN; +} + +/* + * We use the per-socket device @cfg->did to count how many sockets are present, + * and to detemine which PCI buses are associated with each socket. Allocate + * and build the full list of all the skx_dev structures that we need here. + */ +int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) +{ + int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev); + if (!pdev) + break; + ndev++; + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); + if (!d) { + pci_dev_put(pdev); + return -ENOMEM; + } + + if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, ®)) { + kfree(d); + pci_dev_put(pdev); + skx_printk(KERN_ERR, "Failed to read bus idx\n"); + return -ENODEV; + } + + d->bus[0] = GET_BITFIELD(reg, 0, 7); + d->bus[1] = GET_BITFIELD(reg, 8, 15); + if (cfg->type == SKX) { + d->seg = pci_domain_nr(pdev->bus); + d->bus[2] = GET_BITFIELD(reg, 16, 23); + d->bus[3] = GET_BITFIELD(reg, 24, 31); + } else { + d->seg = GET_BITFIELD(reg, 16, 23); + } + + d->num_imc = imc_num; + + edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n", + d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num); + list_add_tail(&d->list, &dev_edac_list); + prev = pdev; + + skx_init_mc_mapping(d); + } + + if (list) + *list = &dev_edac_list; + return ndev; +} +EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); + +struct list_head *skx_get_edac_list(void) +{ + return &dev_edac_list; +} +EXPORT_SYMBOL_GPL(skx_get_edac_list); + +int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) +{ + struct pci_dev *pdev; + u32 reg; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); + if (!pdev) { + edac_dbg(2, "Can't get tolm/tohm\n"); + return -ENODEV; + } + + if (pci_read_config_dword(pdev, off[0], ®)) { + skx_printk(KERN_ERR, "Failed to read tolm\n"); + goto fail; + } + skx_tolm = reg; + + if (pci_read_config_dword(pdev, off[1], ®)) { + skx_printk(KERN_ERR, "Failed to read lower tohm\n"); + goto fail; + } + skx_tohm = reg; + + if (pci_read_config_dword(pdev, off[2], ®)) { + skx_printk(KERN_ERR, "Failed to read upper tohm\n"); + goto fail; + } + skx_tohm |= (u64)reg << 32; + + pci_dev_put(pdev); + *tolm = skx_tolm; + *tohm = skx_tohm; + edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm); + return 0; +fail: + pci_dev_put(pdev); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(skx_get_hi_lo); + +void skx_set_hi_lo(u64 tolm, u64 tohm) +{ + skx_tolm = tolm; + skx_tohm = tohm; +} +EXPORT_SYMBOL_GPL(skx_set_hi_lo); + +static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, + int minval, int maxval, const char *name) +{ + u32 val = GET_BITFIELD(reg, lobit, hibit); + + if (val < minval || val > maxval) { + edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg); + return -EINVAL; + } + return val + add; +} + +#define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") +#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") +#define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") + +int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg) +{ + int banks, ranks, rows, cols, npages; + enum mem_type mtype; + u64 size; + + ranks = numrank(mtr); + rows = numrow(mtr); + cols = imc->hbm_mc ? 6 : numcol(mtr); + + if (imc->hbm_mc) { + banks = 32; + mtype = MEM_HBM2; + } else if (cfg->support_ddr5) { + banks = 32; + mtype = MEM_DDR5; + } else { + banks = 16; + mtype = MEM_DDR4; + } + + /* + * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) + */ + size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); + npages = MiB_TO_PAGES(size); + + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n", + imc->mc, chan, dimmno, size, npages, + banks, 1 << ranks, rows, cols); + + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0); + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9); + imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); + imc->chan[chan].dimms[dimmno].rowbits = rows; + imc->chan[chan].dimms[dimmno].colbits = cols; + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = get_width(mtr); + dimm->mtype = mtype; + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ + + if (imc->hbm_mc) + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", + imc->src_id, imc->lmc, chan); + else + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); + + return 1; +} +EXPORT_SYMBOL_GPL(skx_get_dimm_info); + +int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, + int chan, int dimmno, const char *mod_str) +{ + int smbios_handle; + u32 dev_handle; + u16 flags; + u64 size = 0; + + dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, + imc->src_id, 0); + + smbios_handle = nfit_get_smbios_id(dev_handle, &flags); + if (smbios_handle == -EOPNOTSUPP) { + pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str); + goto unknown_size; + } + + if (smbios_handle < 0) { + skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle); + goto unknown_size; + } + + if (flags & ACPI_NFIT_MEM_MAP_FAILED) { + skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle); + goto unknown_size; + } + + size = dmi_memdev_size(smbios_handle); + if (size == ~0ull) + skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n", + dev_handle, smbios_handle); + +unknown_size: + dimm->nr_pages = size >> PAGE_SHIFT; + dimm->grain = 32; + dimm->dtype = DEV_UNKNOWN; + dimm->mtype = MEM_NVDIMM; + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ + + edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", + imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); + + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); + + return (size == 0 || size == ~0ull) ? 0 : 1; +} +EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); + +int skx_register_mci(struct skx_imc *imc, struct device *dev, + const char *dev_name, const char *ctl_name, + const char *mod_str, get_dimm_config_f get_dimm_config, + struct res_config *cfg) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct skx_pvt *pvt; + int rc; + + /* Allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = imc->num_channels; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = imc->num_dimms; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, + sizeof(struct skx_pvt)); + + if (unlikely(!mci)) + return -ENOMEM; + + edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); + + /* Associate skx_dev and mci for future usage */ + imc->mci = mci; + pvt = mci->pvt_info; + pvt->imc = imc; + + mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, + imc->src_id, imc->lmc); + if (!mci->ctl_name) { + rc = -ENOMEM; + goto fail0; + } + + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; + if (cfg->support_ddr5) + mci->mtype_cap |= MEM_FLAG_DDR5; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = mod_str; + mci->dev_name = dev_name; + mci->ctl_page_to_phys = NULL; + + rc = get_dimm_config(mci, cfg); + if (rc < 0) + goto fail; + + /* Record ptr to the generic device */ + mci->pdev = dev; + + /* Add this new MC control structure to EDAC's list of MCs */ + if (unlikely(edac_mc_add_mc(mci))) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + rc = -EINVAL; + goto fail; + } + + return 0; + +fail: + kfree(mci->ctl_name); +fail0: + edac_mc_free(mci); + imc->mci = NULL; + return rc; +} +EXPORT_SYMBOL_GPL(skx_register_mci); + +static void skx_unregister_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci = imc->mci; + + if (!mci) + return; + + edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->pdev); + + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + kfree(mci->ctl_name); + edac_mc_free(mci); +} + +static void skx_mce_output_error(struct mem_ctl_info *mci, + const struct mce *m, + struct decoded_addr *res) +{ + enum hw_event_mc_err_type tp_event; + char *optype; + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); + bool overflow = GET_BITFIELD(m->status, 62, 62); + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool scrub_err = false; + bool recoverable; + int len; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + + recoverable = GET_BITFIELD(m->status, 56, 56); + + if (uncorrected_error) { + core_err_cnt = 1; + if (ripv) { + tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; + } + } else { + tp_event = HW_EVENT_ERR_CORRECTED; + } + + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + scrub_err = true; + break; + default: + optype = "reserved"; + break; + } + + if (res->decoded_by_adxl) { + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, adxl_msg); + } else { + len = scnprintf(skx_msg, MSG_SIZE, + "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, + res->socket, res->imc, res->rank, + res->row, res->column, res->bank_address, res->bank_group); + } + + if (skx_show_retry_rd_err_log) + skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err); + + edac_dbg(0, "%s\n", skx_msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + res->channel, res->dimm, -1, + optype, skx_msg); +} + +static enum error_source skx_error_source(const struct mce *m) +{ + u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + + if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) + return ERR_SRC_NOT_MEMORY; + + if (!skx_mem_cfg_2lm) + return ERR_SRC_1LM; + + if (errcode == MCACOD_EXT_MEM_ERR) + return ERR_SRC_2LM_NM; + + return ERR_SRC_2LM_FM; +} + +int skx_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + enum error_source err_src; + struct decoded_addr res; + struct mem_ctl_info *mci; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + err_src = skx_error_source(mce); + + /* Ignore unless this is memory related with an address */ + if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) + return NOTIFY_DONE; + + memset(&res, 0, sizeof(res)); + res.mce = mce; + res.addr = mce->addr & MCI_ADDR_PHYSADDR; + if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) { + pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank); + return NOTIFY_DONE; + } + + /* Try driver decoder first */ + if (!(driver_decode && driver_decode(&res))) { + /* Then try firmware decoder (ACPI DSM methods) */ + if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) + return NOTIFY_DONE; + } + + mci = res.dev->imc[res.imc].mci; + + if (!mci) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); + + skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx " + "Bank %d: 0x%llx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc); + skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr); + skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc); + + skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET " + "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); + + skx_mce_output_error(mci, mce, &res); + + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_DONE; +} +EXPORT_SYMBOL_GPL(skx_mce_check_error); + +void skx_remove(void) +{ + int i, j; + struct skx_dev *d, *tmp; + + edac_dbg(0, "\n"); + + list_for_each_entry_safe(d, tmp, &dev_edac_list, list) { + list_del(&d->list); + for (i = 0; i < d->num_imc; i++) { + if (d->imc[i].mci) + skx_unregister_mci(&d->imc[i]); + + if (d->imc[i].mdev) + pci_dev_put(d->imc[i].mdev); + + if (d->imc[i].mbase) + iounmap(d->imc[i].mbase); + + if (d->imc[i].dev) + put_device(d->imc[i].dev); + + for (j = 0; j < d->imc[i].num_channels; j++) { + if (d->imc[i].chan[j].cdev) + pci_dev_put(d->imc[i].chan[j].cdev); + } + } + if (d->util_all) + pci_dev_put(d->util_all); + if (d->pcu_cr3) + pci_dev_put(d->pcu_cr3); + if (d->sad_all) + pci_dev_put(d->sad_all); + if (d->uracu) + pci_dev_put(d->uracu); + + kfree(d); + } +} +EXPORT_SYMBOL_GPL(skx_remove); + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +void skx_setup_debug(const char *name) +{ + skx_test = edac_debugfs_create_dir(name); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} +EXPORT_SYMBOL_GPL(skx_setup_debug); + +void skx_teardown_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +EXPORT_SYMBOL_GPL(skx_teardown_debug); +#endif /*CONFIG_EDAC_DEBUG*/ + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel server processors"); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h new file mode 100644 index 000000000000..f88038e5b18c --- /dev/null +++ b/drivers/edac/skx_common.h @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. + * Originally split out from the skx_edac driver. + * + * Copyright (c) 2018, Intel Corporation. + */ + +#ifndef _SKX_COMM_EDAC_H +#define _SKX_COMM_EDAC_H + +#include <linux/bits.h> +#include <asm/mce.h> + +#define MSG_SIZE 1024 + +/* + * Debug macros + */ +#define skx_printk(level, fmt, arg...) \ + edac_printk(level, "skx", fmt, ##arg) + +#define skx_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) + +/* + * Get a bit field at register value <v>, from bit <lo> to bit <hi> + */ +#define GET_BITFIELD(v, lo, hi) \ + (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) + +#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ +#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ + +#define I10NM_NUM_DDR_CHANNELS 2 +#define I10NM_NUM_DDR_DIMMS 2 + +#define I10NM_NUM_HBM_CHANNELS 2 +#define I10NM_NUM_HBM_DIMMS 1 + +#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) +#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) + +#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) +#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) + +#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) +#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) + +#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) +#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ + +/* + * According to Intel Architecture spec vol 3B, + * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" + * memory errors should fit one of these masks: + * 000f 0000 1mmm cccc (binary) + * 000f 0010 1mmm cccc (binary) [RAM used as cache] + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + */ +#define MCACOD_MEM_ERR_MASK 0xef80 +/* + * Errors from either the memory of the 1-level memory system or the + * 2nd level memory (the slow "far" memory) of the 2-level memory system. + */ +#define MCACOD_MEM_CTL_ERR 0x80 +/* + * Errors from the 1st level memory (the fast "near" memory as cache) + * of the 2-level memory system. + */ +#define MCACOD_EXT_MEM_ERR 0x280 + +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 4 +/* Max RRL registers per set. */ +#define NUM_RRL_REG 6 +/* Max correctable error count registers. */ +#define NUM_CECNT_REG 8 + +/* Modes of RRL register set. */ +enum rrl_mode { + /* Last read error from patrol scrub. */ + LRE_SCRUB, + /* Last read error from demand. */ + LRE_DEMAND, + /* First read error from patrol scrub. */ + FRE_SCRUB, + /* First read error from demand. */ + FRE_DEMAND, +}; + +/* RRL registers per {,sub-,pseudo-}channel. */ +struct reg_rrl { + /* RRL register parts. */ + int set_num, reg_num; + enum rrl_mode modes[NUM_RRL_SET]; + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; + /* RRL register widths in byte per set. */ + u8 widths[NUM_RRL_REG]; + /* RRL control bits of the first register per set. */ + u32 v_mask; + u32 uc_mask; + u32 over_mask; + u32 en_patspr_mask; + u32 noover_mask; + u32 en_mask; + + /* CORRERRCNT register parts. */ + int cecnt_num; + u32 cecnt_offsets[NUM_CECNT_REG]; + u8 cecnt_widths[NUM_CECNT_REG]; +}; + +/* + * Each cpu socket contains some pci devices that provide global + * information, and also some that are local to each of the two + * memory controllers on the die. + */ +struct skx_dev { + /* {skx,i10nm}_edac */ + u8 bus[4]; + int seg; + struct pci_dev *sad_all; + struct pci_dev *util_all; + struct pci_dev *uracu; + struct pci_dev *pcu_cr3; + u32 mcroute; + + /* imh_edac */ + /* System-view MMIO base physical addresses. */ + u64 mmio_base_h_north; + u64 mmio_base_h_south; + int pkg; + + int num_imc; + struct list_head list; + struct skx_imc { + /* i10nm_edac */ + struct pci_dev *mdev; + + /* imh_edac */ + struct device *dev; + + struct mem_ctl_info *mci; + void __iomem *mbase; + int chan_mmio_sz; + int num_channels; /* channels per memory controller */ + int num_dimms; /* dimms per channel */ + bool hbm_mc; + u8 mc; /* system wide mc# */ + u8 lmc; /* socket relative mc# */ + u8 src_id; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping; + struct skx_channel { + struct pci_dev *cdev; + struct pci_dev *edev; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; + struct skx_dimm { + u8 close_pg; + u8 bank_xor_enable; + u8 fine_grain_bank; + u8 rowbits; + u8 colbits; + } dimms[NUM_DIMMS]; + } chan[NUM_CHANNELS]; + } imc[]; +}; + +struct skx_pvt { + struct skx_imc *imc; +}; + +enum type { + SKX, + I10NM, + SPR, + GNR, + DMR, +}; + +enum { + INDEX_SOCKET, + INDEX_MEMCTRL, + INDEX_CHANNEL, + INDEX_DIMM, + INDEX_CS, + INDEX_NM_FIRST, + INDEX_NM_MEMCTRL = INDEX_NM_FIRST, + INDEX_NM_CHANNEL, + INDEX_NM_DIMM, + INDEX_NM_CS, + INDEX_MAX +}; + +enum error_source { + ERR_SRC_1LM, + ERR_SRC_2LM_NM, + ERR_SRC_2LM_FM, + ERR_SRC_NOT_MEMORY, +}; + +#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) +#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) +#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) +#define BIT_NM_CS BIT_ULL(INDEX_NM_CS) + +struct decoded_addr { + struct mce *mce; + struct skx_dev *dev; + u64 addr; + int socket; + int imc; + int channel; + u64 chan_addr; + int sktways; + int chanways; + int dimm; + int cs; + int rank; + int channel_rank; + u64 rank_address; + int row; + int column; + int bank_address; + int bank_group; + bool decoded_by_adxl; +}; + +struct pci_bdf { + u32 bus : 8; + u32 dev : 5; + u32 fun : 3; +}; + +struct res_config { + enum type type; + /* DDR memory controllers per socket */ + int ddr_imc_num; + /* DDR channels per DDR memory controller */ + int ddr_chan_num; + /* DDR DIMMs per DDR memory channel */ + int ddr_dimm_num; + /* Per DDR channel memory-mapped I/O size */ + int ddr_chan_mmio_sz; + /* HBM memory controllers per socket */ + int hbm_imc_num; + /* HBM channels per HBM memory controller */ + int hbm_chan_num; + /* HBM DIMMs per HBM memory channel */ + int hbm_dimm_num; + /* Per HBM channel memory-mapped I/O size */ + int hbm_chan_mmio_sz; + bool support_ddr5; + /* RRL register sets per DDR channel */ + struct reg_rrl *reg_rrl_ddr; + /* RRL register sets per HBM channel */ + struct reg_rrl *reg_rrl_hbm[2]; + union { + /* {skx,i10nm}_edac */ + struct { + /* Configuration agent device ID */ + unsigned int decs_did; + /* Default bus number configuration register offset */ + int busno_cfg_offset; + struct pci_bdf sad_all_bdf; + struct pci_bdf pcu_cr3_bdf; + struct pci_bdf util_all_bdf; + struct pci_bdf uracu_bdf; + struct pci_bdf ddr_mdev_bdf; + struct pci_bdf hbm_mdev_bdf; + int sad_all_offset; + }; + /* imh_edac */ + struct { + /* MMIO base physical address in local package view */ + u64 mmio_base_l_north; + u64 mmio_base_l_south; + u64 ddr_imc_base; + u64 ddr_reg_mcmtr_offset; + u8 ddr_reg_mcmtr_width; + u64 ddr_reg_dimmmtr_offset; + u8 ddr_reg_dimmmtr_width; + u64 ubox_base; + u32 ubox_size; + u32 ubox_reg_mmio_base_offset; + u8 ubox_reg_mmio_base_width; + u32 ubox_reg_socket_id_offset; + u8 ubox_reg_socket_id_width; + u64 pcu_base; + u32 pcu_size; + u32 pcu_reg_capid3_offset; + u8 pcu_reg_capid3_width; + u64 sca_base; + u32 sca_size; + u32 sca_reg_tolm_offset; + u8 sca_reg_tolm_width; + u32 sca_reg_tohm_offset; + u8 sca_reg_tohm_width; + u64 ha_base; + u32 ha_size; + u32 ha_reg_mode_offset; + u8 ha_reg_mode_width; + }; + }; +}; + +typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, + struct res_config *cfg); +typedef bool (*skx_decode_f)(struct decoded_addr *res); +typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); + +int skx_adxl_get(void); +void skx_adxl_put(void); +void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); +void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); +void skx_init_mc_mapping(struct skx_dev *d); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); + +int skx_get_src_id(struct skx_dev *d, int off, u8 *id); + +int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); + +struct list_head *skx_get_edac_list(void); + +int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); +void skx_set_hi_lo(u64 tolm, u64 tohm); + +int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg); + +int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, + int chan, int dimmno, const char *mod_str); + +int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, + const char *ctl_name, const char *mod_str, + get_dimm_config_f get_dimm_config, + struct res_config *cfg); + +int skx_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data); + +void skx_remove(void); + +#ifdef CONFIG_EDAC_DEBUG +void skx_setup_debug(const char *name); +void skx_teardown_debug(void); +#else +static inline void skx_setup_debug(const char *name) {} +static inline void skx_teardown_debug(void) {} +#endif + +#endif /* _SKX_COMM_EDAC_H */ diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c new file mode 100644 index 000000000000..51143b3257de --- /dev/null +++ b/drivers/edac/synopsys_edac.c @@ -0,0 +1,1493 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Synopsys DDR ECC Driver + * This driver is based on ppc4xx_edac.c drivers + * + * Copyright (C) 2012 - 2014 Xilinx, Inc. + */ + +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/sizes.h> +#include <linux/interrupt.h> +#include <linux/of.h> + +#include "edac_module.h" + +/* Number of cs_rows needed per memory controller */ +#define SYNPS_EDAC_NR_CSROWS 1 + +/* Number of channels per memory controller */ +#define SYNPS_EDAC_NR_CHANS 1 + +/* Granularity of reported error in bytes */ +#define SYNPS_EDAC_ERR_GRAIN 1 + +#define SYNPS_EDAC_MSG_SIZE 256 + +#define SYNPS_EDAC_MOD_STRING "synps_edac" +#define SYNPS_EDAC_MOD_VER "1" + +/* Synopsys DDR memory controller registers that are relevant to ECC */ +#define CTRL_OFST 0x0 +#define T_ZQ_OFST 0xA4 + +/* ECC control register */ +#define ECC_CTRL_OFST 0xC4 +/* ECC log register */ +#define CE_LOG_OFST 0xC8 +/* ECC address register */ +#define CE_ADDR_OFST 0xCC +/* ECC data[31:0] register */ +#define CE_DATA_31_0_OFST 0xD0 + +/* Uncorrectable error info registers */ +#define UE_LOG_OFST 0xDC +#define UE_ADDR_OFST 0xE0 +#define UE_DATA_31_0_OFST 0xE4 + +#define STAT_OFST 0xF0 +#define SCRUB_OFST 0xF4 + +/* Control register bit field definitions */ +#define CTRL_BW_MASK 0xC +#define CTRL_BW_SHIFT 2 + +#define DDRCTL_WDTH_16 1 +#define DDRCTL_WDTH_32 0 + +/* ZQ register bit field definitions */ +#define T_ZQ_DDRMODE_MASK 0x2 + +/* ECC control register bit field definitions */ +#define ECC_CTRL_CLR_CE_ERR 0x2 +#define ECC_CTRL_CLR_UE_ERR 0x1 + +/* ECC correctable/uncorrectable error log register definitions */ +#define LOG_VALID 0x1 +#define CE_LOG_BITPOS_MASK 0xFE +#define CE_LOG_BITPOS_SHIFT 1 + +/* ECC correctable/uncorrectable error address register definitions */ +#define ADDR_COL_MASK 0xFFF +#define ADDR_ROW_MASK 0xFFFF000 +#define ADDR_ROW_SHIFT 12 +#define ADDR_BANK_MASK 0x70000000 +#define ADDR_BANK_SHIFT 28 + +/* ECC statistic register definitions */ +#define STAT_UECNT_MASK 0xFF +#define STAT_CECNT_MASK 0xFF00 +#define STAT_CECNT_SHIFT 8 + +/* ECC scrub register definitions */ +#define SCRUB_MODE_MASK 0x7 +#define SCRUB_MODE_SECDED 0x4 + +/* DDR ECC Quirks */ +#define DDR_ECC_INTR_SUPPORT BIT(0) +#define DDR_ECC_DATA_POISON_SUPPORT BIT(1) +#define DDR_ECC_INTR_SELF_CLEAR BIT(2) + +/* ZynqMP Enhanced DDR memory controller registers that are relevant to ECC */ +/* ECC Configuration Registers */ +#define ECC_CFG0_OFST 0x70 +#define ECC_CFG1_OFST 0x74 + +/* ECC Status Register */ +#define ECC_STAT_OFST 0x78 + +/* ECC Clear Register */ +#define ECC_CLR_OFST 0x7C + +/* ECC Error count Register */ +#define ECC_ERRCNT_OFST 0x80 + +/* ECC Corrected Error Address Register */ +#define ECC_CEADDR0_OFST 0x84 +#define ECC_CEADDR1_OFST 0x88 + +/* ECC Syndrome Registers */ +#define ECC_CSYND0_OFST 0x8C +#define ECC_CSYND1_OFST 0x90 +#define ECC_CSYND2_OFST 0x94 + +/* ECC Bit Mask0 Address Register */ +#define ECC_BITMASK0_OFST 0x98 +#define ECC_BITMASK1_OFST 0x9C +#define ECC_BITMASK2_OFST 0xA0 + +/* ECC UnCorrected Error Address Register */ +#define ECC_UEADDR0_OFST 0xA4 +#define ECC_UEADDR1_OFST 0xA8 + +/* ECC Syndrome Registers */ +#define ECC_UESYND0_OFST 0xAC +#define ECC_UESYND1_OFST 0xB0 +#define ECC_UESYND2_OFST 0xB4 + +/* ECC Poison Address Reg */ +#define ECC_POISON0_OFST 0xB8 +#define ECC_POISON1_OFST 0xBC + +#define ECC_ADDRMAP0_OFFSET 0x200 + +/* Control register bitfield definitions */ +#define ECC_CTRL_BUSWIDTH_MASK 0x3000 +#define ECC_CTRL_BUSWIDTH_SHIFT 12 +#define ECC_CTRL_CLR_CE_ERRCNT BIT(2) +#define ECC_CTRL_CLR_UE_ERRCNT BIT(3) + +/* DDR Control Register width definitions */ +#define DDRCTL_EWDTH_16 2 +#define DDRCTL_EWDTH_32 1 +#define DDRCTL_EWDTH_64 0 + +/* ECC status register definitions */ +#define ECC_STAT_UECNT_MASK 0xF0000 +#define ECC_STAT_UECNT_SHIFT 16 +#define ECC_STAT_CECNT_MASK 0xF00 +#define ECC_STAT_CECNT_SHIFT 8 +#define ECC_STAT_BITNUM_MASK 0x7F + +/* ECC error count register definitions */ +#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000 +#define ECC_ERRCNT_UECNT_SHIFT 16 +#define ECC_ERRCNT_CECNT_MASK 0xFFFF + +/* DDR QOS Interrupt register definitions */ +#define DDR_QOS_IRQ_STAT_OFST 0x20200 +#define DDR_QOSUE_MASK 0x4 +#define DDR_QOSCE_MASK 0x2 +#define ECC_CE_UE_INTR_MASK 0x6 +#define DDR_QOS_IRQ_EN_OFST 0x20208 +#define DDR_QOS_IRQ_DB_OFST 0x2020C + +/* DDR QOS Interrupt register definitions */ +#define DDR_UE_MASK BIT(9) +#define DDR_CE_MASK BIT(8) + +/* ECC Corrected Error Register Mask and Shifts*/ +#define ECC_CEADDR0_RW_MASK 0x3FFFF +#define ECC_CEADDR0_RNK_MASK BIT(24) +#define ECC_CEADDR1_BNKGRP_MASK 0x3000000 +#define ECC_CEADDR1_BNKNR_MASK 0x70000 +#define ECC_CEADDR1_BLKNR_MASK 0xFFF +#define ECC_CEADDR1_BNKGRP_SHIFT 24 +#define ECC_CEADDR1_BNKNR_SHIFT 16 + +/* ECC Poison register shifts */ +#define ECC_POISON0_RANK_SHIFT 24 +#define ECC_POISON0_RANK_MASK BIT(24) +#define ECC_POISON0_COLUMN_SHIFT 0 +#define ECC_POISON0_COLUMN_MASK 0xFFF +#define ECC_POISON1_BG_SHIFT 28 +#define ECC_POISON1_BG_MASK 0x30000000 +#define ECC_POISON1_BANKNR_SHIFT 24 +#define ECC_POISON1_BANKNR_MASK 0x7000000 +#define ECC_POISON1_ROW_SHIFT 0 +#define ECC_POISON1_ROW_MASK 0x3FFFF + +/* DDR Memory type defines */ +#define MEM_TYPE_DDR3 0x1 +#define MEM_TYPE_LPDDR3 0x8 +#define MEM_TYPE_DDR2 0x4 +#define MEM_TYPE_DDR4 0x10 +#define MEM_TYPE_LPDDR4 0x20 + +/* DDRC Software control register */ +#define DDRC_SWCTL 0x320 + +/* DDRC ECC CE & UE poison mask */ +#define ECC_CEPOISON_MASK 0x3 +#define ECC_UEPOISON_MASK 0x1 + +/* DDRC Device config masks */ +#define DDRC_MSTR_CFG_MASK 0xC0000000 +#define DDRC_MSTR_CFG_SHIFT 30 +#define DDRC_MSTR_CFG_X4_MASK 0x0 +#define DDRC_MSTR_CFG_X8_MASK 0x1 +#define DDRC_MSTR_CFG_X16_MASK 0x2 +#define DDRC_MSTR_CFG_X32_MASK 0x3 + +#define DDR_MAX_ROW_SHIFT 18 +#define DDR_MAX_COL_SHIFT 14 +#define DDR_MAX_BANK_SHIFT 3 +#define DDR_MAX_BANKGRP_SHIFT 2 + +#define ROW_MAX_VAL_MASK 0xF +#define COL_MAX_VAL_MASK 0xF +#define BANK_MAX_VAL_MASK 0x1F +#define BANKGRP_MAX_VAL_MASK 0x1F +#define RANK_MAX_VAL_MASK 0x1F + +#define ROW_B0_BASE 6 +#define ROW_B1_BASE 7 +#define ROW_B2_BASE 8 +#define ROW_B3_BASE 9 +#define ROW_B4_BASE 10 +#define ROW_B5_BASE 11 +#define ROW_B6_BASE 12 +#define ROW_B7_BASE 13 +#define ROW_B8_BASE 14 +#define ROW_B9_BASE 15 +#define ROW_B10_BASE 16 +#define ROW_B11_BASE 17 +#define ROW_B12_BASE 18 +#define ROW_B13_BASE 19 +#define ROW_B14_BASE 20 +#define ROW_B15_BASE 21 +#define ROW_B16_BASE 22 +#define ROW_B17_BASE 23 + +#define COL_B2_BASE 2 +#define COL_B3_BASE 3 +#define COL_B4_BASE 4 +#define COL_B5_BASE 5 +#define COL_B6_BASE 6 +#define COL_B7_BASE 7 +#define COL_B8_BASE 8 +#define COL_B9_BASE 9 +#define COL_B10_BASE 10 +#define COL_B11_BASE 11 +#define COL_B12_BASE 12 +#define COL_B13_BASE 13 + +#define BANK_B0_BASE 2 +#define BANK_B1_BASE 3 +#define BANK_B2_BASE 4 + +#define BANKGRP_B0_BASE 2 +#define BANKGRP_B1_BASE 3 + +#define RANK_B0_BASE 6 + +/** + * struct ecc_error_info - ECC error log information. + * @row: Row number. + * @col: Column number. + * @bank: Bank number. + * @bitpos: Bit position. + * @data: Data causing the error. + * @bankgrpnr: Bank group number. + * @blknr: Block number. + */ +struct ecc_error_info { + u32 row; + u32 col; + u32 bank; + u32 bitpos; + u32 data; + u32 bankgrpnr; + u32 blknr; +}; + +/** + * struct synps_ecc_status - ECC status information to report. + * @ce_cnt: Correctable error count. + * @ue_cnt: Uncorrectable error count. + * @ceinfo: Correctable error log information. + * @ueinfo: Uncorrectable error log information. + */ +struct synps_ecc_status { + u32 ce_cnt; + u32 ue_cnt; + struct ecc_error_info ceinfo; + struct ecc_error_info ueinfo; +}; + +/** + * struct synps_edac_priv - DDR memory controller private instance data. + * @baseaddr: Base address of the DDR controller. + * @reglock: Concurrent CSRs access lock. + * @message: Buffer for framing the event specific info. + * @stat: ECC status information. + * @p_data: Platform data. + * @ce_cnt: Correctable Error count. + * @ue_cnt: Uncorrectable Error count. + * @poison_addr: Data poison address. + * @row_shift: Bit shifts for row bit. + * @col_shift: Bit shifts for column bit. + * @bank_shift: Bit shifts for bank bit. + * @bankgrp_shift: Bit shifts for bank group bit. + * @rank_shift: Bit shifts for rank bit. + */ +struct synps_edac_priv { + void __iomem *baseaddr; + spinlock_t reglock; + char message[SYNPS_EDAC_MSG_SIZE]; + struct synps_ecc_status stat; + const struct synps_platform_data *p_data; + u32 ce_cnt; + u32 ue_cnt; +#ifdef CONFIG_EDAC_DEBUG + ulong poison_addr; + u32 row_shift[18]; + u32 col_shift[14]; + u32 bank_shift[3]; + u32 bankgrp_shift[2]; + u32 rank_shift[1]; +#endif +}; + +enum synps_platform_type { + ZYNQ, + ZYNQMP, + SYNPS, +}; + +/** + * struct synps_platform_data - synps platform data structure. + * @platform: Identifies the target hardware platform + * @get_error_info: Get EDAC error info. + * @get_mtype: Get mtype. + * @get_dtype: Get dtype. + * @get_mem_info: Get EDAC memory info + * @quirks: To differentiate IPs. + */ +struct synps_platform_data { + enum synps_platform_type platform; + int (*get_error_info)(struct synps_edac_priv *priv); + enum mem_type (*get_mtype)(const void __iomem *base); + enum dev_type (*get_dtype)(const void __iomem *base); +#ifdef CONFIG_EDAC_DEBUG + u64 (*get_mem_info)(struct synps_edac_priv *priv); +#endif + int quirks; +}; + +/** + * zynq_get_error_info - Get the current ECC error info. + * @priv: DDR memory controller private instance data. + * + * Return: one if there is no error, otherwise zero. + */ +static int zynq_get_error_info(struct synps_edac_priv *priv) +{ + struct synps_ecc_status *p; + u32 regval, clearval = 0; + void __iomem *base; + + base = priv->baseaddr; + p = &priv->stat; + + regval = readl(base + STAT_OFST); + if (!regval) + return 1; + + p->ce_cnt = (regval & STAT_CECNT_MASK) >> STAT_CECNT_SHIFT; + p->ue_cnt = regval & STAT_UECNT_MASK; + + regval = readl(base + CE_LOG_OFST); + if (!(p->ce_cnt && (regval & LOG_VALID))) + goto ue_err; + + p->ceinfo.bitpos = (regval & CE_LOG_BITPOS_MASK) >> CE_LOG_BITPOS_SHIFT; + regval = readl(base + CE_ADDR_OFST); + p->ceinfo.row = (regval & ADDR_ROW_MASK) >> ADDR_ROW_SHIFT; + p->ceinfo.col = regval & ADDR_COL_MASK; + p->ceinfo.bank = (regval & ADDR_BANK_MASK) >> ADDR_BANK_SHIFT; + p->ceinfo.data = readl(base + CE_DATA_31_0_OFST); + edac_dbg(3, "CE bit position: %d data: %d\n", p->ceinfo.bitpos, + p->ceinfo.data); + clearval = ECC_CTRL_CLR_CE_ERR; + +ue_err: + regval = readl(base + UE_LOG_OFST); + if (!(p->ue_cnt && (regval & LOG_VALID))) + goto out; + + regval = readl(base + UE_ADDR_OFST); + p->ueinfo.row = (regval & ADDR_ROW_MASK) >> ADDR_ROW_SHIFT; + p->ueinfo.col = regval & ADDR_COL_MASK; + p->ueinfo.bank = (regval & ADDR_BANK_MASK) >> ADDR_BANK_SHIFT; + p->ueinfo.data = readl(base + UE_DATA_31_0_OFST); + clearval |= ECC_CTRL_CLR_UE_ERR; + +out: + writel(clearval, base + ECC_CTRL_OFST); + writel(0x0, base + ECC_CTRL_OFST); + + return 0; +} + +#ifdef CONFIG_EDAC_DEBUG +/** + * zynqmp_get_mem_info - Get the current memory info. + * @priv: DDR memory controller private instance data. + * + * Return: host interface address. + */ +static u64 zynqmp_get_mem_info(struct synps_edac_priv *priv) +{ + u64 hif_addr = 0, linear_addr; + + linear_addr = priv->poison_addr; + if (linear_addr >= SZ_32G) + linear_addr = linear_addr - SZ_32G + SZ_2G; + hif_addr = linear_addr >> 3; + return hif_addr; +} +#endif + +/** + * zynqmp_get_error_info - Get the current ECC error info. + * @priv: DDR memory controller private instance data. + * + * Return: one if there is no error otherwise returns zero. + */ +static int zynqmp_get_error_info(struct synps_edac_priv *priv) +{ + struct synps_ecc_status *p; + u32 regval, clearval; + unsigned long flags; + void __iomem *base; + + base = priv->baseaddr; + p = &priv->stat; + + regval = readl(base + ECC_ERRCNT_OFST); + p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK; + p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT; + if (!p->ce_cnt) + goto ue_err; + + regval = readl(base + ECC_STAT_OFST); + if (!regval) + return 1; + + p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK); + + regval = readl(base + ECC_CEADDR0_OFST); + p->ceinfo.row = (regval & ECC_CEADDR0_RW_MASK); + regval = readl(base + ECC_CEADDR1_OFST); + p->ceinfo.bank = (regval & ECC_CEADDR1_BNKNR_MASK) >> + ECC_CEADDR1_BNKNR_SHIFT; + p->ceinfo.bankgrpnr = (regval & ECC_CEADDR1_BNKGRP_MASK) >> + ECC_CEADDR1_BNKGRP_SHIFT; + p->ceinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK); + p->ceinfo.data = readl(base + ECC_CSYND0_OFST); + edac_dbg(2, "ECCCSYN0: 0x%08X ECCCSYN1: 0x%08X ECCCSYN2: 0x%08X\n", + readl(base + ECC_CSYND0_OFST), readl(base + ECC_CSYND1_OFST), + readl(base + ECC_CSYND2_OFST)); +ue_err: + if (!p->ue_cnt) + goto out; + + regval = readl(base + ECC_UEADDR0_OFST); + p->ueinfo.row = (regval & ECC_CEADDR0_RW_MASK); + regval = readl(base + ECC_UEADDR1_OFST); + p->ueinfo.bankgrpnr = (regval & ECC_CEADDR1_BNKGRP_MASK) >> + ECC_CEADDR1_BNKGRP_SHIFT; + p->ueinfo.bank = (regval & ECC_CEADDR1_BNKNR_MASK) >> + ECC_CEADDR1_BNKNR_SHIFT; + p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK); + p->ueinfo.data = readl(base + ECC_UESYND0_OFST); +out: + spin_lock_irqsave(&priv->reglock, flags); + + clearval = readl(base + ECC_CLR_OFST) | + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; + writel(clearval, base + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); + + return 0; +} + +/** + * handle_error - Handle Correctable and Uncorrectable errors. + * @mci: EDAC memory controller instance. + * @p: Synopsys ECC status structure. + * + * Handles ECC correctable and uncorrectable errors. + */ +static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) +{ + struct synps_edac_priv *priv = mci->pvt_info; + struct ecc_error_info *pinf; + + if (p->ce_cnt) { + pinf = &p->ceinfo; + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) { + snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, + "DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, + pinf->bankgrpnr, pinf->blknr, + pinf->bitpos, pinf->data); + } else { + snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, + "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, pinf->col, + pinf->bitpos, pinf->data); + } + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + p->ce_cnt, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + if (p->ue_cnt) { + pinf = &p->ueinfo; + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) { + snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, + "DDR ECC error type :%s Row %d Bank %d BankGroup Number %d Block Number %d", + "UE", pinf->row, pinf->bank, + pinf->bankgrpnr, pinf->blknr); + } else { + snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, + "DDR ECC error type :%s Row %d Bank %d Col %d ", + "UE", pinf->row, pinf->bank, pinf->col); + } + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + p->ue_cnt, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + memset(p, 0, sizeof(*p)); +} + +static void enable_intr(struct synps_edac_priv *priv) +{ + unsigned long flags; + + /* Enable UE/CE Interrupts */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { + writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, + priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(DDR_UE_MASK | DDR_CE_MASK, + priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); +} + +static void disable_intr(struct synps_edac_priv *priv) +{ + unsigned long flags; + + /* Disable UE/CE Interrupts */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { + writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, + priv->baseaddr + DDR_QOS_IRQ_DB_OFST); + + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(0, priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); +} + +/** + * intr_handler - Interrupt Handler for ECC interrupts. + * @irq: IRQ number. + * @dev_id: Device ID. + * + * Return: IRQ_NONE, if interrupt not set or IRQ_HANDLED otherwise. + */ +static irqreturn_t intr_handler(int irq, void *dev_id) +{ + const struct synps_platform_data *p_data; + struct mem_ctl_info *mci = dev_id; + struct synps_edac_priv *priv; + int status, regval; + + priv = mci->pvt_info; + p_data = priv->p_data; + + /* + * v3.0 of the controller has the ce/ue bits cleared automatically, + * so this condition does not apply. + */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { + regval = readl(priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); + regval &= (DDR_QOSCE_MASK | DDR_QOSUE_MASK); + if (!(regval & ECC_CE_UE_INTR_MASK)) + return IRQ_NONE; + } + + status = p_data->get_error_info(priv); + if (status) + return IRQ_NONE; + + priv->ce_cnt += priv->stat.ce_cnt; + priv->ue_cnt += priv->stat.ue_cnt; + handle_error(mci, &priv->stat); + + edac_dbg(3, "Total error count CE %d UE %d\n", + priv->ce_cnt, priv->ue_cnt); + /* v3.0 of the controller does not have this register */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) + writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); + + return IRQ_HANDLED; +} + +/** + * check_errors - Check controller for ECC errors. + * @mci: EDAC memory controller instance. + * + * Check and post ECC errors. Called by the polling thread. + */ +static void check_errors(struct mem_ctl_info *mci) +{ + const struct synps_platform_data *p_data; + struct synps_edac_priv *priv; + int status; + + priv = mci->pvt_info; + p_data = priv->p_data; + + status = p_data->get_error_info(priv); + if (status) + return; + + priv->ce_cnt += priv->stat.ce_cnt; + priv->ue_cnt += priv->stat.ue_cnt; + handle_error(mci, &priv->stat); + + edac_dbg(3, "Total error count CE %d UE %d\n", + priv->ce_cnt, priv->ue_cnt); +} + +/** + * zynq_get_dtype - Return the controller memory width. + * @base: DDR memory controller base address. + * + * Get the EDAC device type width appropriate for the current controller + * configuration. + * + * Return: a device type width enumeration. + */ +static enum dev_type zynq_get_dtype(const void __iomem *base) +{ + enum dev_type dt; + u32 width; + + width = readl(base + CTRL_OFST); + width = (width & CTRL_BW_MASK) >> CTRL_BW_SHIFT; + + switch (width) { + case DDRCTL_WDTH_16: + dt = DEV_X2; + break; + case DDRCTL_WDTH_32: + dt = DEV_X4; + break; + default: + dt = DEV_UNKNOWN; + } + + return dt; +} + +/** + * zynqmp_get_dtype - Return the controller memory width. + * @base: DDR memory controller base address. + * + * Get the EDAC device type width appropriate for the current controller + * configuration. + * + * Return: a device type width enumeration. + */ +static enum dev_type zynqmp_get_dtype(const void __iomem *base) +{ + enum dev_type dt; + u32 width; + + width = readl(base + CTRL_OFST); + width = (width & ECC_CTRL_BUSWIDTH_MASK) >> ECC_CTRL_BUSWIDTH_SHIFT; + switch (width) { + case DDRCTL_EWDTH_16: + dt = DEV_X2; + break; + case DDRCTL_EWDTH_32: + dt = DEV_X4; + break; + case DDRCTL_EWDTH_64: + dt = DEV_X8; + break; + default: + dt = DEV_UNKNOWN; + } + + return dt; +} + +static bool get_ecc_state(struct synps_edac_priv *priv) +{ + u32 ecctype, clearval; + enum dev_type dt; + + if (priv->p_data->platform == ZYNQ) { + dt = zynq_get_dtype(priv->baseaddr); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(priv->baseaddr + SCRUB_OFST) & SCRUB_MODE_MASK; + if (ecctype == SCRUB_MODE_SECDED && dt == DEV_X2) { + clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_UE_ERR; + writel(clearval, priv->baseaddr + ECC_CTRL_OFST); + writel(0x0, priv->baseaddr + ECC_CTRL_OFST); + return true; + } + } else { + dt = zynqmp_get_dtype(priv->baseaddr); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(priv->baseaddr + ECC_CFG0_OFST) & SCRUB_MODE_MASK; + if (ecctype == SCRUB_MODE_SECDED && + (dt == DEV_X2 || dt == DEV_X4 || dt == DEV_X8)) { + clearval = readl(priv->baseaddr + ECC_CLR_OFST) | + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; + writel(clearval, priv->baseaddr + ECC_CLR_OFST); + return true; + } + } + + return false; +} + +/** + * get_memsize - Read the size of the attached memory device. + * + * Return: the memory size in bytes. + */ +static u32 get_memsize(void) +{ + struct sysinfo inf; + + si_meminfo(&inf); + + return inf.totalram * inf.mem_unit; +} + +/** + * zynq_get_mtype - Return the controller memory type. + * @base: Synopsys ECC status structure. + * + * Get the EDAC memory type appropriate for the current controller + * configuration. + * + * Return: a memory type enumeration. + */ +static enum mem_type zynq_get_mtype(const void __iomem *base) +{ + enum mem_type mt; + u32 memtype; + + memtype = readl(base + T_ZQ_OFST); + + if (memtype & T_ZQ_DDRMODE_MASK) + mt = MEM_DDR3; + else + mt = MEM_DDR2; + + return mt; +} + +/** + * zynqmp_get_mtype - Returns controller memory type. + * @base: Synopsys ECC status structure. + * + * Get the EDAC memory type appropriate for the current controller + * configuration. + * + * Return: a memory type enumeration. + */ +static enum mem_type zynqmp_get_mtype(const void __iomem *base) +{ + enum mem_type mt; + u32 memtype; + + memtype = readl(base + CTRL_OFST); + + if ((memtype & MEM_TYPE_DDR3) || (memtype & MEM_TYPE_LPDDR3)) + mt = MEM_DDR3; + else if (memtype & MEM_TYPE_DDR2) + mt = MEM_RDDR2; + else if ((memtype & MEM_TYPE_LPDDR4) || (memtype & MEM_TYPE_DDR4)) + mt = MEM_DDR4; + else + mt = MEM_EMPTY; + + return mt; +} + +/** + * init_csrows - Initialize the csrow data. + * @mci: EDAC memory controller instance. + * + * Initialize the chip select rows associated with the EDAC memory + * controller instance. + */ +static void init_csrows(struct mem_ctl_info *mci) +{ + struct synps_edac_priv *priv = mci->pvt_info; + const struct synps_platform_data *p_data; + struct csrow_info *csi; + struct dimm_info *dimm; + u32 size, row; + int j; + + p_data = priv->p_data; + + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + size = get_memsize(); + + for (j = 0; j < csi->nr_channels; j++) { + dimm = csi->channels[j]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = p_data->get_mtype(priv->baseaddr); + dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels; + dimm->grain = SYNPS_EDAC_ERR_GRAIN; + dimm->dtype = p_data->get_dtype(priv->baseaddr); + } + } +} + +/** + * mc_init - Initialize one driver instance. + * @mci: EDAC memory controller instance. + * @pdev: platform device. + * + * Perform initialization of the EDAC memory controller instance and + * related driver-private data associated with the memory controller the + * instance is bound to. + */ +static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev) +{ + struct synps_edac_priv *priv; + + mci->pdev = &pdev->dev; + priv = mci->pvt_info; + platform_set_drvdata(pdev, mci); + + /* Initialize controller capabilities and configuration */ + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_HW_SRC; + mci->scrub_mode = SCRUB_NONE; + + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "synps_ddr_controller"; + mci->dev_name = SYNPS_EDAC_MOD_STRING; + mci->mod_name = SYNPS_EDAC_MOD_VER; + + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) { + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + mci->edac_check = check_errors; + } + + mci->ctl_page_to_phys = NULL; + + init_csrows(mci); +} + +static int setup_irq(struct mem_ctl_info *mci, + struct platform_device *pdev) +{ + struct synps_edac_priv *priv = mci->pvt_info; + int ret, irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + edac_printk(KERN_ERR, EDAC_MC, + "No IRQ %d in DT\n", irq); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, intr_handler, + 0, dev_name(&pdev->dev), mci); + if (ret < 0) { + edac_printk(KERN_ERR, EDAC_MC, "Failed to request IRQ\n"); + return ret; + } + + enable_intr(priv); + + return 0; +} + +static const struct synps_platform_data zynq_edac_def = { + .platform = ZYNQ, + .get_error_info = zynq_get_error_info, + .get_mtype = zynq_get_mtype, + .get_dtype = zynq_get_dtype, + .quirks = 0, +}; + +static const struct synps_platform_data zynqmp_edac_def = { + .platform = ZYNQMP, + .get_error_info = zynqmp_get_error_info, + .get_mtype = zynqmp_get_mtype, + .get_dtype = zynqmp_get_dtype, +#ifdef CONFIG_EDAC_DEBUG + .get_mem_info = zynqmp_get_mem_info, +#endif + .quirks = (DDR_ECC_INTR_SUPPORT +#ifdef CONFIG_EDAC_DEBUG + | DDR_ECC_DATA_POISON_SUPPORT +#endif + ), +}; + +static const struct synps_platform_data synopsys_edac_def = { + .platform = SYNPS, + .get_error_info = zynqmp_get_error_info, + .get_mtype = zynqmp_get_mtype, + .get_dtype = zynqmp_get_dtype, + .quirks = (DDR_ECC_INTR_SUPPORT | DDR_ECC_INTR_SELF_CLEAR +#ifdef CONFIG_EDAC_DEBUG + | DDR_ECC_DATA_POISON_SUPPORT +#endif + ), +}; + + +static const struct of_device_id synps_edac_match[] = { + { + .compatible = "xlnx,zynq-ddrc-a05", + .data = (void *)&zynq_edac_def + }, + { + .compatible = "xlnx,zynqmp-ddrc-2.40a", + .data = (void *)&zynqmp_edac_def + }, + { + .compatible = "snps,ddrc-3.80a", + .data = (void *)&synopsys_edac_def + }, + { + /* end of table */ + } +}; + +MODULE_DEVICE_TABLE(of, synps_edac_match); + +#ifdef CONFIG_EDAC_DEBUG +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/** + * ddr_poison_setup - Update poison registers. + * @priv: DDR memory controller private instance data. + * + * Update poison registers as per DDR mapping. + * Return: none. + */ +static void ddr_poison_setup(struct synps_edac_priv *priv) +{ + int col = 0, row = 0, bank = 0, bankgrp = 0, rank = 0, regval; + const struct synps_platform_data *p_data; + int index; + ulong hif_addr = 0; + + p_data = priv->p_data; + + if (p_data->get_mem_info) + hif_addr = p_data->get_mem_info(priv); + else + hif_addr = priv->poison_addr >> 3; + + for (index = 0; index < DDR_MAX_ROW_SHIFT; index++) { + if (priv->row_shift[index]) + row |= (((hif_addr >> priv->row_shift[index]) & + BIT(0)) << index); + else + break; + } + + for (index = 0; index < DDR_MAX_COL_SHIFT; index++) { + if (priv->col_shift[index] || index < 3) + col |= (((hif_addr >> priv->col_shift[index]) & + BIT(0)) << index); + else + break; + } + + for (index = 0; index < DDR_MAX_BANK_SHIFT; index++) { + if (priv->bank_shift[index]) + bank |= (((hif_addr >> priv->bank_shift[index]) & + BIT(0)) << index); + else + break; + } + + for (index = 0; index < DDR_MAX_BANKGRP_SHIFT; index++) { + if (priv->bankgrp_shift[index]) + bankgrp |= (((hif_addr >> priv->bankgrp_shift[index]) + & BIT(0)) << index); + else + break; + } + + if (priv->rank_shift[0]) + rank = (hif_addr >> priv->rank_shift[0]) & BIT(0); + + regval = (rank << ECC_POISON0_RANK_SHIFT) & ECC_POISON0_RANK_MASK; + regval |= (col << ECC_POISON0_COLUMN_SHIFT) & ECC_POISON0_COLUMN_MASK; + writel(regval, priv->baseaddr + ECC_POISON0_OFST); + + regval = (bankgrp << ECC_POISON1_BG_SHIFT) & ECC_POISON1_BG_MASK; + regval |= (bank << ECC_POISON1_BANKNR_SHIFT) & ECC_POISON1_BANKNR_MASK; + regval |= (row << ECC_POISON1_ROW_SHIFT) & ECC_POISON1_ROW_MASK; + writel(regval, priv->baseaddr + ECC_POISON1_OFST); +} + +static ssize_t inject_data_error_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct synps_edac_priv *priv = mci->pvt_info; + + return sprintf(data, "Poison0 Addr: 0x%08x\n\rPoison1 Addr: 0x%08x\n\r" + "Error injection Address: 0x%lx\n\r", + readl(priv->baseaddr + ECC_POISON0_OFST), + readl(priv->baseaddr + ECC_POISON1_OFST), + priv->poison_addr); +} + +static ssize_t inject_data_error_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct synps_edac_priv *priv = mci->pvt_info; + + if (kstrtoul(data, 0, &priv->poison_addr)) + return -EINVAL; + + ddr_poison_setup(priv); + + return count; +} + +static ssize_t inject_data_poison_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct synps_edac_priv *priv = mci->pvt_info; + + return sprintf(data, "Data Poisoning: %s\n\r", + (((readl(priv->baseaddr + ECC_CFG1_OFST)) & 0x3) == 0x3) + ? ("Correctable Error") : ("UnCorrectable Error")); +} + +static ssize_t inject_data_poison_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct synps_edac_priv *priv = mci->pvt_info; + + writel(0, priv->baseaddr + DDRC_SWCTL); + if (strncmp(data, "CE", 2) == 0) + writel(ECC_CEPOISON_MASK, priv->baseaddr + ECC_CFG1_OFST); + else + writel(ECC_UEPOISON_MASK, priv->baseaddr + ECC_CFG1_OFST); + writel(1, priv->baseaddr + DDRC_SWCTL); + + return count; +} + +static DEVICE_ATTR_RW(inject_data_error); +static DEVICE_ATTR_RW(inject_data_poison); + +static int edac_create_sysfs_attributes(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject_data_error); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_data_poison); + if (rc < 0) + return rc; + return 0; +} + +static void edac_remove_sysfs_attributes(struct mem_ctl_info *mci) +{ + device_remove_file(&mci->dev, &dev_attr_inject_data_error); + device_remove_file(&mci->dev, &dev_attr_inject_data_poison); +} + +static void setup_row_address_map(struct synps_edac_priv *priv, u32 *addrmap) +{ + u32 addrmap_row_b2_10; + int index; + + priv->row_shift[0] = (addrmap[5] & ROW_MAX_VAL_MASK) + ROW_B0_BASE; + priv->row_shift[1] = ((addrmap[5] >> 8) & + ROW_MAX_VAL_MASK) + ROW_B1_BASE; + + addrmap_row_b2_10 = (addrmap[5] >> 16) & ROW_MAX_VAL_MASK; + if (addrmap_row_b2_10 != ROW_MAX_VAL_MASK) { + for (index = 2; index < 11; index++) + priv->row_shift[index] = addrmap_row_b2_10 + + index + ROW_B0_BASE; + + } else { + priv->row_shift[2] = (addrmap[9] & + ROW_MAX_VAL_MASK) + ROW_B2_BASE; + priv->row_shift[3] = ((addrmap[9] >> 8) & + ROW_MAX_VAL_MASK) + ROW_B3_BASE; + priv->row_shift[4] = ((addrmap[9] >> 16) & + ROW_MAX_VAL_MASK) + ROW_B4_BASE; + priv->row_shift[5] = ((addrmap[9] >> 24) & + ROW_MAX_VAL_MASK) + ROW_B5_BASE; + priv->row_shift[6] = (addrmap[10] & + ROW_MAX_VAL_MASK) + ROW_B6_BASE; + priv->row_shift[7] = ((addrmap[10] >> 8) & + ROW_MAX_VAL_MASK) + ROW_B7_BASE; + priv->row_shift[8] = ((addrmap[10] >> 16) & + ROW_MAX_VAL_MASK) + ROW_B8_BASE; + priv->row_shift[9] = ((addrmap[10] >> 24) & + ROW_MAX_VAL_MASK) + ROW_B9_BASE; + priv->row_shift[10] = (addrmap[11] & + ROW_MAX_VAL_MASK) + ROW_B10_BASE; + } + + priv->row_shift[11] = (((addrmap[5] >> 24) & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : (((addrmap[5] >> 24) & + ROW_MAX_VAL_MASK) + ROW_B11_BASE); + priv->row_shift[12] = ((addrmap[6] & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : ((addrmap[6] & + ROW_MAX_VAL_MASK) + ROW_B12_BASE); + priv->row_shift[13] = (((addrmap[6] >> 8) & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : (((addrmap[6] >> 8) & + ROW_MAX_VAL_MASK) + ROW_B13_BASE); + priv->row_shift[14] = (((addrmap[6] >> 16) & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : (((addrmap[6] >> 16) & + ROW_MAX_VAL_MASK) + ROW_B14_BASE); + priv->row_shift[15] = (((addrmap[6] >> 24) & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : (((addrmap[6] >> 24) & + ROW_MAX_VAL_MASK) + ROW_B15_BASE); + priv->row_shift[16] = ((addrmap[7] & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : ((addrmap[7] & + ROW_MAX_VAL_MASK) + ROW_B16_BASE); + priv->row_shift[17] = (((addrmap[7] >> 8) & ROW_MAX_VAL_MASK) == + ROW_MAX_VAL_MASK) ? 0 : (((addrmap[7] >> 8) & + ROW_MAX_VAL_MASK) + ROW_B17_BASE); +} + +static void setup_column_address_map(struct synps_edac_priv *priv, u32 *addrmap) +{ + u32 width, memtype; + int index; + + memtype = readl(priv->baseaddr + CTRL_OFST); + width = (memtype & ECC_CTRL_BUSWIDTH_MASK) >> ECC_CTRL_BUSWIDTH_SHIFT; + + priv->col_shift[0] = 0; + priv->col_shift[1] = 1; + priv->col_shift[2] = (addrmap[2] & COL_MAX_VAL_MASK) + COL_B2_BASE; + priv->col_shift[3] = ((addrmap[2] >> 8) & + COL_MAX_VAL_MASK) + COL_B3_BASE; + priv->col_shift[4] = (((addrmap[2] >> 16) & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : (((addrmap[2] >> 16) & + COL_MAX_VAL_MASK) + COL_B4_BASE); + priv->col_shift[5] = (((addrmap[2] >> 24) & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : (((addrmap[2] >> 24) & + COL_MAX_VAL_MASK) + COL_B5_BASE); + priv->col_shift[6] = ((addrmap[3] & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : ((addrmap[3] & + COL_MAX_VAL_MASK) + COL_B6_BASE); + priv->col_shift[7] = (((addrmap[3] >> 8) & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : (((addrmap[3] >> 8) & + COL_MAX_VAL_MASK) + COL_B7_BASE); + priv->col_shift[8] = (((addrmap[3] >> 16) & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : (((addrmap[3] >> 16) & + COL_MAX_VAL_MASK) + COL_B8_BASE); + priv->col_shift[9] = (((addrmap[3] >> 24) & COL_MAX_VAL_MASK) == + COL_MAX_VAL_MASK) ? 0 : (((addrmap[3] >> 24) & + COL_MAX_VAL_MASK) + COL_B9_BASE); + if (width == DDRCTL_EWDTH_64) { + if (memtype & MEM_TYPE_LPDDR3) { + priv->col_shift[10] = ((addrmap[4] & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + ((addrmap[4] & COL_MAX_VAL_MASK) + + COL_B10_BASE); + priv->col_shift[11] = (((addrmap[4] >> 8) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[4] >> 8) & COL_MAX_VAL_MASK) + + COL_B11_BASE); + } else { + priv->col_shift[11] = ((addrmap[4] & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + ((addrmap[4] & COL_MAX_VAL_MASK) + + COL_B10_BASE); + priv->col_shift[13] = (((addrmap[4] >> 8) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[4] >> 8) & COL_MAX_VAL_MASK) + + COL_B11_BASE); + } + } else if (width == DDRCTL_EWDTH_32) { + if (memtype & MEM_TYPE_LPDDR3) { + priv->col_shift[10] = (((addrmap[3] >> 24) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 24) & COL_MAX_VAL_MASK) + + COL_B9_BASE); + priv->col_shift[11] = ((addrmap[4] & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + ((addrmap[4] & COL_MAX_VAL_MASK) + + COL_B10_BASE); + } else { + priv->col_shift[11] = (((addrmap[3] >> 24) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 24) & COL_MAX_VAL_MASK) + + COL_B9_BASE); + priv->col_shift[13] = ((addrmap[4] & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + ((addrmap[4] & COL_MAX_VAL_MASK) + + COL_B10_BASE); + } + } else { + if (memtype & MEM_TYPE_LPDDR3) { + priv->col_shift[10] = (((addrmap[3] >> 16) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 16) & COL_MAX_VAL_MASK) + + COL_B8_BASE); + priv->col_shift[11] = (((addrmap[3] >> 24) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 24) & COL_MAX_VAL_MASK) + + COL_B9_BASE); + priv->col_shift[13] = ((addrmap[4] & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + ((addrmap[4] & COL_MAX_VAL_MASK) + + COL_B10_BASE); + } else { + priv->col_shift[11] = (((addrmap[3] >> 16) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 16) & COL_MAX_VAL_MASK) + + COL_B8_BASE); + priv->col_shift[13] = (((addrmap[3] >> 24) & + COL_MAX_VAL_MASK) == COL_MAX_VAL_MASK) ? 0 : + (((addrmap[3] >> 24) & COL_MAX_VAL_MASK) + + COL_B9_BASE); + } + } + + if (width) { + for (index = 9; index > width; index--) { + priv->col_shift[index] = priv->col_shift[index - width]; + priv->col_shift[index - width] = 0; + } + } + +} + +static void setup_bank_address_map(struct synps_edac_priv *priv, u32 *addrmap) +{ + priv->bank_shift[0] = (addrmap[1] & BANK_MAX_VAL_MASK) + BANK_B0_BASE; + priv->bank_shift[1] = ((addrmap[1] >> 8) & + BANK_MAX_VAL_MASK) + BANK_B1_BASE; + priv->bank_shift[2] = (((addrmap[1] >> 16) & + BANK_MAX_VAL_MASK) == BANK_MAX_VAL_MASK) ? 0 : + (((addrmap[1] >> 16) & BANK_MAX_VAL_MASK) + + BANK_B2_BASE); + +} + +static void setup_bg_address_map(struct synps_edac_priv *priv, u32 *addrmap) +{ + priv->bankgrp_shift[0] = (addrmap[8] & + BANKGRP_MAX_VAL_MASK) + BANKGRP_B0_BASE; + priv->bankgrp_shift[1] = (((addrmap[8] >> 8) & BANKGRP_MAX_VAL_MASK) == + BANKGRP_MAX_VAL_MASK) ? 0 : (((addrmap[8] >> 8) + & BANKGRP_MAX_VAL_MASK) + BANKGRP_B1_BASE); + +} + +static void setup_rank_address_map(struct synps_edac_priv *priv, u32 *addrmap) +{ + priv->rank_shift[0] = ((addrmap[0] & RANK_MAX_VAL_MASK) == + RANK_MAX_VAL_MASK) ? 0 : ((addrmap[0] & + RANK_MAX_VAL_MASK) + RANK_B0_BASE); +} + +/** + * setup_address_map - Set Address Map by querying ADDRMAP registers. + * @priv: DDR memory controller private instance data. + * + * Set Address Map by querying ADDRMAP registers. + * + * Return: none. + */ +static void setup_address_map(struct synps_edac_priv *priv) +{ + u32 addrmap[12]; + int index; + + for (index = 0; index < 12; index++) { + u32 addrmap_offset; + + addrmap_offset = ECC_ADDRMAP0_OFFSET + (index * 4); + addrmap[index] = readl(priv->baseaddr + addrmap_offset); + } + + setup_row_address_map(priv, addrmap); + + setup_column_address_map(priv, addrmap); + + setup_bank_address_map(priv, addrmap); + + setup_bg_address_map(priv, addrmap); + + setup_rank_address_map(priv, addrmap); +} +#endif /* CONFIG_EDAC_DEBUG */ + +/** + * mc_probe - Check controller and bind driver. + * @pdev: platform device. + * + * Probe a specific controller instance for binding with the driver. + * + * Return: 0 if the controller instance was successfully bound to the + * driver; otherwise, < 0 on error. + */ +static int mc_probe(struct platform_device *pdev) +{ + const struct synps_platform_data *p_data; + struct edac_mc_layer layers[2]; + struct synps_edac_priv *priv; + struct mem_ctl_info *mci; + void __iomem *baseaddr; + int rc; + + baseaddr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(baseaddr)) + return PTR_ERR(baseaddr); + + p_data = of_device_get_match_data(&pdev->dev); + if (!p_data) + return -ENODEV; + + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = SYNPS_EDAC_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = SYNPS_EDAC_NR_CHANS; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct synps_edac_priv)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed memory allocation for mc instance\n"); + return -ENOMEM; + } + + priv = mci->pvt_info; + priv->baseaddr = baseaddr; + priv->p_data = p_data; + if (!get_ecc_state(priv)) { + edac_printk(KERN_INFO, EDAC_MC, "ECC not enabled\n"); + rc = -ENODEV; + goto free_edac_mc; + } + + spin_lock_init(&priv->reglock); + + mc_init(mci, pdev); + + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) { + rc = setup_irq(mci, pdev); + if (rc) + goto free_edac_mc; + } + + rc = edac_mc_add_mc(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed to register with EDAC core\n"); + goto free_edac_mc; + } + +#ifdef CONFIG_EDAC_DEBUG + if (priv->p_data->quirks & DDR_ECC_DATA_POISON_SUPPORT) { + rc = edac_create_sysfs_attributes(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed to create sysfs entries\n"); + goto free_edac_mc; + } + } + + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) + setup_address_map(priv); +#endif + + /* + * Start capturing the correctable and uncorrectable errors. A write of + * 0 starts the counters. + */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SUPPORT)) + writel(0x0, baseaddr + ECC_CTRL_OFST); + + return rc; + +free_edac_mc: + edac_mc_free(mci); + + return rc; +} + +/** + * mc_remove - Unbind driver from controller. + * @pdev: Platform device. + * + * Return: Unconditionally 0 + */ +static void mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + struct synps_edac_priv *priv = mci->pvt_info; + + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) + disable_intr(priv); + +#ifdef CONFIG_EDAC_DEBUG + if (priv->p_data->quirks & DDR_ECC_DATA_POISON_SUPPORT) + edac_remove_sysfs_attributes(mci); +#endif + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static struct platform_driver synps_edac_mc_driver = { + .driver = { + .name = "synopsys-edac", + .of_match_table = synps_edac_match, + }, + .probe = mc_probe, + .remove = mc_remove, +}; + +module_platform_driver(synps_edac_mc_driver); + +MODULE_AUTHOR("Xilinx Inc"); +MODULE_DESCRIPTION("Synopsys DDR ECC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c new file mode 100644 index 000000000000..75c04dfc3962 --- /dev/null +++ b/drivers/edac/thunderx_edac.c @@ -0,0 +1,2146 @@ +/* + * Cavium ThunderX memory controller kernel module + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/string.h> +#include <linux/stop_machine.h> +#include <linux/delay.h> +#include <linux/sizes.h> +#include <linux/atomic.h> +#include <linux/bitfield.h> +#include <linux/circ_buf.h> + +#include <asm/page.h> + +#include "edac_module.h" + +#define phys_to_pfn(phys) (PFN_DOWN(phys)) + +#define THUNDERX_NODE GENMASK(45, 44) + +enum { + ERR_CORRECTED = 1, + ERR_UNCORRECTED = 2, + ERR_UNKNOWN = 3, +}; + +struct error_descr { + int type; + u64 mask; + char *descr; +}; + +static void decode_register(char *str, size_t size, + const struct error_descr *descr, + const uint64_t reg) +{ + int ret = 0; + + while (descr->type && descr->mask && descr->descr) { + if (reg & descr->mask) { + ret = snprintf(str, size, "\n\t%s, %s", + descr->type == ERR_CORRECTED ? + "Corrected" : "Uncorrected", + descr->descr); + str += ret; + size -= ret; + } + descr++; + } +} + +static unsigned long get_bits(unsigned long data, int pos, int width) +{ + return (data >> pos) & ((1 << width) - 1); +} + +#define L2C_CTL 0x87E080800000 +#define L2C_CTL_DISIDXALIAS BIT(0) + +#define PCI_DEVICE_ID_THUNDER_LMC 0xa022 + +#define LMC_FADR 0x20 +#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) +#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) +#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) +#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) +#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) + +#define LMC_NXM_FADR 0x28 +#define LMC_ECC_SYND 0x38 + +#define LMC_ECC_PARITY_TEST 0x108 + +#define LMC_INT_W1S 0x150 + +#define LMC_INT_ENA_W1C 0x158 +#define LMC_INT_ENA_W1S 0x160 + +#define LMC_CONFIG 0x188 + +#define LMC_CONFIG_BG2 BIT(62) +#define LMC_CONFIG_RANK_ENA BIT(42) +#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) +#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_XOR_BANK BIT(16) + +#define LMC_INT 0x1F0 + +#define LMC_INT_DDR_ERR BIT(11) +#define LMC_INT_DED_ERR (0xFUL << 5) +#define LMC_INT_SEC_ERR (0xFUL << 1) +#define LMC_INT_NXM_WR_MASK BIT(0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_FADR_SCRAMBLED 0x330 + +#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ + LMC_INT_NXM_WR_MASK) + +#define LMC_INT_CE (LMC_INT_SEC_ERR) + +static const struct error_descr lmc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = LMC_INT_SEC_ERR, + .descr = "Single-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DDR_ERR, + .descr = "DDR chip error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DED_ERR, + .descr = "Double-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_NXM_WR_MASK, + .descr = "Non-existent memory write", + }, + {0, 0, NULL}, +}; + +#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) +#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) +#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) +#define LMC_INT_INTR_DED_ENA BIT(2) +#define LMC_INT_INTR_SEC_ENA BIT(1) +#define LMC_INT_INTR_NXM_WR_ENA BIT(0) + +#define LMC_INT_ENA_ALL GENMASK(5, 0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_RDIMM BIT(0) + +#define LMC_SCRAM_FADR 0x330 + +#define LMC_CHAR_MASK0 0x228 +#define LMC_CHAR_MASK2 0x238 + +#define RING_ENTRIES 8 + +struct debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations fops; +}; + +struct lmc_err_ctx { + u64 reg_int; + u64 reg_fadr; + u64 reg_nxm_fadr; + u64 reg_scram_fadr; + u64 reg_ecc_synd; +}; + +struct thunderx_lmc { + void __iomem *regs; + struct pci_dev *pdev; + struct msix_entry msix_ent; + + atomic_t ecc_int; + + u64 mask0; + u64 mask2; + u64 parity_test; + u64 node; + + int xbits; + int bank_width; + int pbank_lsb; + int dimm_lsb; + int rank_lsb; + int bank_lsb; + int row_lsb; + int col_hi_lsb; + + int xor_bank; + int l2c_alias; + + struct page *mem; + + struct lmc_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +#define ring_pos(pos, size) ((pos) & (size - 1)) + +#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ +static struct debugfs_entry debugfs_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .fops = { \ + .open = simple_open, \ + .write = _write, \ + .read = _read, \ + .llseek = generic_file_llseek, \ + }, \ +} + +#define DEBUGFS_FIELD_ATTR(_type, _field) \ +static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + snprintf(buf, count, "0x%016llx", pdata->_field); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ + \ + return res ? res : count; \ +} \ + \ +DEBUGFS_STRUCT(_field, 0600, \ + thunderx_##_type##_##_field##_write, \ + thunderx_##_type##_##_field##_read) \ + +#define DEBUGFS_REG_ATTR(_type, _name, _reg) \ +static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + u64 val; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &val); \ + \ + if (!res) { \ + writeq(val, pdata->regs + _reg); \ + res = count; \ + } \ + \ + return res; \ +} \ + \ +DEBUGFS_STRUCT(_name, 0600, \ + thunderx_##_type##_##_name##_write, \ + thunderx_##_type##_##_name##_read) + +#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) + +/* + * To get an ECC error injected, the following steps are needed: + * - Setup the ECC injection by writing the appropriate parameters: + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0 + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2 + * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test + * - Do the actual injection: + * echo 1 > /sys/kernel/debug/<device number>/inject_ecc + */ +static ssize_t thunderx_lmc_inject_int_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + u64 val; + int res; + + res = kstrtoull_from_user(data, count, 0, &val); + + if (!res) { + /* Trigger the interrupt */ + writeq(val, lmc->regs + LMC_INT_W1S); + res = count; + } + + return res; +} + +static ssize_t thunderx_lmc_int_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + char buf[20]; + u64 lmc_int = readq(lmc->regs + LMC_INT); + + snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); + return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); +} + +#define TEST_PATTERN 0xa5 + +static int inject_ecc_fn(void *arg) +{ + struct thunderx_lmc *lmc = arg; + uintptr_t addr, phys; + unsigned int cline_size = cache_line_size(); + const unsigned int lines = PAGE_SIZE / cline_size; + unsigned int i, cl_idx; + + addr = (uintptr_t)page_address(lmc->mem); + phys = (uintptr_t)page_to_phys(lmc->mem); + + cl_idx = (phys & 0x7f) >> 4; + lmc->parity_test &= ~(7ULL << 8); + lmc->parity_test |= (cl_idx << 8); + + writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); + writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); + writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); + + readq(lmc->regs + LMC_CHAR_MASK0); + readq(lmc->regs + LMC_CHAR_MASK2); + readq(lmc->regs + LMC_ECC_PARITY_TEST); + + for (i = 0; i < lines; i++) { + memset((void *)addr, TEST_PATTERN, cline_size); + barrier(); + + /* + * Flush L1 cachelines to the PoC (L2). + * This will cause cacheline eviction to the L2. + */ + asm volatile("dc civac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Flush L2 cachelines to the DRAM. + * This will cause cacheline eviction to the DRAM + * and ECC corruption according to the masks set. + */ + __asm__ volatile("sys #0,c11,C1,#2, %0\n" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L2 cachelines. + * The subsequent load will cause cacheline fetch + * from the DRAM and an error interrupt + */ + __asm__ volatile("sys #0,c11,C1,#1, %0" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L1 cachelines. + * The subsequent load will cause cacheline fetch + * from the L2 and/or DRAM + */ + asm volatile("dc ivac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + return 0; +} + +static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + unsigned int cline_size = cache_line_size(); + u8 *tmp; + void __iomem *addr; + unsigned int offs, timeout = 100000; + + atomic_set(&lmc->ecc_int, 0); + + lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); + if (!lmc->mem) + return -ENOMEM; + + tmp = kmalloc(cline_size, GFP_KERNEL); + if (!tmp) { + __free_pages(lmc->mem, 0); + return -ENOMEM; + } + + addr = page_address(lmc->mem); + + while (!atomic_read(&lmc->ecc_int) && timeout--) { + stop_machine(inject_ecc_fn, lmc, NULL); + + for (offs = 0; offs < PAGE_SIZE; offs += cline_size) { + /* + * Do a load from the previously rigged location + * This should generate an error interrupt. + */ + memcpy(tmp, addr + offs, cline_size); + asm volatile("dsb ld\n"); + } + } + + kfree(tmp); + __free_pages(lmc->mem, 0); + + return count; +} + +LMC_DEBUGFS_ENT(mask0); +LMC_DEBUGFS_ENT(mask2); +LMC_DEBUGFS_ENT(parity_test); + +DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); +DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); +DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); + +static struct debugfs_entry *lmc_dfs_ents[] = { + &debugfs_mask0, + &debugfs_mask2, + &debugfs_parity_test, + &debugfs_inject_ecc, + &debugfs_inject_int, + &debugfs_int_w1c, +}; + +static int thunderx_create_debugfs_nodes(struct dentry *parent, + struct debugfs_entry *attrs[], + void *data, + size_t num) +{ + int i; + struct dentry *ent; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return 0; + + if (!parent) + return -ENOENT; + + for (i = 0; i < num; i++) { + ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, + parent, data, &attrs[i]->fops); + + if (IS_ERR(ent)) + break; + } + + return i; +} + +static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) +{ + phys_addr_t addr = 0; + int bank, xbits; + + addr |= lmc->node << 40; + addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; + addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; + addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; + addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; + + bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; + + if (lmc->xor_bank) + bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); + + addr |= bank << lmc->bank_lsb; + + xbits = PCI_FUNC(lmc->pdev->devfn); + + if (lmc->l2c_alias) + xbits ^= get_bits(addr, 20, lmc->xbits) ^ + get_bits(addr, 12, lmc->xbits); + + addr |= xbits << 7; + + return addr; +} + +static unsigned int thunderx_get_num_lmcs(unsigned int node) +{ + unsigned int number = 0; + struct pci_dev *pdev = NULL; + + do { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_LMC, + pdev); + if (pdev) { +#ifdef CONFIG_NUMA + if (pdev->dev.numa_node == node) + number++; +#else + number++; +#endif + } + } while (pdev); + + return number; +} + +#define LMC_MESSAGE_SIZE 120 +#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) + +static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + + unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); + struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; + + writeq(0, lmc->regs + LMC_CHAR_MASK0); + writeq(0, lmc->regs + LMC_CHAR_MASK2); + writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); + + ctx->reg_int = readq(lmc->regs + LMC_INT); + ctx->reg_fadr = readq(lmc->regs + LMC_FADR); + ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); + ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); + ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); + + lmc->ring_head++; + + atomic_set(&lmc->ecc_int, 1); + + /* Clear the interrupt */ + writeq(ctx->reg_int, lmc->regs + LMC_INT); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + phys_addr_t phys_addr; + + unsigned long tail; + struct lmc_err_ctx *ctx; + + irqreturn_t ret = IRQ_NONE; + + char *msg; + char *other; + + msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, + ARRAY_SIZE(lmc->err_ctx))) { + tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); + + ctx = &lmc->err_ctx[tail]; + + dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", + ctx->reg_int); + dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", + ctx->reg_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", + ctx->reg_nxm_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", + ctx->reg_scram_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", + ctx->reg_ecc_synd); + + snprintf(msg, LMC_MESSAGE_SIZE, + "DIMM %lld rank %lld bank %lld row %lld col %lld", + LMC_FADR_FDIMM(ctx->reg_scram_fadr), + LMC_FADR_FBUNK(ctx->reg_scram_fadr), + LMC_FADR_FBANK(ctx->reg_scram_fadr), + LMC_FADR_FROW(ctx->reg_scram_fadr), + LMC_FADR_FCOL(ctx->reg_scram_fadr)); + + decode_register(other, LMC_OTHER_SIZE, lmc_errors, + ctx->reg_int); + + phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); + + if (ctx->reg_int & LMC_INT_UE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + else if (ctx->reg_int & LMC_INT_CE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + + lmc->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(msg); + kfree(other); + + return ret; +} + +static const struct pci_device_id thunderx_lmc_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, + { 0, }, +}; + +static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) +{ + int node = dev_to_node(&pdev->dev); + int ret = PCI_FUNC(pdev->devfn); + + ret += max(node, 0) << 3; + + return ret; +} + +static int thunderx_lmc_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_lmc *lmc; + struct edac_mc_layer layer; + struct mem_ctl_info *mci; + u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; + int ret; + u64 lmc_int; + void *l2c_ioaddr; + + layer.type = EDAC_MC_LAYER_SLOT; + layer.size = 2; + layer.is_virt_csrow = false; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, + sizeof(struct thunderx_lmc)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + lmc = mci->pvt_info; + + pci_set_drvdata(pdev, mci); + + lmc->regs = pcim_iomap_table(pdev)[0]; + + lmc_control = readq(lmc->regs + LMC_CONTROL); + lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); + lmc_config = readq(lmc->regs + LMC_CONFIG); + + if (lmc_control & LMC_CONTROL_RDIMM) { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_RDDR4 : MEM_RDDR3; + } else { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_DDR4 : MEM_DDR3; + } + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = "thunderx-lmc"; + mci->ctl_name = "thunderx-lmc"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + lmc->pdev = pdev; + lmc->msix_ent.entry = 0; + + lmc->ring_head = 0; + lmc->ring_tail = 0; + + ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, + thunderx_lmc_err_isr, + thunderx_lmc_threaded_isr, 0, + "[EDAC] ThunderX LMC", mci); + if (ret) { + dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); + goto err_free; + } + + lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); + + lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; + lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && + FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; + + lmc->pbank_lsb = (lmc_config >> 5) & 0xf; + lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; + lmc->rank_lsb = lmc->dimm_lsb; + lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; + lmc->bank_lsb = 7 + lmc->xbits; + lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; + + lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; + + lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; + + l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE); + if (!l2c_ioaddr) { + dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); + ret = -ENOMEM; + goto err_free; + } + + lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); + + iounmap(l2c_ioaddr); + + ret = edac_mc_add_mc(mci); + if (ret) { + dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); + goto err_free; + } + + lmc_int = readq(lmc->regs + LMC_INT); + writeq(lmc_int, lmc->regs + LMC_INT); + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ret = thunderx_create_debugfs_nodes(mci->debugfs, + lmc_dfs_ents, + lmc, + ARRAY_SIZE(lmc_dfs_ents)); + + if (ret != ARRAY_SIZE(lmc_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + return 0; + +err_free: + pci_set_drvdata(pdev, NULL); + edac_mc_free(mci); + + return ret; +} + +static void thunderx_lmc_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = pci_get_drvdata(pdev); + struct thunderx_lmc *lmc = mci->pvt_info; + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); + +static struct pci_driver thunderx_lmc_driver = { + .name = "thunderx_lmc_edac", + .probe = thunderx_lmc_probe, + .remove = thunderx_lmc_remove, + .id_table = thunderx_lmc_pci_tbl, +}; + +/*---------------------- OCX driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_OCX 0xa013 + +#define OCX_LINK_INTS 3 +#define OCX_INTS (OCX_LINK_INTS + 1) +#define OCX_RX_LANES 24 +#define OCX_RX_LANE_STATS 15 + +#define OCX_COM_INT 0x100 +#define OCX_COM_INT_W1S 0x108 +#define OCX_COM_INT_ENA_W1S 0x110 +#define OCX_COM_INT_ENA_W1C 0x118 + +#define OCX_COM_IO_BADID BIT(54) +#define OCX_COM_MEM_BADID BIT(53) +#define OCX_COM_COPR_BADID BIT(52) +#define OCX_COM_WIN_REQ_BADID BIT(51) +#define OCX_COM_WIN_REQ_TOUT BIT(50) +#define OCX_COM_RX_LANE GENMASK(23, 0) + +#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ + OCX_COM_MEM_BADID | \ + OCX_COM_COPR_BADID | \ + OCX_COM_WIN_REQ_BADID | \ + OCX_COM_WIN_REQ_TOUT) + +static const struct error_descr ocx_com_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_IO_BADID, + .descr = "Invalid IO transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_MEM_BADID, + .descr = "Invalid memory transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_COPR_BADID, + .descr = "Invalid coprocessor transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_BADID, + .descr = "Invalid SLI transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_TOUT, + .descr = "Window/core request timeout", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) +#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) + +#define OCX_COM_LINK_BAD_WORD BIT(13) +#define OCX_COM_LINK_ALIGN_FAIL BIT(12) +#define OCX_COM_LINK_ALIGN_DONE BIT(11) +#define OCX_COM_LINK_UP BIT(10) +#define OCX_COM_LINK_STOP BIT(9) +#define OCX_COM_LINK_BLK_ERR BIT(8) +#define OCX_COM_LINK_REINIT BIT(7) +#define OCX_COM_LINK_LNK_DATA BIT(6) +#define OCX_COM_LINK_RXFIFO_DBE BIT(5) +#define OCX_COM_LINK_RXFIFO_SBE BIT(4) +#define OCX_COM_LINK_TXFIFO_DBE BIT(3) +#define OCX_COM_LINK_TXFIFO_SBE BIT(2) +#define OCX_COM_LINK_REPLAY_DBE BIT(1) +#define OCX_COM_LINK_REPLAY_SBE BIT(0) + +static const struct error_descr ocx_com_link_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_REPLAY_SBE, + .descr = "Replay buffer single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_TXFIFO_SBE, + .descr = "TX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_RXFIFO_SBE, + .descr = "RX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BLK_ERR, + .descr = "Block code error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_ALIGN_FAIL, + .descr = "Link alignment failure", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BAD_WORD, + .descr = "Bad code word", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_REPLAY_DBE, + .descr = "Replay buffer double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_TXFIFO_DBE, + .descr = "TX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_RXFIFO_DBE, + .descr = "RX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_STOP, + .descr = "Link stopped", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ + OCX_COM_LINK_TXFIFO_DBE | \ + OCX_COM_LINK_RXFIFO_DBE | \ + OCX_COM_LINK_STOP) + +#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ + OCX_COM_LINK_TXFIFO_SBE | \ + OCX_COM_LINK_RXFIFO_SBE | \ + OCX_COM_LINK_BLK_ERR | \ + OCX_COM_LINK_ALIGN_FAIL | \ + OCX_COM_LINK_BAD_WORD) + +#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) +#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) +#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) +#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) +#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) + +#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) +#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) +#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) +#define OCX_LNE_CFG_RX_STAT_ENA BIT(0) + + +#define OCX_LANE_BAD_64B67B BIT(8) +#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) +#define OCX_LANE_SCRM_SYNC_LOSS BIT(4) +#define OCX_LANE_UKWN_CNTL_WORD BIT(3) +#define OCX_LANE_CRC32_ERR BIT(2) +#define OCX_LANE_BDRY_SYNC_LOSS BIT(1) +#define OCX_LANE_SERDES_LOCK_LOSS BIT(0) + +#define OCX_COM_LANE_INT_UE (0) +#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ + OCX_LANE_BDRY_SYNC_LOSS | \ + OCX_LANE_CRC32_ERR | \ + OCX_LANE_UKWN_CNTL_WORD | \ + OCX_LANE_SCRM_SYNC_LOSS | \ + OCX_LANE_DSKEW_FIFO_OVFL | \ + OCX_LANE_BAD_64B67B) + +static const struct error_descr ocx_lane_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SERDES_LOCK_LOSS, + .descr = "RX SerDes lock lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BDRY_SYNC_LOSS, + .descr = "RX word boundary lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_CRC32_ERR, + .descr = "CRC32 error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_UKWN_CNTL_WORD, + .descr = "Unknown control word", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SCRM_SYNC_LOSS, + .descr = "Scrambler synchronization lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_DSKEW_FIFO_OVFL, + .descr = "RX deskew FIFO overflow", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BAD_64B67B, + .descr = "Bad 64B/67B codeword", + }, + {0, 0, NULL}, +}; + +#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) +#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) +#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ + GENMASK(9, 7) | GENMASK(5, 0)) + +#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) +#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) + +struct ocx_com_err_ctx { + u64 reg_com_int; + u64 reg_lane_int[OCX_RX_LANES]; + u64 reg_lane_stat11[OCX_RX_LANES]; +}; + +struct ocx_link_err_ctx { + u64 reg_com_link_int; + int link; +}; + +struct thunderx_ocx { + void __iomem *regs; + int com_link; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + struct msix_entry msix_ent[OCX_INTS]; + + struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; + struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; + + unsigned long com_ring_head; + unsigned long com_ring_tail; + + unsigned long link_ring_head; + unsigned long link_ring_tail; +}; + +#define OCX_MESSAGE_SIZE SZ_1K +#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) + +/* This handler is threaded */ +static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + int lane; + unsigned long head = ring_pos(ocx->com_ring_head, + ARRAY_SIZE(ocx->com_err_ctx)); + struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; + + ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + ctx->reg_lane_int[lane] = + readq(ocx->regs + OCX_LNE_INT(lane)); + ctx->reg_lane_stat11[lane] = + readq(ocx->regs + OCX_LNE_STAT(lane, 11)); + + writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); + } + + writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); + + ocx->com_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + irqreturn_t ret = IRQ_NONE; + + unsigned long tail; + struct ocx_com_err_ctx *ctx; + int lane; + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx))) { + tail = ring_pos(ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx)); + ctx = &ocx->com_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", + ocx->edac_dev->ctl_name, ctx->reg_com_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_errors, ctx->reg_com_int); + + strlcat(msg, other, OCX_MESSAGE_SIZE); + + for (lane = 0; lane < OCX_RX_LANES; lane++) + if (ctx->reg_com_int & BIT(lane)) { + snprintf(other, OCX_OTHER_SIZE, + "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", + lane, ctx->reg_lane_int[lane], + lane, ctx->reg_lane_stat11[lane]); + + strlcat(msg, other, OCX_MESSAGE_SIZE); + + decode_register(other, OCX_OTHER_SIZE, + ocx_lane_errors, + ctx->reg_lane_int[lane]); + strlcat(msg, other, OCX_MESSAGE_SIZE); + } + + if (ctx->reg_com_int & OCX_COM_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->com_ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + unsigned long head = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; + + ctx->link = msix->entry; + ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + ocx->link_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + irqreturn_t ret = IRQ_NONE; + unsigned long tail; + struct ocx_link_err_ctx *ctx; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, + ARRAY_SIZE(ocx->link_err_ctx))) { + tail = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + + ctx = &ocx->link_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, + "%s: OCX_COM_LINK_INT[%d]: %016llx", + ocx->edac_dev->ctl_name, + ctx->link, ctx->reg_com_link_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_link_errors, ctx->reg_com_link_int); + + strlcat(msg, other, OCX_MESSAGE_SIZE); + + if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->link_ring_tail++; + } + + ret = IRQ_HANDLED; +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) + +OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); +OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); +OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); + +OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); +OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); +OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); +OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); +OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); +OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); +OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); +OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); + +OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); +OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); +OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); +OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); +OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); +OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); +OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); +OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); + +OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); +OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); +OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); +OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); +OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); +OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); +OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); +OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); + +OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); + +static struct debugfs_entry *ocx_dfs_ents[] = { + &debugfs_tlk0_ecc_ctl, + &debugfs_tlk1_ecc_ctl, + &debugfs_tlk2_ecc_ctl, + + &debugfs_rlk0_ecc_ctl, + &debugfs_rlk1_ecc_ctl, + &debugfs_rlk2_ecc_ctl, + + &debugfs_com_link0_int, + &debugfs_com_link1_int, + &debugfs_com_link2_int, + + &debugfs_lne00_badcnt, + &debugfs_lne01_badcnt, + &debugfs_lne02_badcnt, + &debugfs_lne03_badcnt, + &debugfs_lne04_badcnt, + &debugfs_lne05_badcnt, + &debugfs_lne06_badcnt, + &debugfs_lne07_badcnt, + &debugfs_lne08_badcnt, + &debugfs_lne09_badcnt, + &debugfs_lne10_badcnt, + &debugfs_lne11_badcnt, + &debugfs_lne12_badcnt, + &debugfs_lne13_badcnt, + &debugfs_lne14_badcnt, + &debugfs_lne15_badcnt, + &debugfs_lne16_badcnt, + &debugfs_lne17_badcnt, + &debugfs_lne18_badcnt, + &debugfs_lne19_badcnt, + &debugfs_lne20_badcnt, + &debugfs_lne21_badcnt, + &debugfs_lne22_badcnt, + &debugfs_lne23_badcnt, + + &debugfs_com_int, +}; + +static const struct pci_device_id thunderx_ocx_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, + { 0, }, +}; + +static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) +{ + int lane, stat, cfg; + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); + cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; + cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; + writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); + + for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) + readq(ocx->regs + OCX_LNE_STAT(lane, stat)); + } +} + +static int thunderx_ocx_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_ocx *ocx; + struct edac_device_ctl_info *edac_dev; + char name[32]; + int idx; + int i; + int ret; + u64 reg; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), "OCX%d", idx); + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), + name, 1, "CCPI", 1, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + ocx = edac_dev->pvt_info; + ocx->edac_dev = edac_dev; + ocx->com_ring_head = 0; + ocx->com_ring_tail = 0; + ocx->link_ring_head = 0; + ocx->link_ring_tail = 0; + + ocx->regs = pcim_iomap_table(pdev)[0]; + if (!ocx->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + ocx->pdev = pdev; + + for (i = 0; i < OCX_INTS; i++) { + ocx->msix_ent[i].entry = i; + ocx->msix_ent[i].vector = 0; + } + + ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + for (i = 0; i < OCX_INTS; i++) { + ret = devm_request_threaded_irq(&pdev->dev, + ocx->msix_ent[i].vector, + (i == 3) ? + thunderx_ocx_com_isr : + thunderx_ocx_lnk_isr, + (i == 3) ? + thunderx_ocx_com_threaded_isr : + thunderx_ocx_lnk_threaded_isr, + 0, "[EDAC] ThunderX OCX", + &ocx->msix_ent[i]); + if (ret) + goto err_free; + } + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-ocx"; + edac_dev->ctl_name = "thunderx-ocx"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(ocx->debugfs, + ocx_dfs_ents, + ocx, + ARRAY_SIZE(ocx_dfs_ents)); + if (ret != ARRAY_SIZE(ocx_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + thunderx_ocx_clearstats(ocx); + + for (i = 0; i < OCX_RX_LANES; i++) { + writeq(OCX_LNE_INT_ENA_ALL, + ocx->regs + OCX_LNE_INT_EN(i)); + + reg = readq(ocx->regs + OCX_LNE_INT(i)); + writeq(reg, ocx->regs + OCX_LNE_INT(i)); + + } + + for (i = 0; i < OCX_LINK_INTS; i++) { + reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); + writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); + + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); + } + + reg = readq(ocx->regs + OCX_COM_INT); + writeq(reg, ocx->regs + OCX_COM_INT); + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); + + return 0; +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_ocx_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_ocx *ocx = edac_dev->pvt_info; + int i; + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); + + for (i = 0; i < OCX_INTS; i++) { + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); + } + + edac_debugfs_remove_recursive(ocx->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); + +static struct pci_driver thunderx_ocx_driver = { + .name = "thunderx_ocx_edac", + .probe = thunderx_ocx_probe, + .remove = thunderx_ocx_remove, + .id_table = thunderx_ocx_pci_tbl, +}; + +/*---------------------- L2C driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e +#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f +#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 + +#define L2C_TAD_INT_W1C 0x40000 +#define L2C_TAD_INT_W1S 0x40008 + +#define L2C_TAD_INT_ENA_W1C 0x40020 +#define L2C_TAD_INT_ENA_W1S 0x40028 + + +#define L2C_TAD_INT_L2DDBE BIT(1) +#define L2C_TAD_INT_SBFSBE BIT(2) +#define L2C_TAD_INT_SBFDBE BIT(3) +#define L2C_TAD_INT_FBFSBE BIT(4) +#define L2C_TAD_INT_FBFDBE BIT(5) +#define L2C_TAD_INT_TAGDBE BIT(9) +#define L2C_TAD_INT_RDDISLMC BIT(15) +#define L2C_TAD_INT_WRDISLMC BIT(16) +#define L2C_TAD_INT_LFBTO BIT(17) +#define L2C_TAD_INT_GSYNCTO BIT(18) +#define L2C_TAD_INT_RTGSBE BIT(32) +#define L2C_TAD_INT_RTGDBE BIT(33) +#define L2C_TAD_INT_RDDISOCI BIT(34) +#define L2C_TAD_INT_WRDISOCI BIT(35) + +#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) + +#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ + L2C_TAD_INT_FBFSBE) + +#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFDBE | \ + L2C_TAD_INT_TAGDBE | \ + L2C_TAD_INT_RTGDBE | \ + L2C_TAD_INT_WRDISOCI | \ + L2C_TAD_INT_RDDISOCI | \ + L2C_TAD_INT_WRDISLMC | \ + L2C_TAD_INT_RDDISLMC | \ + L2C_TAD_INT_LFBTO | \ + L2C_TAD_INT_GSYNCTO) + +static const struct error_descr l2_tad_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_SBFSBE, + .descr = "SBF single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_FBFSBE, + .descr = "FBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_L2DDBE, + .descr = "L2D double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_SBFDBE, + .descr = "SBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_FBFDBE, + .descr = "FBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_TAGDBE, + .descr = "TAG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RTGDBE, + .descr = "RTG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISOCI, + .descr = "Write to a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISLMC, + .descr = "Write to a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISLMC, + .descr = "Read from a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_LFBTO, + .descr = "LFB entry timeout", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_GSYNCTO, + .descr = "Global sync CCPI timeout", + }, + {0, 0, NULL}, +}; + +#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) + +#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) + +#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) + +#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) + +#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ + L2C_TAD_INT_RTG | \ + L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ + L2C_TAD_INT_LFBTO) + +#define L2C_TAD_TIMETWO 0x50000 +#define L2C_TAD_TIMEOUT 0x50100 +#define L2C_TAD_ERR 0x60000 +#define L2C_TAD_TQD_ERR 0x60100 +#define L2C_TAD_TTG_ERR 0x60200 + + +#define L2C_CBC_INT_W1C 0x60000 + +#define L2C_CBC_INT_RSDSBE BIT(0) +#define L2C_CBC_INT_RSDDBE BIT(1) + +#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) + +#define L2C_CBC_INT_MIBSBE BIT(4) +#define L2C_CBC_INT_MIBDBE BIT(5) + +#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) + +#define L2C_CBC_INT_IORDDISOCI BIT(6) +#define L2C_CBC_INT_IOWRDISOCI BIT(7) + +#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ + L2C_CBC_INT_IOWRDISOCI) + +#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) +#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) + + +static const struct error_descr l2_cbc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_RSDSBE, + .descr = "RSD single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_MIBSBE, + .descr = "MIB single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_RSDDBE, + .descr = "RSD double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_MIBDBE, + .descr = "MIB double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IORDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IOWRDISOCI, + .descr = "Write to a disabled CCPI", + }, + {0, 0, NULL}, +}; + +#define L2C_CBC_INT_W1S 0x60008 +#define L2C_CBC_INT_ENA_W1C 0x60020 + +#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ + L2C_CBC_INT_IODISOCI) + +#define L2C_CBC_INT_ENA_W1S 0x60028 + +#define L2C_CBC_IODISOCIERR 0x80008 +#define L2C_CBC_IOCERR 0x80010 +#define L2C_CBC_RSDERR 0x80018 +#define L2C_CBC_MIBERR 0x80020 + + +#define L2C_MCI_INT_W1C 0x0 + +#define L2C_MCI_INT_VBFSBE BIT(0) +#define L2C_MCI_INT_VBFDBE BIT(1) + +static const struct error_descr l2_mci_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_MCI_INT_VBFSBE, + .descr = "VBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_MCI_INT_VBFDBE, + .descr = "VBF double-bit error", + }, + {0, 0, NULL}, +}; + +#define L2C_MCI_INT_W1S 0x8 +#define L2C_MCI_INT_ENA_W1C 0x20 + +#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) + +#define L2C_MCI_INT_ENA_W1S 0x28 + +#define L2C_MCI_ERR 0x10000 + +#define L2C_MESSAGE_SIZE SZ_1K +#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) + +struct l2c_err_ctx { + char *reg_ext_name; + u64 reg_int; + u64 reg_ext; +}; + +struct thunderx_l2c { + void __iomem *regs; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + + int index; + + struct msix_entry msix_ent; + + struct l2c_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); + struct l2c_err_ctx *ctx = &tad->err_ctx[head]; + + ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); + + if (ctx->reg_int & L2C_TAD_INT_ECC) { + ctx->reg_ext_name = "TQD_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_TAG) { + ctx->reg_ext_name = "TTG_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { + ctx->reg_ext_name = "TIMEOUT"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); + } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } + + writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); + + tad->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); + struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; + + ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); + + if (ctx->reg_int & L2C_CBC_INT_RSD) { + ctx->reg_ext_name = "RSDERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); + } else if (ctx->reg_int & L2C_CBC_INT_MIB) { + ctx->reg_ext_name = "MIBERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); + } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { + ctx->reg_ext_name = "IODISOCIERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); + } + + writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); + + cbc->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); + struct l2c_err_ctx *ctx = &mci->err_ctx[head]; + + ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); + ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); + + writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); + + ctx->reg_ext_name = "ERR"; + + mci->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); + struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; + irqreturn_t ret = IRQ_NONE; + + u64 mask_ue, mask_ce; + const struct error_descr *l2_errors; + char *reg_int_name; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + switch (l2c->pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + reg_int_name = "L2C_TAD_INT"; + mask_ue = L2C_TAD_INT_UE; + mask_ce = L2C_TAD_INT_CE; + l2_errors = l2_tad_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + reg_int_name = "L2C_CBC_INT"; + mask_ue = L2C_CBC_INT_UE; + mask_ce = L2C_CBC_INT_CE; + l2_errors = l2_cbc_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + reg_int_name = "L2C_MCI_INT"; + mask_ue = L2C_MCI_INT_VBFDBE; + mask_ce = L2C_MCI_INT_VBFSBE; + l2_errors = l2_mci_errors; + break; + default: + dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", + l2c->pdev->device); + goto err_free; + } + + while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, + ARRAY_SIZE(l2c->err_ctx))) { + snprintf(msg, L2C_MESSAGE_SIZE, + "%s: %s: %016llx, %s: %016llx", + l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, + ctx->reg_ext_name, ctx->reg_ext); + + decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); + + strlcat(msg, other, L2C_MESSAGE_SIZE); + + if (ctx->reg_int & mask_ue) + edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); + else if (ctx->reg_int & mask_ce) + edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); + + l2c->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) + +L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); + +static struct debugfs_entry *l2c_tad_dfs_ents[] = { + &debugfs_tad_int, +}; + +L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); + +static struct debugfs_entry *l2c_cbc_dfs_ents[] = { + &debugfs_cbc_int, +}; + +L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); + +static struct debugfs_entry *l2c_mci_dfs_ents[] = { + &debugfs_mci_int, +}; + +static const struct pci_device_id thunderx_l2c_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, + { 0, }, +}; + +static int thunderx_l2c_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_l2c *l2c; + struct edac_device_ctl_info *edac_dev; + struct debugfs_entry **l2c_devattr; + size_t dfs_entries; + irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; + char name[32]; + const char *fmt; + u64 reg_en_offs, reg_en_mask; + int idx; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + thunderx_l2c_isr = thunderx_l2c_tad_isr; + l2c_devattr = l2c_tad_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); + fmt = "L2C-TAD%d"; + reg_en_offs = L2C_TAD_INT_ENA_W1S; + reg_en_mask = L2C_TAD_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + thunderx_l2c_isr = thunderx_l2c_cbc_isr; + l2c_devattr = l2c_cbc_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); + fmt = "L2C-CBC%d"; + reg_en_offs = L2C_CBC_INT_ENA_W1S; + reg_en_mask = L2C_CBC_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + thunderx_l2c_isr = thunderx_l2c_mci_isr; + l2c_devattr = l2c_mci_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); + fmt = "L2C-MCI%d"; + reg_en_offs = L2C_MCI_INT_ENA_W1S; + reg_en_mask = L2C_MCI_INT_ENA_ALL; + break; + default: + //Should never ever get here + dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", + pdev->device); + return -EINVAL; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), fmt, idx); + + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), + name, 1, "L2C", 1, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + + l2c = edac_dev->pvt_info; + l2c->edac_dev = edac_dev; + + l2c->regs = pcim_iomap_table(pdev)[0]; + if (!l2c->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + l2c->pdev = pdev; + + l2c->ring_head = 0; + l2c->ring_tail = 0; + + l2c->msix_ent.entry = 0; + l2c->msix_ent.vector = 0; + + ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, + thunderx_l2c_isr, + thunderx_l2c_threaded_isr, + 0, "[EDAC] ThunderX L2C", + &l2c->msix_ent); + if (ret) + goto err_free; + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-l2c"; + edac_dev->ctl_name = "thunderx-l2c"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, + l2c, dfs_entries); + + if (ret != dfs_entries) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + writeq(reg_en_mask, l2c->regs + reg_en_offs); + + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_l2c_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_l2c *l2c = edac_dev->pvt_info; + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + break; + } + + edac_debugfs_remove_recursive(l2c->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); + +static struct pci_driver thunderx_l2c_driver = { + .name = "thunderx_l2c_edac", + .probe = thunderx_l2c_probe, + .remove = thunderx_l2c_remove, + .id_table = thunderx_l2c_pci_tbl, +}; + +static int __init thunderx_edac_init(void) +{ + int rc = 0; + + if (ghes_get_devices()) + return -EBUSY; + + rc = pci_register_driver(&thunderx_lmc_driver); + if (rc) + return rc; + + rc = pci_register_driver(&thunderx_ocx_driver); + if (rc) + goto err_lmc; + + rc = pci_register_driver(&thunderx_l2c_driver); + if (rc) + goto err_ocx; + + return rc; +err_ocx: + pci_unregister_driver(&thunderx_ocx_driver); +err_lmc: + pci_unregister_driver(&thunderx_lmc_driver); + + return rc; +} + +static void __exit thunderx_edac_exit(void) +{ + pci_unregister_driver(&thunderx_l2c_driver); + pci_unregister_driver(&thunderx_ocx_driver); + pci_unregister_driver(&thunderx_lmc_driver); + +} + +module_init(thunderx_edac_init); +module_exit(thunderx_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cavium, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c new file mode 100644 index 000000000000..39cc2ef9cac4 --- /dev/null +++ b/drivers/edac/ti_edac.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/ + * + * Texas Instruments DDR3 ECC error correction and detection driver + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/edac.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/module.h> + +#include "edac_module.h" + +/* EMIF controller registers */ +#define EMIF_SDRAM_CONFIG 0x008 +#define EMIF_IRQ_STATUS 0x0ac +#define EMIF_IRQ_ENABLE_SET 0x0b4 +#define EMIF_ECC_CTRL 0x110 +#define EMIF_1B_ECC_ERR_CNT 0x130 +#define EMIF_1B_ECC_ERR_THRSH 0x134 +#define EMIF_1B_ECC_ERR_ADDR_LOG 0x13c +#define EMIF_2B_ECC_ERR_ADDR_LOG 0x140 + +/* Bit definitions for EMIF_SDRAM_CONFIG */ +#define SDRAM_TYPE_SHIFT 29 +#define SDRAM_TYPE_MASK GENMASK(31, 29) +#define SDRAM_TYPE_DDR3 (3 << SDRAM_TYPE_SHIFT) +#define SDRAM_TYPE_DDR2 (2 << SDRAM_TYPE_SHIFT) +#define SDRAM_NARROW_MODE_MASK GENMASK(15, 14) +#define SDRAM_K2_NARROW_MODE_SHIFT 12 +#define SDRAM_K2_NARROW_MODE_MASK GENMASK(13, 12) +#define SDRAM_ROWSIZE_SHIFT 7 +#define SDRAM_ROWSIZE_MASK GENMASK(9, 7) +#define SDRAM_IBANK_SHIFT 4 +#define SDRAM_IBANK_MASK GENMASK(6, 4) +#define SDRAM_K2_IBANK_SHIFT 5 +#define SDRAM_K2_IBANK_MASK GENMASK(6, 5) +#define SDRAM_K2_EBANK_SHIFT 3 +#define SDRAM_K2_EBANK_MASK BIT(SDRAM_K2_EBANK_SHIFT) +#define SDRAM_PAGESIZE_SHIFT 0 +#define SDRAM_PAGESIZE_MASK GENMASK(2, 0) +#define SDRAM_K2_PAGESIZE_SHIFT 0 +#define SDRAM_K2_PAGESIZE_MASK GENMASK(1, 0) + +#define EMIF_1B_ECC_ERR_THRSH_SHIFT 24 + +/* IRQ bit definitions */ +#define EMIF_1B_ECC_ERR BIT(5) +#define EMIF_2B_ECC_ERR BIT(4) +#define EMIF_WR_ECC_ERR BIT(3) +#define EMIF_SYS_ERR BIT(0) +/* Bit 31 enables ECC and 28 enables RMW */ +#define ECC_ENABLED (BIT(31) | BIT(28)) + +#define EDAC_MOD_NAME "ti-emif-edac" + +enum { + EMIF_TYPE_DRA7, + EMIF_TYPE_K2 +}; + +struct ti_edac { + void __iomem *reg; +}; + +static u32 ti_edac_readl(struct ti_edac *edac, u16 offset) +{ + return readl_relaxed(edac->reg + offset); +} + +static void ti_edac_writel(struct ti_edac *edac, u32 val, u16 offset) +{ + writel_relaxed(val, edac->reg + offset); +} + +static irqreturn_t ti_edac_isr(int irq, void *data) +{ + struct mem_ctl_info *mci = data; + struct ti_edac *edac = mci->pvt_info; + u32 irq_status; + u32 err_addr; + int err_count; + + irq_status = ti_edac_readl(edac, EMIF_IRQ_STATUS); + + if (irq_status & EMIF_1B_ECC_ERR) { + err_addr = ti_edac_readl(edac, EMIF_1B_ECC_ERR_ADDR_LOG); + err_count = ti_edac_readl(edac, EMIF_1B_ECC_ERR_CNT); + ti_edac_writel(edac, err_count, EMIF_1B_ECC_ERR_CNT); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, -1, 0, 0, 0, + mci->ctl_name, "1B"); + } + + if (irq_status & EMIF_2B_ECC_ERR) { + err_addr = ti_edac_readl(edac, EMIF_2B_ECC_ERR_ADDR_LOG); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, -1, 0, 0, 0, + mci->ctl_name, "2B"); + } + + if (irq_status & EMIF_WR_ECC_ERR) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, -1, 0, 0, 0, + mci->ctl_name, "WR"); + + ti_edac_writel(edac, irq_status, EMIF_IRQ_STATUS); + + return IRQ_HANDLED; +} + +static void ti_edac_setup_dimm(struct mem_ctl_info *mci, u32 type) +{ + struct dimm_info *dimm; + struct ti_edac *edac = mci->pvt_info; + int bits; + u32 val; + u32 memsize; + + dimm = edac_get_dimm(mci, 0, 0, 0); + + val = ti_edac_readl(edac, EMIF_SDRAM_CONFIG); + + if (type == EMIF_TYPE_DRA7) { + bits = ((val & SDRAM_PAGESIZE_MASK) >> SDRAM_PAGESIZE_SHIFT) + 8; + bits += ((val & SDRAM_ROWSIZE_MASK) >> SDRAM_ROWSIZE_SHIFT) + 9; + bits += (val & SDRAM_IBANK_MASK) >> SDRAM_IBANK_SHIFT; + + if (val & SDRAM_NARROW_MODE_MASK) { + bits++; + dimm->dtype = DEV_X16; + } else { + bits += 2; + dimm->dtype = DEV_X32; + } + } else { + bits = 16; + bits += ((val & SDRAM_K2_PAGESIZE_MASK) >> + SDRAM_K2_PAGESIZE_SHIFT) + 8; + bits += (val & SDRAM_K2_IBANK_MASK) >> SDRAM_K2_IBANK_SHIFT; + bits += (val & SDRAM_K2_EBANK_MASK) >> SDRAM_K2_EBANK_SHIFT; + + val = (val & SDRAM_K2_NARROW_MODE_MASK) >> + SDRAM_K2_NARROW_MODE_SHIFT; + switch (val) { + case 0: + bits += 3; + dimm->dtype = DEV_X64; + break; + case 1: + bits += 2; + dimm->dtype = DEV_X32; + break; + case 2: + bits++; + dimm->dtype = DEV_X16; + break; + } + } + + memsize = 1 << bits; + + dimm->nr_pages = memsize >> PAGE_SHIFT; + dimm->grain = 4; + if ((val & SDRAM_TYPE_MASK) == SDRAM_TYPE_DDR2) + dimm->mtype = MEM_DDR2; + else + dimm->mtype = MEM_DDR3; + + val = ti_edac_readl(edac, EMIF_ECC_CTRL); + if (val & ECC_ENABLED) + dimm->edac_mode = EDAC_SECDED; + else + dimm->edac_mode = EDAC_NONE; +} + +static const struct of_device_id ti_edac_of_match[] = { + { .compatible = "ti,emif-keystone", .data = (void *)EMIF_TYPE_K2 }, + { .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 }, + {}, +}; +MODULE_DEVICE_TABLE(of, ti_edac_of_match); + +static int _emif_get_id(struct device_node *node) +{ + struct device_node *np; + const __be32 *addrp; + u32 addr, my_addr; + int my_id = 0; + + addrp = of_get_address(node, 0, NULL, NULL); + my_addr = (u32)of_translate_address(node, addrp); + + for_each_matching_node(np, ti_edac_of_match) { + if (np == node) + continue; + + addrp = of_get_address(np, 0, NULL, NULL); + addr = (u32)of_translate_address(np, addrp); + + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "addr=%x, my_addr=%x\n", + addr, my_addr); + + if (addr < my_addr) + my_id++; + } + + return my_id; +} + +static int ti_edac_probe(struct platform_device *pdev) +{ + int error_irq = 0, ret = -ENODEV; + struct device *dev = &pdev->dev; + struct resource *res; + void __iomem *reg; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + const struct of_device_id *id; + struct ti_edac *edac; + int emif_id; + + id = of_match_device(ti_edac_of_match, &pdev->dev); + if (!id) + return -ENODEV; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(dev, res); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = 1; + + /* Allocate ID number for our EMIF controller */ + emif_id = _emif_get_id(pdev->dev.of_node); + if (emif_id < 0) + return -EINVAL; + + mci = edac_mc_alloc(emif_id, 1, layers, sizeof(*edac)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + edac = mci->pvt_info; + edac->reg = reg; + platform_set_drvdata(pdev, mci); + + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_SECDED | EDAC_FLAG_NONE; + mci->mod_name = EDAC_MOD_NAME; + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); + + /* Setup memory layout */ + ti_edac_setup_dimm(mci, (u32)(id->data)); + + /* add EMIF ECC error handler */ + error_irq = platform_get_irq(pdev, 0); + if (error_irq < 0) { + ret = error_irq; + goto err; + } + + ret = devm_request_irq(dev, error_irq, ti_edac_isr, 0, + "emif-edac-irq", mci); + if (ret) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "request_irq fail for EMIF EDAC irq\n"); + goto err; + } + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "Failed to register mci: %d.\n", ret); + goto err; + } + + /* Generate an interrupt with each 1b error */ + ti_edac_writel(edac, 1 << EMIF_1B_ECC_ERR_THRSH_SHIFT, + EMIF_1B_ECC_ERR_THRSH); + + /* Enable interrupts */ + ti_edac_writel(edac, + EMIF_1B_ECC_ERR | EMIF_2B_ECC_ERR | EMIF_WR_ECC_ERR, + EMIF_IRQ_ENABLE_SET); + + return 0; + +err: + edac_mc_free(mci); + return ret; +} + +static void ti_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static struct platform_driver ti_edac_driver = { + .probe = ti_edac_probe, + .remove = ti_edac_remove, + .driver = { + .name = EDAC_MOD_NAME, + .of_match_table = ti_edac_of_match, + }, +}; + +module_platform_driver(ti_edac_driver); + +MODULE_AUTHOR("Texas Instruments Inc."); +MODULE_DESCRIPTION("EDAC Driver for Texas Instruments DDR3 MC"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c deleted file mode 100644 index a0820536b7d9..000000000000 --- a/drivers/edac/tile_edac.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * Tilera-specific EDAC driver. - * - * This source code is derived from the following driver: - * - * Cell MIC driver for ECC counting - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/uaccess.h> -#include <linux/edac.h> -#include <hv/hypervisor.h> -#include <hv/drv_mshim_intf.h> - -#include "edac_core.h" - -#define DRV_NAME "tile-edac" - -/* Number of cs_rows needed per memory controller on TILEPro. */ -#define TILE_EDAC_NR_CSROWS 1 - -/* Number of channels per memory controller on TILEPro. */ -#define TILE_EDAC_NR_CHANS 1 - -/* Granularity of reported error in bytes on TILEPro. */ -#define TILE_EDAC_ERROR_GRAIN 8 - -/* TILE processor has multiple independent memory controllers. */ -struct platform_device *mshim_pdev[TILE_MAX_MSHIMS]; - -struct tile_edac_priv { - int hv_devhdl; /* Hypervisor device handle. */ - int node; /* Memory controller instance #. */ - unsigned int ce_count; /* - * Correctable-error counter - * kept by the driver. - */ -}; - -static void tile_edac_check(struct mem_ctl_info *mci) -{ - struct tile_edac_priv *priv = mci->pvt_info; - struct mshim_mem_error mem_error; - - if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error, - sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) != - sizeof(struct mshim_mem_error)) { - pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n"); - return; - } - - /* Check if the current error count is different from the saved one. */ - if (mem_error.sbe_count != priv->ce_count) { - dev_dbg(mci->pdev, "ECC CE err on node %d\n", priv->node); - priv->ce_count = mem_error.sbe_count; - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, 0, - 0, 0, -1, - mci->ctl_name, ""); - } -} - -/* - * Initialize the 'csrows' table within the mci control structure with the - * addressing of memory. - */ -static int tile_edac_init_csrows(struct mem_ctl_info *mci) -{ - struct csrow_info *csrow = mci->csrows[0]; - struct tile_edac_priv *priv = mci->pvt_info; - struct mshim_mem_info mem_info; - struct dimm_info *dimm = csrow->channels[0]->dimm; - - if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info, - sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) != - sizeof(struct mshim_mem_info)) { - pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n"); - return -1; - } - - if (mem_info.mem_ecc) - dimm->edac_mode = EDAC_SECDED; - else - dimm->edac_mode = EDAC_NONE; - switch (mem_info.mem_type) { - case DDR2: - dimm->mtype = MEM_DDR2; - break; - - case DDR3: - dimm->mtype = MEM_DDR3; - break; - - default: - return -1; - } - - dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT; - dimm->grain = TILE_EDAC_ERROR_GRAIN; - dimm->dtype = DEV_UNKNOWN; - - return 0; -} - -static int tile_edac_mc_probe(struct platform_device *pdev) -{ - char hv_file[32]; - int hv_devhdl; - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct tile_edac_priv *priv; - int rc; - - sprintf(hv_file, "mshim/%d", pdev->id); - hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0); - if (hv_devhdl < 0) - return -EINVAL; - - /* A TILE MC has a single channel and one chip-select row. */ - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = TILE_EDAC_NR_CSROWS; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = TILE_EDAC_NR_CHANS; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers, - sizeof(struct tile_edac_priv)); - if (mci == NULL) - return -ENOMEM; - priv = mci->pvt_info; - priv->node = pdev->id; - priv->hv_devhdl = hv_devhdl; - - mci->pdev = &pdev->dev; - mci->mtype_cap = MEM_FLAG_DDR2; - mci->edac_ctl_cap = EDAC_FLAG_SECDED; - - mci->mod_name = DRV_NAME; -#ifdef __tilegx__ - mci->ctl_name = "TILEGx_Memory_Controller"; -#else - mci->ctl_name = "TILEPro_Memory_Controller"; -#endif - mci->dev_name = dev_name(&pdev->dev); - mci->edac_check = tile_edac_check; - - /* - * Initialize the MC control structure 'csrows' table - * with the mapping and control information. - */ - if (tile_edac_init_csrows(mci)) { - /* No csrows found. */ - mci->edac_cap = EDAC_FLAG_NONE; - } else { - mci->edac_cap = EDAC_FLAG_SECDED; - } - - platform_set_drvdata(pdev, mci); - - /* Register with EDAC core */ - rc = edac_mc_add_mc(mci); - if (rc) { - dev_err(&pdev->dev, "failed to register with EDAC core\n"); - edac_mc_free(mci); - return rc; - } - - return 0; -} - -static int tile_edac_mc_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = platform_get_drvdata(pdev); - - edac_mc_del_mc(&pdev->dev); - if (mci) - edac_mc_free(mci); - return 0; -} - -static struct platform_driver tile_edac_mc_driver = { - .driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - }, - .probe = tile_edac_mc_probe, - .remove = tile_edac_mc_remove, -}; - -/* - * Driver init routine. - */ -static int __init tile_edac_init(void) -{ - char hv_file[32]; - struct platform_device *pdev; - int i, err, num = 0; - - /* Only support POLL mode. */ - edac_op_state = EDAC_OPSTATE_POLL; - - err = platform_driver_register(&tile_edac_mc_driver); - if (err) - return err; - - for (i = 0; i < TILE_MAX_MSHIMS; i++) { - /* - * Not all memory controllers are configured such as in the - * case of a simulator. So we register only those mshims - * that are configured by the hypervisor. - */ - sprintf(hv_file, "mshim/%d", i); - if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0) - continue; - - pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0); - if (IS_ERR(pdev)) - continue; - mshim_pdev[i] = pdev; - num++; - } - - if (num == 0) { - platform_driver_unregister(&tile_edac_mc_driver); - return -ENODEV; - } - return 0; -} - -/* - * Driver cleanup routine. - */ -static void __exit tile_edac_exit(void) -{ - int i; - - for (i = 0; i < TILE_MAX_MSHIMS; i++) { - struct platform_device *pdev = mshim_pdev[i]; - if (!pdev) - continue; - - platform_set_drvdata(pdev, NULL); - platform_device_unregister(pdev); - } - platform_driver_unregister(&tile_edac_mc_driver); -} - -module_init(tile_edac_init); -module_exit(tile_edac_exit); diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c new file mode 100644 index 000000000000..5a43b5d43ca2 --- /dev/null +++ b/drivers/edac/versal_edac.c @@ -0,0 +1,1196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx Versal memory controller driver + * Copyright (C) 2023 Advanced Micro Devices, Inc. + */ +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/sizes.h> +#include <linux/firmware/xlnx-zynqmp.h> +#include <linux/firmware/xlnx-event-manager.h> + +#include "edac_module.h" + +/* Granularity of reported error in bytes */ +#define XDDR_EDAC_ERR_GRAIN 1 + +#define XDDR_EDAC_MSG_SIZE 256 +#define EVENT 2 + +#define XDDR_PCSR_OFFSET 0xC +#define XDDR_ISR_OFFSET 0x14 +#define XDDR_IRQ_EN_OFFSET 0x20 +#define XDDR_IRQ1_EN_OFFSET 0x2C +#define XDDR_IRQ_DIS_OFFSET 0x24 +#define XDDR_IRQ_CE_MASK GENMASK(18, 15) +#define XDDR_IRQ_UE_MASK GENMASK(14, 11) + +#define XDDR_REG_CONFIG0_OFFSET 0x258 +#define XDDR_REG_CONFIG0_BUS_WIDTH_MASK GENMASK(19, 18) +#define XDDR_REG_CONFIG0_NUM_CHANS_MASK BIT(17) +#define XDDR_REG_CONFIG0_NUM_RANKS_MASK GENMASK(15, 14) +#define XDDR_REG_CONFIG0_SIZE_MASK GENMASK(10, 8) + +#define XDDR_REG_PINOUT_OFFSET 0x25C +#define XDDR_REG_PINOUT_ECC_EN_MASK GENMASK(7, 5) + +#define ECCW0_FLIP_CTRL 0x109C +#define ECCW0_FLIP0_OFFSET 0x10A0 +#define ECCW0_FLIP0_BITS 31 +#define ECCW0_FLIP1_OFFSET 0x10A4 +#define ECCW1_FLIP_CTRL 0x10AC +#define ECCW1_FLIP0_OFFSET 0x10B0 +#define ECCW1_FLIP1_OFFSET 0x10B4 +#define ECCR0_CERR_STAT_OFFSET 0x10BC +#define ECCR0_CE_ADDR_LO_OFFSET 0x10C0 +#define ECCR0_CE_ADDR_HI_OFFSET 0x10C4 +#define ECCR0_CE_DATA_LO_OFFSET 0x10C8 +#define ECCR0_CE_DATA_HI_OFFSET 0x10CC +#define ECCR0_CE_DATA_PAR_OFFSET 0x10D0 + +#define ECCR0_UERR_STAT_OFFSET 0x10D4 +#define ECCR0_UE_ADDR_LO_OFFSET 0x10D8 +#define ECCR0_UE_ADDR_HI_OFFSET 0x10DC +#define ECCR0_UE_DATA_LO_OFFSET 0x10E0 +#define ECCR0_UE_DATA_HI_OFFSET 0x10E4 +#define ECCR0_UE_DATA_PAR_OFFSET 0x10E8 + +#define ECCR1_CERR_STAT_OFFSET 0x10F4 +#define ECCR1_CE_ADDR_LO_OFFSET 0x10F8 +#define ECCR1_CE_ADDR_HI_OFFSET 0x10FC +#define ECCR1_CE_DATA_LO_OFFSET 0x1100 +#define ECCR1_CE_DATA_HI_OFFSET 0x110C +#define ECCR1_CE_DATA_PAR_OFFSET 0x1108 + +#define ECCR1_UERR_STAT_OFFSET 0x110C +#define ECCR1_UE_ADDR_LO_OFFSET 0x1110 +#define ECCR1_UE_ADDR_HI_OFFSET 0x1114 +#define ECCR1_UE_DATA_LO_OFFSET 0x1118 +#define ECCR1_UE_DATA_HI_OFFSET 0x111C +#define ECCR1_UE_DATA_PAR_OFFSET 0x1120 + +#define XDDR_NOC_REG_ADEC4_OFFSET 0x44 +#define RANK_1_MASK GENMASK(11, 6) +#define LRANK_0_MASK GENMASK(17, 12) +#define LRANK_1_MASK GENMASK(23, 18) +#define MASK_24 GENMASK(29, 24) + +#define XDDR_NOC_REG_ADEC5_OFFSET 0x48 +#define XDDR_NOC_REG_ADEC6_OFFSET 0x4C +#define XDDR_NOC_REG_ADEC7_OFFSET 0x50 +#define XDDR_NOC_REG_ADEC8_OFFSET 0x54 +#define XDDR_NOC_REG_ADEC9_OFFSET 0x58 +#define XDDR_NOC_REG_ADEC10_OFFSET 0x5C + +#define XDDR_NOC_REG_ADEC11_OFFSET 0x60 +#define MASK_0 GENMASK(5, 0) +#define GRP_0_MASK GENMASK(11, 6) +#define GRP_1_MASK GENMASK(17, 12) +#define CH_0_MASK GENMASK(23, 18) + +#define XDDR_NOC_REG_ADEC12_OFFSET 0x71C +#define XDDR_NOC_REG_ADEC13_OFFSET 0x720 + +#define XDDR_NOC_REG_ADEC14_OFFSET 0x724 +#define XDDR_NOC_ROW_MATCH_MASK GENMASK(17, 0) +#define XDDR_NOC_COL_MATCH_MASK GENMASK(27, 18) +#define XDDR_NOC_BANK_MATCH_MASK GENMASK(29, 28) +#define XDDR_NOC_GRP_MATCH_MASK GENMASK(31, 30) + +#define XDDR_NOC_REG_ADEC15_OFFSET 0x728 +#define XDDR_NOC_RANK_MATCH_MASK GENMASK(1, 0) +#define XDDR_NOC_LRANK_MATCH_MASK GENMASK(4, 2) +#define XDDR_NOC_CH_MATCH_MASK BIT(5) +#define XDDR_NOC_MOD_SEL_MASK BIT(6) +#define XDDR_NOC_MATCH_EN_MASK BIT(8) + +#define ECCR_UE_CE_ADDR_HI_ROW_MASK GENMASK(7, 0) + +#define XDDR_EDAC_NR_CSROWS 1 +#define XDDR_EDAC_NR_CHANS 1 + +#define XDDR_BUS_WIDTH_64 0 +#define XDDR_BUS_WIDTH_32 1 +#define XDDR_BUS_WIDTH_16 2 + +#define XDDR_MAX_ROW_CNT 18 +#define XDDR_MAX_COL_CNT 10 +#define XDDR_MAX_RANK_CNT 2 +#define XDDR_MAX_LRANK_CNT 3 +#define XDDR_MAX_BANK_CNT 2 +#define XDDR_MAX_GRP_CNT 2 + +/* + * Config and system registers are usually locked. This is the + * code which unlocks them in order to accept writes. See + * + * https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register + */ +#define PCSR_UNLOCK_VAL 0xF9E8D7C6 +#define PCSR_LOCK_VAL 1 +#define XDDR_ERR_TYPE_CE 0 +#define XDDR_ERR_TYPE_UE 1 + +#define XILINX_DRAM_SIZE_4G 0 +#define XILINX_DRAM_SIZE_6G 1 +#define XILINX_DRAM_SIZE_8G 2 +#define XILINX_DRAM_SIZE_12G 3 +#define XILINX_DRAM_SIZE_16G 4 +#define XILINX_DRAM_SIZE_32G 5 +#define NUM_UE_BITPOS 2 + +/** + * struct ecc_error_info - ECC error log information. + * @burstpos: Burst position. + * @lrank: Logical Rank number. + * @rank: Rank number. + * @group: Group number. + * @bank: Bank number. + * @col: Column number. + * @row: Row number. + * @rowhi: Row number higher bits. + * @i: ECC error info. + */ +union ecc_error_info { + struct { + u32 burstpos:3; + u32 lrank:3; + u32 rank:2; + u32 group:2; + u32 bank:2; + u32 col:10; + u32 row:10; + u32 rowhi; + }; + u64 i; +} __packed; + +union edac_info { + struct { + u32 row0:6; + u32 row1:6; + u32 row2:6; + u32 row3:6; + u32 row4:6; + u32 reserved:2; + }; + struct { + u32 col1:6; + u32 col2:6; + u32 col3:6; + u32 col4:6; + u32 col5:6; + u32 reservedcol:2; + }; + u32 i; +} __packed; + +/** + * struct ecc_status - ECC status information to report. + * @ceinfo: Correctable error log information. + * @ueinfo: Uncorrectable error log information. + * @channel: Channel number. + * @error_type: Error type information. + */ +struct ecc_status { + union ecc_error_info ceinfo[2]; + union ecc_error_info ueinfo[2]; + u8 channel; + u8 error_type; +}; + +/** + * struct edac_priv - DDR memory controller private instance data. + * @ddrmc_baseaddr: Base address of the DDR controller. + * @ddrmc_noc_baseaddr: Base address of the DDRMC NOC. + * @message: Buffer for framing the event specific info. + * @mc_id: Memory controller ID. + * @ce_cnt: Correctable error count. + * @ue_cnt: UnCorrectable error count. + * @stat: ECC status information. + * @lrank_bit: Bit shifts for lrank bit. + * @rank_bit: Bit shifts for rank bit. + * @row_bit: Bit shifts for row bit. + * @col_bit: Bit shifts for column bit. + * @bank_bit: Bit shifts for bank bit. + * @grp_bit: Bit shifts for group bit. + * @ch_bit: Bit shifts for channel bit. + * @err_inject_addr: Data poison address. + * @debugfs: Debugfs handle. + */ +struct edac_priv { + void __iomem *ddrmc_baseaddr; + void __iomem *ddrmc_noc_baseaddr; + char message[XDDR_EDAC_MSG_SIZE]; + u32 mc_id; + u32 ce_cnt; + u32 ue_cnt; + struct ecc_status stat; + u32 lrank_bit[3]; + u32 rank_bit[2]; + u32 row_bit[18]; + u32 col_bit[10]; + u32 bank_bit[2]; + u32 grp_bit[2]; + u32 ch_bit; +#ifdef CONFIG_EDAC_DEBUG + u64 err_inject_addr; + struct dentry *debugfs; +#endif +}; + +static void get_ce_error_info(struct edac_priv *priv) +{ + void __iomem *ddrmc_base; + struct ecc_status *p; + u32 regval; + u64 reghi; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + p->error_type = XDDR_ERR_TYPE_CE; + regval = readl(ddrmc_base + ECCR0_CE_ADDR_LO_OFFSET); + reghi = regval & ECCR_UE_CE_ADDR_HI_ROW_MASK; + p->ceinfo[0].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR0_CE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR0_CE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR0_CE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR0_CE_DATA_PAR_OFFSET)); + + regval = readl(ddrmc_base + ECCR1_CE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET); + p->ceinfo[1].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR1_CE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR1_CE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR1_CE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR1_CE_DATA_PAR_OFFSET)); +} + +static void get_ue_error_info(struct edac_priv *priv) +{ + void __iomem *ddrmc_base; + struct ecc_status *p; + u32 regval; + u64 reghi; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + p->error_type = XDDR_ERR_TYPE_UE; + regval = readl(ddrmc_base + ECCR0_UE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET); + + p->ueinfo[0].i = regval | reghi << 32; + regval = readl(ddrmc_base + ECCR0_UE_ADDR_HI_OFFSET); + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR0_UE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR0_UE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR0_UE_DATA_PAR_OFFSET)); + + regval = readl(ddrmc_base + ECCR1_UE_ADDR_LO_OFFSET); + reghi = readl(ddrmc_base + ECCR1_UE_ADDR_HI_OFFSET); + p->ueinfo[1].i = regval | reghi << 32; + + edac_dbg(2, "ERR DATA: 0x%08X%08X ERR DATA PARITY: 0x%08X\n", + readl(ddrmc_base + ECCR1_UE_DATA_LO_OFFSET), + readl(ddrmc_base + ECCR1_UE_DATA_HI_OFFSET), + readl(ddrmc_base + ECCR1_UE_DATA_PAR_OFFSET)); +} + +static bool get_error_info(struct edac_priv *priv) +{ + u32 eccr0_ceval, eccr1_ceval, eccr0_ueval, eccr1_ueval; + void __iomem *ddrmc_base; + struct ecc_status *p; + + ddrmc_base = priv->ddrmc_baseaddr; + p = &priv->stat; + + eccr0_ceval = readl(ddrmc_base + ECCR0_CERR_STAT_OFFSET); + eccr1_ceval = readl(ddrmc_base + ECCR1_CERR_STAT_OFFSET); + eccr0_ueval = readl(ddrmc_base + ECCR0_UERR_STAT_OFFSET); + eccr1_ueval = readl(ddrmc_base + ECCR1_UERR_STAT_OFFSET); + + if (!eccr0_ceval && !eccr1_ceval && !eccr0_ueval && !eccr1_ueval) + return 1; + if (!eccr0_ceval) + p->channel = 1; + else + p->channel = 0; + + if (eccr0_ceval || eccr1_ceval) + get_ce_error_info(priv); + + if (eccr0_ueval || eccr1_ueval) { + if (!eccr0_ueval) + p->channel = 1; + else + p->channel = 0; + get_ue_error_info(priv); + } + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, ddrmc_base + XDDR_PCSR_OFFSET); + + writel(0, ddrmc_base + ECCR0_CERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR1_CERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR0_UERR_STAT_OFFSET); + writel(0, ddrmc_base + ECCR1_UERR_STAT_OFFSET); + + /* Lock the PCSR registers */ + writel(1, ddrmc_base + XDDR_PCSR_OFFSET); + + return 0; +} + +/** + * convert_to_physical - Convert to physical address. + * @priv: DDR memory controller private instance data. + * @pinf: ECC error info structure. + * + * Return: Physical address of the DDR memory. + */ +static unsigned long convert_to_physical(struct edac_priv *priv, union ecc_error_info pinf) +{ + unsigned long err_addr = 0; + u32 index; + u32 row; + + row = pinf.rowhi << 10 | pinf.row; + for (index = 0; index < XDDR_MAX_ROW_CNT; index++) { + err_addr |= (row & BIT(0)) << priv->row_bit[index]; + row >>= 1; + } + + for (index = 0; index < XDDR_MAX_COL_CNT; index++) { + err_addr |= (pinf.col & BIT(0)) << priv->col_bit[index]; + pinf.col >>= 1; + } + + for (index = 0; index < XDDR_MAX_BANK_CNT; index++) { + err_addr |= (pinf.bank & BIT(0)) << priv->bank_bit[index]; + pinf.bank >>= 1; + } + + for (index = 0; index < XDDR_MAX_GRP_CNT; index++) { + err_addr |= (pinf.group & BIT(0)) << priv->grp_bit[index]; + pinf.group >>= 1; + } + + for (index = 0; index < XDDR_MAX_RANK_CNT; index++) { + err_addr |= (pinf.rank & BIT(0)) << priv->rank_bit[index]; + pinf.rank >>= 1; + } + + for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) { + err_addr |= (pinf.lrank & BIT(0)) << priv->lrank_bit[index]; + pinf.lrank >>= 1; + } + + err_addr |= (priv->stat.channel & BIT(0)) << priv->ch_bit; + + return err_addr; +} + +/** + * handle_error - Handle Correctable and Uncorrectable errors. + * @mci: EDAC memory controller instance. + * @stat: ECC status structure. + * + * Handles ECC correctable and uncorrectable errors. + */ +static void handle_error(struct mem_ctl_info *mci, struct ecc_status *stat) +{ + struct edac_priv *priv = mci->pvt_info; + union ecc_error_info pinf; + + if (stat->error_type == XDDR_ERR_TYPE_CE) { + priv->ce_cnt++; + pinf = stat->ceinfo[stat->channel]; + snprintf(priv->message, XDDR_EDAC_MSG_SIZE, + "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n", + "CE", priv->mc_id, + convert_to_physical(priv, pinf), pinf.burstpos); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + if (stat->error_type == XDDR_ERR_TYPE_UE) { + priv->ue_cnt++; + pinf = stat->ueinfo[stat->channel]; + snprintf(priv->message, XDDR_EDAC_MSG_SIZE, + "Error type:%s MC ID: %d Addr at %lx Burst Pos: %d\n", + "UE", priv->mc_id, + convert_to_physical(priv, pinf), pinf.burstpos); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + memset(stat, 0, sizeof(*stat)); +} + +/** + * err_callback - Handle Correctable and Uncorrectable errors. + * @payload: payload data. + * @data: mci controller data. + * + * Handles ECC correctable and uncorrectable errors. + */ +static void err_callback(const u32 *payload, void *data) +{ + struct mem_ctl_info *mci = (struct mem_ctl_info *)data; + struct edac_priv *priv; + struct ecc_status *p; + int regval; + + priv = mci->pvt_info; + p = &priv->stat; + + regval = readl(priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); + + if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_CR) + p->error_type = XDDR_ERR_TYPE_CE; + if (payload[EVENT] == XPM_EVENT_ERROR_MASK_DDRMC_NCR) + p->error_type = XDDR_ERR_TYPE_UE; + + if (get_error_info(priv)) + return; + + handle_error(mci, &priv->stat); + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Clear the ISR */ + writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + edac_dbg(3, "Total error count CE %d UE %d\n", + priv->ce_cnt, priv->ue_cnt); +} + +/** + * get_dwidth - Return the controller memory width. + * @base: DDR memory controller base address. + * + * Get the EDAC device type width appropriate for the controller + * configuration. + * + * Return: a device type width enumeration. + */ +static enum dev_type get_dwidth(const void __iomem *base) +{ + enum dev_type dt; + u32 regval; + u32 width; + + regval = readl(base + XDDR_REG_CONFIG0_OFFSET); + width = FIELD_GET(XDDR_REG_CONFIG0_BUS_WIDTH_MASK, regval); + + switch (width) { + case XDDR_BUS_WIDTH_16: + dt = DEV_X2; + break; + case XDDR_BUS_WIDTH_32: + dt = DEV_X4; + break; + case XDDR_BUS_WIDTH_64: + dt = DEV_X8; + break; + default: + dt = DEV_UNKNOWN; + } + + return dt; +} + +/** + * get_ecc_state - Return the controller ECC enable/disable status. + * @base: DDR memory controller base address. + * + * Get the ECC enable/disable status for the controller. + * + * Return: a ECC status boolean i.e true/false - enabled/disabled. + */ +static bool get_ecc_state(void __iomem *base) +{ + enum dev_type dt; + u32 ecctype; + + dt = get_dwidth(base); + if (dt == DEV_UNKNOWN) + return false; + + ecctype = readl(base + XDDR_REG_PINOUT_OFFSET); + ecctype &= XDDR_REG_PINOUT_ECC_EN_MASK; + + return !!ecctype; +} + +/** + * get_memsize - Get the size of the attached memory device. + * @priv: DDR memory controller private instance data. + * + * Return: the memory size in bytes. + */ +static u64 get_memsize(struct edac_priv *priv) +{ + u32 regval; + u64 size; + + regval = readl(priv->ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); + regval = FIELD_GET(XDDR_REG_CONFIG0_SIZE_MASK, regval); + + switch (regval) { + case XILINX_DRAM_SIZE_4G: + size = 4U; break; + case XILINX_DRAM_SIZE_6G: + size = 6U; break; + case XILINX_DRAM_SIZE_8G: + size = 8U; break; + case XILINX_DRAM_SIZE_12G: + size = 12U; break; + case XILINX_DRAM_SIZE_16G: + size = 16U; break; + case XILINX_DRAM_SIZE_32G: + size = 32U; break; + /* Invalid configuration */ + default: + size = 0; break; + } + + size *= SZ_1G; + return size; +} + +/** + * init_csrows - Initialize the csrow data. + * @mci: EDAC memory controller instance. + * + * Initialize the chip select rows associated with the EDAC memory + * controller instance. + */ +static void init_csrows(struct mem_ctl_info *mci) +{ + struct edac_priv *priv = mci->pvt_info; + struct csrow_info *csi; + struct dimm_info *dimm; + unsigned long size; + u32 row; + int ch; + + size = get_memsize(priv); + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + for (ch = 0; ch < csi->nr_channels; ch++) { + dimm = csi->channels[ch]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = MEM_DDR4; + dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels; + dimm->grain = XDDR_EDAC_ERR_GRAIN; + dimm->dtype = get_dwidth(priv->ddrmc_baseaddr); + } + } +} + +/** + * mc_init - Initialize one driver instance. + * @mci: EDAC memory controller instance. + * @pdev: platform device. + * + * Perform initialization of the EDAC memory controller instance and + * related driver-private data associated with the memory controller the + * instance is bound to. + */ +static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev) +{ + mci->pdev = &pdev->dev; + platform_set_drvdata(pdev, mci); + + /* Initialize controller capabilities and configuration */ + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_HW_SRC; + mci->scrub_mode = SCRUB_NONE; + + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "xlnx_ddr_controller"; + mci->dev_name = dev_name(&pdev->dev); + mci->mod_name = "xlnx_edac"; + + edac_op_state = EDAC_OPSTATE_INT; + + init_csrows(mci); +} + +static void enable_intr(struct edac_priv *priv) +{ + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Enable UE and CE Interrupts to support the interrupt case */ + writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ_EN_OFFSET); + + writel(XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET); + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); +} + +static void disable_intr(struct edac_priv *priv) +{ + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + + /* Disable UE/CE Interrupts */ + writel(XDDR_IRQ_CE_MASK | XDDR_IRQ_UE_MASK, + priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +#ifdef CONFIG_EDAC_DEBUG +/** + * poison_setup - Update poison registers. + * @priv: DDR memory controller private instance data. + * + * Update poison registers as per DDR mapping upon write of the address + * location the fault is injected. + * Return: none. + */ +static void poison_setup(struct edac_priv *priv) +{ + u32 col = 0, row = 0, bank = 0, grp = 0, rank = 0, lrank = 0, ch = 0; + u32 index, regval; + + for (index = 0; index < XDDR_MAX_ROW_CNT; index++) { + row |= (((priv->err_inject_addr >> priv->row_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_COL_CNT; index++) { + col |= (((priv->err_inject_addr >> priv->col_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_BANK_CNT; index++) { + bank |= (((priv->err_inject_addr >> priv->bank_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_GRP_CNT; index++) { + grp |= (((priv->err_inject_addr >> priv->grp_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_RANK_CNT; index++) { + rank |= (((priv->err_inject_addr >> priv->rank_bit[index]) & + BIT(0)) << index); + } + + for (index = 0; index < XDDR_MAX_LRANK_CNT; index++) { + lrank |= (((priv->err_inject_addr >> priv->lrank_bit[index]) & + BIT(0)) << index); + } + + ch = (priv->err_inject_addr >> priv->ch_bit) & BIT(0); + if (ch) + writel(0xFF, priv->ddrmc_baseaddr + ECCW1_FLIP_CTRL); + else + writel(0xFF, priv->ddrmc_baseaddr + ECCW0_FLIP_CTRL); + + writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC12_OFFSET); + writel(0, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC13_OFFSET); + + regval = row & XDDR_NOC_ROW_MATCH_MASK; + regval |= FIELD_PREP(XDDR_NOC_COL_MATCH_MASK, col); + regval |= FIELD_PREP(XDDR_NOC_BANK_MATCH_MASK, bank); + regval |= FIELD_PREP(XDDR_NOC_GRP_MATCH_MASK, grp); + writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC14_OFFSET); + + regval = rank & XDDR_NOC_RANK_MATCH_MASK; + regval |= FIELD_PREP(XDDR_NOC_LRANK_MATCH_MASK, lrank); + regval |= FIELD_PREP(XDDR_NOC_CH_MATCH_MASK, ch); + regval |= (XDDR_NOC_MOD_SEL_MASK | XDDR_NOC_MATCH_EN_MASK); + writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET); +} + +static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos) +{ + u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1; + struct edac_priv *priv = mci->pvt_info; + + if (ce_bitpos < ECCW0_FLIP0_BITS) { + ecc0_flip0 = BIT(ce_bitpos); + ecc1_flip0 = BIT(ce_bitpos); + ecc0_flip1 = 0; + ecc1_flip1 = 0; + } else { + ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS; + ecc0_flip1 = BIT(ce_bitpos); + ecc1_flip1 = BIT(ce_bitpos); + ecc0_flip0 = 0; + ecc1_flip0 = 0; + } + + writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); + writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject a correctable error, the following steps are needed: + * + * - Write the correctable error bit position value: + * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ce + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default + * configuration is there and no addresses are masked. + * + * The row, column, bank, group and rank registers are written to the + * match ADEC bit to generate errors at the particular address. ADEC14 + * and ADEC15 have the match bits. + * + * xddr_inject_data_ce_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ce_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + u8 ce_bitpos; + int ret; + + ret = kstrtou8_from_user(data, count, 0, &ce_bitpos); + if (ret) + return ret; + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ce_store(mci, ce_bitpos); + ret = count; + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + return ret; +} + +static const struct file_operations xddr_inject_ce_fops = { + .open = simple_open, + .write = inject_data_ce_store, + .llseek = generic_file_llseek, +}; + +static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1) +{ + struct edac_priv *priv = mci->pvt_info; + + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject an uncorrectable error, the following steps are needed: + * echo <bit_pos val> > /sys/kernel/debug/edac/<controller instance>/inject_ue + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the + * addresses are masked. The row, column, bank, group and rank registers + * are written to the match ADEC bit to generate errors at the + * particular address. ADEC14 and ADEC15 have the match bits. + * + * xddr_inject_data_ue_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. For + * uncorrectable errors + * 2 bit errors are injected. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ue_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + char buf[6], *pbuf, *token[2]; + u32 val0 = 0, val1 = 0; + u8 len, ue0, ue1; + int i, ret; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + if (!token[0] || !token[1]) + return -EFAULT; + + ret = kstrtou8(token[0], 0, &ue0); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &ue1); + if (ret) + return ret; + + if (ue0 < ECCW0_FLIP0_BITS) { + val0 = BIT(ue0); + } else { + ue0 = ue0 - ECCW0_FLIP0_BITS; + val1 = BIT(ue0); + } + + if (ue1 < ECCW0_FLIP0_BITS) { + val0 |= BIT(ue1); + } else { + ue1 = ue1 - ECCW0_FLIP0_BITS; + val1 |= BIT(ue1); + } + + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ue_store(mci, val0, val1); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + return count; +} + +static const struct file_operations xddr_inject_ue_fops = { + .open = simple_open, + .write = inject_data_ue_store, + .llseek = generic_file_llseek, +}; + +static void create_debugfs_attributes(struct mem_ctl_info *mci) +{ + struct edac_priv *priv = mci->pvt_info; + + priv->debugfs = edac_debugfs_create_dir(mci->dev_name); + if (!priv->debugfs) + return; + + if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ce_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + + if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ue_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + debugfs_create_x64("address", 0600, priv->debugfs, + &priv->err_inject_addr); + mci->debugfs = priv->debugfs; +} + +static inline void process_bit(struct edac_priv *priv, unsigned int start, u32 regval) +{ + union edac_info rows; + + rows.i = regval; + priv->row_bit[start] = rows.row0; + priv->row_bit[start + 1] = rows.row1; + priv->row_bit[start + 2] = rows.row2; + priv->row_bit[start + 3] = rows.row3; + priv->row_bit[start + 4] = rows.row4; +} + +static void setup_row_address_map(struct edac_priv *priv) +{ + u32 regval; + union edac_info rows; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC5_OFFSET); + process_bit(priv, 0, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC6_OFFSET); + process_bit(priv, 5, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC7_OFFSET); + process_bit(priv, 10, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET); + rows.i = regval; + + priv->row_bit[15] = rows.row0; + priv->row_bit[16] = rows.row1; + priv->row_bit[17] = rows.row2; +} + +static void setup_column_address_map(struct edac_priv *priv) +{ + u32 regval; + union edac_info cols; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC8_OFFSET); + priv->col_bit[0] = FIELD_GET(MASK_24, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC9_OFFSET); + cols.i = regval; + priv->col_bit[1] = cols.col1; + priv->col_bit[2] = cols.col2; + priv->col_bit[3] = cols.col3; + priv->col_bit[4] = cols.col4; + priv->col_bit[5] = cols.col5; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET); + cols.i = regval; + priv->col_bit[6] = cols.col1; + priv->col_bit[7] = cols.col2; + priv->col_bit[8] = cols.col3; + priv->col_bit[9] = cols.col4; +} + +static void setup_bank_grp_ch_address_map(struct edac_priv *priv) +{ + u32 regval; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC10_OFFSET); + priv->bank_bit[0] = FIELD_GET(MASK_24, regval); + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC11_OFFSET); + priv->bank_bit[1] = (regval & MASK_0); + priv->grp_bit[0] = FIELD_GET(GRP_0_MASK, regval); + priv->grp_bit[1] = FIELD_GET(GRP_1_MASK, regval); + priv->ch_bit = FIELD_GET(CH_0_MASK, regval); +} + +static void setup_rank_lrank_address_map(struct edac_priv *priv) +{ + u32 regval; + + regval = readl(priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC4_OFFSET); + priv->rank_bit[0] = (regval & MASK_0); + priv->rank_bit[1] = FIELD_GET(RANK_1_MASK, regval); + priv->lrank_bit[0] = FIELD_GET(LRANK_0_MASK, regval); + priv->lrank_bit[1] = FIELD_GET(LRANK_1_MASK, regval); + priv->lrank_bit[2] = FIELD_GET(MASK_24, regval); +} + +/** + * setup_address_map - Set Address Map by querying ADDRMAP registers. + * @priv: DDR memory controller private instance data. + * + * Set Address Map by querying ADDRMAP registers. + * + * Return: none. + */ +static void setup_address_map(struct edac_priv *priv) +{ + setup_row_address_map(priv); + + setup_column_address_map(priv); + + setup_bank_grp_ch_address_map(priv); + + setup_rank_lrank_address_map(priv); +} +#endif /* CONFIG_EDAC_DEBUG */ + +static const struct of_device_id xlnx_edac_match[] = { + { .compatible = "xlnx,versal-ddrmc", }, + { + /* end of table */ + } +}; + +MODULE_DEVICE_TABLE(of, xlnx_edac_match); +static u32 emif_get_id(struct device_node *node) +{ + u32 addr, my_addr, my_id = 0; + struct device_node *np; + const __be32 *addrp; + + addrp = of_get_address(node, 0, NULL, NULL); + my_addr = (u32)of_translate_address(node, addrp); + + for_each_matching_node(np, xlnx_edac_match) { + if (np == node) + continue; + + addrp = of_get_address(np, 0, NULL, NULL); + addr = (u32)of_translate_address(np, addrp); + + edac_printk(KERN_INFO, EDAC_MC, + "addr=%x, my_addr=%x\n", + addr, my_addr); + + if (addr < my_addr) + my_id++; + } + + return my_id; +} + +static int mc_probe(struct platform_device *pdev) +{ + void __iomem *ddrmc_baseaddr, *ddrmc_noc_baseaddr; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + u8 num_chans, num_csrows; + struct edac_priv *priv; + u32 edac_mc_id, regval; + int rc; + + ddrmc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "base"); + if (IS_ERR(ddrmc_baseaddr)) + return PTR_ERR(ddrmc_baseaddr); + + ddrmc_noc_baseaddr = devm_platform_ioremap_resource_byname(pdev, "noc"); + if (IS_ERR(ddrmc_noc_baseaddr)) + return PTR_ERR(ddrmc_noc_baseaddr); + + if (!get_ecc_state(ddrmc_baseaddr)) + return -ENXIO; + + /* Allocate ID number for the EMIF controller */ + edac_mc_id = emif_get_id(pdev->dev.of_node); + + regval = readl(ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); + num_chans = FIELD_GET(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); + num_chans++; + + num_csrows = FIELD_GET(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); + num_csrows *= 2; + if (!num_csrows) + num_csrows = 1; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = num_csrows; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_chans; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(edac_mc_id, ARRAY_SIZE(layers), layers, + sizeof(struct edac_priv)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed memory allocation for mc instance\n"); + return -ENOMEM; + } + + priv = mci->pvt_info; + priv->ddrmc_baseaddr = ddrmc_baseaddr; + priv->ddrmc_noc_baseaddr = ddrmc_noc_baseaddr; + priv->ce_cnt = 0; + priv->ue_cnt = 0; + priv->mc_id = edac_mc_id; + + mc_init(mci, pdev); + + rc = edac_mc_add_mc(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, + "Failed to register with EDAC core\n"); + goto free_edac_mc; + } + + rc = xlnx_register_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, + XPM_EVENT_ERROR_MASK_DDRMC_CR | XPM_EVENT_ERROR_MASK_DDRMC_NCR, + false, err_callback, mci); + if (rc) { + if (rc == -EACCES) + rc = -EPROBE_DEFER; + + goto del_mc; + } + +#ifdef CONFIG_EDAC_DEBUG + create_debugfs_attributes(mci); + setup_address_map(priv); +#endif + enable_intr(priv); + return rc; + +del_mc: + edac_mc_del_mc(&pdev->dev); +free_edac_mc: + edac_mc_free(mci); + + return rc; +} + +static void mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + struct edac_priv *priv = mci->pvt_info; + + disable_intr(priv); + +#ifdef CONFIG_EDAC_DEBUG + debugfs_remove_recursive(priv->debugfs); +#endif + + xlnx_unregister_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, + XPM_EVENT_ERROR_MASK_DDRMC_CR | + XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci); + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +static struct platform_driver xilinx_ddr_edac_mc_driver = { + .driver = { + .name = "xilinx-ddrmc-edac", + .of_match_table = xlnx_edac_match, + }, + .probe = mc_probe, + .remove = mc_remove, +}; + +module_platform_driver(xilinx_ddr_edac_mc_driver); + +MODULE_AUTHOR("AMD Inc"); +MODULE_DESCRIPTION("Xilinx DDRMC ECC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c new file mode 100644 index 000000000000..1a1092793092 --- /dev/null +++ b/drivers/edac/versalnet_edac.c @@ -0,0 +1,962 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Versal NET memory controller driver + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#include <linux/cdx/edac_cdx_pcol.h> +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/ras.h> +#include <linux/remoteproc.h> +#include <linux/rpmsg.h> +#include <linux/sizes.h> +#include <ras/ras_event.h> + +#include "edac_module.h" + +/* Granularity of reported error in bytes */ +#define MC5_ERR_GRAIN 1 +#define MC_GET_DDR_CONFIG_IN_LEN 4 + +#define MC5_IRQ_CE_MASK GENMASK(18, 15) +#define MC5_IRQ_UE_MASK GENMASK(14, 11) + +#define MC5_RANK_1_MASK GENMASK(11, 6) +#define MASK_24 GENMASK(29, 24) +#define MASK_0 GENMASK(5, 0) + +#define MC5_LRANK_1_MASK GENMASK(11, 6) +#define MC5_LRANK_2_MASK GENMASK(17, 12) +#define MC5_BANK1_MASK GENMASK(11, 6) +#define MC5_GRP_0_MASK GENMASK(17, 12) +#define MC5_GRP_1_MASK GENMASK(23, 18) + +#define MC5_REGHI_ROW 7 +#define MC5_EACHBIT 1 +#define MC5_ERR_TYPE_CE 0 +#define MC5_ERR_TYPE_UE 1 +#define MC5_HIGH_MEM_EN BIT(20) +#define MC5_MEM_MASK GENMASK(19, 0) +#define MC5_X16_BASE 256 +#define MC5_X16_ECC 32 +#define MC5_X16_SIZE (MC5_X16_BASE + MC5_X16_ECC) +#define MC5_X32_SIZE 576 +#define MC5_HIMEM_BASE (256 * SZ_1M) +#define MC5_ILC_HIMEM_EN BIT(28) +#define MC5_ILC_MEM GENMASK(27, 0) +#define MC5_INTERLEAVE_SEL GENMASK(3, 0) +#define MC5_BUS_WIDTH_MASK GENMASK(19, 18) +#define MC5_NUM_CHANS_MASK BIT(17) +#define MC5_RANK_MASK GENMASK(15, 14) + +#define ERROR_LEVEL 2 +#define ERROR_ID 3 +#define TOTAL_ERR_LENGTH 5 +#define MSG_ERR_OFFSET 8 +#define MSG_ERR_LENGTH 9 +#define ERROR_DATA 10 +#define MCDI_RESPONSE 0xFF + +#define REG_MAX 152 +#define ADEC_MAX 152 +#define NUM_CONTROLLERS 8 +#define REGS_PER_CONTROLLER 19 +#define ADEC_NUM 19 +#define BUFFER_SZ 80 + +#define XDDR5_BUS_WIDTH_64 0 +#define XDDR5_BUS_WIDTH_32 1 +#define XDDR5_BUS_WIDTH_16 2 + +/** + * struct ecc_error_info - ECC error log information. + * @burstpos: Burst position. + * @lrank: Logical Rank number. + * @rank: Rank number. + * @group: Group number. + * @bank: Bank number. + * @col: Column number. + * @row: Row number. + * @rowhi: Row number higher bits. + * @i: Combined ECC error vector containing encoded values of burst position, + * rank, bank, column, and row information. + */ +union ecc_error_info { + struct { + u32 burstpos:3; + u32 lrank:4; + u32 rank:2; + u32 group:3; + u32 bank:2; + u32 col:11; + u32 row:7; + u32 rowhi; + }; + u64 i; +} __packed; + +/* Row and column bit positions in the address decoder (ADEC) registers. */ +union row_col_mapping { + struct { + u32 row0:6; + u32 row1:6; + u32 row2:6; + u32 row3:6; + u32 row4:6; + u32 reserved:2; + }; + struct { + u32 col1:6; + u32 col2:6; + u32 col3:6; + u32 col4:6; + u32 col5:6; + u32 reservedcol:2; + }; + u32 i; +} __packed; + +/** + * struct ecc_status - ECC status information to report. + * @ceinfo: Correctable errors. + * @ueinfo: Uncorrected errors. + * @channel: Channel number. + * @error_type: Error type. + */ +struct ecc_status { + union ecc_error_info ceinfo[2]; + union ecc_error_info ueinfo[2]; + u8 channel; + u8 error_type; +}; + +/** + * struct mc_priv - DDR memory controller private instance data. + * @message: Buffer for framing the event specific info. + * @stat: ECC status information. + * @error_id: The error id. + * @error_level: The error level. + * @dwidth: Width of data bus excluding ECC bits. + * @part_len: The support of the message received. + * @regs: The registers sent on the rpmsg. + * @adec: Address decode registers. + * @mci: Memory controller interface. + * @ept: rpmsg endpoint. + * @mcdi: The mcdi handle. + */ +struct mc_priv { + char message[256]; + struct ecc_status stat; + u32 error_id; + u32 error_level; + u32 dwidth; + u32 part_len; + u32 regs[REG_MAX]; + u32 adec[ADEC_MAX]; + struct mem_ctl_info *mci[NUM_CONTROLLERS]; + struct rpmsg_endpoint *ept; + struct cdx_mcdi *mcdi; +}; + +/* + * Address decoder (ADEC) registers to match the order in which the register + * information is received from the firmware. + */ +enum adec_info { + CONF = 0, + ADEC0, + ADEC1, + ADEC2, + ADEC3, + ADEC4, + ADEC5, + ADEC6, + ADEC7, + ADEC8, + ADEC9, + ADEC10, + ADEC11, + ADEC12, + ADEC13, + ADEC14, + ADEC15, + ADEC16, + ADECILC, +}; + +enum reg_info { + ISR = 0, + IMR, + ECCR0_ERR_STATUS, + ECCR0_ADDR_LO, + ECCR0_ADDR_HI, + ECCR0_DATA_LO, + ECCR0_DATA_HI, + ECCR0_PAR, + ECCR1_ERR_STATUS, + ECCR1_ADDR_LO, + ECCR1_ADDR_HI, + ECCR1_DATA_LO, + ECCR1_DATA_HI, + ECCR1_PAR, + XMPU_ERR, + XMPU_ERR_ADDR_L0, + XMPU_ERR_ADDR_HI, + XMPU_ERR_AXI_ID, + ADEC_CHK_ERR_LOG, +}; + +static bool get_ddr_info(u32 *error_data, struct mc_priv *priv) +{ + u32 reglo, reghi, parity, eccr0_val, eccr1_val, isr; + struct ecc_status *p; + + isr = error_data[ISR]; + + if (!(isr & (MC5_IRQ_UE_MASK | MC5_IRQ_CE_MASK))) + return false; + + eccr0_val = error_data[ECCR0_ERR_STATUS]; + eccr1_val = error_data[ECCR1_ERR_STATUS]; + + if (!eccr0_val && !eccr1_val) + return false; + + p = &priv->stat; + + if (!eccr0_val) + p->channel = 1; + else + p->channel = 0; + + reglo = error_data[ECCR0_ADDR_LO]; + reghi = error_data[ECCR0_ADDR_HI]; + if (isr & MC5_IRQ_CE_MASK) + p->ceinfo[0].i = reglo | (u64)reghi << 32; + else if (isr & MC5_IRQ_UE_MASK) + p->ueinfo[0].i = reglo | (u64)reghi << 32; + + parity = error_data[ECCR0_PAR]; + edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n", + reghi, reglo, parity); + + reglo = error_data[ECCR1_ADDR_LO]; + reghi = error_data[ECCR1_ADDR_HI]; + if (isr & MC5_IRQ_CE_MASK) + p->ceinfo[1].i = reglo | (u64)reghi << 32; + else if (isr & MC5_IRQ_UE_MASK) + p->ueinfo[1].i = reglo | (u64)reghi << 32; + + parity = error_data[ECCR1_PAR]; + edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n", + reghi, reglo, parity); + + return true; +} + +/** + * convert_to_physical - Convert @error_data to a physical address. + * @priv: DDR memory controller private instance data. + * @pinf: ECC error info structure. + * @controller: Controller number of the MC5 + * @error_data: the DDRMC5 ADEC address decoder register data + * + * Return: physical address of the DDR memory. + */ +static unsigned long convert_to_physical(struct mc_priv *priv, + union ecc_error_info pinf, + int controller, int *error_data) +{ + u32 row, blk, rsh_req_addr, interleave, ilc_base_ctrl_add, ilc_himem_en, reg, offset; + u64 high_mem_base, high_mem_offset, low_mem_offset, ilcmem_base; + unsigned long err_addr = 0, addr; + union row_col_mapping cols; + union row_col_mapping rows; + u32 col_bit_0; + + row = pinf.rowhi << MC5_REGHI_ROW | pinf.row; + offset = controller * ADEC_NUM; + + reg = error_data[ADEC6]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + row >>= MC5_EACHBIT; + + reg = error_data[ADEC7]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + row >>= MC5_EACHBIT; + + reg = error_data[ADEC8]; + rows.i = reg; + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row3; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row4; + + reg = error_data[ADEC9]; + rows.i = reg; + + err_addr |= (row & BIT(0)) << rows.row0; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row1; + row >>= MC5_EACHBIT; + err_addr |= (row & BIT(0)) << rows.row2; + row >>= MC5_EACHBIT; + + col_bit_0 = FIELD_GET(MASK_24, error_data[ADEC9]); + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << col_bit_0; + + cols.i = error_data[ADEC10]; + err_addr |= (pinf.col & 1) << cols.col1; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col2; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col3; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col4; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col5; + pinf.col >>= 1; + + cols.i = error_data[ADEC11]; + err_addr |= (pinf.col & 1) << cols.col1; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col2; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col3; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col4; + pinf.col >>= 1; + err_addr |= (pinf.col & 1) << cols.col5; + pinf.col >>= 1; + + reg = error_data[ADEC12]; + err_addr |= (pinf.bank & BIT(0)) << (reg & MASK_0); + pinf.bank >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_BANK1_MASK, reg); + pinf.bank >>= MC5_EACHBIT; + + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_0_MASK, reg); + pinf.group >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_1_MASK, reg); + pinf.group >>= MC5_EACHBIT; + err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MASK_24, reg); + pinf.group >>= MC5_EACHBIT; + + reg = error_data[ADEC4]; + err_addr |= (pinf.rank & BIT(0)) << (reg & MASK_0); + pinf.rank >>= MC5_EACHBIT; + err_addr |= (pinf.rank & BIT(0)) << FIELD_GET(MC5_RANK_1_MASK, reg); + pinf.rank >>= MC5_EACHBIT; + + reg = error_data[ADEC5]; + err_addr |= (pinf.lrank & BIT(0)) << (reg & MASK_0); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_1_MASK, reg); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_2_MASK, reg); + pinf.lrank >>= MC5_EACHBIT; + err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MASK_24, reg); + pinf.lrank >>= MC5_EACHBIT; + + high_mem_base = (priv->adec[ADEC2 + offset] & MC5_MEM_MASK) * MC5_HIMEM_BASE; + interleave = priv->adec[ADEC13 + offset] & MC5_INTERLEAVE_SEL; + + high_mem_offset = priv->adec[ADEC3 + offset] & MC5_MEM_MASK; + low_mem_offset = priv->adec[ADEC1 + offset] & MC5_MEM_MASK; + reg = priv->adec[ADEC14 + offset]; + ilc_himem_en = !!(reg & MC5_ILC_HIMEM_EN); + ilcmem_base = (reg & MC5_ILC_MEM) * SZ_1M; + if (ilc_himem_en) + ilc_base_ctrl_add = ilcmem_base - high_mem_offset; + else + ilc_base_ctrl_add = ilcmem_base - low_mem_offset; + + if (priv->dwidth == DEV_X16) { + blk = err_addr / MC5_X16_SIZE; + rsh_req_addr = (blk << 8) + ilc_base_ctrl_add; + err_addr = rsh_req_addr * interleave * 2; + } else { + blk = err_addr / MC5_X32_SIZE; + rsh_req_addr = (blk << 9) + ilc_base_ctrl_add; + err_addr = rsh_req_addr * interleave * 2; + } + + if ((priv->adec[ADEC2 + offset] & MC5_HIGH_MEM_EN) && err_addr >= high_mem_base) + addr = err_addr - high_mem_offset; + else + addr = err_addr - low_mem_offset; + + return addr; +} + +/** + * handle_error - Handle errors. + * @priv: DDR memory controller private instance data. + * @stat: ECC status structure. + * @ctl_num: Controller number of the MC5 + * @error_data: the MC5 ADEC address decoder register data + * + * Handles ECC correctable and uncorrectable errors. + */ +static void handle_error(struct mc_priv *priv, struct ecc_status *stat, + int ctl_num, int *error_data) +{ + union ecc_error_info pinf; + struct mem_ctl_info *mci; + unsigned long pa; + phys_addr_t pfn; + int err; + + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) + return; + + mci = priv->mci[ctl_num]; + + if (stat->error_type == MC5_ERR_TYPE_CE) { + pinf = stat->ceinfo[stat->channel]; + snprintf(priv->message, sizeof(priv->message), + "Error type:%s Controller %d Addr at %lx\n", + "CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data)); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + } + + if (stat->error_type == MC5_ERR_TYPE_UE) { + pinf = stat->ueinfo[stat->channel]; + snprintf(priv->message, sizeof(priv->message), + "Error type:%s controller %d Addr at %lx\n", + "UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data)); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, + priv->message, ""); + pa = convert_to_physical(priv, pinf, ctl_num, error_data); + pfn = PHYS_PFN(pa); + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) { + err = memory_failure(pfn, MF_ACTION_REQUIRED); + if (err) + edac_dbg(2, "memory_failure() error: %d", err); + else + edac_dbg(2, "Poison page at PA 0x%lx\n", pa); + } + } +} + +static void mc_init(struct mem_ctl_info *mci, struct device *dev) +{ + struct mc_priv *priv = mci->pvt_info; + struct csrow_info *csi; + struct dimm_info *dimm; + u32 row; + int ch; + + /* Initialize controller capabilities and configuration */ + mci->mtype_cap = MEM_FLAG_DDR5; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_HW_SRC; + mci->scrub_mode = SCRUB_NONE; + + mci->edac_cap = EDAC_FLAG_SECDED; + mci->ctl_name = "VersalNET DDR5"; + mci->dev_name = dev_name(dev); + mci->mod_name = "versalnet_edac"; + + edac_op_state = EDAC_OPSTATE_INT; + + for (row = 0; row < mci->nr_csrows; row++) { + csi = mci->csrows[row]; + for (ch = 0; ch < csi->nr_channels; ch++) { + dimm = csi->channels[ch]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->mtype = MEM_DDR5; + dimm->grain = MC5_ERR_GRAIN; + dimm->dtype = priv->dwidth; + } + } +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static unsigned int mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd) +{ + return MCDI_RPC_TIMEOUT; +} + +static void mcdi_request(struct cdx_mcdi *cdx, + const struct cdx_dword *hdr, size_t hdr_len, + const struct cdx_dword *sdu, size_t sdu_len) +{ + void *send_buf; + int ret; + + send_buf = kzalloc(hdr_len + sdu_len, GFP_KERNEL); + if (!send_buf) + return; + + memcpy(send_buf, hdr, hdr_len); + memcpy(send_buf + hdr_len, sdu, sdu_len); + + ret = rpmsg_send(cdx->ept, send_buf, hdr_len + sdu_len); + if (ret) + dev_err(&cdx->rpdev->dev, "Failed to send rpmsg data: %d\n", ret); + + kfree(send_buf); +} + +static const struct cdx_mcdi_ops mcdi_ops = { + .mcdi_rpc_timeout = mcdi_rpc_timeout, + .mcdi_request = mcdi_request, +}; + +static void get_ddr_config(u32 index, u32 *buffer, struct cdx_mcdi *amd_mcdi) +{ + size_t outlen; + int ret; + + MCDI_DECLARE_BUF(inbuf, MC_GET_DDR_CONFIG_IN_LEN); + MCDI_DECLARE_BUF(outbuf, BUFFER_SZ); + + MCDI_SET_DWORD(inbuf, EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX, index); + + ret = cdx_mcdi_rpc(amd_mcdi, MC_CMD_EDAC_GET_DDR_CONFIG, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (!ret) + memcpy(buffer, MCDI_PTR(outbuf, GET_DDR_CONFIG), + (ADEC_NUM * 4)); +} + +static int setup_mcdi(struct mc_priv *mc_priv) +{ + struct cdx_mcdi *amd_mcdi; + int ret, i; + + amd_mcdi = kzalloc(sizeof(*amd_mcdi), GFP_KERNEL); + if (!amd_mcdi) + return -ENOMEM; + + amd_mcdi->mcdi_ops = &mcdi_ops; + ret = cdx_mcdi_init(amd_mcdi); + if (ret) { + kfree(amd_mcdi); + return ret; + } + + amd_mcdi->ept = mc_priv->ept; + mc_priv->mcdi = amd_mcdi; + + for (i = 0; i < NUM_CONTROLLERS; i++) + get_ddr_config(i, &mc_priv->adec[ADEC_NUM * i], amd_mcdi); + + return 0; +} + +static const guid_t amd_versalnet_guid = GUID_INIT(0x82678888, 0xa556, 0x44f2, + 0xb8, 0xb4, 0x45, 0x56, 0x2e, + 0x8c, 0x5b, 0xec); + +static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, + int len, void *priv, u32 src) +{ + struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev); + const guid_t *sec_type = &guid_null; + u32 length, offset, error_id; + u32 *result = (u32 *)data; + struct ecc_status *p; + int i, j, k, sec_sev; + const char *err_str; + u32 *adec_data; + + if (*(u8 *)data == MCDI_RESPONSE) { + cdx_mcdi_process_cmd(mc_priv->mcdi, (struct cdx_dword *)data, len); + return 0; + } + + sec_sev = result[ERROR_LEVEL]; + error_id = result[ERROR_ID]; + length = result[MSG_ERR_LENGTH]; + offset = result[MSG_ERR_OFFSET]; + + /* + * The data can come in two stretches. Construct the regs from two + * messages. The offset indicates the offset from which the data is to + * be taken. + */ + for (i = 0 ; i < length; i++) { + k = offset + i; + j = ERROR_DATA + i; + mc_priv->regs[k] = result[j]; + } + + if (result[TOTAL_ERR_LENGTH] > length) { + if (!mc_priv->part_len) + mc_priv->part_len = length; + else + mc_priv->part_len += length; + + if (mc_priv->part_len < result[TOTAL_ERR_LENGTH]) + return 0; + mc_priv->part_len = 0; + } + + mc_priv->error_id = error_id; + mc_priv->error_level = result[ERROR_LEVEL]; + + switch (error_id) { + case 5: err_str = "General Software Non-Correctable error"; break; + case 6: err_str = "CFU error"; break; + case 7: err_str = "CFRAME error"; break; + case 10: err_str = "DDRMC Microblaze Correctable ECC error"; break; + case 11: err_str = "DDRMC Microblaze Non-Correctable ECC error"; break; + case 15: err_str = "MMCM error"; break; + case 16: err_str = "HNICX Correctable error"; break; + case 17: err_str = "HNICX Non-Correctable error"; break; + + case 18: + p = &mc_priv->stat; + memset(p, 0, sizeof(struct ecc_status)); + p->error_type = MC5_ERR_TYPE_CE; + for (i = 0 ; i < NUM_CONTROLLERS; i++) { + if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) { + adec_data = mc_priv->adec + ADEC_NUM * i; + handle_error(mc_priv, &mc_priv->stat, i, adec_data); + } + } + return 0; + case 19: + p = &mc_priv->stat; + memset(p, 0, sizeof(struct ecc_status)); + p->error_type = MC5_ERR_TYPE_UE; + for (i = 0 ; i < NUM_CONTROLLERS; i++) { + if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) { + adec_data = mc_priv->adec + ADEC_NUM * i; + handle_error(mc_priv, &mc_priv->stat, i, adec_data); + } + } + return 0; + + case 21: err_str = "GT Non-Correctable error"; break; + case 22: err_str = "PL Sysmon Correctable error"; break; + case 23: err_str = "PL Sysmon Non-Correctable error"; break; + case 111: err_str = "LPX unexpected dfx activation error"; break; + case 114: err_str = "INT_LPD Non-Correctable error"; break; + case 116: err_str = "INT_OCM Non-Correctable error"; break; + case 117: err_str = "INT_FPD Correctable error"; break; + case 118: err_str = "INT_FPD Non-Correctable error"; break; + case 120: err_str = "INT_IOU Non-Correctable error"; break; + case 123: err_str = "err_int_irq from APU GIC Distributor"; break; + case 124: err_str = "fault_int_irq from APU GIC Distribute"; break; + case 132 ... 139: err_str = "FPX SPLITTER error"; break; + case 140: err_str = "APU Cluster 0 error"; break; + case 141: err_str = "APU Cluster 1 error"; break; + case 142: err_str = "APU Cluster 2 error"; break; + case 143: err_str = "APU Cluster 3 error"; break; + case 145: err_str = "WWDT1 LPX error"; break; + case 147: err_str = "IPI error"; break; + case 152 ... 153: err_str = "AFIFS error"; break; + case 154 ... 155: err_str = "LPX glitch error"; break; + case 185 ... 186: err_str = "FPX AFIFS error"; break; + case 195 ... 199: err_str = "AFIFM error"; break; + case 108: err_str = "PSM Correctable error"; break; + case 59: err_str = "PMC correctable error"; break; + case 60: err_str = "PMC Un correctable error"; break; + case 43 ... 47: err_str = "PMC Sysmon error"; break; + case 163 ... 184: err_str = "RPU error"; break; + case 148: err_str = "OCM0 correctable error"; break; + case 149: err_str = "OCM1 correctable error"; break; + case 150: err_str = "OCM0 Un-correctable error"; break; + case 151: err_str = "OCM1 Un-correctable error"; break; + case 189: err_str = "PSX_CMN_3 PD block consolidated error"; break; + case 191: err_str = "FPD_INT_WRAP PD block consolidated error"; break; + case 232: err_str = "CRAM Un-Correctable error"; break; + default: err_str = "VERSAL_EDAC_ERR_ID: %d"; break; + } + + snprintf(mc_priv->message, + sizeof(mc_priv->message), + "[VERSAL_EDAC_ERR_ID: %d] Error type: %s", error_id, err_str); + + /* Convert to bytes */ + length = result[TOTAL_ERR_LENGTH] * 4; + log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message, + sec_sev, (void *)&mc_priv->regs, length); + + return 0; +} + +static struct rpmsg_device_id amd_rpmsg_id_table[] = { + { .name = "error_ipc" }, + { }, +}; +MODULE_DEVICE_TABLE(rpmsg, amd_rpmsg_id_table); + +static int rpmsg_probe(struct rpmsg_device *rpdev) +{ + struct rpmsg_channel_info chinfo; + struct mc_priv *pg; + + pg = (struct mc_priv *)amd_rpmsg_id_table[0].driver_data; + chinfo.src = RPMSG_ADDR_ANY; + chinfo.dst = rpdev->dst; + strscpy(chinfo.name, amd_rpmsg_id_table[0].name, + strlen(amd_rpmsg_id_table[0].name)); + + pg->ept = rpmsg_create_ept(rpdev, rpmsg_cb, NULL, chinfo); + if (!pg->ept) + return dev_err_probe(&rpdev->dev, -ENXIO, "Failed to create ept for channel %s\n", + chinfo.name); + + dev_set_drvdata(&rpdev->dev, pg); + + return 0; +} + +static void rpmsg_remove(struct rpmsg_device *rpdev) +{ + struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev); + + rpmsg_destroy_ept(mc_priv->ept); + dev_set_drvdata(&rpdev->dev, NULL); +} + +static struct rpmsg_driver amd_rpmsg_driver = { + .drv.name = KBUILD_MODNAME, + .probe = rpmsg_probe, + .remove = rpmsg_remove, + .callback = rpmsg_cb, + .id_table = amd_rpmsg_id_table, +}; + +static void versal_edac_release(struct device *dev) +{ + kfree(dev); +} + +static int init_versalnet(struct mc_priv *priv, struct platform_device *pdev) +{ + u32 num_chans, rank, dwidth, config; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct device *dev; + enum dev_type dt; + char *name; + int rc, i; + + for (i = 0; i < NUM_CONTROLLERS; i++) { + config = priv->adec[CONF + i * ADEC_NUM]; + num_chans = FIELD_GET(MC5_NUM_CHANS_MASK, config); + rank = 1 << FIELD_GET(MC5_RANK_MASK, config); + dwidth = FIELD_GET(MC5_BUS_WIDTH_MASK, config); + + switch (dwidth) { + case XDDR5_BUS_WIDTH_16: + dt = DEV_X16; + break; + case XDDR5_BUS_WIDTH_32: + dt = DEV_X32; + break; + case XDDR5_BUS_WIDTH_64: + dt = DEV_X64; + break; + default: + dt = DEV_UNKNOWN; + } + + if (dt == DEV_UNKNOWN) + continue; + + /* Find the first enabled device and register that one. */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = rank; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_chans; + layers[1].is_virt_csrow = false; + + rc = -ENOMEM; + mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers, + sizeof(struct mc_priv)); + if (!mci) { + edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for MC%d\n", i); + goto err_alloc; + } + + priv->mci[i] = mci; + priv->dwidth = dt; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev->release = versal_edac_release; + name = kmalloc(32, GFP_KERNEL); + sprintf(name, "versal-net-ddrmc5-edac-%d", i); + dev->init_name = name; + rc = device_register(dev); + if (rc) + goto err_alloc; + + mci->pdev = dev; + + platform_set_drvdata(pdev, priv); + + mc_init(mci, dev); + rc = edac_mc_add_mc(mci); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, "Failed to register MC%d with EDAC core\n", i); + goto err_alloc; + } + } + return 0; + +err_alloc: + while (i--) { + mci = priv->mci[i]; + if (!mci) + continue; + + if (mci->pdev) { + device_unregister(mci->pdev); + edac_mc_del_mc(mci->pdev); + } + + edac_mc_free(mci); + } + + return rc; +} + +static void remove_versalnet(struct mc_priv *priv) +{ + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < NUM_CONTROLLERS; i++) { + device_unregister(priv->mci[i]->pdev); + mci = edac_mc_del_mc(priv->mci[i]->pdev); + if (!mci) + return; + + edac_mc_free(mci); + } +} + +static int mc_probe(struct platform_device *pdev) +{ + struct device_node *r5_core_node; + struct mc_priv *priv; + struct rproc *rp; + int rc; + + r5_core_node = of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0); + if (!r5_core_node) { + dev_err(&pdev->dev, "amd,rproc: invalid phandle\n"); + return -EINVAL; + } + + rp = rproc_get_by_phandle(r5_core_node->phandle); + if (!rp) + return -EPROBE_DEFER; + + rc = rproc_boot(rp); + if (rc) { + dev_err(&pdev->dev, "Failed to attach to remote processor\n"); + goto err_rproc_boot; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto err_alloc; + } + + amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)priv; + + rc = register_rpmsg_driver(&amd_rpmsg_driver); + if (rc) { + edac_printk(KERN_ERR, EDAC_MC, "Failed to register RPMsg driver: %d\n", rc); + goto err_alloc; + } + + rc = setup_mcdi(priv); + if (rc) + goto err_unreg; + + priv->mcdi->r5_rproc = rp; + + rc = init_versalnet(priv, pdev); + if (rc) + goto err_init; + + return 0; + +err_init: + cdx_mcdi_finish(priv->mcdi); + +err_unreg: + unregister_rpmsg_driver(&amd_rpmsg_driver); + +err_alloc: + rproc_shutdown(rp); + +err_rproc_boot: + rproc_put(rp); + + return rc; +} + +static void mc_remove(struct platform_device *pdev) +{ + struct mc_priv *priv = platform_get_drvdata(pdev); + + unregister_rpmsg_driver(&amd_rpmsg_driver); + remove_versalnet(priv); + rproc_shutdown(priv->mcdi->r5_rproc); + cdx_mcdi_finish(priv->mcdi); +} + +static const struct of_device_id amd_edac_match[] = { + { .compatible = "xlnx,versal-net-ddrmc5", }, + {} +}; +MODULE_DEVICE_TABLE(of, amd_edac_match); + +static struct platform_driver amd_ddr_edac_mc_driver = { + .driver = { + .name = "versal-net-edac", + .of_match_table = amd_edac_match, + }, + .probe = mc_probe, + .remove = mc_remove, +}; + +module_platform_driver(amd_ddr_edac_mc_driver); + +MODULE_AUTHOR("AMD Inc"); +MODULE_DESCRIPTION("Versal NET EDAC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c new file mode 100644 index 000000000000..ad3f516627c5 --- /dev/null +++ b/drivers/edac/wq.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "edac_module.h" + +static struct workqueue_struct *wq; + +bool edac_queue_work(struct delayed_work *work, unsigned long delay) +{ + return queue_delayed_work(wq, work, delay); +} +EXPORT_SYMBOL_GPL(edac_queue_work); + +bool edac_mod_work(struct delayed_work *work, unsigned long delay) +{ + return mod_delayed_work(wq, work, delay); +} +EXPORT_SYMBOL_GPL(edac_mod_work); + +bool edac_stop_work(struct delayed_work *work) +{ + bool ret; + + ret = cancel_delayed_work_sync(work); + flush_workqueue(wq); + + return ret; +} +EXPORT_SYMBOL_GPL(edac_stop_work); + +int edac_workqueue_setup(void) +{ + wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM); + if (!wq) + return -ENODEV; + else + return 0; +} + +void edac_workqueue_teardown(void) +{ + destroy_workqueue(wq); + wq = NULL; +} diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index c9db24d95caa..49ab5721aab2 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -14,9 +14,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/edac.h> -#include "edac_core.h" -#define X38_REVISION "1.1" +#include <linux/io-64-nonatomic-lo-hi.h> +#include "edac_module.h" #define EDAC_MOD_STR "x38_edac" @@ -161,11 +161,6 @@ static void x38_clear_error_info(struct mem_ctl_info *mci) X38_ERRSTS_BITS); } -static u64 x38_readq(const void __iomem *addr) -{ - return readl(addr) | (((u64)readl(addr + 4)) << 32); -} - static void x38_get_and_clear_error_info(struct mem_ctl_info *mci, struct x38_error_info *info) { @@ -183,9 +178,9 @@ static void x38_get_and_clear_error_info(struct mem_ctl_info *mci, if (!(info->errsts & X38_ERRSTS_BITS)) return; - info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(window + X38_C0ECCERRLOG); if (x38_channel_num == 2) - info->eccerrlog[1] = x38_readq(window + X38_C1ECCERRLOG); + info->eccerrlog[1] = lo_hi_readq(window + X38_C1ECCERRLOG); pci_read_config_word(pdev, X38_ERRSTS, &info->errsts2); @@ -196,10 +191,10 @@ static void x38_get_and_clear_error_info(struct mem_ctl_info *mci, * should be UE info. */ if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) { - info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(window + X38_C0ECCERRLOG); if (x38_channel_num == 2) info->eccerrlog[1] = - x38_readq(window + X38_C1ECCERRLOG); + lo_hi_readq(window + X38_C1ECCERRLOG); } x38_clear_error_info(mci); @@ -243,13 +238,11 @@ static void x38_check(struct mem_ctl_info *mci) { struct x38_error_info info; - edac_dbg(1, "MC%d\n", mci->mc_idx); x38_get_and_clear_error_info(mci, &info); x38_process_error_info(mci, &info); } - -void __iomem *x38_map_mchbar(struct pci_dev *pdev) +static void __iomem *x38_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -272,7 +265,7 @@ void __iomem *x38_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, X38_MMR_WINDOW_SIZE); if (!window) printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -361,7 +354,6 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = X38_REVISION; mci->ctl_name = x38_devs[dev_idx].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = x38_check; @@ -449,7 +441,7 @@ static void x38_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(x38_pci_tbl) = { +static const struct pci_device_id x38_pci_tbl[] = { { PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, X38}, @@ -504,8 +496,7 @@ fail1: pci_unregister_driver(&x38_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c new file mode 100644 index 000000000000..9955396c9a52 --- /dev/null +++ b/drivers/edac/xgene_edac.c @@ -0,0 +1,2042 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * APM X-Gene SoC EDAC (error detection and correction) + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Feng Kan <fkan@apm.com> + * Loc Ho <lho@apm.com> + */ + +#include <linux/ctype.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/regmap.h> +#include <linux/string_choices.h> + +#include "edac_module.h" + +#define EDAC_MOD_STR "xgene_edac" + +/* Global error configuration status registers (CSR) */ +#define PCPHPERRINTSTS 0x0000 +#define PCPHPERRINTMSK 0x0004 +#define MCU_CTL_ERR_MASK BIT(12) +#define IOB_PA_ERR_MASK BIT(11) +#define IOB_BA_ERR_MASK BIT(10) +#define IOB_XGIC_ERR_MASK BIT(9) +#define IOB_RB_ERR_MASK BIT(8) +#define L3C_UNCORR_ERR_MASK BIT(5) +#define MCU_UNCORR_ERR_MASK BIT(4) +#define PMD3_MERR_MASK BIT(3) +#define PMD2_MERR_MASK BIT(2) +#define PMD1_MERR_MASK BIT(1) +#define PMD0_MERR_MASK BIT(0) +#define PCPLPERRINTSTS 0x0008 +#define PCPLPERRINTMSK 0x000C +#define CSW_SWITCH_TRACE_ERR_MASK BIT(2) +#define L3C_CORR_ERR_MASK BIT(1) +#define MCU_CORR_ERR_MASK BIT(0) +#define MEMERRINTSTS 0x0010 +#define MEMERRINTMSK 0x0014 + +struct xgene_edac { + struct device *dev; + struct regmap *csw_map; + struct regmap *mcba_map; + struct regmap *mcbb_map; + struct regmap *efuse_map; + struct regmap *rb_map; + void __iomem *pcp_csr; + spinlock_t lock; + struct dentry *dfs; + + struct list_head mcus; + struct list_head pmds; + struct list_head l3s; + struct list_head socs; + + struct mutex mc_lock; + int mc_active_mask; + int mc_registered_mask; +}; + +static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val) +{ + *val = readl(edac->pcp_csr + reg); +} + +static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg, + u32 bits_mask) +{ + u32 val; + + spin_lock(&edac->lock); + val = readl(edac->pcp_csr + reg); + val &= ~bits_mask; + writel(val, edac->pcp_csr + reg); + spin_unlock(&edac->lock); +} + +static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg, + u32 bits_mask) +{ + u32 val; + + spin_lock(&edac->lock); + val = readl(edac->pcp_csr + reg); + val |= bits_mask; + writel(val, edac->pcp_csr + reg); + spin_unlock(&edac->lock); +} + +/* Memory controller error CSR */ +#define MCU_MAX_RANK 8 +#define MCU_RANK_STRIDE 0x40 + +#define MCUGECR 0x0110 +#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0) +#define MCU_GECR_BACKUCINTREN_MASK BIT(1) +#define MCU_GECR_CINTREN_MASK BIT(2) +#define MUC_GECR_MCUADDRERREN_MASK BIT(9) +#define MCUGESR 0x0114 +#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7) +#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6) +#define MCU_GESR_PHYP_ERR_MASK BIT(3) +#define MCUESRR0 0x0314 +#define MCU_ESRR_MULTUCERR_MASK BIT(3) +#define MCU_ESRR_BACKUCERR_MASK BIT(2) +#define MCU_ESRR_DEMANDUCERR_MASK BIT(1) +#define MCU_ESRR_CERR_MASK BIT(0) +#define MCUESRRA0 0x0318 +#define MCUEBLRR0 0x031c +#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0) +#define MCUERCRR0 0x0320 +#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16) +#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF) +#define MCUSBECNT0 0x0324 +#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF) + +#define CSW_CSWCR 0x0000 +#define CSW_CSWCR_DUALMCB_MASK BIT(0) + +#define MCBADDRMR 0x0000 +#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3) +#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2) +#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1) +#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0) + +struct xgene_edac_mc_ctx { + struct list_head next; + char *name; + struct mem_ctl_info *mci; + struct xgene_edac *edac; + void __iomem *mcu_csr; + u32 mcu_id; +}; + +static ssize_t xgene_edac_mc_err_inject_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct mem_ctl_info *mci = file->private_data; + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + int i; + + for (i = 0; i < MCU_MAX_RANK; i++) { + writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK | + MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK, + ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE); + } + return count; +} + +static const struct file_operations xgene_edac_mc_debug_inject_fops = { + .open = simple_open, + .write = xgene_edac_mc_err_inject_write, + .llseek = generic_file_llseek, +}; + +static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci) +{ + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; + + if (!mci->debugfs) + return; + + edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, + &xgene_edac_mc_debug_inject_fops); +} + +static void xgene_edac_mc_check(struct mem_ctl_info *mci) +{ + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + unsigned int pcp_hp_stat; + unsigned int pcp_lp_stat; + u32 reg; + u32 rank; + u32 bank; + u32 count; + u32 col_row; + + xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); + xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat); + if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || + (MCU_CTL_ERR_MASK & pcp_hp_stat) || + (MCU_CORR_ERR_MASK & pcp_lp_stat))) + return; + + for (rank = 0; rank < MCU_MAX_RANK; rank++) { + reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); + + /* Detect uncorrectable memory error */ + if (reg & (MCU_ESRR_DEMANDUCERR_MASK | + MCU_ESRR_BACKUCERR_MASK)) { + /* Detected uncorrectable memory error */ + edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene", + "MCU uncorrectable error at rank %d\n", rank); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); + } + + /* Detect correctable memory error */ + if (reg & MCU_ESRR_CERR_MASK) { + bank = readl(ctx->mcu_csr + MCUEBLRR0 + + rank * MCU_RANK_STRIDE); + col_row = readl(ctx->mcu_csr + MCUERCRR0 + + rank * MCU_RANK_STRIDE); + count = readl(ctx->mcu_csr + MCUSBECNT0 + + rank * MCU_RANK_STRIDE); + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU correctable error at rank %d bank %d column %d row %d count %d\n", + rank, MCU_EBLRR_ERRBANK_RD(bank), + MCU_ERCRR_ERRCOL_RD(col_row), + MCU_ERCRR_ERRROW_RD(col_row), + MCU_SBECNT_COUNT(count)); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); + } + + /* Clear all error registers */ + writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE); + writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE); + writel(0x0, ctx->mcu_csr + MCUSBECNT0 + + rank * MCU_RANK_STRIDE); + writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); + } + + /* Detect memory controller error */ + reg = readl(ctx->mcu_csr + MCUGESR); + if (reg) { + if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK) + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU address miss-match error\n"); + if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK) + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU address multi-match error\n"); + + writel(reg, ctx->mcu_csr + MCUGESR); + } +} + +static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable) +{ + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + unsigned int val; + + if (edac_op_state != EDAC_OPSTATE_INT) + return; + + mutex_lock(&ctx->edac->mc_lock); + + /* + * As there is only single bit for enable error and interrupt mask, + * we must only enable top level interrupt after all MCUs are + * registered. Otherwise, if there is an error and the corresponding + * MCU has not registered, the interrupt will never get cleared. To + * determine all MCU have registered, we will keep track of active + * MCUs and registered MCUs. + */ + if (enable) { + /* Set registered MCU bit */ + ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id; + + /* Enable interrupt after all active MCU registered */ + if (ctx->edac->mc_registered_mask == + ctx->edac->mc_active_mask) { + /* Enable memory controller top level interrupt */ + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + MCU_UNCORR_ERR_MASK | + MCU_CTL_ERR_MASK); + xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK, + MCU_CORR_ERR_MASK); + } + + /* Enable MCU interrupt and error reporting */ + val = readl(ctx->mcu_csr + MCUGECR); + val |= MCU_GECR_DEMANDUCINTREN_MASK | + MCU_GECR_BACKUCINTREN_MASK | + MCU_GECR_CINTREN_MASK | + MUC_GECR_MCUADDRERREN_MASK; + writel(val, ctx->mcu_csr + MCUGECR); + } else { + /* Disable MCU interrupt */ + val = readl(ctx->mcu_csr + MCUGECR); + val &= ~(MCU_GECR_DEMANDUCINTREN_MASK | + MCU_GECR_BACKUCINTREN_MASK | + MCU_GECR_CINTREN_MASK | + MUC_GECR_MCUADDRERREN_MASK); + writel(val, ctx->mcu_csr + MCUGECR); + + /* Disable memory controller top level interrupt */ + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK); + xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK, + MCU_CORR_ERR_MASK); + + /* Clear registered MCU bit */ + ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id); + } + + mutex_unlock(&ctx->edac->mc_lock); +} + +static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx) +{ + unsigned int reg; + u32 mcu_mask; + + if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, ®)) + return 0; + + if (reg & CSW_CSWCR_DUALMCB_MASK) { + /* + * Dual MCB active - Determine if all 4 active or just MCU0 + * and MCU2 active + */ + if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, ®)) + return 0; + mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5; + } else { + /* + * Single MCB active - Determine if MCU0/MCU1 or just MCU0 + * active + */ + if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, ®)) + return 0; + mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1; + } + + /* Save active MC mask if hasn't set already */ + if (!ctx->edac->mc_active_mask) + ctx->edac->mc_active_mask = mcu_mask; + + return (mcu_mask & (1 << mc_idx)) ? 1 : 0; +} + +static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct xgene_edac_mc_ctx tmp_ctx; + struct xgene_edac_mc_ctx *ctx; + struct resource res; + int rc; + + memset(&tmp_ctx, 0, sizeof(tmp_ctx)); + tmp_ctx.edac = edac; + + if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no MCU resource address\n"); + goto err_group; + } + tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(tmp_ctx.mcu_csr)) { + dev_err(edac->dev, "unable to map MCU resource\n"); + rc = PTR_ERR(tmp_ctx.mcu_csr); + goto err_group; + } + + /* Ignore non-active MCU */ + if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) { + dev_err(edac->dev, "no memory-controller property\n"); + rc = -ENODEV; + goto err_group; + } + if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) { + rc = -ENODEV; + goto err_group; + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 4; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 2; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers, + sizeof(*ctx)); + if (!mci) { + rc = -ENOMEM; + goto err_group; + } + + ctx = mci->pvt_info; + *ctx = tmp_ctx; /* Copy over resource value */ + ctx->name = "xgene_edac_mc_err"; + ctx->mci = mci; + mci->pdev = &mci->dev; + mci->ctl_name = ctx->name; + mci->dev_name = ctx->name; + + mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 | + MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_page_to_phys = NULL; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = xgene_edac_mc_check; + + if (edac_mc_add_mc(mci)) { + dev_err(edac->dev, "edac_mc_add_mc failed\n"); + rc = -EINVAL; + goto err_free; + } + + xgene_edac_mc_create_debugfs_node(mci); + + list_add(&ctx->next, &edac->mcus); + + xgene_edac_mc_irq_ctl(mci, true); + + devres_remove_group(edac->dev, xgene_edac_mc_add); + + dev_info(edac->dev, "X-Gene EDAC MC registered\n"); + return 0; + +err_free: + edac_mc_free(mci); +err_group: + devres_release_group(edac->dev, xgene_edac_mc_add); + return rc; +} + +static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu) +{ + xgene_edac_mc_irq_ctl(mcu->mci, false); + edac_mc_del_mc(&mcu->mci->dev); + edac_mc_free(mcu->mci); + return 0; +} + +/* CPU L1/L2 error CSR */ +#define MAX_CPU_PER_PMD 2 +#define CPU_CSR_STRIDE 0x00100000 +#define CPU_L2C_PAGE 0x000D0000 +#define CPU_MEMERR_L2C_PAGE 0x000E0000 +#define CPU_MEMERR_CPU_PAGE 0x000F0000 + +#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000 +#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004 +#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) +#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0) +#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c +#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) +#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0) +#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008 +#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010 +#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014 +#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16) +#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7) +#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0) +#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804 +#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c +#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814 + +#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000 +#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004 +#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18) +#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17) +#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13) +#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10) +#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8) +#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3) +#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2) +#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1) +#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0) +#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008 +#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010 +#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c +#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014 +#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1) +#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0) +#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018 +#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c +#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804 + +/* + * Processor Module Domain (PMD) context - Context for a pair of processors. + * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of + * its own L1 cache. + */ +struct xgene_edac_pmd_ctx { + struct list_head next; + struct device ddev; + char *name; + struct xgene_edac *edac; + struct edac_device_ctl_info *edac_dev; + void __iomem *pmd_csr; + u32 pmd; + int version; +}; + +static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev, + int cpu_idx) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_f; + u32 val; + + pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE; + + val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); + if (!val) + goto chk_lsu; + dev_err(edac_dev->dev, + "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_ICFESR_ERRWAY_RD(val), + MEMERR_CPU_ICFESR_ERRINDEX_RD(val), + MEMERR_CPU_ICFESR_ERRINFO_RD(val)); + if (val & MEMERR_CPU_ICFESR_CERR_MASK) + dev_err(edac_dev->dev, "One or more correctable error\n"); + if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) { + case 1: + dev_err(edac_dev->dev, "L1 TLB multiple hit\n"); + break; + case 2: + dev_err(edac_dev->dev, "Way select multiple hit\n"); + break; + case 3: + dev_err(edac_dev->dev, "Physical tag parity error\n"); + break; + case 4: + case 5: + dev_err(edac_dev->dev, "L1 data parity error\n"); + break; + case 6: + dev_err(edac_dev->dev, "L1 pre-decode parity error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); + + if (val & (MEMERR_CPU_ICFESR_CERR_MASK | + MEMERR_CPU_ICFESR_MULTCERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + +chk_lsu: + val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); + if (!val) + goto chk_mmu; + dev_err(edac_dev->dev, + "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_LSUESR_ERRWAY_RD(val), + MEMERR_CPU_LSUESR_ERRINDEX_RD(val), + MEMERR_CPU_LSUESR_ERRINFO_RD(val)); + if (val & MEMERR_CPU_LSUESR_CERR_MASK) + dev_err(edac_dev->dev, "One or more correctable error\n"); + if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Load tag error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Load data error\n"); + break; + case 2: + dev_err(edac_dev->dev, "WSL multihit error\n"); + break; + case 3: + dev_err(edac_dev->dev, "Store tag error\n"); + break; + case 4: + dev_err(edac_dev->dev, + "DTB multihit from load pipeline error\n"); + break; + case 5: + dev_err(edac_dev->dev, + "DTB multihit from store pipeline error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); + + if (val & (MEMERR_CPU_LSUESR_CERR_MASK | + MEMERR_CPU_LSUESR_MULTCERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + +chk_mmu: + val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); + if (!val) + return; + dev_err(edac_dev->dev, + "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_MMUESR_ERRWAY_RD(val), + MEMERR_CPU_MMUESR_ERRINDEX_RD(val), + MEMERR_CPU_MMUESR_ERRINFO_RD(val), + val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF"); + if (val & MEMERR_CPU_MMUESR_CERR_MASK) + dev_err(edac_dev->dev, "One or more correctable error\n"); + if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Stage 1 UTB hit error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Stage 1 UTB miss error\n"); + break; + case 2: + dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n"); + break; + case 3: + dev_err(edac_dev->dev, "TMO operation single bank error\n"); + break; + case 4: + dev_err(edac_dev->dev, "Stage 2 UTB error\n"); + break; + case 5: + dev_err(edac_dev->dev, "Stage 2 UTB miss error\n"); + break; + case 6: + dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n"); + break; + case 7: + dev_err(edac_dev->dev, "TMO operation multiple bank error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); + + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); +} + +static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_d; + void __iomem *pg_e; + u32 val_hi; + u32 val_lo; + u32 val; + + /* Check L2 */ + pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); + if (!val) + goto chk_l2c; + val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET); + val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET); + dev_err(edac_dev->dev, + "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n", + ctx->pmd, val, val_hi, val_lo); + dev_err(edac_dev->dev, + "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n", + MEMERR_L2C_L2ESR_ERRSYN_RD(val), + MEMERR_L2C_L2ESR_ERRWAY_RD(val), + MEMERR_L2C_L2ESR_ERRCPU_RD(val), + MEMERR_L2C_L2ESR_ERRGROUP_RD(val), + MEMERR_L2C_L2ESR_ERRACTION_RD(val)); + + if (val & MEMERR_L2C_L2ESR_ERR_MASK) + dev_err(edac_dev->dev, "One or more correctable error\n"); + if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + if (val & MEMERR_L2C_L2ESR_UCERR_MASK) + dev_err(edac_dev->dev, "One or more uncorrectable error\n"); + if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK) + dev_err(edac_dev->dev, "Multiple uncorrectable error\n"); + + switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Outbound SDB parity error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Inbound SDB parity error\n"); + break; + case 2: + dev_err(edac_dev->dev, "Tag ECC error\n"); + break; + case 3: + dev_err(edac_dev->dev, "Data ECC error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); + + if (val & (MEMERR_L2C_L2ESR_ERR_MASK | + MEMERR_L2C_L2ESR_MULTICERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + if (val & (MEMERR_L2C_L2ESR_UCERR_MASK | + MEMERR_L2C_L2ESR_MULTUCERR_MASK)) + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); + +chk_l2c: + /* Check if any memory request timed out on L2 cache */ + pg_d = ctx->pmd_csr + CPU_L2C_PAGE; + val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); + if (val) { + val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET); + val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET); + dev_err(edac_dev->dev, + "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n", + ctx->pmd, val, val_hi, val_lo); + writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); + } +} + +static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + unsigned int pcp_hp_stat; + int i; + + xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); + if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat)) + return; + + /* Check CPU L1 error */ + for (i = 0; i < MAX_CPU_PER_PMD; i++) + xgene_edac_pmd_l1_check(edac_dev, i); + + /* Check CPU L2 error */ + xgene_edac_pmd_l2_check(edac_dev); +} + +static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev, + int cpu) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE + + CPU_MEMERR_CPU_PAGE; + + /* + * Enable CPU memory error: + * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA + */ + writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET); + writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET); + writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET); +} + +static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE; + void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + + /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */ + writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET); + /* Configure L2C HW request time out feature if supported */ + if (ctx->version > 1) + writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET); +} + +static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev, + bool enable) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + int i; + + /* Enable PMD error interrupt */ + if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { + if (enable) + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + PMD0_MERR_MASK << ctx->pmd); + else + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + PMD0_MERR_MASK << ctx->pmd); + } + + if (enable) { + xgene_edac_pmd_hw_cfg(edac_dev); + + /* Two CPUs per a PMD */ + for (i = 0; i < MAX_CPU_PER_PMD; i++) + xgene_edac_pmd_cpu_hw_cfg(edac_dev, i); + } +} + +static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *cpux_pg_f; + int i; + + for (i = 0; i < MAX_CPU_PER_PMD; i++) { + cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE + + CPU_MEMERR_CPU_PAGE; + + writel(MEMERR_CPU_ICFESR_MULTCERR_MASK | + MEMERR_CPU_ICFESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET); + writel(MEMERR_CPU_LSUESR_MULTCERR_MASK | + MEMERR_CPU_LSUESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET); + writel(MEMERR_CPU_MMUESR_MULTCERR_MASK | + MEMERR_CPU_MMUESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET); + } + return count; +} + +static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + + writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK | + MEMERR_L2C_L2ESR_MULTICERR_MASK | + MEMERR_L2C_L2ESR_UCERR_MASK | + MEMERR_L2C_L2ESR_ERR_MASK, + pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET); + return count; +} + +static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = { + { + .open = simple_open, + .write = xgene_edac_pmd_l1_inject_ctrl_write, + .llseek = generic_file_llseek, }, + { + .open = simple_open, + .write = xgene_edac_pmd_l2_inject_ctrl_write, + .llseek = generic_file_llseek, }, + { } +}; + +static void +xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + struct dentry *dbgfs_dir; + char name[10]; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs) + return; + + snprintf(name, sizeof(name), "PMD%d", ctx->pmd); + dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs); + if (!dbgfs_dir) + return; + + edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev, + &xgene_edac_pmd_debug_inject_fops[0]); + edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev, + &xgene_edac_pmd_debug_inject_fops[1]); +} + +static int xgene_edac_pmd_available(u32 efuse, int pmd) +{ + return (efuse & (1 << pmd)) ? 0 : 1; +} + +static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np, + int version) +{ + struct edac_device_ctl_info *edac_dev; + struct xgene_edac_pmd_ctx *ctx; + struct resource res; + char edac_name[10]; + u32 pmd; + int rc; + u32 val; + + if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL)) + return -ENOMEM; + + /* Determine if this PMD is disabled */ + if (of_property_read_u32(np, "pmd-controller", &pmd)) { + dev_err(edac->dev, "no pmd-controller property\n"); + rc = -ENODEV; + goto err_group; + } + rc = regmap_read(edac->efuse_map, 0, &val); + if (rc) + goto err_group; + if (!xgene_edac_pmd_available(val, pmd)) { + rc = -ENODEV; + goto err_group; + } + + snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd); + edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), + edac_name, 1, "l2c", 1, 2, + edac_device_alloc_index()); + if (!edac_dev) { + rc = -ENOMEM; + goto err_group; + } + + ctx = edac_dev->pvt_info; + ctx->name = "xgene_pmd_err"; + ctx->pmd = pmd; + ctx->edac = edac; + ctx->edac_dev = edac_dev; + ctx->ddev = *edac->dev; + ctx->version = version; + edac_dev->dev = &ctx->ddev; + edac_dev->ctl_name = ctx->name; + edac_dev->dev_name = ctx->name; + edac_dev->mod_name = EDAC_MOD_STR; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no PMD resource address\n"); + goto err_free; + } + ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(ctx->pmd_csr)) { + dev_err(edac->dev, + "devm_ioremap_resource failed for PMD resource address\n"); + rc = PTR_ERR(ctx->pmd_csr); + goto err_free; + } + + if (edac_op_state == EDAC_OPSTATE_POLL) + edac_dev->edac_check = xgene_edac_pmd_check; + + xgene_edac_pmd_create_debugfs_nodes(edac_dev); + + rc = edac_device_add_device(edac_dev); + if (rc > 0) { + dev_err(edac->dev, "edac_device_add_device failed\n"); + rc = -ENOMEM; + goto err_free; + } + + if (edac_op_state == EDAC_OPSTATE_INT) + edac_dev->op_state = OP_RUNNING_INTERRUPT; + + list_add(&ctx->next, &edac->pmds); + + xgene_edac_pmd_hw_ctl(edac_dev, 1); + + devres_remove_group(edac->dev, xgene_edac_pmd_add); + + dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd); + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); +err_group: + devres_release_group(edac->dev, xgene_edac_pmd_add); + return rc; +} + +static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd) +{ + struct edac_device_ctl_info *edac_dev = pmd->edac_dev; + + xgene_edac_pmd_hw_ctl(edac_dev, 0); + edac_device_del_device(edac_dev->dev); + edac_device_free_ctl_info(edac_dev); + return 0; +} + +/* L3 Error device */ +#define L3C_ESR (0x0A * 4) +#define L3C_ESR_DATATAG_MASK BIT(9) +#define L3C_ESR_MULTIHIT_MASK BIT(8) +#define L3C_ESR_UCEVICT_MASK BIT(6) +#define L3C_ESR_MULTIUCERR_MASK BIT(5) +#define L3C_ESR_MULTICERR_MASK BIT(4) +#define L3C_ESR_UCERR_MASK BIT(3) +#define L3C_ESR_CERR_MASK BIT(2) +#define L3C_ESR_UCERRINTR_MASK BIT(1) +#define L3C_ESR_CERRINTR_MASK BIT(0) +#define L3C_ECR (0x0B * 4) +#define L3C_ECR_UCINTREN BIT(3) +#define L3C_ECR_CINTREN BIT(2) +#define L3C_UCERREN BIT(1) +#define L3C_CERREN BIT(0) +#define L3C_ELR (0x0C * 4) +#define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23) +#define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17) +#define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13) +#define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8) +#define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4) +#define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F) +#define L3C_AELR (0x0D * 4) +#define L3C_BELR (0x0E * 4) +#define L3C_BELR_BANK(src) (src & 0x0000000F) + +struct xgene_edac_dev_ctx { + struct list_head next; + struct device ddev; + char *name; + struct xgene_edac *edac; + struct edac_device_ctl_info *edac_dev; + int edac_idx; + void __iomem *dev_csr; + int version; +}; + +/* + * Version 1 of the L3 controller has broken single bit correctable logic for + * certain error syndromes. Log them as uncorrectable in that case. + */ +static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr) +{ + if (l3cesr & L3C_ESR_DATATAG_MASK) { + switch (L3C_ELR_ERRSYN(l3celr)) { + case 0x13C: + case 0x0B4: + case 0x007: + case 0x00D: + case 0x00E: + case 0x019: + case 0x01A: + case 0x01C: + case 0x04E: + case 0x041: + return true; + } + } else if (L3C_ELR_ERRWAY(l3celr) == 9) + return true; + + return false; +} + +static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + u32 l3cesr; + u32 l3celr; + u32 l3caelr; + u32 l3cbelr; + + l3cesr = readl(ctx->dev_csr + L3C_ESR); + if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK))) + return; + + if (l3cesr & L3C_ESR_UCERR_MASK) + dev_err(edac_dev->dev, "L3C uncorrectable error\n"); + if (l3cesr & L3C_ESR_CERR_MASK) + dev_warn(edac_dev->dev, "L3C correctable error\n"); + + l3celr = readl(ctx->dev_csr + L3C_ELR); + l3caelr = readl(ctx->dev_csr + L3C_AELR); + l3cbelr = readl(ctx->dev_csr + L3C_BELR); + if (l3cesr & L3C_ESR_MULTIHIT_MASK) + dev_err(edac_dev->dev, "L3C multiple hit error\n"); + if (l3cesr & L3C_ESR_UCEVICT_MASK) + dev_err(edac_dev->dev, + "L3C dropped eviction of line with error\n"); + if (l3cesr & L3C_ESR_MULTIUCERR_MASK) + dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n"); + if (l3cesr & L3C_ESR_DATATAG_MASK) + dev_err(edac_dev->dev, + "L3C data error syndrome 0x%X group 0x%X\n", + L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr)); + else + dev_err(edac_dev->dev, + "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n", + L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr), + L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr)); + /* + * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr). + * Address [37:6] in l3caelr. Lower 6 bits are zero. + */ + dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n", + L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26), + (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr)); + dev_err(edac_dev->dev, + "L3C error status register value 0x%X\n", l3cesr); + + /* Clear L3C error interrupt */ + writel(0, ctx->dev_csr + L3C_ESR); + + if (ctx->version <= 1 && + xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) { + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); + return; + } + if (l3cesr & L3C_ESR_CERR_MASK) + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + if (l3cesr & L3C_ESR_UCERR_MASK) + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); +} + +static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev, + bool enable) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + u32 val; + + val = readl(ctx->dev_csr + L3C_ECR); + val |= L3C_UCERREN | L3C_CERREN; + /* On disable, we just disable interrupt but keep error enabled */ + if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { + if (enable) + val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN; + else + val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN); + } + writel(val, ctx->dev_csr + L3C_ECR); + + if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { + /* Enable/disable L3 error top level interrupt */ + if (enable) { + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + L3C_UNCORR_ERR_MASK); + xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK, + L3C_CORR_ERR_MASK); + } else { + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + L3C_UNCORR_ERR_MASK); + xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK, + L3C_CORR_ERR_MASK); + } + } +} + +static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + + /* Generate all errors */ + writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR); + return count; +} + +static const struct file_operations xgene_edac_l3_debug_inject_fops = { + .open = simple_open, + .write = xgene_edac_l3_inject_ctrl_write, + .llseek = generic_file_llseek +}; + +static void +xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + struct dentry *dbgfs_dir; + char name[10]; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs) + return; + + snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx); + dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs); + if (!dbgfs_dir) + return; + + debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev, + &xgene_edac_l3_debug_inject_fops); +} + +static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np, + int version) +{ + struct edac_device_ctl_info *edac_dev; + struct xgene_edac_dev_ctx *ctx; + struct resource res; + void __iomem *dev_csr; + int edac_idx; + int rc = 0; + + if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no L3 resource address\n"); + goto err_release_group; + } + dev_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(dev_csr)) { + dev_err(edac->dev, + "devm_ioremap_resource failed for L3 resource address\n"); + rc = PTR_ERR(dev_csr); + goto err_release_group; + } + + edac_idx = edac_device_alloc_index(); + edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), + "l3c", 1, "l3c", 1, 0, edac_idx); + if (!edac_dev) { + rc = -ENOMEM; + goto err_release_group; + } + + ctx = edac_dev->pvt_info; + ctx->dev_csr = dev_csr; + ctx->name = "xgene_l3_err"; + ctx->edac_idx = edac_idx; + ctx->edac = edac; + ctx->edac_dev = edac_dev; + ctx->ddev = *edac->dev; + ctx->version = version; + edac_dev->dev = &ctx->ddev; + edac_dev->ctl_name = ctx->name; + edac_dev->dev_name = ctx->name; + edac_dev->mod_name = EDAC_MOD_STR; + + if (edac_op_state == EDAC_OPSTATE_POLL) + edac_dev->edac_check = xgene_edac_l3_check; + + xgene_edac_l3_create_debugfs_nodes(edac_dev); + + rc = edac_device_add_device(edac_dev); + if (rc > 0) { + dev_err(edac->dev, "failed edac_device_add_device()\n"); + rc = -ENOMEM; + goto err_ctl_free; + } + + if (edac_op_state == EDAC_OPSTATE_INT) + edac_dev->op_state = OP_RUNNING_INTERRUPT; + + list_add(&ctx->next, &edac->l3s); + + xgene_edac_l3_hw_init(edac_dev, 1); + + devres_remove_group(edac->dev, xgene_edac_l3_add); + + dev_info(edac->dev, "X-Gene EDAC L3 registered\n"); + return 0; + +err_ctl_free: + edac_device_free_ctl_info(edac_dev); +err_release_group: + devres_release_group(edac->dev, xgene_edac_l3_add); + return rc; +} + +static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3) +{ + struct edac_device_ctl_info *edac_dev = l3->edac_dev; + + xgene_edac_l3_hw_init(edac_dev, 0); + edac_device_del_device(l3->edac->dev); + edac_device_free_ctl_info(edac_dev); + return 0; +} + +/* SoC error device */ +#define IOBAXIS0TRANSERRINTSTS 0x0000 +#define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1) +#define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0) +#define IOBAXIS0TRANSERRINTMSK 0x0004 +#define IOBAXIS0TRANSERRREQINFOL 0x0008 +#define IOBAXIS0TRANSERRREQINFOH 0x000c +#define REQTYPE_RD(src) (((src) & BIT(0))) +#define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22) +#define IOBAXIS1TRANSERRINTSTS 0x0010 +#define IOBAXIS1TRANSERRINTMSK 0x0014 +#define IOBAXIS1TRANSERRREQINFOL 0x0018 +#define IOBAXIS1TRANSERRREQINFOH 0x001c +#define IOBPATRANSERRINTSTS 0x0020 +#define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7) +#define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6) +#define IOBPA_M_TRANS_CORRUPT_MASK BIT(5) +#define IOBPA_TRANS_CORRUPT_MASK BIT(4) +#define IOBPA_M_WDATA_CORRUPT_MASK BIT(3) +#define IOBPA_WDATA_CORRUPT_MASK BIT(2) +#define IOBPA_M_RDATA_CORRUPT_MASK BIT(1) +#define IOBPA_RDATA_CORRUPT_MASK BIT(0) +#define IOBBATRANSERRINTSTS 0x0030 +#define M_ILLEGAL_ACCESS_MASK BIT(15) +#define ILLEGAL_ACCESS_MASK BIT(14) +#define M_WIDRAM_CORRUPT_MASK BIT(13) +#define WIDRAM_CORRUPT_MASK BIT(12) +#define M_RIDRAM_CORRUPT_MASK BIT(11) +#define RIDRAM_CORRUPT_MASK BIT(10) +#define M_TRANS_CORRUPT_MASK BIT(9) +#define TRANS_CORRUPT_MASK BIT(8) +#define M_WDATA_CORRUPT_MASK BIT(7) +#define WDATA_CORRUPT_MASK BIT(6) +#define M_RBM_POISONED_REQ_MASK BIT(5) +#define RBM_POISONED_REQ_MASK BIT(4) +#define M_XGIC_POISONED_REQ_MASK BIT(3) +#define XGIC_POISONED_REQ_MASK BIT(2) +#define M_WRERR_RESP_MASK BIT(1) +#define WRERR_RESP_MASK BIT(0) +#define IOBBATRANSERRREQINFOL 0x0038 +#define IOBBATRANSERRREQINFOH 0x003c +#define REQTYPE_F2_RD(src) ((src) & BIT(0)) +#define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22) +#define IOBBATRANSERRCSWREQID 0x0040 +#define XGICTRANSERRINTSTS 0x0050 +#define M_WR_ACCESS_ERR_MASK BIT(3) +#define WR_ACCESS_ERR_MASK BIT(2) +#define M_RD_ACCESS_ERR_MASK BIT(1) +#define RD_ACCESS_ERR_MASK BIT(0) +#define XGICTRANSERRINTMSK 0x0054 +#define XGICTRANSERRREQINFO 0x0058 +#define REQTYPE_MASK BIT(26) +#define ERRADDR_RD(src) ((src) & 0x03ffffff) +#define GLBL_ERR_STS 0x0800 +#define MDED_ERR_MASK BIT(3) +#define DED_ERR_MASK BIT(2) +#define MSEC_ERR_MASK BIT(1) +#define SEC_ERR_MASK BIT(0) +#define GLBL_SEC_ERRL 0x0810 +#define GLBL_SEC_ERRH 0x0818 +#define GLBL_MSEC_ERRL 0x0820 +#define GLBL_MSEC_ERRH 0x0828 +#define GLBL_DED_ERRL 0x0830 +#define GLBL_DED_ERRLMASK 0x0834 +#define GLBL_DED_ERRH 0x0838 +#define GLBL_DED_ERRHMASK 0x083c +#define GLBL_MDED_ERRL 0x0840 +#define GLBL_MDED_ERRLMASK 0x0844 +#define GLBL_MDED_ERRH 0x0848 +#define GLBL_MDED_ERRHMASK 0x084c + +/* IO Bus Registers */ +#define RBCSR 0x0000 +#define STICKYERR_MASK BIT(0) +#define RBEIR 0x0008 +#define AGENT_OFFLINE_ERR_MASK BIT(30) +#define UNIMPL_RBPAGE_ERR_MASK BIT(29) +#define WORD_ALIGNED_ERR_MASK BIT(28) +#define PAGE_ACCESS_ERR_MASK BIT(27) +#define WRITE_ACCESS_MASK BIT(26) + +static const char * const soc_mem_err_v1[] = { + "10GbE0", + "10GbE1", + "Security", + "SATA45", + "SATA23/ETH23", + "SATA01/ETH01", + "USB1", + "USB0", + "QML", + "QM0", + "QM1 (XGbE01)", + "PCIE4", + "PCIE3", + "PCIE2", + "PCIE1", + "PCIE0", + "CTX Manager", + "OCM", + "1GbE", + "CLE", + "AHBC", + "PktDMA", + "GFC", + "MSLIM", + "10GbE2", + "10GbE3", + "QM2 (XGbE23)", + "IOB", + "unknown", + "unknown", + "unknown", + "unknown", +}; + +static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + u32 err_addr_lo; + u32 err_addr_hi; + u32 reg; + u32 info; + + /* GIC transaction error interrupt */ + reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS); + if (!reg) + goto chk_iob_err; + dev_err(edac_dev->dev, "XGIC transaction error\n"); + if (reg & RD_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, "XGIC read size error\n"); + if (reg & M_RD_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, "Multiple XGIC read size error\n"); + if (reg & WR_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, "XGIC write size error\n"); + if (reg & M_WR_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, "Multiple XGIC write size error\n"); + info = readl(ctx->dev_csr + XGICTRANSERRREQINFO); + dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n", + str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info), + info); + writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS); + +chk_iob_err: + /* IOB memory error */ + reg = readl(ctx->dev_csr + GLBL_ERR_STS); + if (!reg) + return; + if (reg & SEC_ERR_MASK) { + err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL); + err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH); + dev_err(edac_dev->dev, + "IOB single-bit correctable memory at 0x%08X.%08X error\n", + err_addr_lo, err_addr_hi); + writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL); + writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH); + } + if (reg & MSEC_ERR_MASK) { + err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL); + err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH); + dev_err(edac_dev->dev, + "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n", + err_addr_lo, err_addr_hi); + writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL); + writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH); + } + if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + + if (reg & DED_ERR_MASK) { + err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL); + err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH); + dev_err(edac_dev->dev, + "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n", + err_addr_lo, err_addr_hi); + writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL); + writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH); + } + if (reg & MDED_ERR_MASK) { + err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL); + err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH); + dev_err(edac_dev->dev, + "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n", + err_addr_lo, err_addr_hi); + writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL); + writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH); + } + if (reg & (DED_ERR_MASK | MDED_ERR_MASK)) + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); +} + +static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + u32 err_addr_lo; + u32 err_addr_hi; + u32 reg; + + /* If the register bus resource isn't available, just skip it */ + if (!ctx->edac->rb_map) + goto rb_skip; + + /* + * Check RB access errors + * 1. Out of range + * 2. Un-implemented page + * 3. Un-aligned access + * 4. Offline slave IP + */ + if (regmap_read(ctx->edac->rb_map, RBCSR, ®)) + return; + if (reg & STICKYERR_MASK) { + bool write; + + dev_err(edac_dev->dev, "IOB bus access error(s)\n"); + if (regmap_read(ctx->edac->rb_map, RBEIR, ®)) + return; + write = reg & WRITE_ACCESS_MASK ? 1 : 0; + if (reg & AGENT_OFFLINE_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s access to offline agent error\n", + str_write_read(write)); + if (reg & UNIMPL_RBPAGE_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s access to unimplemented page error\n", + str_write_read(write)); + if (reg & WORD_ALIGNED_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s word aligned access error\n", + str_write_read(write)); + if (reg & PAGE_ACCESS_ERR_MASK) + dev_err(edac_dev->dev, + "IOB bus %s to page out of range access error\n", + str_write_read(write)); + if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) + return; + if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) + return; + } +rb_skip: + + /* IOB Bridge agent transaction error interrupt */ + reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS); + if (!reg) + return; + + dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n"); + if (reg & WRERR_RESP_MASK) + dev_err(edac_dev->dev, "IOB BA write response error\n"); + if (reg & M_WRERR_RESP_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA write response error\n"); + if (reg & XGIC_POISONED_REQ_MASK) + dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n"); + if (reg & M_XGIC_POISONED_REQ_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA XGIC poisoned write error\n"); + if (reg & RBM_POISONED_REQ_MASK) + dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n"); + if (reg & M_RBM_POISONED_REQ_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA RBM poisoned write error\n"); + if (reg & WDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB BA write error\n"); + if (reg & M_WDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, "Multiple IOB BA write error\n"); + if (reg & TRANS_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB BA transaction error\n"); + if (reg & M_TRANS_CORRUPT_MASK) + dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n"); + if (reg & RIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, + "IOB BA RDIDRAM read transaction ID error\n"); + if (reg & M_RIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA RDIDRAM read transaction ID error\n"); + if (reg & WIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, + "IOB BA RDIDRAM write transaction ID error\n"); + if (reg & M_WIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA RDIDRAM write transaction ID error\n"); + if (reg & ILLEGAL_ACCESS_MASK) + dev_err(edac_dev->dev, + "IOB BA XGIC/RB illegal access error\n"); + if (reg & M_ILLEGAL_ACCESS_MASK) + dev_err(edac_dev->dev, + "Multiple IOB BA XGIC/RB illegal access error\n"); + + err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL); + err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH); + dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n", + str_read_write(REQTYPE_F2_RD(err_addr_hi)), + ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi); + if (reg & WRERR_RESP_MASK) + dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n", + readl(ctx->dev_csr + IOBBATRANSERRCSWREQID)); + writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS); +} + +static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + u32 err_addr_lo; + u32 err_addr_hi; + u32 reg; + + /* IOB Processing agent transaction error interrupt */ + reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS); + if (!reg) + goto chk_iob_axi0; + dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n"); + if (reg & IOBPA_RDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB PA read data RAM error\n"); + if (reg & IOBPA_M_RDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, + "Multiple IOB PA read data RAM error\n"); + if (reg & IOBPA_WDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB PA write data RAM error\n"); + if (reg & IOBPA_M_WDATA_CORRUPT_MASK) + dev_err(edac_dev->dev, + "Multiple IOB PA write data RAM error\n"); + if (reg & IOBPA_TRANS_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB PA transaction error\n"); + if (reg & IOBPA_M_TRANS_CORRUPT_MASK) + dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n"); + if (reg & IOBPA_REQIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n"); + if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK) + dev_err(edac_dev->dev, + "Multiple IOB PA transaction ID RAM error\n"); + writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS); + +chk_iob_axi0: + /* IOB AXI0 Error */ + reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); + if (!reg) + goto chk_iob_axi1; + err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL); + err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH); + dev_err(edac_dev->dev, + "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n", + reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", + str_read_write(REQTYPE_RD(err_addr_hi)), + ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); + writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); + +chk_iob_axi1: + /* IOB AXI1 Error */ + reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); + if (!reg) + return; + err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL); + err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH); + dev_err(edac_dev->dev, + "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n", + reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", + str_read_write(REQTYPE_RD(err_addr_hi)), + ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); + writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); +} + +static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + const char * const *soc_mem_err = NULL; + u32 pcp_hp_stat; + u32 pcp_lp_stat; + u32 reg; + int i; + + xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); + xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat); + xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, ®); + if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK | + IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) || + (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg)) + return; + + if (pcp_hp_stat & IOB_XGIC_ERR_MASK) + xgene_edac_iob_gic_report(edac_dev); + + if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK)) + xgene_edac_rb_report(edac_dev); + + if (pcp_hp_stat & IOB_PA_ERR_MASK) + xgene_edac_pa_report(edac_dev); + + if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) { + dev_info(edac_dev->dev, + "CSW switch trace correctable memory parity error\n"); + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + } + + if (!reg) + return; + if (ctx->version == 1) + soc_mem_err = soc_mem_err_v1; + if (!soc_mem_err) { + dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n", + reg); + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); + return; + } + for (i = 0; i < 31; i++) { + if (reg & (1 << i)) { + dev_err(edac_dev->dev, "%s memory parity error\n", + soc_mem_err[i]); + edac_device_handle_ue(edac_dev, 0, 0, + edac_dev->ctl_name); + } + } +} + +static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev, + bool enable) +{ + struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info; + + /* Enable SoC IP error interrupt */ + if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { + if (enable) { + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + IOB_PA_ERR_MASK | + IOB_BA_ERR_MASK | + IOB_XGIC_ERR_MASK | + IOB_RB_ERR_MASK); + xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK, + CSW_SWITCH_TRACE_ERR_MASK); + } else { + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + IOB_PA_ERR_MASK | + IOB_BA_ERR_MASK | + IOB_XGIC_ERR_MASK | + IOB_RB_ERR_MASK); + xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK, + CSW_SWITCH_TRACE_ERR_MASK); + } + + writel(enable ? 0x0 : 0xFFFFFFFF, + ctx->dev_csr + IOBAXIS0TRANSERRINTMSK); + writel(enable ? 0x0 : 0xFFFFFFFF, + ctx->dev_csr + IOBAXIS1TRANSERRINTMSK); + writel(enable ? 0x0 : 0xFFFFFFFF, + ctx->dev_csr + XGICTRANSERRINTMSK); + + xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK, + enable ? 0x0 : 0xFFFFFFFF); + } +} + +static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np, + int version) +{ + struct edac_device_ctl_info *edac_dev; + struct xgene_edac_dev_ctx *ctx; + void __iomem *dev_csr; + struct resource res; + int edac_idx; + int rc; + + if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no SoC resource address\n"); + goto err_release_group; + } + dev_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(dev_csr)) { + dev_err(edac->dev, + "devm_ioremap_resource failed for soc resource address\n"); + rc = PTR_ERR(dev_csr); + goto err_release_group; + } + + edac_idx = edac_device_alloc_index(); + edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), + "SOC", 1, "SOC", 1, 2, edac_idx); + if (!edac_dev) { + rc = -ENOMEM; + goto err_release_group; + } + + ctx = edac_dev->pvt_info; + ctx->dev_csr = dev_csr; + ctx->name = "xgene_soc_err"; + ctx->edac_idx = edac_idx; + ctx->edac = edac; + ctx->edac_dev = edac_dev; + ctx->ddev = *edac->dev; + ctx->version = version; + edac_dev->dev = &ctx->ddev; + edac_dev->ctl_name = ctx->name; + edac_dev->dev_name = ctx->name; + edac_dev->mod_name = EDAC_MOD_STR; + + if (edac_op_state == EDAC_OPSTATE_POLL) + edac_dev->edac_check = xgene_edac_soc_check; + + rc = edac_device_add_device(edac_dev); + if (rc > 0) { + dev_err(edac->dev, "failed edac_device_add_device()\n"); + rc = -ENOMEM; + goto err_ctl_free; + } + + if (edac_op_state == EDAC_OPSTATE_INT) + edac_dev->op_state = OP_RUNNING_INTERRUPT; + + list_add(&ctx->next, &edac->socs); + + xgene_edac_soc_hw_init(edac_dev, 1); + + devres_remove_group(edac->dev, xgene_edac_soc_add); + + dev_info(edac->dev, "X-Gene EDAC SoC registered\n"); + + return 0; + +err_ctl_free: + edac_device_free_ctl_info(edac_dev); +err_release_group: + devres_release_group(edac->dev, xgene_edac_soc_add); + return rc; +} + +static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc) +{ + struct edac_device_ctl_info *edac_dev = soc->edac_dev; + + xgene_edac_soc_hw_init(edac_dev, 0); + edac_device_del_device(soc->edac->dev); + edac_device_free_ctl_info(edac_dev); + return 0; +} + +static irqreturn_t xgene_edac_isr(int irq, void *dev_id) +{ + struct xgene_edac *ctx = dev_id; + struct xgene_edac_pmd_ctx *pmd; + struct xgene_edac_dev_ctx *node; + unsigned int pcp_hp_stat; + unsigned int pcp_lp_stat; + + xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat); + xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat); + if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || + (MCU_CTL_ERR_MASK & pcp_hp_stat) || + (MCU_CORR_ERR_MASK & pcp_lp_stat)) { + struct xgene_edac_mc_ctx *mcu; + + list_for_each_entry(mcu, &ctx->mcus, next) + xgene_edac_mc_check(mcu->mci); + } + + list_for_each_entry(pmd, &ctx->pmds, next) { + if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat) + xgene_edac_pmd_check(pmd->edac_dev); + } + + list_for_each_entry(node, &ctx->l3s, next) + xgene_edac_l3_check(node->edac_dev); + + list_for_each_entry(node, &ctx->socs, next) + xgene_edac_soc_check(node->edac_dev); + + return IRQ_HANDLED; +} + +static int xgene_edac_probe(struct platform_device *pdev) +{ + struct xgene_edac *edac; + struct device_node *child; + struct resource *res; + int rc; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->mcus); + INIT_LIST_HEAD(&edac->pmds); + INIT_LIST_HEAD(&edac->l3s); + INIT_LIST_HEAD(&edac->socs); + spin_lock_init(&edac->lock); + mutex_init(&edac->mc_lock); + + edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-csw"); + if (IS_ERR(edac->csw_map)) { + dev_err(edac->dev, "unable to get syscon regmap csw\n"); + rc = PTR_ERR(edac->csw_map); + goto out_err; + } + + edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-mcba"); + if (IS_ERR(edac->mcba_map)) { + dev_err(edac->dev, "unable to get syscon regmap mcba\n"); + rc = PTR_ERR(edac->mcba_map); + goto out_err; + } + + edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-mcbb"); + if (IS_ERR(edac->mcbb_map)) { + dev_err(edac->dev, "unable to get syscon regmap mcbb\n"); + rc = PTR_ERR(edac->mcbb_map); + goto out_err; + } + edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-efuse"); + if (IS_ERR(edac->efuse_map)) { + dev_err(edac->dev, "unable to get syscon regmap efuse\n"); + rc = PTR_ERR(edac->efuse_map); + goto out_err; + } + + /* + * NOTE: The register bus resource is optional for compatibility + * reason. + */ + edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-rb"); + if (IS_ERR(edac->rb_map)) { + dev_warn(edac->dev, "missing syscon regmap rb\n"); + edac->rb_map = NULL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(edac->pcp_csr)) { + dev_err(&pdev->dev, "no PCP resource address\n"); + rc = PTR_ERR(edac->pcp_csr); + goto out_err; + } + + if (edac_op_state == EDAC_OPSTATE_INT) { + int irq; + int i; + + for (i = 0; i < 3; i++) { + irq = platform_get_irq_optional(pdev, i); + if (irq < 0) { + dev_err(&pdev->dev, "No IRQ resource\n"); + rc = irq; + goto out_err; + } + rc = devm_request_irq(&pdev->dev, irq, + xgene_edac_isr, IRQF_SHARED, + dev_name(&pdev->dev), edac); + if (rc) { + dev_err(&pdev->dev, + "Could not request IRQ %d\n", irq); + goto out_err; + } + } + } + + edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + for_each_child_of_node(pdev->dev.of_node, child) { + if (!of_device_is_available(child)) + continue; + if (of_device_is_compatible(child, "apm,xgene-edac-mc")) + xgene_edac_mc_add(edac, child); + if (of_device_is_compatible(child, "apm,xgene-edac-pmd")) + xgene_edac_pmd_add(edac, child, 1); + if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2")) + xgene_edac_pmd_add(edac, child, 2); + if (of_device_is_compatible(child, "apm,xgene-edac-l3")) + xgene_edac_l3_add(edac, child, 1); + if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2")) + xgene_edac_l3_add(edac, child, 2); + if (of_device_is_compatible(child, "apm,xgene-edac-soc")) + xgene_edac_soc_add(edac, child, 0); + if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1")) + xgene_edac_soc_add(edac, child, 1); + } + + return 0; + +out_err: + return rc; +} + +static void xgene_edac_remove(struct platform_device *pdev) +{ + struct xgene_edac *edac = dev_get_drvdata(&pdev->dev); + struct xgene_edac_mc_ctx *mcu; + struct xgene_edac_mc_ctx *temp_mcu; + struct xgene_edac_pmd_ctx *pmd; + struct xgene_edac_pmd_ctx *temp_pmd; + struct xgene_edac_dev_ctx *node; + struct xgene_edac_dev_ctx *temp_node; + + list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next) + xgene_edac_mc_remove(mcu); + + list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next) + xgene_edac_pmd_remove(pmd); + + list_for_each_entry_safe(node, temp_node, &edac->l3s, next) + xgene_edac_l3_remove(node); + + list_for_each_entry_safe(node, temp_node, &edac->socs, next) + xgene_edac_soc_remove(node); +} + +static const struct of_device_id xgene_edac_of_match[] = { + { .compatible = "apm,xgene-edac" }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgene_edac_of_match); + +static struct platform_driver xgene_edac_driver = { + .probe = xgene_edac_probe, + .remove = xgene_edac_remove, + .driver = { + .name = "xgene-edac", + .of_match_table = xgene_edac_of_match, + }, +}; + +static int __init xgene_edac_init(void) +{ + int rc; + + if (ghes_get_devices()) + return -EBUSY; + + /* Make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + + rc = platform_driver_register(&xgene_edac_driver); + if (rc) { + edac_printk(KERN_ERR, EDAC_MOD_STR, + "EDAC fails to register\n"); + goto reg_failed; + } + + return 0; + +reg_failed: + return rc; +} +module_init(xgene_edac_init); + +static void __exit xgene_edac_exit(void) +{ + platform_driver_unregister(&xgene_edac_driver); +} +module_exit(xgene_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Feng Kan <fkan@apm.com>"); +MODULE_DESCRIPTION("APM X-Gene EDAC driver"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, + "EDAC error reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/zynqmp_edac.c b/drivers/edac/zynqmp_edac.c new file mode 100644 index 000000000000..cdffc9e4194d --- /dev/null +++ b/drivers/edac/zynqmp_edac.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP OCM ECC Driver + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + */ + +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define ZYNQMP_OCM_EDAC_MSG_SIZE 256 + +#define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm" + +/* Error/Interrupt registers */ +#define ERR_CTRL_OFST 0x0 +#define OCM_ISR_OFST 0x04 +#define OCM_IMR_OFST 0x08 +#define OCM_IEN_OFST 0x0C +#define OCM_IDS_OFST 0x10 + +/* ECC control register */ +#define ECC_CTRL_OFST 0x14 + +/* Correctable error info registers */ +#define CE_FFA_OFST 0x1C +#define CE_FFD0_OFST 0x20 +#define CE_FFD1_OFST 0x24 +#define CE_FFD2_OFST 0x28 +#define CE_FFD3_OFST 0x2C +#define CE_FFE_OFST 0x30 + +/* Uncorrectable error info registers */ +#define UE_FFA_OFST 0x34 +#define UE_FFD0_OFST 0x38 +#define UE_FFD1_OFST 0x3C +#define UE_FFD2_OFST 0x40 +#define UE_FFD3_OFST 0x44 +#define UE_FFE_OFST 0x48 + +/* ECC control register bit field definitions */ +#define ECC_CTRL_CLR_CE_ERR 0x40 +#define ECC_CTRL_CLR_UE_ERR 0x80 + +/* Fault injection data and count registers */ +#define OCM_FID0_OFST 0x4C +#define OCM_FID1_OFST 0x50 +#define OCM_FID2_OFST 0x54 +#define OCM_FID3_OFST 0x58 +#define OCM_FIC_OFST 0x74 + +#define UE_MAX_BITPOS_LOWER 31 +#define UE_MIN_BITPOS_UPPER 32 +#define UE_MAX_BITPOS_UPPER 63 + +/* Interrupt masks */ +#define OCM_CEINTR_MASK BIT(6) +#define OCM_UEINTR_MASK BIT(7) +#define OCM_ECC_ENABLE_MASK BIT(0) + +#define OCM_FICOUNT_MASK GENMASK(23, 0) +#define OCM_NUM_UE_BITPOS 2 +#define OCM_BASEVAL 0xFFFC0000 +#define EDAC_DEVICE "ZynqMP-OCM" + +/** + * struct ecc_error_info - ECC error log information + * @addr: Fault generated at this address + * @fault_lo: Generated fault data (lower 32-bit) + * @fault_hi: Generated fault data (upper 32-bit) + */ +struct ecc_error_info { + u32 addr; + u32 fault_lo; + u32 fault_hi; +}; + +/** + * struct ecc_status - ECC status information to report + * @ce_cnt: Correctable error count + * @ue_cnt: Uncorrectable error count + * @ceinfo: Correctable error log information + * @ueinfo: Uncorrectable error log information + */ +struct ecc_status { + u32 ce_cnt; + u32 ue_cnt; + struct ecc_error_info ceinfo; + struct ecc_error_info ueinfo; +}; + +/** + * struct edac_priv - OCM private instance data + * @baseaddr: Base address of the OCM + * @message: Buffer for framing the event specific info + * @stat: ECC status information + * @ce_cnt: Correctable Error count + * @ue_cnt: Uncorrectable Error count + * @debugfs_dir: Directory entry for debugfs + * @ce_bitpos: Bit position for Correctable Error + * @ue_bitpos: Array to store UnCorrectable Error bit positions + * @fault_injection_cnt: Fault Injection Counter value + */ +struct edac_priv { + void __iomem *baseaddr; + char message[ZYNQMP_OCM_EDAC_MSG_SIZE]; + struct ecc_status stat; + u32 ce_cnt; + u32 ue_cnt; +#ifdef CONFIG_EDAC_DEBUG + struct dentry *debugfs_dir; + u8 ce_bitpos; + u8 ue_bitpos[OCM_NUM_UE_BITPOS]; + u32 fault_injection_cnt; +#endif +}; + +/** + * get_error_info - Get the current ECC error info + * @base: Pointer to the base address of the OCM + * @p: Pointer to the OCM ECC status structure + * @mask: Status register mask value + * + * Determines there is any ECC error or not + * + */ +static void get_error_info(void __iomem *base, struct ecc_status *p, int mask) +{ + if (mask & OCM_CEINTR_MASK) { + p->ce_cnt++; + p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST); + p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST); + p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST)); + writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST); + } else if (mask & OCM_UEINTR_MASK) { + p->ue_cnt++; + p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST); + p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST); + p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST)); + writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST); + } +} + +/** + * handle_error - Handle error types CE and UE + * @dci: Pointer to the EDAC device instance + * @p: Pointer to the OCM ECC status structure + * + * Handles correctable and uncorrectable errors. + */ +static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p) +{ + struct edac_priv *priv = dci->pvt_info; + struct ecc_error_info *pinf; + + if (p->ce_cnt) { + pinf = &p->ceinfo; + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", + "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo); + edac_device_handle_ce(dci, 0, 0, priv->message); + } + + if (p->ue_cnt) { + pinf = &p->ueinfo; + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", + "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo); + edac_device_handle_ue(dci, 0, 0, priv->message); + } + + memset(p, 0, sizeof(*p)); +} + +/** + * intr_handler - ISR routine + * @irq: irq number + * @dev_id: device id pointer + * + * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise + */ +static irqreturn_t intr_handler(int irq, void *dev_id) +{ + struct edac_device_ctl_info *dci = dev_id; + struct edac_priv *priv = dci->pvt_info; + int regval; + + regval = readl(priv->baseaddr + OCM_ISR_OFST); + if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) { + WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval); + return IRQ_NONE; + } + + get_error_info(priv->baseaddr, &priv->stat, regval); + + priv->ce_cnt += priv->stat.ce_cnt; + priv->ue_cnt += priv->stat.ue_cnt; + handle_error(dci, &priv->stat); + + return IRQ_HANDLED; +} + +/** + * get_eccstate - Return the ECC status + * @base: Pointer to the OCM base address + * + * Get the ECC enable/disable status + * + * Return: ECC status 0/1. + */ +static bool get_eccstate(void __iomem *base) +{ + return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK; +} + +#ifdef CONFIG_EDAC_DEBUG +/** + * write_fault_count - write fault injection count + * @priv: Pointer to the EDAC private struct + * + * Update the fault injection count register, once the counter reaches + * zero, it injects errors + */ +static void write_fault_count(struct edac_priv *priv) +{ + u32 ficount = priv->fault_injection_cnt; + + if (ficount & ~OCM_FICOUNT_MASK) { + ficount &= OCM_FICOUNT_MASK; + edac_printk(KERN_INFO, EDAC_DEVICE, + "Fault injection count value truncated to %d\n", ficount); + } + + writel(ficount, priv->baseaddr + OCM_FIC_OFST); +} + +/* + * To get the Correctable Error injected, the following steps are needed: + * - Setup the optional Fault Injection Count: + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count + * - Write the Correctable Error bit position value: + * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos + */ +static ssize_t inject_ce_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct edac_priv *priv = edac_dev->pvt_info; + int ret; + + if (!data) + return -EFAULT; + + ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos); + if (ret) + return ret; + + if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER) + return -EINVAL; + + if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) { + writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST); + writel(0, priv->baseaddr + OCM_FID1_OFST); + } else { + writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER), + priv->baseaddr + OCM_FID1_OFST); + writel(0, priv->baseaddr + OCM_FID0_OFST); + } + + write_fault_count(priv); + + return count; +} + +static const struct file_operations inject_ce_fops = { + .open = simple_open, + .write = inject_ce_write, + .llseek = generic_file_llseek, +}; + +/* + * To get the Uncorrectable Error injected, the following steps are needed: + * - Setup the optional Fault Injection Count: + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count + * - Write the Uncorrectable Error bit position values: + * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos + */ +static ssize_t inject_ue_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct edac_priv *priv = edac_dev->pvt_info; + char buf[6], *pbuf, *token[2]; + u64 ue_bitpos; + int i, ret; + u8 len; + + if (!data) + return -EFAULT; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < OCM_NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]); + if (ret) + return ret; + + if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER || + priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER) + return -EINVAL; + + if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n"); + return -EINVAL; + } + + ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]); + + writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST); + writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST); + + write_fault_count(priv); + + return count; +} + +static const struct file_operations inject_ue_fops = { + .open = simple_open, + .write = inject_ue_write, + .llseek = generic_file_llseek, +}; + +static void setup_debugfs(struct edac_device_ctl_info *edac_dev) +{ + struct edac_priv *priv = edac_dev->pvt_info; + + priv->debugfs_dir = edac_debugfs_create_dir("ocm"); + if (!priv->debugfs_dir) + return; + + edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir, + &priv->fault_injection_cnt); + edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir, + edac_dev, &inject_ue_fops); + edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir, + edac_dev, &inject_ce_fops); +} +#endif + +static int edac_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci; + struct edac_priv *priv; + void __iomem *baseaddr; + struct resource *res; + int irq, ret; + + baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(baseaddr)) + return PTR_ERR(baseaddr); + + if (!get_eccstate(baseaddr)) { + edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n"); + return -ENXIO; + } + + dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING, + 1, ZYNQMP_OCM_EDAC_STRING, 1, 0, + edac_device_alloc_index()); + if (!dci) + return -ENOMEM; + + priv = dci->pvt_info; + platform_set_drvdata(pdev, dci); + dci->dev = &pdev->dev; + priv->baseaddr = baseaddr; + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = ZYNQMP_OCM_EDAC_STRING; + dci->dev_name = dev_name(&pdev->dev); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto free_dev_ctl; + } + + ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0, + dev_name(&pdev->dev), dci); + if (ret) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n"); + goto free_dev_ctl; + } + + /* Enable UE, CE interrupts */ + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST); + +#ifdef CONFIG_EDAC_DEBUG + setup_debugfs(dci); +#endif + + ret = edac_device_add_device(dci); + if (ret) + goto free_dev_ctl; + + return 0; + +free_dev_ctl: + edac_device_free_ctl_info(dci); + + return ret; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); + struct edac_priv *priv = dci->pvt_info; + + /* Disable UE, CE interrupts */ + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST); + +#ifdef CONFIG_EDAC_DEBUG + debugfs_remove_recursive(priv->debugfs_dir); +#endif + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); +} + +static const struct of_device_id zynqmp_ocm_edac_match[] = { + { .compatible = "xlnx,zynqmp-ocmc-1.0"}, + { /* end of table */ } +}; + +MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match); + +static struct platform_driver zynqmp_ocm_edac_driver = { + .driver = { + .name = "zynqmp-ocm-edac", + .of_match_table = zynqmp_ocm_edac_match, + }, + .probe = edac_probe, + .remove = edac_remove, +}; + +module_platform_driver(zynqmp_ocm_edac_driver); + +MODULE_AUTHOR("Advanced Micro Devices, Inc"); +MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver"); +MODULE_LICENSE("GPL"); |
