diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/fsl_ddr_edac.c | 141 | ||||
-rw-r--r-- | drivers/edac/fsl_ddr_edac.h | 13 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 1 | ||||
-rw-r--r-- | drivers/edac/ie31200_edac.c | 8 | ||||
-rw-r--r-- | drivers/edac/igen6_edac.c | 41 | ||||
-rw-r--r-- | drivers/edac/layerscape_edac.c | 1 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 57 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 8 |
8 files changed, 198 insertions, 72 deletions
diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c index d148d262d0d4..e4eaec0aa81d 100644 --- a/drivers/edac/fsl_ddr_edac.c +++ b/drivers/edac/fsl_ddr_edac.c @@ -31,18 +31,30 @@ static int edac_mc_idx; -static u32 orig_ddr_err_disable; -static u32 orig_ddr_err_sbe; -static bool little_endian; +static inline void __iomem *ddr_reg_addr(struct fsl_mc_pdata *pdata, unsigned int off) +{ + if (pdata->flag == TYPE_IMX9 && off >= FSL_MC_DATA_ERR_INJECT_HI && off <= FSL_MC_ERR_SBE) + return pdata->inject_vbase + off - FSL_MC_DATA_ERR_INJECT_HI + + IMX9_MC_DATA_ERR_INJECT_OFF; + + if (pdata->flag == TYPE_IMX9 && off >= IMX9_MC_ERR_EN) + return pdata->inject_vbase + off - IMX9_MC_ERR_EN; -static inline u32 ddr_in32(void __iomem *addr) + return pdata->mc_vbase + off; +} + +static inline u32 ddr_in32(struct fsl_mc_pdata *pdata, unsigned int off) { - return little_endian ? ioread32(addr) : ioread32be(addr); + void __iomem *addr = ddr_reg_addr(pdata, off); + + return pdata->little_endian ? ioread32(addr) : ioread32be(addr); } -static inline void ddr_out32(void __iomem *addr, u32 value) +static inline void ddr_out32(struct fsl_mc_pdata *pdata, unsigned int off, u32 value) { - if (little_endian) + void __iomem *addr = ddr_reg_addr(pdata, off); + + if (pdata->little_endian) iowrite32(value, addr); else iowrite32be(value, addr); @@ -60,7 +72,7 @@ static ssize_t fsl_mc_inject_data_hi_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_HI)); } static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, @@ -70,7 +82,7 @@ static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO)); + ddr_in32(pdata, FSL_MC_DATA_ERR_INJECT_LO)); } static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, @@ -80,7 +92,7 @@ static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct fsl_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", - ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT)); + ddr_in32(pdata, FSL_MC_ECC_ERR_INJECT)); } static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, @@ -97,7 +109,7 @@ static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_HI, val); return count; } return 0; @@ -117,7 +129,7 @@ static ssize_t fsl_mc_inject_data_lo_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val); + ddr_out32(pdata, FSL_MC_DATA_ERR_INJECT_LO, val); return count; } return 0; @@ -137,7 +149,7 @@ static ssize_t fsl_mc_inject_ctrl_store(struct device *dev, if (rc) return rc; - ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val); + ddr_out32(pdata, FSL_MC_ECC_ERR_INJECT, val); return count; } return 0; @@ -286,7 +298,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) int bad_data_bit; int bad_ecc_bit; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return; @@ -295,14 +307,14 @@ static void fsl_mc_check(struct mem_ctl_info *mci) /* no more processing if not ECC bit errors */ if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); return; } - syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC); + syndrome = ddr_in32(pdata, FSL_MC_CAPTURE_ECC); /* Mask off appropriate bits of syndrome based on bus width */ - bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) & + bus_width = (ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG) & DSC_DBW_MASK) ? 32 : 64; if (bus_width == 64) syndrome &= 0xff; @@ -310,8 +322,8 @@ static void fsl_mc_check(struct mem_ctl_info *mci) syndrome &= 0xffff; err_addr = make64( - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS), - ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS)); + ddr_in32(pdata, FSL_MC_CAPTURE_EXT_ADDRESS), + ddr_in32(pdata, FSL_MC_CAPTURE_ADDRESS)); pfn = err_addr >> PAGE_SHIFT; for (row_index = 0; row_index < mci->nr_csrows; row_index++) { @@ -320,29 +332,33 @@ static void fsl_mc_check(struct mem_ctl_info *mci) break; } - cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI); - cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO); + cap_high = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_HI); + cap_low = ddr_in32(pdata, FSL_MC_CAPTURE_DATA_LO); /* * Analyze single-bit errors on 64-bit wide buses * TODO: Add support for 32-bit wide buses */ if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + u64 cap = (u64)cap_high << 32 | cap_low; + u32 s = syndrome; + sbe_ecc_decode(cap_high, cap_low, syndrome, &bad_data_bit, &bad_ecc_bit); - if (bad_data_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty Data bit: %d\n", bad_data_bit); - if (bad_ecc_bit != -1) - fsl_mc_printk(mci, KERN_ERR, - "Faulty ECC bit: %d\n", bad_ecc_bit); + if (bad_data_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty Data bit: %d\n", bad_data_bit); + cap ^= 1ULL << bad_data_bit; + } + + if (bad_ecc_bit >= 0) { + fsl_mc_printk(mci, KERN_ERR, "Faulty ECC bit: %d\n", bad_ecc_bit); + s ^= 1 << bad_ecc_bit; + } fsl_mc_printk(mci, KERN_ERR, "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high ^ (1 << (bad_data_bit - 32)), - cap_low ^ (1 << bad_data_bit), - syndrome ^ (1 << bad_ecc_bit)); + upper_32_bits(cap), lower_32_bits(cap), s); } fsl_mc_printk(mci, KERN_ERR, @@ -367,7 +383,7 @@ static void fsl_mc_check(struct mem_ctl_info *mci) row_index, 0, -1, mci->ctl_name, ""); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + ddr_out32(pdata, FSL_MC_ERR_DETECT, err_detect); } static irqreturn_t fsl_mc_isr(int irq, void *dev_id) @@ -376,7 +392,7 @@ static irqreturn_t fsl_mc_isr(int irq, void *dev_id) struct fsl_mc_pdata *pdata = mci->pvt_info; u32 err_detect; - err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + err_detect = ddr_in32(pdata, FSL_MC_ERR_DETECT); if (!err_detect) return IRQ_NONE; @@ -396,7 +412,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) u32 cs_bnds; int index; - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); sdtype = sdram_ctl & DSC_SDTYPE_MASK; if (sdram_ctl & DSC_RD_EN) { @@ -431,6 +447,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) case 0x05000000: mtype = MEM_DDR4; break; + case 0x04000000: + mtype = MEM_LPDDR4; + break; default: mtype = MEM_UNKNOWN; break; @@ -444,7 +463,7 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) csrow = mci->csrows[index]; dimm = csrow->channels[0]->dimm; - cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 + + cs_bnds = ddr_in32(pdata, FSL_MC_CS_BNDS_0 + (index * FSL_MC_CS_BNDS_OFS)); start = (cs_bnds & 0xffff0000) >> 16; @@ -464,7 +483,9 @@ static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) dimm->grain = 8; dimm->mtype = mtype; dimm->dtype = DEV_UNKNOWN; - if (sdram_ctl & DSC_X32_EN) + if (pdata->flag == TYPE_IMX9) + dimm->dtype = DEV_X16; + else if (sdram_ctl & DSC_X32_EN) dimm->dtype = DEV_X32; dimm->edac_mode = EDAC_SECDED; } @@ -476,6 +497,7 @@ int fsl_mc_err_probe(struct platform_device *op) struct edac_mc_layer layers[2]; struct fsl_mc_pdata *pdata; struct resource r; + u32 ecc_en_mask; u32 sdram_ctl; int res; @@ -503,11 +525,13 @@ int fsl_mc_err_probe(struct platform_device *op) mci->ctl_name = pdata->name; mci->dev_name = pdata->name; + pdata->flag = (unsigned long)device_get_match_data(&op->dev); + /* * Get the endianness of DDR controller registers. * Default is big endian. */ - little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); + pdata->little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); res = of_address_to_resource(op->dev.of_node, 0, &r); if (res) { @@ -531,8 +555,23 @@ int fsl_mc_err_probe(struct platform_device *op) goto err; } - sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); - if (!(sdram_ctl & DSC_ECC_EN)) { + if (pdata->flag == TYPE_IMX9) { + pdata->inject_vbase = devm_platform_ioremap_resource_byname(op, "inject"); + if (IS_ERR(pdata->inject_vbase)) { + res = -ENOMEM; + goto err; + } + } + + if (pdata->flag == TYPE_IMX9) { + sdram_ctl = ddr_in32(pdata, IMX9_MC_ERR_EN); + ecc_en_mask = ERR_ECC_EN | ERR_INLINE_ECC; + } else { + sdram_ctl = ddr_in32(pdata, FSL_MC_DDR_SDRAM_CFG); + ecc_en_mask = DSC_ECC_EN; + } + + if ((sdram_ctl & ecc_en_mask) != ecc_en_mask) { /* no ECC */ pr_warn("%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; @@ -543,7 +582,8 @@ int fsl_mc_err_probe(struct platform_device *op) mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR | MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 | MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 | - MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LPDDR4; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; @@ -558,11 +598,11 @@ int fsl_mc_err_probe(struct platform_device *op) fsl_ddr_init_csrows(mci); /* store the original error disable bits */ - orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0); + pdata->orig_ddr_err_disable = ddr_in32(pdata, FSL_MC_ERR_DISABLE); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, 0); /* clear all error bits */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0); + ddr_out32(pdata, FSL_MC_ERR_DETECT, ~0); res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups); if (res) { @@ -571,15 +611,15 @@ int fsl_mc_err_probe(struct platform_device *op) } if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, + ddr_out32(pdata, FSL_MC_ERR_INT_EN, DDR_EIE_MBEE | DDR_EIE_SBEE); /* store the original error management threshold */ - orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase + - FSL_MC_ERR_SBE) & 0xff0000; + pdata->orig_ddr_err_sbe = ddr_in32(pdata, + FSL_MC_ERR_SBE) & 0xff0000; /* set threshold to 1 error per interrupt */ - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000); + ddr_out32(pdata, FSL_MC_ERR_SBE, 0x10000); /* register interrupts */ pdata->irq = platform_get_irq(op, 0); @@ -620,12 +660,13 @@ void fsl_mc_err_remove(struct platform_device *op) edac_dbg(0, "\n"); if (edac_op_state == EDAC_OPSTATE_INT) { - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0); + ddr_out32(pdata, FSL_MC_ERR_INT_EN, 0); } - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, - orig_ddr_err_disable); - ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe); + ddr_out32(pdata, FSL_MC_ERR_DISABLE, + pdata->orig_ddr_err_disable); + ddr_out32(pdata, FSL_MC_ERR_SBE, pdata->orig_ddr_err_sbe); + edac_mc_del_mc(&op->dev); edac_mc_free(mci); diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h index c0994a2a003c..73618f79e587 100644 --- a/drivers/edac/fsl_ddr_edac.h +++ b/drivers/edac/fsl_ddr_edac.h @@ -39,6 +39,9 @@ #define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54 #define FSL_MC_ERR_SBE 0x0e58 +#define IMX9_MC_ERR_EN 0x1000 +#define IMX9_MC_DATA_ERR_INJECT_OFF 0x100 + #define DSC_MEM_EN 0x80000000 #define DSC_ECC_EN 0x20000000 #define DSC_RD_EN 0x10000000 @@ -46,6 +49,9 @@ #define DSC_DBW_32 0x00080000 #define DSC_DBW_64 0x00000000 +#define ERR_ECC_EN 0x80000000 +#define ERR_INLINE_ECC 0x40000000 + #define DSC_SDTYPE_MASK 0x07000000 #define DSC_X32_EN 0x00000020 @@ -65,11 +71,18 @@ #define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ #define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ +#define TYPE_IMX9 0x1 /* MC used by iMX9 having registers changed */ + struct fsl_mc_pdata { char *name; int edac_idx; void __iomem *mc_vbase; + void __iomem *inject_vbase; int irq; + u32 orig_ddr_err_disable; + u32 orig_ddr_err_sbe; + bool little_endian; + unsigned long flag; }; int fsl_mc_err_probe(struct platform_device *op); void fsl_mc_err_remove(struct platform_device *op); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index e2a954de913b..51556c72a967 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1036,6 +1036,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 9ef13570f2e5..4fc16922dc1a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -19,7 +19,8 @@ * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers - * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 590f: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 5918: Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers @@ -67,7 +68,8 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F #define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 #define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F -#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x590f +#define PCI_DEVICE_ID_INTEL_IE31200_HB_12 0x5918 /* Coffee Lake-S */ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 @@ -88,6 +90,7 @@ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) @@ -587,6 +590,7 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index dd29a8d33b4e..fdf3a84fe698 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -1178,6 +1178,20 @@ fail: return -ENODEV; } +static void igen6_check(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u64 ecclog; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + return; + + if (!ecclog_gen_pool_add(imc->mc, ecclog)) + irq_work_queue(&ecclog_irq_work); +} + static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) { struct edac_mc_layer layers[2]; @@ -1219,6 +1233,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; mci->dev_name = pci_name(pdev); + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = igen6_check; mci->pvt_info = &igen6_pvt->imc[mc]; imc = mci->pvt_info; @@ -1253,6 +1269,7 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) imc->mci = mci; return 0; fail3: + mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: edac_mc_free(mci); @@ -1277,6 +1294,7 @@ static void igen6_unregister_mcis(void) edac_mc_del_mc(mci->pdev); kfree(mci->ctl_name); + mci->pvt_info = NULL; edac_mc_free(mci); iounmap(imc->window); } @@ -1356,6 +1374,25 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } +static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) +{ + /* + * Quirk: Certain SoCs' error reporting interrupts don't work. + * Force polling mode for them to ensure that memory error + * events can be handled. + */ + if (ent->device == DID_ADL_N_SKU4) { + edac_op_state = EDAC_OPSTATE_POLL; + return; + } + + /* Set the mode according to the configuration data. */ + if (cfg->machine_check) + edac_op_state = EDAC_OPSTATE_INT; + else + edac_op_state = EDAC_OPSTATE_NMI; +} + static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u64 mchbar; @@ -1373,6 +1410,8 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto fail; + opstate_set(res_cfg, ent); + for (i = 0; i < res_cfg->num_imc; i++) { rc = igen6_register_mci(i, mchbar, pdev); if (rc) @@ -1456,8 +1495,6 @@ static int __init igen6_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; - edac_op_state = EDAC_OPSTATE_NMI; - rc = pci_register_driver(&igen6_driver); if (rc) return rc; diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c index 0d42c1238908..9a0c92ebbc3c 100644 --- a/drivers/edac/layerscape_edac.c +++ b/drivers/edac/layerscape_edac.c @@ -21,6 +21,7 @@ static const struct of_device_id fsl_ddr_mc_err_of_match[] = { { .compatible = "fsl,qoriq-memory-controller", }, + { .compatible = "nxp,imx9-memory-controller", .data = (void *)TYPE_IMX9, }, {}, }; MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 85713646957b..6cf17af7d911 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -47,6 +47,7 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; int skx_adxl_get(void) { @@ -119,7 +120,7 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); -static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) +static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; int i, len = 0; @@ -135,8 +136,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me return false; } + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - if (error_in_1st_level_mem) { + if (err_src == ERR_SRC_2LM_NM) { res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? @@ -191,6 +208,12 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) } EXPORT_SYMBOL_GPL(skx_set_mem_cfg); +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { driver_decode = decode; @@ -620,31 +643,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } -static bool skx_error_in_1st_level_mem(const struct mce *m) +static enum error_source skx_error_source(const struct mce *m) { - u32 errcode; + u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if (!skx_mem_cfg_2lm) - return false; - - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - - return errcode == MCACOD_EXT_MEM_ERR; -} + if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) + return ERR_SRC_NOT_MEMORY; -static bool skx_error_in_mem(const struct mce *m) -{ - u32 errcode; + if (!skx_mem_cfg_2lm) + return ERR_SRC_1LM; - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + if (errcode == MCACOD_EXT_MEM_ERR) + return ERR_SRC_2LM_NM; - return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); + return ERR_SRC_2LM_FM; } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; + enum error_source err_src; struct decoded_addr res; struct mem_ctl_info *mci; char *type; @@ -652,8 +671,10 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; + err_src = skx_error_source(mce); + /* Ignore unless this is memory related with an address */ - if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) + if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); @@ -667,7 +688,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, /* Try driver decoder first */ if (!(driver_decode && driver_decode(&res))) { /* Then try firmware decoder (ACPI DSM methods) */ - if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) + if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) return NOTIFY_DONE; } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index f945c1bf5ca4..54bba8a62f72 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -146,6 +146,13 @@ enum { INDEX_MAX }; +enum error_source { + ERR_SRC_1LM, + ERR_SRC_2LM_NM, + ERR_SRC_2LM_FM, + ERR_SRC_NOT_MEMORY, +}; + #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) @@ -234,6 +241,7 @@ int skx_adxl_get(void); void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); |