diff options
Diffstat (limited to 'drivers/edac/igen6_edac.c')
-rw-r--r-- | drivers/edac/igen6_edac.c | 174 |
1 files changed, 146 insertions, 28 deletions
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index cdd8480e7368..1cb5c67e78ae 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -127,6 +127,7 @@ static struct res_config { bool machine_check; + /* The number of present memory controllers. */ int num_imc; u32 imc_base; u32 cmf_base; @@ -240,6 +241,12 @@ static struct work_struct ecclog_work; #define DID_ADL_N_SKU11 0x467c #define DID_ADL_N_SKU12 0x4632 +/* Compute die IDs for Arizona Beach with IBECC */ +#define DID_AZB_SKU1 0x4676 + +/* Compute did IDs for Amston Lake with IBECC */ +#define DID_ASL_SKU1 0x464a + /* Compute die IDs for Raptor Lake-P with IBECC */ #define DID_RPL_P_SKU1 0xa706 #define DID_RPL_P_SKU2 0xa707 @@ -258,6 +265,16 @@ static struct work_struct ecclog_work; #define DID_MTL_P_SKU2 0x7d02 #define DID_MTL_P_SKU3 0x7d14 +/* Compute die IDs for Arrow Lake-UH with IBECC */ +#define DID_ARL_UH_SKU1 0x7d06 +#define DID_ARL_UH_SKU2 0x7d20 +#define DID_ARL_UH_SKU3 0x7d30 + +/* Compute die IDs for Panther Lake-H with IBECC */ +#define DID_PTL_H_SKU1 0xb000 +#define DID_PTL_H_SKU2 0xb001 +#define DID_PTL_H_SKU3 0xb002 + static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) { union { @@ -311,7 +328,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) if (igen6_tom <= _4GB) return eaddr + igen6_tolud - _4GB; - if (eaddr < _4GB) + if (eaddr >= igen6_tom) return eaddr + igen6_tolud - igen6_tom; return eaddr; @@ -552,7 +569,7 @@ static struct res_config mtl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct pci_device_id igen6_pci_tbl[] = { +static struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, @@ -585,6 +602,8 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, @@ -597,6 +616,12 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, + { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, { }, }; MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); @@ -769,13 +794,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc) { u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); - if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { - /* Clear CE/UE bits by writing 1s */ - writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); - return ecclog; - } + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; - return 0; + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; } static void errsts_clear(struct igen6_imc *imc) @@ -800,7 +834,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); if (enable) errcmd |= ERRCMD_CE | ERRSTS_UE; @@ -809,7 +843,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); return 0; } @@ -1162,23 +1196,35 @@ fail: return -ENODEV; } -static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) +static void igen6_check(struct mem_ctl_info *mci) +{ + struct igen6_imc *imc = mci->pvt_info; + u64 ecclog; + + /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ + ecclog = ecclog_read_and_clear(imc); + if (!ecclog) + return; + + if (!ecclog_gen_pool_add(imc->mc, ecclog)) + irq_work_queue(&ecclog_irq_work); +} + +/* Check whether the memory controller is absent. */ +static bool igen6_imc_absent(void __iomem *window) +{ + return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; +} + +static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) { struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct igen6_imc *imc; - void __iomem *window; int rc; edac_dbg(2, "\n"); - mchbar += mc * MCHBAR_SIZE; - window = ioremap(mchbar, MCHBAR_SIZE); - if (!window) { - igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); - return -ENODEV; - } - layers[0].type = EDAC_MC_LAYER_CHANNEL; layers[0].size = NUM_CHANNELS; layers[0].is_virt_csrow = false; @@ -1203,6 +1249,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; mci->dev_name = pci_name(pdev); + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = igen6_check; mci->pvt_info = &igen6_pvt->imc[mc]; imc = mci->pvt_info; @@ -1237,11 +1285,11 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) imc->mci = mci; return 0; fail3: + mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: edac_mc_free(mci); fail: - iounmap(window); return rc; } @@ -1261,11 +1309,64 @@ static void igen6_unregister_mcis(void) edac_mc_del_mc(mci->pdev); kfree(mci->ctl_name); + mci->pvt_info = NULL; edac_mc_free(mci); iounmap(imc->window); } } +static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) +{ + void __iomem *window; + int lmc, pmc, rc; + u64 base; + + for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { + base = mchbar + pmc * MCHBAR_SIZE; + window = ioremap(base, MCHBAR_SIZE); + if (!window) { + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); + rc = -ENOMEM; + goto out_unregister_mcis; + } + + if (igen6_imc_absent(window)) { + iounmap(window); + edac_dbg(2, "Skip absent mc%d\n", pmc); + continue; + } + + rc = igen6_register_mci(lmc, window, pdev); + if (rc) + goto out_iounmap; + + /* Done, if all present MCs are detected and registered. */ + if (++lmc >= res_cfg->num_imc) + break; + } + + if (!lmc) { + igen6_printk(KERN_ERR, "No mc found.\n"); + return -ENODEV; + } + + if (lmc < res_cfg->num_imc) { + igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", + res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } + + return 0; + +out_iounmap: + iounmap(window); + +out_unregister_mcis: + igen6_unregister_mcis(); + + return rc; +} + static int igen6_mem_slice_setup(u64 mchbar) { struct igen6_imc *imc = &igen6_pvt->imc[0]; @@ -1340,10 +1441,29 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) +{ + /* + * Quirk: Certain SoCs' error reporting interrupts don't work. + * Force polling mode for them to ensure that memory error + * events can be handled. + */ + if (ent->device == DID_ADL_N_SKU4) { + edac_op_state = EDAC_OPSTATE_POLL; + return; + } + + /* Set the mode according to the configuration data. */ + if (cfg->machine_check) + edac_op_state = EDAC_OPSTATE_INT; + else + edac_op_state = EDAC_OPSTATE_NMI; +} + static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u64 mchbar; - int i, rc; + int rc; edac_dbg(2, "\n"); @@ -1357,11 +1477,11 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto fail; - for (i = 0; i < res_cfg->num_imc; i++) { - rc = igen6_register_mci(i, mchbar, pdev); - if (rc) - goto fail2; - } + opstate_set(res_cfg, ent); + + rc = igen6_register_mcis(pdev, mchbar); + if (rc) + goto fail; if (res_cfg->num_imc > 1) { rc = igen6_mem_slice_setup(mchbar); @@ -1440,8 +1560,6 @@ static int __init igen6_init(void) if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) return -EBUSY; - edac_op_state = EDAC_OPSTATE_NMI; - rc = pci_register_driver(&igen6_driver); if (rc) return rc; |