summaryrefslogtreecommitdiff
path: root/drivers/edac/igen6_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/igen6_edac.c')
-rw-r--r--drivers/edac/igen6_edac.c174
1 files changed, 146 insertions, 28 deletions
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index cdd8480e7368..1cb5c67e78ae 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -127,6 +127,7 @@
static struct res_config {
bool machine_check;
+ /* The number of present memory controllers. */
int num_imc;
u32 imc_base;
u32 cmf_base;
@@ -240,6 +241,12 @@ static struct work_struct ecclog_work;
#define DID_ADL_N_SKU11 0x467c
#define DID_ADL_N_SKU12 0x4632
+/* Compute die IDs for Arizona Beach with IBECC */
+#define DID_AZB_SKU1 0x4676
+
+/* Compute did IDs for Amston Lake with IBECC */
+#define DID_ASL_SKU1 0x464a
+
/* Compute die IDs for Raptor Lake-P with IBECC */
#define DID_RPL_P_SKU1 0xa706
#define DID_RPL_P_SKU2 0xa707
@@ -258,6 +265,16 @@ static struct work_struct ecclog_work;
#define DID_MTL_P_SKU2 0x7d02
#define DID_MTL_P_SKU3 0x7d14
+/* Compute die IDs for Arrow Lake-UH with IBECC */
+#define DID_ARL_UH_SKU1 0x7d06
+#define DID_ARL_UH_SKU2 0x7d20
+#define DID_ARL_UH_SKU3 0x7d30
+
+/* Compute die IDs for Panther Lake-H with IBECC */
+#define DID_PTL_H_SKU1 0xb000
+#define DID_PTL_H_SKU2 0xb001
+#define DID_PTL_H_SKU3 0xb002
+
static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
{
union {
@@ -311,7 +328,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
if (igen6_tom <= _4GB)
return eaddr + igen6_tolud - _4GB;
- if (eaddr < _4GB)
+ if (eaddr >= igen6_tom)
return eaddr + igen6_tolud - igen6_tom;
return eaddr;
@@ -552,7 +569,7 @@ static struct res_config mtl_p_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct pci_device_id igen6_pci_tbl[] = {
+static struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
@@ -585,6 +602,8 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
+ { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
@@ -597,6 +616,12 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
+ { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
{ },
};
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -769,13 +794,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc)
{
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
- if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
- /* Clear CE/UE bits by writing 1s */
- writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
- return ecclog;
- }
+ /*
+ * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
+ * the invalid value ~0. This will result in a flood of invalid
+ * error reports in polling mode. Skip it.
+ */
+ if (ecclog == ~0)
+ return 0;
- return 0;
+ /* Neither a CE nor a UE. Skip it.*/
+ if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
+ return 0;
+
+ /* Clear CE/UE bits by writing 1s */
+ writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+
+ return ecclog;
}
static void errsts_clear(struct igen6_imc *imc)
@@ -800,7 +834,7 @@ static int errcmd_enable_error_reporting(bool enable)
rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
if (rc)
- return rc;
+ return pcibios_err_to_errno(rc);
if (enable)
errcmd |= ERRCMD_CE | ERRSTS_UE;
@@ -809,7 +843,7 @@ static int errcmd_enable_error_reporting(bool enable)
rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
if (rc)
- return rc;
+ return pcibios_err_to_errno(rc);
return 0;
}
@@ -1162,23 +1196,35 @@ fail:
return -ENODEV;
}
-static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
+static void igen6_check(struct mem_ctl_info *mci)
+{
+ struct igen6_imc *imc = mci->pvt_info;
+ u64 ecclog;
+
+ /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
+ ecclog = ecclog_read_and_clear(imc);
+ if (!ecclog)
+ return;
+
+ if (!ecclog_gen_pool_add(imc->mc, ecclog))
+ irq_work_queue(&ecclog_irq_work);
+}
+
+/* Check whether the memory controller is absent. */
+static bool igen6_imc_absent(void __iomem *window)
+{
+ return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
+}
+
+static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct igen6_imc *imc;
- void __iomem *window;
int rc;
edac_dbg(2, "\n");
- mchbar += mc * MCHBAR_SIZE;
- window = ioremap(mchbar, MCHBAR_SIZE);
- if (!window) {
- igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
- return -ENODEV;
- }
-
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
@@ -1203,6 +1249,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->dev_name = pci_name(pdev);
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ mci->edac_check = igen6_check;
mci->pvt_info = &igen6_pvt->imc[mc];
imc = mci->pvt_info;
@@ -1237,11 +1285,11 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
imc->mci = mci;
return 0;
fail3:
+ mci->pvt_info = NULL;
kfree(mci->ctl_name);
fail2:
edac_mc_free(mci);
fail:
- iounmap(window);
return rc;
}
@@ -1261,11 +1309,64 @@ static void igen6_unregister_mcis(void)
edac_mc_del_mc(mci->pdev);
kfree(mci->ctl_name);
+ mci->pvt_info = NULL;
edac_mc_free(mci);
iounmap(imc->window);
}
}
+static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
+{
+ void __iomem *window;
+ int lmc, pmc, rc;
+ u64 base;
+
+ for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
+ base = mchbar + pmc * MCHBAR_SIZE;
+ window = ioremap(base, MCHBAR_SIZE);
+ if (!window) {
+ igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
+ rc = -ENOMEM;
+ goto out_unregister_mcis;
+ }
+
+ if (igen6_imc_absent(window)) {
+ iounmap(window);
+ edac_dbg(2, "Skip absent mc%d\n", pmc);
+ continue;
+ }
+
+ rc = igen6_register_mci(lmc, window, pdev);
+ if (rc)
+ goto out_iounmap;
+
+ /* Done, if all present MCs are detected and registered. */
+ if (++lmc >= res_cfg->num_imc)
+ break;
+ }
+
+ if (!lmc) {
+ igen6_printk(KERN_ERR, "No mc found.\n");
+ return -ENODEV;
+ }
+
+ if (lmc < res_cfg->num_imc) {
+ igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
+ res_cfg->num_imc, lmc);
+ res_cfg->num_imc = lmc;
+ }
+
+ return 0;
+
+out_iounmap:
+ iounmap(window);
+
+out_unregister_mcis:
+ igen6_unregister_mcis();
+
+ return rc;
+}
+
static int igen6_mem_slice_setup(u64 mchbar)
{
struct igen6_imc *imc = &igen6_pvt->imc[0];
@@ -1340,10 +1441,29 @@ static void unregister_err_handler(void)
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
}
+static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
+{
+ /*
+ * Quirk: Certain SoCs' error reporting interrupts don't work.
+ * Force polling mode for them to ensure that memory error
+ * events can be handled.
+ */
+ if (ent->device == DID_ADL_N_SKU4) {
+ edac_op_state = EDAC_OPSTATE_POLL;
+ return;
+ }
+
+ /* Set the mode according to the configuration data. */
+ if (cfg->machine_check)
+ edac_op_state = EDAC_OPSTATE_INT;
+ else
+ edac_op_state = EDAC_OPSTATE_NMI;
+}
+
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
- int i, rc;
+ int rc;
edac_dbg(2, "\n");
@@ -1357,11 +1477,11 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto fail;
- for (i = 0; i < res_cfg->num_imc; i++) {
- rc = igen6_register_mci(i, mchbar, pdev);
- if (rc)
- goto fail2;
- }
+ opstate_set(res_cfg, ent);
+
+ rc = igen6_register_mcis(pdev, mchbar);
+ if (rc)
+ goto fail;
if (res_cfg->num_imc > 1) {
rc = igen6_mem_slice_setup(mchbar);
@@ -1440,8 +1560,6 @@ static int __init igen6_init(void)
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
- edac_op_state = EDAC_OPSTATE_NMI;
-
rc = pci_register_driver(&igen6_driver);
if (rc)
return rc;