diff options
Diffstat (limited to 'drivers/edac')
| -rw-r--r-- | drivers/edac/Kconfig | 20 | ||||
| -rw-r--r-- | drivers/edac/Makefile | 3 | ||||
| -rw-r--r-- | drivers/edac/altera_edac.c | 22 | ||||
| -rw-r--r-- | drivers/edac/amd64_edac.c | 61 | ||||
| -rw-r--r-- | drivers/edac/amd64_edac.h | 7 | ||||
| -rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 404 | ||||
| -rw-r--r-- | drivers/edac/ghes_edac.c | 7 | ||||
| -rw-r--r-- | drivers/edac/i10nm_base.c | 3 | ||||
| -rw-r--r-- | drivers/edac/ie31200_edac.c | 2 | ||||
| -rw-r--r-- | drivers/edac/igen6_edac.c | 2 | ||||
| -rw-r--r-- | drivers/edac/imh_base.c | 602 | ||||
| -rw-r--r-- | drivers/edac/skx_base.c | 4 | ||||
| -rw-r--r-- | drivers/edac/skx_common.c | 33 | ||||
| -rw-r--r-- | drivers/edac/skx_common.h | 98 | ||||
| -rw-r--r-- | drivers/edac/versalnet_edac.c | 26 |
15 files changed, 777 insertions, 517 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 39352b9b7a7e..81e40543ffd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -23,14 +23,6 @@ menuconfig EDAC if EDAC -config EDAC_LEGACY_SYSFS - bool "EDAC legacy sysfs" - default y - help - Enable the compatibility sysfs nodes. - Use 'Y' if your edac utilities aren't ported to work with the newer - structures. - config EDAC_DEBUG bool "Debugging" select DEBUG_FS @@ -291,6 +283,18 @@ config EDAC_I10NM system has non-volatile DIMMs you should also manually select CONFIG_ACPI_NFIT. +config EDAC_IMH + tristate "Intel Integrated Memory/IO Hub MC" + depends on X86_64 && X86_MCE_INTEL && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y + select DMI + select ACPI_ADXL + help + Support for error detection and correction the Intel + Integrated Memory/IO Hub Memory Controller. This MC IP is + first used on the Diamond Rapids servers but may appear on + others in the future. + config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 1c14796410a3..8429b1e856bc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -65,6 +65,9 @@ obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o i10nm_edac-y := i10nm_base.o obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o +imh_edac-y := imh_base.o +obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o + obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 103b2c2eba2a..0c5b94e64ea1 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1184,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device) if (ret) return ret; - /* Verify OCRAM has been initialized */ + /* + * Verify that OCRAM has been initialized. + * During a warm reset, OCRAM contents are retained, but the control + * and status registers are reset to their default values. Therefore, + * ECC must be explicitly re-enabled in the control register. + * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set. + */ if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, - (base + ALTR_A10_ECC_INITSTAT_OFST))) - return -ENODEV; + (base + ALTR_A10_ECC_INITSTAT_OFST))) { + if (!ecc_test_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST))) + ecc_set_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST)); + else + return -ENODEV; + } /* Enable IRQ on Single Bit Error */ writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST)); @@ -1357,7 +1369,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ @@ -1447,7 +1459,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_USB */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 2f6ab783bf20..2391f3469961 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3732,6 +3732,7 @@ static void hw_info_put(struct amd64_pvt *pvt) pci_dev_put(pvt->F1); pci_dev_put(pvt->F2); kfree(pvt->umc); + kfree(pvt->csels); } static struct low_ops umc_ops = { @@ -3766,6 +3767,7 @@ static int per_family_init(struct amd64_pvt *pvt) pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; + char *tmp_name = NULL; pvt->max_mcs = 2; /* @@ -3779,7 +3781,7 @@ static int per_family_init(struct amd64_pvt *pvt) switch (pvt->fam) { case 0xf: - pvt->ctl_name = (pvt->ext_model >= K8_REV_F) ? + tmp_name = (pvt->ext_model >= K8_REV_F) ? "K8 revF or later" : "K8 revE or earlier"; pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; @@ -3788,7 +3790,6 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x10: - pvt->ctl_name = "F10h"; pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; @@ -3797,12 +3798,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x15: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F15h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; break; case 0x60: - pvt->ctl_name = "F15h_M60h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; @@ -3811,7 +3810,6 @@ static int per_family_init(struct amd64_pvt *pvt) /* Richland is only client */ return -ENODEV; default: - pvt->ctl_name = "F15h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; @@ -3822,12 +3820,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x16: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F16h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; break; default: - pvt->ctl_name = "F16h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; break; @@ -3836,76 +3832,51 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x17: switch (pvt->model) { - case 0x10 ... 0x2f: - pvt->ctl_name = "F17h_M10h"; - break; case 0x30 ... 0x3f: - pvt->ctl_name = "F17h_M30h"; pvt->max_mcs = 8; break; - case 0x60 ... 0x6f: - pvt->ctl_name = "F17h_M60h"; - break; - case 0x70 ... 0x7f: - pvt->ctl_name = "F17h_M70h"; - break; default: - pvt->ctl_name = "F17h"; break; } break; case 0x18: - pvt->ctl_name = "F18h"; break; case 0x19: switch (pvt->model) { case 0x00 ... 0x0f: - pvt->ctl_name = "F19h"; pvt->max_mcs = 8; break; case 0x10 ... 0x1f: - pvt->ctl_name = "F19h_M10h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; - case 0x20 ... 0x2f: - pvt->ctl_name = "F19h_M20h"; - break; case 0x30 ... 0x3f: if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) { - pvt->ctl_name = "MI200"; + tmp_name = "MI200"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM2; pvt->gpu_umc_base = 0x50000; pvt->ops = &gpu_ops; } else { - pvt->ctl_name = "F19h_M30h"; pvt->max_mcs = 8; } break; - case 0x50 ... 0x5f: - pvt->ctl_name = "F19h_M50h"; - break; case 0x60 ... 0x6f: - pvt->ctl_name = "F19h_M60h"; pvt->flags.zn_regs_v2 = 1; break; case 0x70 ... 0x7f: - pvt->ctl_name = "F19h_M70h"; pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F19h_M90h"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM3; pvt->gpu_umc_base = 0x90000; pvt->ops = &gpu_ops; break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F19h_MA0h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; @@ -3915,34 +3886,22 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x1A: switch (pvt->model) { case 0x00 ... 0x1f: - pvt->ctl_name = "F1Ah"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; case 0x40 ... 0x4f: - pvt->ctl_name = "F1Ah_M40h"; pvt->flags.zn_regs_v2 = 1; break; case 0x50 ... 0x57: - pvt->ctl_name = "F1Ah_M50h"; + case 0xc0 ... 0xc7: pvt->max_mcs = 16; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F1Ah_M90h"; - pvt->max_mcs = 8; - pvt->flags.zn_regs_v2 = 1; - break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F1Ah_MA0h"; pvt->max_mcs = 8; pvt->flags.zn_regs_v2 = 1; break; - case 0xc0 ... 0xc7: - pvt->ctl_name = "F1Ah_MC0h"; - pvt->max_mcs = 16; - pvt->flags.zn_regs_v2 = 1; - break; } break; @@ -3951,6 +3910,16 @@ static int per_family_init(struct amd64_pvt *pvt) return -ENODEV; } + if (tmp_name) + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name); + else + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", + pvt->fam, pvt->model); + + pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL); + if (!pvt->csels) + return -ENOMEM; + return 0; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index d70b8a8d0b09..1757c1b99fc8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,11 +96,12 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 16 #define ON true #define OFF false +#define MAX_CTL_NAMELEN 19 + /* * PCI-defined configuration space registers */ @@ -346,7 +347,7 @@ struct amd64_pvt { u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ /* one for each DCT/UMC */ - struct chip_select csels[NUM_CONTROLLERS]; + struct chip_select *csels; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; @@ -362,7 +363,7 @@ struct amd64_pvt { /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; - const char *ctl_name; + char ctl_name[MAX_CTL_NAMELEN]; u16 f1_id, f2_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 8689631f1905..091cc6aae8a9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -115,401 +115,6 @@ static const char * const edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -#ifdef CONFIG_EDAC_LEGACY_SYSFS -/* - * EDAC sysfs CSROW data structures and methods - */ - -#define to_csrow(k) container_of(k, struct csrow_info, dev) - -/* - * We need it to avoid namespace conflicts between the legacy API - * and the per-dimm/per-rank one - */ -#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) - -struct dev_ch_attribute { - struct device_attribute attr; - unsigned int channel; -}; - -#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ - static struct dev_ch_attribute dev_attr_legacy_##_name = \ - { __ATTR(_name, _mode, _show, _store), (_var) } - -#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - int i; - u32 nr_pages = 0; - - for (i = 0; i < csrow->nr_channels; i++) - nr_pages += csrow->channels[i]->dimm->nr_pages; - return sysfs_emit(data, "%u\n", PAGES_TO_MiB(nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]); -} - -static ssize_t csrow_dev_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - /* if field has not been initialized, there is nothing to send */ - if (!rank->dimm->label[0]) - return 0; - - return sysfs_emit(data, "%s\n", rank->dimm->label); -} - -static ssize_t channel_dimm_label_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - size_t copy_count = count; - - if (count == 0) - return -EINVAL; - - if (data[count - 1] == '\0' || data[count - 1] == '\n') - copy_count -= 1; - - if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label)) - return -EINVAL; - - memcpy(rank->dimm->label, data, copy_count); - rank->dimm->label[copy_count] = '\0'; - - return count; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - return sysfs_emit(data, "%u\n", rank->ce_count); -} - -/* cwrow<id>/attribute files */ -DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); -DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); -DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); -DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); -DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); -DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); - -/* default attributes of the CSROW<id> object */ -static struct attribute *csrow_attrs[] = { - &dev_attr_legacy_dev_type.attr, - &dev_attr_legacy_mem_type.attr, - &dev_attr_legacy_edac_mode.attr, - &dev_attr_legacy_size_mb.attr, - &dev_attr_legacy_ue_count.attr, - &dev_attr_legacy_ce_count.attr, - NULL, -}; - -static const struct attribute_group csrow_attr_grp = { - .attrs = csrow_attrs, -}; - -static const struct attribute_group *csrow_attr_groups[] = { - &csrow_attr_grp, - NULL -}; - -static const struct device_type csrow_attr_type = { - .groups = csrow_attr_groups, -}; - -/* - * possible dynamic channel DIMM Label attribute files - * - */ -DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 0); -DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 1); -DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 2); -DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 3); -DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 4); -DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 5); -DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 6); -DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 7); -DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 8); -DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 9); -DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 10); -DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 11); -DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 12); -DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 13); -DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 14); -DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 15); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct attribute *dynamic_csrow_dimm_attr[] = { - &dev_attr_legacy_ch0_dimm_label.attr.attr, - &dev_attr_legacy_ch1_dimm_label.attr.attr, - &dev_attr_legacy_ch2_dimm_label.attr.attr, - &dev_attr_legacy_ch3_dimm_label.attr.attr, - &dev_attr_legacy_ch4_dimm_label.attr.attr, - &dev_attr_legacy_ch5_dimm_label.attr.attr, - &dev_attr_legacy_ch6_dimm_label.attr.attr, - &dev_attr_legacy_ch7_dimm_label.attr.attr, - &dev_attr_legacy_ch8_dimm_label.attr.attr, - &dev_attr_legacy_ch9_dimm_label.attr.attr, - &dev_attr_legacy_ch10_dimm_label.attr.attr, - &dev_attr_legacy_ch11_dimm_label.attr.attr, - &dev_attr_legacy_ch12_dimm_label.attr.attr, - &dev_attr_legacy_ch13_dimm_label.attr.attr, - &dev_attr_legacy_ch14_dimm_label.attr.attr, - &dev_attr_legacy_ch15_dimm_label.attr.attr, - NULL -}; - -/* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 5); -DEVICE_CHANNEL(ch6_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 6); -DEVICE_CHANNEL(ch7_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 7); -DEVICE_CHANNEL(ch8_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 8); -DEVICE_CHANNEL(ch9_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 9); -DEVICE_CHANNEL(ch10_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 10); -DEVICE_CHANNEL(ch11_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 11); -DEVICE_CHANNEL(ch12_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 12); -DEVICE_CHANNEL(ch13_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 13); -DEVICE_CHANNEL(ch14_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 14); -DEVICE_CHANNEL(ch15_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 15); - -/* Total possible dynamic ce_count attribute file table */ -static struct attribute *dynamic_csrow_ce_count_attr[] = { - &dev_attr_legacy_ch0_ce_count.attr.attr, - &dev_attr_legacy_ch1_ce_count.attr.attr, - &dev_attr_legacy_ch2_ce_count.attr.attr, - &dev_attr_legacy_ch3_ce_count.attr.attr, - &dev_attr_legacy_ch4_ce_count.attr.attr, - &dev_attr_legacy_ch5_ce_count.attr.attr, - &dev_attr_legacy_ch6_ce_count.attr.attr, - &dev_attr_legacy_ch7_ce_count.attr.attr, - &dev_attr_legacy_ch8_ce_count.attr.attr, - &dev_attr_legacy_ch9_ce_count.attr.attr, - &dev_attr_legacy_ch10_ce_count.attr.attr, - &dev_attr_legacy_ch11_ce_count.attr.attr, - &dev_attr_legacy_ch12_ce_count.attr.attr, - &dev_attr_legacy_ch13_ce_count.attr.attr, - &dev_attr_legacy_ch14_ce_count.attr.attr, - &dev_attr_legacy_ch15_ce_count.attr.attr, - NULL -}; - -static umode_t csrow_dev_is_visible(struct kobject *kobj, - struct attribute *attr, int idx) -{ - struct device *dev = kobj_to_dev(kobj); - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - if (idx >= csrow->nr_channels) - return 0; - - if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) { - WARN_ONCE(1, "idx: %d\n", idx); - return 0; - } - - /* Only expose populated DIMMs */ - if (!csrow->channels[idx]->dimm->nr_pages) - return 0; - - return attr->mode; -} - - -static const struct attribute_group csrow_dev_dimm_group = { - .attrs = dynamic_csrow_dimm_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group csrow_dev_ce_count_group = { - .attrs = dynamic_csrow_ce_count_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group *csrow_dev_groups[] = { - &csrow_dev_dimm_group, - &csrow_dev_ce_count_group, - NULL -}; - -static void csrow_release(struct device *dev) -{ - /* - * Nothing to do, just unregister sysfs here. The mci - * device owns the data and will also release it. - */ -} - -static inline int nr_pages_per_csrow(struct csrow_info *csrow) -{ - int chan, nr_pages = 0; - - for (chan = 0; chan < csrow->nr_channels; chan++) - nr_pages += csrow->channels[chan]->dimm->nr_pages; - - return nr_pages; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) -{ - int err; - - csrow->dev.type = &csrow_attr_type; - csrow->dev.groups = csrow_dev_groups; - csrow->dev.release = csrow_release; - device_initialize(&csrow->dev); - csrow->dev.parent = &mci->dev; - csrow->mci = mci; - dev_set_name(&csrow->dev, "csrow%d", index); - dev_set_drvdata(&csrow->dev, csrow); - - err = device_add(&csrow->dev); - if (err) { - edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); - put_device(&csrow->dev); - return err; - } - - edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); - - return 0; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_objects(struct mem_ctl_info *mci) -{ - int err, i; - struct csrow_info *csrow; - - for (i = 0; i < mci->nr_csrows; i++) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) - goto error; - } - return 0; - -error: - for (--i; i >= 0; i--) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } - - return err; -} - -static void edac_delete_csrow_objects(struct mem_ctl_info *mci) -{ - int i; - - for (i = 0; i < mci->nr_csrows; i++) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } -} - -#endif - /* * Per-dimm (or per-rank) devices */ @@ -989,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, goto fail; } -#ifdef CONFIG_EDAC_LEGACY_SYSFS - err = edac_create_csrow_objects(mci); - if (err < 0) - goto fail; -#endif - edac_create_debugfs_nodes(mci); return 0; @@ -1019,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) #ifdef CONFIG_EDAC_DEBUG edac_debugfs_remove_recursive(mci->debugfs); #endif -#ifdef CONFIG_EDAC_LEGACY_SYSFS - edac_delete_csrow_objects(mci); -#endif mci_for_each_dimm(mci, dimm) { if (!device_is_registered(&dimm->dev)) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 1eb0136c6fbd..d80c88818691 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,6 +15,7 @@ #include "edac_module.h" #include <ras/ras_event.h> #include <linux/notifier.h> +#include <linux/string.h> #define OTHER_DETAIL_LEN 400 @@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, p = pvt->msg; p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { - strcpy(pvt->msg, "unknown error"); + strscpy(pvt->msg, "unknown error"); } /* Error address */ @@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; - strcpy(e->label, dimm->label); + strscpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) - strcpy(e->label, "unknown memory"); + strscpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2010a47149f4..89b3e8cc38b1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1198,7 +1198,8 @@ static int __init i10nm_init(void) d->imc[i].num_dimms = cfg->ddr_dimm_num; } - rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, + pci_name(d->imc[i].mdev), "Intel_10nm Socket", EDAC_MOD_STR, i10nm_get_dimm_config, cfg); if (rc < 0) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 5a080ab65476..8d4ddaa85ae8 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -526,6 +526,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in ie31200_pvt.priv[mc] = priv; return 0; fail_unmap: + put_device(&priv->dev); iounmap(window); fail_free: edac_mc_free(mci); @@ -598,6 +599,7 @@ static void ie31200_unregister_mcis(void) mci = priv->mci; edac_mc_del_mc(mci->pdev); iounmap(priv->window); + put_device(&priv->dev); edac_mc_free(mci); } } diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 2fc59f9eed69..553c31a2d922 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -1300,6 +1300,7 @@ static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev imc->mci = mci; return 0; fail3: + put_device(&imc->dev); mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: @@ -1326,6 +1327,7 @@ static void igen6_unregister_mcis(void) kfree(mci->ctl_name); mci->pvt_info = NULL; edac_mc_free(mci); + put_device(&imc->dev); iounmap(imc->window); } } diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c new file mode 100644 index 000000000000..4348b3883b45 --- /dev/null +++ b/drivers/edac/imh_base.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller. + * Copyright (c) 2025, Intel Corporation. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include <asm/cpu.h> +#include "edac_module.h" +#include "skx_common.h" + +#define IMH_REVISION "v0.0.1" +#define EDAC_MOD_STR "imh_edac" + +/* Debug macros */ +#define imh_printk(level, fmt, arg...) \ + edac_printk(level, "imh", fmt, ##arg) + +/* Configuration Agent(Ubox) */ +#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23) +#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3) + +/* PUNIT */ +#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30) + +/* Memory Controller */ +#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2) +#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15) + +/* System Cache Agent(SCA) */ +#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) +#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) + +/* Home Agent (HA) */ +#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8) + +/** + * struct local_reg - A register as described in the local package view. + * + * @pkg: (input) The package where the register is located. + * @pbase: (input) The IP MMIO base physical address in the local package view. + * @size: (input) The IP MMIO size. + * @offset: (input) The register offset from the IP MMIO base @pbase. + * @width: (input) The register width in byte. + * @vbase: (internal) The IP MMIO base virtual address. + * @val: (output) The register value. + */ +struct local_reg { + int pkg; + u64 pbase; + u32 size; + u32 offset; + u8 width; + void __iomem *vbase; + u64 val; +}; + +#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \ + struct local_reg name = { \ + .pkg = package, \ + .pbase = (north ? (cfg)->mmio_base_l_north : \ + (cfg)->mmio_base_l_south) + \ + (cfg)->ip_name##_base + \ + (cfg)->ip_name##_size * (ip_idx), \ + .size = (cfg)->ip_name##_size, \ + .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \ + .width = (cfg)->ip_name##_reg_##reg_name##_width, \ + } + +static u64 readx(void __iomem *addr, u8 width) +{ + switch (width) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width); + return 0; + } +} + +static void __read_local_reg(void *reg) +{ + struct local_reg *r = (struct local_reg *)reg; + + r->val = readx(r->vbase + r->offset, r->width); +} + +/* Read a local-view register. */ +static bool read_local_reg(struct local_reg *reg) +{ + int cpu; + + /* Get the target CPU in the package @reg->pkg. */ + for_each_online_cpu(cpu) { + if (reg->pkg == topology_physical_package_id(cpu)) + break; + } + + if (cpu >= nr_cpu_ids) + return false; + + reg->vbase = ioremap(reg->pbase, reg->size); + if (!reg->vbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase); + return false; + } + + /* Get the target CPU to read the register. */ + smp_call_function_single(cpu, __read_local_reg, reg, 1); + iounmap(reg->vbase); + + return true; +} + +/* Get the bitmap of memory controller instances in package @pkg. */ +static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north) +{ + DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3); + + if (!read_local_reg(®)) + return 0; + + edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n", + pkg, north ? "north" : "south", + DDR_IMC_BITMAP(reg.val), reg.val); + + return DDR_IMC_BITMAP(reg.val); +} + +static void imc_release(struct device *dev) +{ + edac_dbg(2, "imc device %s released\n", dev_name(dev)); + kfree(dev); +} + +static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d, + bool north, int lmc) +{ + unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num; + unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north); + void __iomem *mbase; + struct device *dev; + int i, rc, pmc; + u64 base; + + for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) { + base = north ? d->mmio_base_h_north : d->mmio_base_h_south; + base += cfg->ddr_imc_base + size * i; + + edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n", + d->pkg, lmc, base, size); + + /* Set up the imc MMIO. */ + mbase = ioremap(base, size); + if (!mbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].lmc = lmc; + + /* Create the imc device instance. */ + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->release = imc_release; + device_initialize(dev); + rc = dev_set_name(dev, "0x%llx", base); + if (rc) { + imh_printk(KERN_ERR, "Failed to set dev name\n"); + put_device(dev); + return rc; + } + + d->imc[lmc].dev = dev; + + /* Set up the imc index mapping. */ + pmc = north ? i : 8 + i; + skx_set_mc_mapping(d, pmc, lmc); + + lmc++; + } + + return lmc; +} + +static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d) +{ + int lmc = __get_ddr_munits(cfg, d, true, 0); + + if (lmc < 0) + return false; + + lmc = __get_ddr_munits(cfg, d, false, lmc); + if (lmc <= 0) + return false; + + return true; +} + +static bool get_socket_id(struct res_config *cfg, struct skx_dev *d) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id); + u8 src_id; + int i; + + if (!read_local_reg(®)) + return false; + + src_id = SOCKET_ID(reg.val); + edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val); + + for (i = 0; i < cfg->ddr_imc_num; i++) + d->imc[i].src_id = src_id; + + return true; +} + +/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */ +static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm) +{ + DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm); + + if (!read_local_reg(®)) + return false; + + *tolm = TOLM(reg.val); + edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val); + + DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm); + + if (!read_local_reg(®2)) + return false; + + *tohm = TOHM(reg2.val); + edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val); + + return true; +} + +/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */ +static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list) +{ + int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; + struct skx_dev *d; + + for (i = 0; i < n; i++) { + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); + if (!d) + return -ENOMEM; + + DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base); + + /* Get MMIO_BASE_H for the north-IMH. */ + if (!read_local_reg(®) || !reg.val) { + kfree(d); + imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i); + return -ENODEV; + } + + d->mmio_base_h_north = MMIO_BASE_H(reg.val); + edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_north, reg.val); + + /* Get MMIO_BASE_H for the south-IMH (optional). */ + DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base); + + if (read_local_reg(®2)) { + d->mmio_base_h_south = MMIO_BASE_H(reg2.val); + edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_south, reg2.val); + } + + d->pkg = i; + d->num_imc = imc_num; + skx_init_mc_mapping(d); + list_add_tail(&d->list, edac_list); + } + + return 0; +} + +/* Get the number of per-package memory controllers. */ +static int imh_get_imc_num(struct res_config *cfg) +{ + int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) + + hweight32(get_imc_bitmap(cfg, 0, false)); + + if (!imc_num) { + imh_printk(KERN_ERR, "Invalid mc number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set ddr mc number %d\n", imc_num); + } + + return 0; +} + +/* Get all memory controllers' parameters. */ +static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + u8 mc = 0; + int i; + + list_for_each_entry(d, edac_list, list) { + if (!get_ddr_munits(cfg, d)) { + imh_printk(KERN_ERR, "No mc found\n"); + return -ENODEV; + } + + if (!get_socket_id(cfg, d)) { + imh_printk(KERN_ERR, "Failed to get socket id\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz; + imc->num_channels = cfg->ddr_chan_num; + imc->num_dimms = cfg->ddr_dimm_num; + imc->mc = mc++; + } + } + + return 0; +} + +static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode); + + if (!read_local_reg(®)) + return false; + + if (!NMCACHING(reg.val)) + return false; + + edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx); + return true; +} + +/* Check whether the system has a 2-level memory configuration. */ +static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head) +{ + struct skx_dev *d; + int i; + + list_for_each_entry(d, head, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) + if (check_2lm_enabled(cfg, d, i)) + return true; + } + + return false; +} + +/* Helpers to read memory controller registers */ +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width); +} + +static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width); +} + +static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset + + cfg->ddr_reg_dimmmtr_width * dimm, + cfg->ddr_reg_dimmmtr_width); +} + +static bool ecc_enabled(u32 mcmtr) +{ + return (bool)ECC_ENABLED(mcmtr); +} + +static bool dimm_populated(u32 dimmmtr) +{ + return (bool)DIMM_POPULATED(dimmmtr); +} + +/* Get each DIMM's configurations of the memory controller @mci. */ +static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + u32 mcmtr, dimmmtr; + int i, j, ndimms; + + for (i = 0; i < imc->num_channels; i++) { + if (!imc->mbase) + continue; + + mcmtr = read_imc_mcmtr(cfg, imc, i); + + for (ndimms = 0, j = 0; j < imc->num_dimms; j++) { + dimmmtr = read_imc_dimmmtr(cfg, imc, i, j); + edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n", + mcmtr, dimmmtr, imc->mc, i, j); + + if (!dimm_populated(dimmmtr)) + continue; + + dimm = edac_get_dimm(mci, i, j, 0); + ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm, + imc, i, j, cfg); + } + + if (ndimms && !ecc_enabled(mcmtr)) { + imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n", + imc->mc, i); + return -ENODEV; + } + } + + return 0; +} + +/* Register all memory controllers to the EDAC core. */ +static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + int i, rc; + + list_for_each_entry(d, edac_list, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + rc = skx_register_mci(imc, imc->dev, + dev_name(imc->dev), + "Intel IMH-based Socket", + EDAC_MOD_STR, + imh_get_dimm_config, cfg); + if (rc) + return rc; + } + } + + return 0; +} + +static struct res_config dmr_cfg = { + .type = DMR, + .support_ddr5 = true, + .mmio_base_l_north = 0xf6800000, + .mmio_base_l_south = 0xf6000000, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_imc_base = 0x39b000, + .ddr_chan_mmio_sz = 0x8000, + .ddr_reg_mcmtr_offset = 0x360, + .ddr_reg_mcmtr_width = 4, + .ddr_reg_dimmmtr_offset = 0x370, + .ddr_reg_dimmmtr_width = 4, + .ubox_base = 0x0, + .ubox_size = 0x2000, + .ubox_reg_mmio_base_offset = 0x580, + .ubox_reg_mmio_base_width = 4, + .ubox_reg_socket_id_offset = 0x1080, + .ubox_reg_socket_id_width = 4, + .pcu_base = 0x3000, + .pcu_size = 0x10000, + .pcu_reg_capid3_offset = 0x290, + .pcu_reg_capid3_width = 4, + .sca_base = 0x24c000, + .sca_size = 0x2500, + .sca_reg_tolm_offset = 0x2100, + .sca_reg_tolm_width = 8, + .sca_reg_tohm_offset = 0x2108, + .sca_reg_tohm_width = 8, + .ha_base = 0x3eb000, + .ha_size = 0x1000, + .ha_reg_mode_offset = 0x4a0, + .ha_reg_mode_width = 4, +}; + +static const struct x86_cpu_id imh_cpuids[] = { + X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, imh_cpuids); + +static struct notifier_block imh_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init imh_init(void) +{ + const struct x86_cpu_id *id; + struct list_head *edac_list; + struct res_config *cfg; + const char *owner; + u64 tolm, tohm; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(imh_cpuids); + if (!id) + return -ENODEV; + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + + if (!imh_get_tolm_tohm(cfg, &tolm, &tohm)) + return -ENODEV; + + skx_set_hi_lo(tolm, tohm); + + rc = imh_get_imc_num(cfg); + if (rc < 0) + goto fail; + + edac_list = skx_get_edac_list(); + + rc = imh_get_all_mmio_base_h(cfg, edac_list); + if (rc) + goto fail; + + rc = imh_get_munits(cfg, edac_list); + if (rc) + goto fail; + + skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list)); + + rc = imh_register_mci(cfg, edac_list); + if (rc) + goto fail; + + rc = skx_adxl_get(); + if (rc) + goto fail; + + opstate_init(); + mce_register_decode_chain(&imh_mce_dec); + skx_setup_debug("imh_test"); + + imh_printk(KERN_INFO, "%s\n", IMH_REVISION); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit imh_exit(void) +{ + edac_dbg(2, "\n"); + + skx_teardown_debug(); + mce_unregister_decode_chain(&imh_mce_dec); + skx_adxl_put(); + skx_remove(); +} + +module_init(imh_init); +module_exit(imh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Qiuxu Zhuo"); +MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller"); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 078ddf95cc6e..aa6593ccda2d 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -662,8 +662,8 @@ static int __init skx_init(void) d->imc[i].src_id = src_id; d->imc[i].num_channels = cfg->ddr_chan_num; d->imc[i].num_dimms = cfg->ddr_dimm_num; - - rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev, + pci_name(d->imc[i].chan[0].cdev), "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); if (rc < 0) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 724842f512ac..3276afe43922 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -124,7 +124,7 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); -static void skx_init_mc_mapping(struct skx_dev *d) +void skx_init_mc_mapping(struct skx_dev *d) { /* * By default, the BIOS presents all memory controllers within each @@ -135,6 +135,7 @@ static void skx_init_mc_mapping(struct skx_dev *d) for (int i = 0; i < d->num_imc; i++) d->imc[i].mc_mapping = i; } +EXPORT_SYMBOL_GPL(skx_init_mc_mapping); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) { @@ -384,6 +385,12 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) } EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); +struct list_head *skx_get_edac_list(void) +{ + return &dev_edac_list; +} +EXPORT_SYMBOL_GPL(skx_get_edac_list); + int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) { struct pci_dev *pdev; @@ -424,6 +431,13 @@ fail: } EXPORT_SYMBOL_GPL(skx_get_hi_lo); +void skx_set_hi_lo(u64 tolm, u64 tohm) +{ + skx_tolm = tolm; + skx_tohm = tohm; +} +EXPORT_SYMBOL_GPL(skx_set_hi_lo); + static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, int maxval, const char *name) { @@ -437,7 +451,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, } #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") -#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows") +#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, @@ -545,9 +559,9 @@ unknown_size: } EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, - const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config, +int skx_register_mci(struct skx_imc *imc, struct device *dev, + const char *dev_name, const char *ctl_name, + const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg) { struct mem_ctl_info *mci; @@ -588,7 +602,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = mod_str; - mci->dev_name = pci_name(pdev); + mci->dev_name = dev_name; mci->ctl_page_to_phys = NULL; rc = get_dimm_config(mci, cfg); @@ -596,7 +610,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, goto fail; /* Record ptr to the generic device */ - mci->pdev = &pdev->dev; + mci->pdev = dev; /* Add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { @@ -810,6 +824,9 @@ void skx_remove(void) if (d->imc[i].mbase) iounmap(d->imc[i].mbase); + if (d->imc[i].dev) + put_device(d->imc[i].dev); + for (j = 0; j < d->imc[i].num_channels; j++) { if (d->imc[i].chan[j].cdev) pci_dev_put(d->imc[i].chan[j].cdev); @@ -833,7 +850,7 @@ EXPORT_SYMBOL_GPL(skx_remove); /* * Debug feature. * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/{skx,i10nm}_test/addr. + * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. */ static struct dentry *skx_test; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 73ba89786cdf..f88038e5b18c 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -121,20 +121,33 @@ struct reg_rrl { * memory controllers on the die. */ struct skx_dev { - struct list_head list; + /* {skx,i10nm}_edac */ u8 bus[4]; int seg; struct pci_dev *sad_all; struct pci_dev *util_all; - struct pci_dev *uracu; /* for i10nm CPU */ - struct pci_dev *pcu_cr3; /* for HBM memory detection */ + struct pci_dev *uracu; + struct pci_dev *pcu_cr3; u32 mcroute; + + /* imh_edac */ + /* System-view MMIO base physical addresses. */ + u64 mmio_base_h_north; + u64 mmio_base_h_south; + int pkg; + int num_imc; + struct list_head list; struct skx_imc { + /* i10nm_edac */ + struct pci_dev *mdev; + + /* imh_edac */ + struct device *dev; + struct mem_ctl_info *mci; - struct pci_dev *mdev; /* for i10nm CPU */ - void __iomem *mbase; /* for i10nm CPU */ - int chan_mmio_sz; /* for i10nm CPU */ + void __iomem *mbase; + int chan_mmio_sz; int num_channels; /* channels per memory controller */ int num_dimms; /* dimms per channel */ bool hbm_mc; @@ -178,7 +191,8 @@ enum type { SKX, I10NM, SPR, - GNR + GNR, + DMR, }; enum { @@ -237,10 +251,6 @@ struct pci_bdf { struct res_config { enum type type; - /* Configuration agent device ID */ - unsigned int decs_did; - /* Default bus number configuration register offset */ - int busno_cfg_offset; /* DDR memory controllers per socket */ int ddr_imc_num; /* DDR channels per DDR memory controller */ @@ -258,23 +268,57 @@ struct res_config { /* Per HBM channel memory-mapped I/O size */ int hbm_chan_mmio_sz; bool support_ddr5; - /* SAD device BDF */ - struct pci_bdf sad_all_bdf; - /* PCU device BDF */ - struct pci_bdf pcu_cr3_bdf; - /* UTIL device BDF */ - struct pci_bdf util_all_bdf; - /* URACU device BDF */ - struct pci_bdf uracu_bdf; - /* DDR mdev device BDF */ - struct pci_bdf ddr_mdev_bdf; - /* HBM mdev device BDF */ - struct pci_bdf hbm_mdev_bdf; - int sad_all_offset; /* RRL register sets per DDR channel */ struct reg_rrl *reg_rrl_ddr; /* RRL register sets per HBM channel */ struct reg_rrl *reg_rrl_hbm[2]; + union { + /* {skx,i10nm}_edac */ + struct { + /* Configuration agent device ID */ + unsigned int decs_did; + /* Default bus number configuration register offset */ + int busno_cfg_offset; + struct pci_bdf sad_all_bdf; + struct pci_bdf pcu_cr3_bdf; + struct pci_bdf util_all_bdf; + struct pci_bdf uracu_bdf; + struct pci_bdf ddr_mdev_bdf; + struct pci_bdf hbm_mdev_bdf; + int sad_all_offset; + }; + /* imh_edac */ + struct { + /* MMIO base physical address in local package view */ + u64 mmio_base_l_north; + u64 mmio_base_l_south; + u64 ddr_imc_base; + u64 ddr_reg_mcmtr_offset; + u8 ddr_reg_mcmtr_width; + u64 ddr_reg_dimmmtr_offset; + u8 ddr_reg_dimmmtr_width; + u64 ubox_base; + u32 ubox_size; + u32 ubox_reg_mmio_base_offset; + u8 ubox_reg_mmio_base_width; + u32 ubox_reg_socket_id_offset; + u8 ubox_reg_socket_id_width; + u64 pcu_base; + u32 pcu_size; + u32 pcu_reg_capid3_offset; + u8 pcu_reg_capid3_width; + u64 sca_base; + u32 sca_size; + u32 sca_reg_tolm_offset; + u8 sca_reg_tolm_width; + u32 sca_reg_tohm_offset; + u8 sca_reg_tohm_width; + u64 ha_base; + u32 ha_size; + u32 ha_reg_mode_offset; + u8 ha_reg_mode_width; + }; + }; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, @@ -287,13 +331,17 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_init_mc_mapping(struct skx_dev *d); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); +struct list_head *skx_get_edac_list(void); + int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); +void skx_set_hi_lo(u64 tolm, u64 tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, @@ -302,7 +350,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, +int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, const char *ctl_name, const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg); diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 7c5db8bf0595..1a1092793092 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -433,7 +433,7 @@ static void handle_error(struct mc_priv *priv, struct ecc_status *stat, phys_addr_t pfn; int err; - if (WARN_ON_ONCE(ctl_num > NUM_CONTROLLERS)) + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) return; mci = priv->mci[ctl_num]; @@ -605,21 +605,23 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, length = result[MSG_ERR_LENGTH]; offset = result[MSG_ERR_OFFSET]; + /* + * The data can come in two stretches. Construct the regs from two + * messages. The offset indicates the offset from which the data is to + * be taken. + */ + for (i = 0 ; i < length; i++) { + k = offset + i; + j = ERROR_DATA + i; + mc_priv->regs[k] = result[j]; + } + if (result[TOTAL_ERR_LENGTH] > length) { if (!mc_priv->part_len) mc_priv->part_len = length; else mc_priv->part_len += length; - /* - * The data can come in 2 stretches. Construct the regs from 2 - * messages the offset indicates the offset from which the data is to - * be taken - */ - for (i = 0 ; i < length; i++) { - k = offset + i; - j = ERROR_DATA + i; - mc_priv->regs[k] = result[j]; - } + if (mc_priv->part_len < result[TOTAL_ERR_LENGTH]) return 0; mc_priv->part_len = 0; @@ -705,7 +707,7 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, /* Convert to bytes */ length = result[TOTAL_ERR_LENGTH] * 4; log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message, - sec_sev, (void *)&result[ERROR_DATA], length); + sec_sev, (void *)&mc_priv->regs, length); return 0; } |
