diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/einj.c | 71 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 29 |
2 files changed, 71 insertions, 29 deletions
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 013eb621dc92..89fb9331c611 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -73,6 +73,7 @@ static u32 notrigger; static u32 vendor_flags; static struct debugfs_blob_wrapper vendor_blob; +static struct debugfs_blob_wrapper vendor_errors; static char vendor_dev[64]; /* @@ -182,6 +183,21 @@ static int einj_timedout(u64 *t) return 0; } +static void get_oem_vendor_struct(u64 paddr, int offset, + struct vendor_error_type_extension *v) +{ + unsigned long vendor_size; + u64 target_pa = paddr + offset + sizeof(struct vendor_error_type_extension); + + vendor_size = v->length - sizeof(struct vendor_error_type_extension); + + if (vendor_size) + vendor_errors.data = acpi_os_map_memory(target_pa, vendor_size); + + if (vendor_errors.data) + vendor_errors.size = vendor_size; +} + static void check_vendor_extension(u64 paddr, struct set_error_type_with_address *v5param) { @@ -194,6 +210,7 @@ static void check_vendor_extension(u64 paddr, v = acpi_os_map_iomem(paddr + offset, sizeof(*v)); if (!v) return; + get_oem_vendor_struct(paddr, offset, v); sbdf = v->pcie_sbdf; sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n", sbdf >> 24, (sbdf >> 16) & 0xff, @@ -577,38 +594,40 @@ static u64 error_param2; static u64 error_param3; static u64 error_param4; static struct dentry *einj_debug_dir; -static const char * const einj_error_type_string[] = { - "0x00000001\tProcessor Correctable\n", - "0x00000002\tProcessor Uncorrectable non-fatal\n", - "0x00000004\tProcessor Uncorrectable fatal\n", - "0x00000008\tMemory Correctable\n", - "0x00000010\tMemory Uncorrectable non-fatal\n", - "0x00000020\tMemory Uncorrectable fatal\n", - "0x00000040\tPCI Express Correctable\n", - "0x00000080\tPCI Express Uncorrectable non-fatal\n", - "0x00000100\tPCI Express Uncorrectable fatal\n", - "0x00000200\tPlatform Correctable\n", - "0x00000400\tPlatform Uncorrectable non-fatal\n", - "0x00000800\tPlatform Uncorrectable fatal\n", - "0x00001000\tCXL.cache Protocol Correctable\n", - "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", - "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", - "0x00008000\tCXL.mem Protocol Correctable\n", - "0x00010000\tCXL.mem Protocol Uncorrectable non-fatal\n", - "0x00020000\tCXL.mem Protocol Uncorrectable fatal\n", +static struct { u32 mask; const char *str; } const einj_error_type_string[] = { + { BIT(0), "Processor Correctable" }, + { BIT(1), "Processor Uncorrectable non-fatal" }, + { BIT(2), "Processor Uncorrectable fatal" }, + { BIT(3), "Memory Correctable" }, + { BIT(4), "Memory Uncorrectable non-fatal" }, + { BIT(5), "Memory Uncorrectable fatal" }, + { BIT(6), "PCI Express Correctable" }, + { BIT(7), "PCI Express Uncorrectable non-fatal" }, + { BIT(8), "PCI Express Uncorrectable fatal" }, + { BIT(9), "Platform Correctable" }, + { BIT(10), "Platform Uncorrectable non-fatal" }, + { BIT(11), "Platform Uncorrectable fatal"}, + { BIT(12), "CXL.cache Protocol Correctable" }, + { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" }, + { BIT(14), "CXL.cache Protocol Uncorrectable fatal" }, + { BIT(15), "CXL.mem Protocol Correctable" }, + { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" }, + { BIT(17), "CXL.mem Protocol Uncorrectable fatal" }, + { BIT(31), "Vendor Defined Error Types" }, }; static int available_error_type_show(struct seq_file *m, void *v) { int rc; - u32 available_error_type = 0; + u32 error_type = 0; - rc = einj_get_available_error_type(&available_error_type); + rc = einj_get_available_error_type(&error_type); if (rc) return rc; for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++) - if (available_error_type & BIT(pos)) - seq_puts(m, einj_error_type_string[pos]); + if (error_type & einj_error_type_string[pos].mask) + seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask, + einj_error_type_string[pos].str); return 0; } @@ -767,6 +786,10 @@ static int __init einj_init(void) einj_debug_dir, &vendor_flags); } + if (vendor_errors.size) + debugfs_create_blob("oem_error", 0600, einj_debug_dir, + &vendor_errors); + pr_info("Error INJection is initialized.\n"); return 0; @@ -792,6 +815,8 @@ static void __exit einj_exit(void) sizeof(struct einj_parameter); acpi_os_unmap_iomem(einj_param, size); + if (vendor_errors.size) + acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size); } einj_exec_ctx_init(&ctx); apei_exec_post_unmap_gars(&ctx); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63ad0541db38..ab2a82cb1b0b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -102,6 +102,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) } /* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + +/* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain * compatible with existing boot arg use cases. @@ -489,7 +503,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) } static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) + int sev, bool sync) { int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); @@ -503,7 +517,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0; if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags); @@ -511,9 +525,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, return false; } -static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; bool queued = false; int sec_sev, i; char *p; @@ -538,7 +554,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s * and don't filter out 'corrected' error here. */ if (is_cache && has_pa) { - queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); p += err_info->length; continue; } @@ -666,6 +682,7 @@ static bool ghes_do_proc(struct ghes *ghes, const guid_t *fru_id = &guid_null; char *fru_text = ""; bool queued = false; + bool sync = is_hest_sync_notify(ghes); sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -683,13 +700,13 @@ static bool ghes_do_proc(struct ghes *ghes, atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - queued = ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - queued = ghes_handle_arm_hw_error(gdata, sev); + queued = ghes_handle_arm_hw_error(gdata, sev, sync); } else { void *err = acpi_hest_get_payload(gdata); |