summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/einj.c71
-rw-r--r--drivers/acpi/apei/ghes.c29
2 files changed, 71 insertions, 29 deletions
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 013eb621dc92..89fb9331c611 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -73,6 +73,7 @@ static u32 notrigger;
static u32 vendor_flags;
static struct debugfs_blob_wrapper vendor_blob;
+static struct debugfs_blob_wrapper vendor_errors;
static char vendor_dev[64];
/*
@@ -182,6 +183,21 @@ static int einj_timedout(u64 *t)
return 0;
}
+static void get_oem_vendor_struct(u64 paddr, int offset,
+ struct vendor_error_type_extension *v)
+{
+ unsigned long vendor_size;
+ u64 target_pa = paddr + offset + sizeof(struct vendor_error_type_extension);
+
+ vendor_size = v->length - sizeof(struct vendor_error_type_extension);
+
+ if (vendor_size)
+ vendor_errors.data = acpi_os_map_memory(target_pa, vendor_size);
+
+ if (vendor_errors.data)
+ vendor_errors.size = vendor_size;
+}
+
static void check_vendor_extension(u64 paddr,
struct set_error_type_with_address *v5param)
{
@@ -194,6 +210,7 @@ static void check_vendor_extension(u64 paddr,
v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
if (!v)
return;
+ get_oem_vendor_struct(paddr, offset, v);
sbdf = v->pcie_sbdf;
sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
sbdf >> 24, (sbdf >> 16) & 0xff,
@@ -577,38 +594,40 @@ static u64 error_param2;
static u64 error_param3;
static u64 error_param4;
static struct dentry *einj_debug_dir;
-static const char * const einj_error_type_string[] = {
- "0x00000001\tProcessor Correctable\n",
- "0x00000002\tProcessor Uncorrectable non-fatal\n",
- "0x00000004\tProcessor Uncorrectable fatal\n",
- "0x00000008\tMemory Correctable\n",
- "0x00000010\tMemory Uncorrectable non-fatal\n",
- "0x00000020\tMemory Uncorrectable fatal\n",
- "0x00000040\tPCI Express Correctable\n",
- "0x00000080\tPCI Express Uncorrectable non-fatal\n",
- "0x00000100\tPCI Express Uncorrectable fatal\n",
- "0x00000200\tPlatform Correctable\n",
- "0x00000400\tPlatform Uncorrectable non-fatal\n",
- "0x00000800\tPlatform Uncorrectable fatal\n",
- "0x00001000\tCXL.cache Protocol Correctable\n",
- "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n",
- "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n",
- "0x00008000\tCXL.mem Protocol Correctable\n",
- "0x00010000\tCXL.mem Protocol Uncorrectable non-fatal\n",
- "0x00020000\tCXL.mem Protocol Uncorrectable fatal\n",
+static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
+ { BIT(0), "Processor Correctable" },
+ { BIT(1), "Processor Uncorrectable non-fatal" },
+ { BIT(2), "Processor Uncorrectable fatal" },
+ { BIT(3), "Memory Correctable" },
+ { BIT(4), "Memory Uncorrectable non-fatal" },
+ { BIT(5), "Memory Uncorrectable fatal" },
+ { BIT(6), "PCI Express Correctable" },
+ { BIT(7), "PCI Express Uncorrectable non-fatal" },
+ { BIT(8), "PCI Express Uncorrectable fatal" },
+ { BIT(9), "Platform Correctable" },
+ { BIT(10), "Platform Uncorrectable non-fatal" },
+ { BIT(11), "Platform Uncorrectable fatal"},
+ { BIT(12), "CXL.cache Protocol Correctable" },
+ { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" },
+ { BIT(14), "CXL.cache Protocol Uncorrectable fatal" },
+ { BIT(15), "CXL.mem Protocol Correctable" },
+ { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" },
+ { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
+ { BIT(31), "Vendor Defined Error Types" },
};
static int available_error_type_show(struct seq_file *m, void *v)
{
int rc;
- u32 available_error_type = 0;
+ u32 error_type = 0;
- rc = einj_get_available_error_type(&available_error_type);
+ rc = einj_get_available_error_type(&error_type);
if (rc)
return rc;
for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
- if (available_error_type & BIT(pos))
- seq_puts(m, einj_error_type_string[pos]);
+ if (error_type & einj_error_type_string[pos].mask)
+ seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
+ einj_error_type_string[pos].str);
return 0;
}
@@ -767,6 +786,10 @@ static int __init einj_init(void)
einj_debug_dir, &vendor_flags);
}
+ if (vendor_errors.size)
+ debugfs_create_blob("oem_error", 0600, einj_debug_dir,
+ &vendor_errors);
+
pr_info("Error INJection is initialized.\n");
return 0;
@@ -792,6 +815,8 @@ static void __exit einj_exit(void)
sizeof(struct einj_parameter);
acpi_os_unmap_iomem(einj_param, size);
+ if (vendor_errors.size)
+ acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
}
einj_exec_ctx_init(&ctx);
apei_exec_post_unmap_gars(&ctx);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 63ad0541db38..ab2a82cb1b0b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -102,6 +102,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
}
/*
+ * A platform may describe one error source for the handling of synchronous
+ * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
+ * or External Interrupt). On x86, the HEST notifications are always
+ * asynchronous, so only SEA on ARM is delivered as a synchronous
+ * notification.
+ */
+static inline bool is_hest_sync_notify(struct ghes *ghes)
+{
+ u8 notify_type = ghes->generic->notify.type;
+
+ return notify_type == ACPI_HEST_NOTIFY_SEA;
+}
+
+/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
* compatible with existing boot arg use cases.
@@ -489,7 +503,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
}
static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev)
+ int sev, bool sync)
{
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
@@ -503,7 +517,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE;
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
- flags = 0;
+ flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1)
return ghes_do_memory_failure(mem_err->physical_addr, flags);
@@ -511,9 +525,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
return false;
}
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ int flags = sync ? MF_ACTION_REQUIRED : 0;
bool queued = false;
int sec_sev, i;
char *p;
@@ -538,7 +554,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
* and don't filter out 'corrected' error here.
*/
if (is_cache && has_pa) {
- queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
p += err_info->length;
continue;
}
@@ -666,6 +682,7 @@ static bool ghes_do_proc(struct ghes *ghes,
const guid_t *fru_id = &guid_null;
char *fru_text = "";
bool queued = false;
+ bool sync = is_hest_sync_notify(ghes);
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -683,13 +700,13 @@ static bool ghes_do_proc(struct ghes *ghes,
atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
- queued = ghes_handle_memory_failure(gdata, sev);
+ queued = ghes_handle_memory_failure(gdata, sev, sync);
}
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- queued = ghes_handle_arm_hw_error(gdata, sev);
+ queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else {
void *err = acpi_hest_get_payload(gdata);