diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/Kconfig | 13 | ||||
-rw-r--r-- | drivers/acpi/apei/Makefile | 2 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-base.c | 2 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-internal.h | 18 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-core.c (renamed from drivers/acpi/apei/einj.c) | 130 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-cxl.c | 113 | ||||
-rw-r--r-- | drivers/acpi/apei/erst-dbg.c | 1 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 109 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 51 |
9 files changed, 405 insertions, 34 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7be3..3cfe7e7475f2 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -60,6 +60,19 @@ config ACPI_APEI_EINJ mainly used for debugging and testing the other parts of APEI and some other RAS features. +config ACPI_APEI_EINJ_CXL + bool "CXL Error INJection Support" + default ACPI_APEI_EINJ + depends on ACPI_APEI_EINJ + depends on CXL_BUS && CXL_BUS <= ACPI_APEI_EINJ + help + Support for CXL protocol Error INJection through debugfs/cxl. + Availability and which errors are supported is dependent on + the host platform. Look to ACPI v6.5 section 18.6.4 and kernel + EINJ documentation for more information. + + If unsure say 'n' + config ACPI_APEI_ERST_DEBUG tristate "APEI Error Record Serialization Table (ERST) Debug Support" depends on ACPI_APEI diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile index 4dfac2128737..2c474e6477e1 100644 --- a/drivers/acpi/apei/Makefile +++ b/drivers/acpi/apei/Makefile @@ -2,6 +2,8 @@ obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o +einj-y := einj-core.o +einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o apei-y := apei-base.o hest.o erst.o bert.o diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index c7c26872f4ce..9c84f3da7c09 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -28,7 +28,7 @@ #include <linux/interrupt.h> #include <linux/debugfs.h> #include <acpi/apei.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "apei-internal.h" diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index 67c2c3b959e1..cd2766c69d78 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -130,4 +130,22 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus) } int apei_osc_setup(void); + +int einj_get_available_error_type(u32 *type); +int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, + u64 param4); +int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4); +bool einj_is_cxl_error_type(u64 type); +int einj_validate_error_type(u64 type); + +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) +#endif + #endif diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj-core.c index 89fb9331c611..04731a5b01fa 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj-core.c @@ -21,7 +21,8 @@ #include <linux/nmi.h> #include <linux/delay.h> #include <linux/mm.h> -#include <asm/unaligned.h> +#include <linux/platform_device.h> +#include <linux/unaligned.h> #include "apei-internal.h" @@ -36,6 +37,12 @@ #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ ACPI_EINJ_MEMORY_UNCORRECTABLE | \ ACPI_EINJ_MEMORY_FATAL) +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_FATAL | \ + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ + ACPI_EINJ_CXL_MEM_FATAL) /* * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. @@ -137,6 +144,11 @@ static struct apei_exec_ins_type einj_ins_type[] = { */ static DEFINE_MUTEX(einj_mutex); +/* + * Exported APIs use this flag to exit early if einj_probe() failed. + */ +bool einj_initialized __ro_after_init; + static void *einj_param; static void einj_exec_ctx_init(struct apei_exec_context *ctx) @@ -160,7 +172,7 @@ static int __einj_get_available_error_type(u32 *type) } /* Get error injection capabilities of the platform */ -static int einj_get_available_error_type(u32 *type) +int einj_get_available_error_type(u32 *type) { int rc; @@ -530,8 +542,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, } /* Inject the specified hardware error */ -static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, - u64 param3, u64 param4) +int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, + u64 param4) { int rc; u64 base_addr, size; @@ -554,8 +566,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, if (type & ACPI5_VENDOR_BIT) { if (vendor_flags != SETWA_FLAGS_MEM) goto inject; - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { goto inject; + } + + /* + * Injections targeting a CXL 1.0/1.1 port have to be injected + * via the einj_cxl_rch_error_inject() path as that does the proper + * validation of the given RCRB base (MMIO) address. + */ + if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)) + return -EINVAL; /* * Disallow crazy address masks that give BIOS leeway to pick @@ -587,6 +608,21 @@ inject: return rc; } +int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) +{ + int rc; + + if (!(einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM))) + return -EINVAL; + + mutex_lock(&einj_mutex); + rc = __einj_error_inject(type, flags, param1, param2, param3, param4); + mutex_unlock(&einj_mutex); + + return rc; +} + static u32 error_type; static u32 error_flags; static u64 error_param1; @@ -607,12 +643,6 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = { { BIT(9), "Platform Correctable" }, { BIT(10), "Platform Uncorrectable non-fatal" }, { BIT(11), "Platform Uncorrectable fatal"}, - { BIT(12), "CXL.cache Protocol Correctable" }, - { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" }, - { BIT(14), "CXL.cache Protocol Uncorrectable fatal" }, - { BIT(15), "CXL.mem Protocol Correctable" }, - { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" }, - { BIT(17), "CXL.mem Protocol Uncorrectable fatal" }, { BIT(31), "Vendor Defined Error Types" }, }; @@ -641,22 +671,26 @@ static int error_type_get(void *data, u64 *val) return 0; } -static int error_type_set(void *data, u64 val) +bool einj_is_cxl_error_type(u64 type) { + return (type & CXL_ERROR_MASK) && (!(type & ACPI5_VENDOR_BIT)); +} + +int einj_validate_error_type(u64 type) +{ + u32 tval, vendor, available_error_type = 0; int rc; - u32 available_error_type = 0; - u32 tval, vendor; /* Only low 32 bits for error type are valid */ - if (val & GENMASK_ULL(63, 32)) + if (type & GENMASK_ULL(63, 32)) return -EINVAL; /* * Vendor defined types have 0x80000000 bit set, and * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE */ - vendor = val & ACPI5_VENDOR_BIT; - tval = val & 0x7fffffff; + vendor = type & ACPI5_VENDOR_BIT; + tval = type & GENMASK(30, 0); /* Only one error type can be specified */ if (tval & (tval - 1)) @@ -665,9 +699,21 @@ static int error_type_set(void *data, u64 val) rc = einj_get_available_error_type(&available_error_type); if (rc) return rc; - if (!(val & available_error_type)) + if (!(type & available_error_type)) return -EINVAL; } + + return 0; +} + +static int error_type_set(void *data, u64 val) +{ + int rc; + + rc = einj_validate_error_type(val); + if (rc) + return rc; + error_type = val; return 0; @@ -703,21 +749,21 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) return 0; } -static int __init einj_init(void) +static int __init einj_probe(struct platform_device *pdev) { int rc; acpi_status status; struct apei_exec_context ctx; if (acpi_disabled) { - pr_info("ACPI disabled.\n"); + pr_debug("ACPI disabled.\n"); return -ENODEV; } status = acpi_get_table(ACPI_SIG_EINJ, 0, (struct acpi_table_header **)&einj_tab); if (status == AE_NOT_FOUND) { - pr_warn("EINJ table not found.\n"); + pr_debug("EINJ table not found.\n"); return -ENODEV; } else if (ACPI_FAILURE(status)) { pr_err("Failed to get EINJ table: %s\n", @@ -805,7 +851,7 @@ err_put_table: return rc; } -static void __exit einj_exit(void) +static void __exit einj_remove(struct platform_device *pdev) { struct apei_exec_context ctx; @@ -826,6 +872,46 @@ static void __exit einj_exit(void) acpi_put_table((struct acpi_table_header *)einj_tab); } +static struct platform_device *einj_dev; +/* + * einj_remove() lives in .exit.text. For drivers registered via + * platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver einj_driver __refdata = { + .remove = __exit_p(einj_remove), + .driver = { + .name = "acpi-einj", + }, +}; + +static int __init einj_init(void) +{ + struct platform_device_info einj_dev_info = { + .name = "acpi-einj", + .id = -1, + }; + int rc; + + einj_dev = platform_device_register_full(&einj_dev_info); + if (IS_ERR(einj_dev)) + return PTR_ERR(einj_dev); + + rc = platform_driver_probe(&einj_driver, einj_probe); + einj_initialized = rc == 0; + + return 0; +} + +static void __exit einj_exit(void) +{ + if (einj_initialized) + platform_driver_unregister(&einj_driver); + + platform_device_unregister(einj_dev); +} + module_init(einj_init); module_exit(einj_exit); diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c new file mode 100644 index 000000000000..78da9ae543a2 --- /dev/null +++ b/drivers/acpi/apei/einj-cxl.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CXL Error INJection support. Used by CXL core to inject + * protocol errors into CXL ports. + * + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#include <linux/seq_file.h> +#include <linux/pci.h> +#include <cxl/einj.h> + +#include "apei-internal.h" + +/* Defined in einj-core.c */ +extern bool einj_initialized; + +static struct { u32 mask; const char *str; } const einj_cxl_error_type_string[] = { + { ACPI_EINJ_CXL_CACHE_CORRECTABLE, "CXL.cache Protocol Correctable" }, + { ACPI_EINJ_CXL_CACHE_UNCORRECTABLE, "CXL.cache Protocol Uncorrectable non-fatal" }, + { ACPI_EINJ_CXL_CACHE_FATAL, "CXL.cache Protocol Uncorrectable fatal" }, + { ACPI_EINJ_CXL_MEM_CORRECTABLE, "CXL.mem Protocol Correctable" }, + { ACPI_EINJ_CXL_MEM_UNCORRECTABLE, "CXL.mem Protocol Uncorrectable non-fatal" }, + { ACPI_EINJ_CXL_MEM_FATAL, "CXL.mem Protocol Uncorrectable fatal" }, +}; + +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) +{ + int cxl_err, rc; + u32 available_error_type = 0; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; + + if (available_error_type & cxl_err) + seq_printf(m, "0x%08x\t%s\n", + einj_cxl_error_type_string[pos].mask, + einj_cxl_error_type_string[pos].str); + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, "CXL"); + +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) +{ + struct pci_bus *pbus; + struct pci_host_bridge *bridge; + u64 seg = 0, bus; + + pbus = dport_dev->bus; + bridge = pci_find_host_bridge(pbus); + + if (!bridge) + return -ENODEV; + + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) + seg = bridge->domain_nr; + + bus = pbus->number; + *sbdf = (seg << 24) | (bus << 16) | (dport_dev->devfn << 8); + + return 0; +} + +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + int rc; + + /* Only CXL error types can be specified */ + if (!einj_is_cxl_error_type(type)) + return -EINVAL; + + rc = einj_validate_error_type(type); + if (rc) + return rc; + + return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0), + 0, 0); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, "CXL"); + +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) +{ + u64 param4 = 0; + int rc; + + /* Only CXL error types can be specified */ + if (!einj_is_cxl_error_type(type)) + return -EINVAL; + + rc = einj_validate_error_type(type); + if (rc) + return rc; + + rc = cxl_dport_get_sbdf(dport, ¶m4); + if (rc) + return rc; + + return einj_error_inject(type, 0x4, 0, 0, 0, param4); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, "CXL"); + +bool einj_cxl_is_initialized(void) +{ + return einj_initialized; +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, "CXL"); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 8bc71cdc2270..246076341e8c 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -199,7 +199,6 @@ static const struct file_operations erst_dbg_ops = { .read = erst_dbg_read, .write = erst_dbg_write, .unlocked_ioctl = erst_dbg_ioctl, - .llseek = no_llseek, }; static struct miscdevice erst_dbg_dev = { diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ab2a82cb1b0b..b72772494655 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/timer.h> #include <linux/cper.h> +#include <linux/cleanup.h> #include <linux/platform_device.h> #include <linux/mutex.h> #include <linux/ratelimit.h> @@ -33,6 +34,7 @@ #include <linux/irq_work.h> #include <linux/llist.h> #include <linux/genalloc.h> +#include <linux/kfifo.h> #include <linux/pci.h> #include <linux/pfn.h> #include <linux/aer.h> @@ -47,6 +49,7 @@ #include <acpi/apei.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> +#include <cxl/event.h> #include <ras/ras_event.h> #include "apei-internal.h" @@ -170,8 +173,6 @@ static struct gen_pool *ghes_estatus_pool; static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; -static int ghes_panic_timeout __read_mostly = 30; - static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) { phys_addr_t paddr; @@ -673,6 +674,75 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +/* Room for 8 entries for each of the 4 event log queues */ +#define CXL_CPER_FIFO_DEPTH 32 +DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_work changes */ +static DEFINE_SPINLOCK(cxl_cper_work_lock); +struct work_struct *cxl_cper_work; + +static void cxl_cper_post_event(enum cxl_event_type event_type, + struct cxl_cper_event_rec *rec) +{ + struct cxl_cper_work_data wd; + + if (rec->hdr.length <= sizeof(rec->hdr) || + rec->hdr.length > sizeof(*rec)) { + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", + rec->hdr.length); + return; + } + + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { + pr_err(FW_WARN "CXL CPER invalid event\n"); + return; + } + + guard(spinlock_irqsave)(&cxl_cper_work_lock); + + if (!cxl_cper_work) + return; + + wd.event_type = event_type; + memcpy(&wd.rec, rec, sizeof(wd.rec)); + + if (!kfifo_put(&cxl_cper_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_work); +} + +int cxl_cper_register_work(struct work_struct *work) +{ + if (cxl_cper_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL"); + +int cxl_cper_unregister_work(struct work_struct *work) +{ + if (cxl_cper_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL"); + +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) +{ + return kfifo_get(&cxl_cper_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL"); + static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -707,6 +777,18 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); @@ -899,14 +981,16 @@ static void __ghes_panic(struct ghes *ghes, struct acpi_hest_generic_status *estatus, u64 buf_paddr, enum fixed_addresses fixmap_idx) { + const char *msg = GHES_PFX "Fatal hardware error"; + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); - /* reboot to log the error! */ if (!panic_timeout) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); + pr_emerg("%s but panic disabled\n", msg); + + panic(msg); } static int ghes_proc(struct ghes *ghes) @@ -1455,7 +1539,7 @@ err: return rc; } -static int ghes_remove(struct platform_device *ghes_dev) +static void ghes_remove(struct platform_device *ghes_dev) { int rc; struct ghes *ghes; @@ -1492,8 +1576,15 @@ static int ghes_remove(struct platform_device *ghes_dev) break; case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: rc = apei_sdei_unregister_ghes(ghes); - if (rc) - return rc; + if (rc) { + /* + * Returning early results in a resource leak, but we're + * only here if stopping the hardware failed. + */ + dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n", + ERR_PTR(rc)); + return; + } break; default: BUG(); @@ -1507,8 +1598,6 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_devs_mutex); kfree(ghes); - - return 0; } static struct platform_driver ghes_platform_driver = { diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 6aef1ee5e1bd..20d757687e3d 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable); static struct acpi_table_hest *__read_mostly hest_tab; +/* + * Since GHES_ASSIST is not supported, skip initialization of GHES_ASSIST + * structures for MCA. + * During HEST parsing, detected MCA error sources are cached from early + * table entries so that the Flags and Source Id fields from these cached + * values are then referred to in later table entries to determine if the + * encountered GHES_ASSIST structure should be initialized. + */ +static struct { + struct acpi_hest_ia_corrected *cmc; + struct acpi_hest_ia_machine_check *mc; + struct acpi_hest_ia_deferred_check *dmc; +} mces; + static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, @@ -70,22 +84,54 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr) cmc = (struct acpi_hest_ia_corrected *)hest_hdr; len = sizeof(*cmc) + cmc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.cmc = cmc; } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { struct acpi_hest_ia_machine_check *mc; mc = (struct acpi_hest_ia_machine_check *)hest_hdr; len = sizeof(*mc) + mc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.mc = mc; } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) { struct acpi_hest_ia_deferred_check *mc; mc = (struct acpi_hest_ia_deferred_check *)hest_hdr; len = sizeof(*mc) + mc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.dmc = mc; } BUG_ON(len == -1); return len; }; +/* + * GHES and GHESv2 structures share the same format, starting from + * Source Id and ending in Error Status Block Length (inclusive). + */ +static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr) +{ + struct acpi_hest_generic *ghes; + u16 related_source_id; + + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) + return false; + + ghes = (struct acpi_hest_generic *)hest_hdr; + related_source_id = ghes->related_source_id; + + if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.cmc->header.source_id) + return true; + if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.mc->header.source_id) + return true; + if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.dmc->header.source_id) + return true; + + return false; +} + typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); static int apei_hest_parse(apei_hest_func_t func, void *data) @@ -114,6 +160,11 @@ static int apei_hest_parse(apei_hest_func_t func, void *data) return -EINVAL; } + if (is_ghes_assist_struct(hest_hdr)) { + hest_hdr = (void *)hest_hdr + len; + continue; + } + rc = func(hest_hdr, data); if (rc) return rc; |