diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/Kconfig | 1 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-base.c | 2 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-core.c | 73 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-cxl.c | 12 | ||||
-rw-r--r-- | drivers/acpi/apei/erst-dbg.c | 1 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 203 |
6 files changed, 234 insertions, 58 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 3cfe7e7475f2..070c07d68dfb 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -23,6 +23,7 @@ config ACPI_APEI_GHES select ACPI_HED select IRQ_WORK select GENERIC_ALLOCATOR + select ARM_SDE_INTERFACE if ARM64 help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index c7c26872f4ce..9c84f3da7c09 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -28,7 +28,7 @@ #include <linux/interrupt.h> #include <linux/debugfs.h> #include <acpi/apei.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "apei-internal.h" diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 66e7f529e92f..9b041415a9d0 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -21,8 +21,8 @@ #include <linux/nmi.h> #include <linux/delay.h> #include <linux/mm.h> -#include <linux/platform_device.h> -#include <asm/unaligned.h> +#include <linux/device/faux.h> +#include <linux/unaligned.h> #include "apei-internal.h" @@ -83,6 +83,8 @@ static struct debugfs_blob_wrapper vendor_blob; static struct debugfs_blob_wrapper vendor_errors; static char vendor_dev[64]; +static u32 available_error_type; + /* * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as @@ -648,14 +650,9 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = { static int available_error_type_show(struct seq_file *m, void *v) { - int rc; - u32 error_type = 0; - rc = einj_get_available_error_type(&error_type); - if (rc) - return rc; for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++) - if (error_type & einj_error_type_string[pos].mask) + if (available_error_type & einj_error_type_string[pos].mask) seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask, einj_error_type_string[pos].str); @@ -678,8 +675,7 @@ bool einj_is_cxl_error_type(u64 type) int einj_validate_error_type(u64 type) { - u32 tval, vendor, available_error_type = 0; - int rc; + u32 tval, vendor; /* Only low 32 bits for error type are valid */ if (type & GENMASK_ULL(63, 32)) @@ -695,13 +691,9 @@ int einj_validate_error_type(u64 type) /* Only one error type can be specified */ if (tval & (tval - 1)) return -EINVAL; - if (!vendor) { - rc = einj_get_available_error_type(&available_error_type); - if (rc) - return rc; + if (!vendor) if (!(type & available_error_type)) return -EINVAL; - } return 0; } @@ -749,17 +741,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) return 0; } -static int __init einj_probe(struct platform_device *pdev) +static int __init einj_probe(struct faux_device *fdev) { int rc; acpi_status status; struct apei_exec_context ctx; - if (acpi_disabled) { - pr_debug("ACPI disabled.\n"); - return -ENODEV; - } - status = acpi_get_table(ACPI_SIG_EINJ, 0, (struct acpi_table_header **)&einj_tab); if (status == AE_NOT_FOUND) { @@ -777,6 +764,10 @@ static int __init einj_probe(struct platform_device *pdev) goto err_put_table; } + rc = einj_get_available_error_type(&available_error_type); + if (rc) + goto err_put_table; + rc = -ENOMEM; einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir()); @@ -851,7 +842,7 @@ err_put_table: return rc; } -static void __exit einj_remove(struct platform_device *pdev) +static void __exit einj_remove(struct faux_device *fdev) { struct apei_exec_context ctx; @@ -872,38 +863,36 @@ static void __exit einj_remove(struct platform_device *pdev) acpi_put_table((struct acpi_table_header *)einj_tab); } -static struct platform_device *einj_dev; -static struct platform_driver einj_driver = { - .remove_new = einj_remove, - .driver = { - .name = "acpi-einj", - }, +static struct faux_device *einj_dev; +/* + * einj_remove() lives in .exit.text. For drivers registered via + * platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct faux_device_ops einj_device_ops __refdata = { + .probe = einj_probe, + .remove = __exit_p(einj_remove), }; static int __init einj_init(void) { - struct platform_device_info einj_dev_info = { - .name = "acpi-einj", - .id = -1, - }; - int rc; + if (acpi_disabled) { + pr_debug("ACPI disabled.\n"); + return -ENODEV; + } - einj_dev = platform_device_register_full(&einj_dev_info); - if (IS_ERR(einj_dev)) - return PTR_ERR(einj_dev); + einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops); - rc = platform_driver_probe(&einj_driver, einj_probe); - einj_initialized = rc == 0; + if (einj_dev) + einj_initialized = true; return 0; } static void __exit einj_exit(void) { - if (einj_initialized) - platform_driver_unregister(&einj_driver); - - platform_device_del(einj_dev); + faux_device_destroy(einj_dev); } module_init(einj_init); diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c index 8b8be0c90709..78da9ae543a2 100644 --- a/drivers/acpi/apei/einj-cxl.c +++ b/drivers/acpi/apei/einj-cxl.c @@ -7,9 +7,9 @@ * * Author: Ben Cheatham <benjamin.cheatham@amd.com> */ -#include <linux/einj-cxl.h> #include <linux/seq_file.h> #include <linux/pci.h> +#include <cxl/einj.h> #include "apei-internal.h" @@ -45,7 +45,7 @@ int einj_cxl_available_error_type_show(struct seq_file *m, void *v) return 0; } -EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, "CXL"); static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) { @@ -63,7 +63,7 @@ static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) seg = bridge->domain_nr; bus = pbus->number; - *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn; + *sbdf = (seg << 24) | (bus << 16) | (dport_dev->devfn << 8); return 0; } @@ -83,7 +83,7 @@ int einj_cxl_inject_rch_error(u64 rcrb, u64 type) return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0), 0, 0); } -EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, "CXL"); int einj_cxl_inject_error(struct pci_dev *dport, u64 type) { @@ -104,10 +104,10 @@ int einj_cxl_inject_error(struct pci_dev *dport, u64 type) return einj_error_inject(type, 0x4, 0, 0, 0, param4); } -EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, "CXL"); bool einj_cxl_is_initialized(void) { return einj_initialized; } -EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, CXL); +EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, "CXL"); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 8bc71cdc2270..246076341e8c 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -199,7 +199,6 @@ static const struct file_operations erst_dbg_ops = { .read = erst_dbg_read, .write = erst_dbg_write, .unlocked_ioctl = erst_dbg_ioctl, - .llseek = no_llseek, }; static struct miscdevice erst_dbg_dev = { diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 512067cac170..f0584ccad451 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/timer.h> #include <linux/cper.h> +#include <linux/cleanup.h> #include <linux/platform_device.h> #include <linux/mutex.h> #include <linux/ratelimit.h> @@ -33,6 +34,7 @@ #include <linux/irq_work.h> #include <linux/llist.h> #include <linux/genalloc.h> +#include <linux/kfifo.h> #include <linux/pci.h> #include <linux/pfn.h> #include <linux/aer.h> @@ -47,6 +49,7 @@ #include <acpi/apei.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> +#include <cxl/event.h> #include <ras/ras_event.h> #include "apei-internal.h" @@ -170,8 +173,6 @@ static struct gen_pool *ghes_estatus_pool; static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; -static int ghes_panic_timeout __read_mostly = 30; - static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) { phys_addr_t paddr; @@ -673,6 +674,174 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +/* Room for 8 entries */ +#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8 +static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data, + CXL_CPER_PROT_ERR_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */ +static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock); +struct work_struct *cxl_cper_prot_err_work; + +static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, + int severity) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cxl_cper_prot_err_work_data wd; + u8 *dvsec_start, *cap_start; + + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { + pr_err_ratelimited("CXL CPER invalid agent type\n"); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); + return; + } + + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", + prot_err->err_len); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) + pr_warn(FW_WARN "CXL CPER no device serial number\n"); + + guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock); + + if (!cxl_cper_prot_err_work) + return; + + switch (prot_err->agent_type) { + case RCD: + case DEVICE: + case LD: + case FMLD: + case RP: + case DSP: + case USP: + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); + + dvsec_start = (u8 *)(prot_err + 1); + cap_start = dvsec_start + prot_err->dvsec_len; + + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); + wd.severity = cper_severity_to_aer(severity); + break; + default: + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", + prot_err->agent_type); + return; + } + + if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_prot_err_work); +#endif +} + +int cxl_cper_register_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL"); + +int cxl_cper_unregister_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL"); + +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) +{ + return kfifo_get(&cxl_cper_prot_err_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL"); + +/* Room for 8 entries for each of the 4 event log queues */ +#define CXL_CPER_FIFO_DEPTH 32 +DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_work changes */ +static DEFINE_SPINLOCK(cxl_cper_work_lock); +struct work_struct *cxl_cper_work; + +static void cxl_cper_post_event(enum cxl_event_type event_type, + struct cxl_cper_event_rec *rec) +{ + struct cxl_cper_work_data wd; + + if (rec->hdr.length <= sizeof(rec->hdr) || + rec->hdr.length > sizeof(*rec)) { + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", + rec->hdr.length); + return; + } + + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { + pr_err(FW_WARN "CXL CPER invalid event\n"); + return; + } + + guard(spinlock_irqsave)(&cxl_cper_work_lock); + + if (!cxl_cper_work) + return; + + wd.event_type = event_type; + memcpy(&wd.rec, rec, sizeof(wd.rec)); + + if (!kfifo_put(&cxl_cper_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_work); +} + +int cxl_cper_register_work(struct work_struct *work) +{ + if (cxl_cper_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL"); + +int cxl_cper_unregister_work(struct work_struct *work) +{ + if (cxl_cper_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL"); + +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) +{ + return kfifo_get(&cxl_cper_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL"); + static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -707,6 +876,22 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); + + cxl_cper_post_prot_err(prot_err, gdata->error_severity); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); @@ -899,14 +1084,16 @@ static void __ghes_panic(struct ghes *ghes, struct acpi_hest_generic_status *estatus, u64 buf_paddr, enum fixed_addresses fixmap_idx) { + const char *msg = GHES_PFX "Fatal hardware error"; + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); - /* reboot to log the error! */ if (!panic_timeout) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); + pr_emerg("%s but panic disabled\n", msg); + + panic(msg); } static int ghes_proc(struct ghes *ghes) @@ -951,7 +1138,7 @@ static void ghes_add_timer(struct ghes *ghes) static void ghes_poll_func(struct timer_list *t) { - struct ghes *ghes = from_timer(ghes, t, timer); + struct ghes *ghes = timer_container_of(ghes, t, timer); unsigned long flags; spin_lock_irqsave(&ghes_notify_lock_irq, flags); @@ -1521,14 +1708,14 @@ static struct platform_driver ghes_platform_driver = { .name = "GHES", }, .probe = ghes_probe, - .remove_new = ghes_remove, + .remove = ghes_remove, }; void __init acpi_ghes_init(void) { int rc; - sdei_init(); + acpi_sdei_init(); if (acpi_disabled) return; |