summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei/ghes.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r--drivers/acpi/apei/ghes.c225
1 files changed, 164 insertions, 61 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7b7c605166e0..f0584ccad451 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,7 +26,7 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
-#include <linux/cxl-event.h>
+#include <linux/cleanup.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -34,6 +34,7 @@
#include <linux/irq_work.h>
#include <linux/llist.h>
#include <linux/genalloc.h>
+#include <linux/kfifo.h>
#include <linux/pci.h>
#include <linux/pfn.h>
#include <linux/aer.h>
@@ -48,6 +49,7 @@
#include <acpi/apei.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
+#include <cxl/event.h>
#include <ras/ras_event.h>
#include "apei-internal.h"
@@ -171,8 +173,6 @@ static struct gen_pool *ghes_estatus_pool;
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
-static int ghes_panic_timeout __read_mostly = 30;
-
static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
{
phys_addr_t paddr;
@@ -674,41 +674,118 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
-/*
- * Only a single callback can be registered for CXL CPER events.
- */
-static DECLARE_RWSEM(cxl_cper_rw_sem);
-static cxl_cper_callback cper_callback;
+/* Room for 8 entries */
+#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
+static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
+ CXL_CPER_PROT_ERR_FIFO_DEPTH);
-/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
+static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
+struct work_struct *cxl_cper_prot_err_work;
-/*
- * General Media Event Record
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
- */
-#define CPER_SEC_CXL_GEN_MEDIA_GUID \
- GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
- 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
+ int severity)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cxl_cper_prot_err_work_data wd;
+ u8 *dvsec_start, *cap_start;
-/*
- * DRAM Event Record
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
- */
-#define CPER_SEC_CXL_DRAM_GUID \
- GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
- 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) {
+ pr_err_ratelimited("CXL CPER invalid agent type\n");
+ return;
+ }
-/*
- * Memory Module Event Record
- * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
- */
-#define CPER_SEC_CXL_MEM_MODULE_GUID \
- GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
- 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) {
+ pr_err_ratelimited("CXL CPER invalid protocol error log\n");
+ return;
+ }
+
+ if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) {
+ pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n",
+ prot_err->err_len);
+ return;
+ }
+
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER))
+ pr_warn(FW_WARN "CXL CPER no device serial number\n");
+
+ guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
+
+ if (!cxl_cper_prot_err_work)
+ return;
+
+ switch (prot_err->agent_type) {
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ case RP:
+ case DSP:
+ case USP:
+ memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err));
+
+ dvsec_start = (u8 *)(prot_err + 1);
+ cap_start = dvsec_start + prot_err->dvsec_len;
+
+ memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap));
+ wd.severity = cper_severity_to_aer(severity);
+ break;
+ default:
+ pr_err_ratelimited("CXL CPER invalid agent type: %d\n",
+ prot_err->agent_type);
+ return;
+ }
+
+ if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
+ pr_err_ratelimited("CXL CPER kfifo overflow\n");
+ return;
+ }
+
+ schedule_work(cxl_cper_prot_err_work);
+#endif
+}
+
+int cxl_cper_register_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = work;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
+
+int cxl_cper_unregister_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work != work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
+
+int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
+{
+ return kfifo_get(&cxl_cper_prot_err_fifo, wd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
+
+/* Room for 8 entries for each of the 4 event log queues */
+#define CXL_CPER_FIFO_DEPTH 32
+DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
+
+/* Synchronize schedule_work() with cxl_cper_work changes */
+static DEFINE_SPINLOCK(cxl_cper_work_lock);
+struct work_struct *cxl_cper_work;
static void cxl_cper_post_event(enum cxl_event_type event_type,
struct cxl_cper_event_rec *rec)
{
+ struct cxl_cper_work_data wd;
+
if (rec->hdr.length <= sizeof(rec->hdr) ||
rec->hdr.length > sizeof(*rec)) {
pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
@@ -721,30 +798,49 @@ static void cxl_cper_post_event(enum cxl_event_type event_type,
return;
}
- guard(rwsem_read)(&cxl_cper_rw_sem);
- if (cper_callback)
- cper_callback(event_type, rec);
+ guard(spinlock_irqsave)(&cxl_cper_work_lock);
+
+ if (!cxl_cper_work)
+ return;
+
+ wd.event_type = event_type;
+ memcpy(&wd.rec, rec, sizeof(wd.rec));
+
+ if (!kfifo_put(&cxl_cper_fifo, wd)) {
+ pr_err_ratelimited("CXL CPER kfifo overflow\n");
+ return;
+ }
+
+ schedule_work(cxl_cper_work);
}
-int cxl_cper_register_callback(cxl_cper_callback callback)
+int cxl_cper_register_work(struct work_struct *work)
{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (cper_callback)
+ if (cxl_cper_work)
return -EINVAL;
- cper_callback = callback;
+
+ guard(spinlock)(&cxl_cper_work_lock);
+ cxl_cper_work = work;
return 0;
}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
-int cxl_cper_unregister_callback(cxl_cper_callback callback)
+int cxl_cper_unregister_work(struct work_struct *work)
{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (callback != cper_callback)
+ if (cxl_cper_work != work)
return -EINVAL;
- cper_callback = NULL;
+
+ guard(spinlock)(&cxl_cper_work_lock);
+ cxl_cper_work = NULL;
return 0;
}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
+
+int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
+{
+ return kfifo_get(&cxl_cper_fifo, wd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
@@ -780,20 +876,20 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
+ struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_prot_err(prot_err, gdata->error_severity);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
- } else if (guid_equal(sec_type,
- &CPER_SEC_CXL_MEM_MODULE_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
@@ -988,14 +1084,16 @@ static void __ghes_panic(struct ghes *ghes,
struct acpi_hest_generic_status *estatus,
u64 buf_paddr, enum fixed_addresses fixmap_idx)
{
+ const char *msg = GHES_PFX "Fatal hardware error";
+
__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
- /* reboot to log the error! */
if (!panic_timeout)
- panic_timeout = ghes_panic_timeout;
- panic("Fatal hardware error!");
+ pr_emerg("%s but panic disabled\n", msg);
+
+ panic(msg);
}
static int ghes_proc(struct ghes *ghes)
@@ -1040,7 +1138,7 @@ static void ghes_add_timer(struct ghes *ghes)
static void ghes_poll_func(struct timer_list *t)
{
- struct ghes *ghes = from_timer(ghes, t, timer);
+ struct ghes *ghes = timer_container_of(ghes, t, timer);
unsigned long flags;
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1544,7 +1642,7 @@ err:
return rc;
}
-static int ghes_remove(struct platform_device *ghes_dev)
+static void ghes_remove(struct platform_device *ghes_dev)
{
int rc;
struct ghes *ghes;
@@ -1581,8 +1679,15 @@ static int ghes_remove(struct platform_device *ghes_dev)
break;
case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
rc = apei_sdei_unregister_ghes(ghes);
- if (rc)
- return rc;
+ if (rc) {
+ /*
+ * Returning early results in a resource leak, but we're
+ * only here if stopping the hardware failed.
+ */
+ dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n",
+ ERR_PTR(rc));
+ return;
+ }
break;
default:
BUG();
@@ -1596,8 +1701,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
mutex_unlock(&ghes_devs_mutex);
kfree(ghes);
-
- return 0;
}
static struct platform_driver ghes_platform_driver = {
@@ -1612,7 +1715,7 @@ void __init acpi_ghes_init(void)
{
int rc;
- sdei_init();
+ acpi_sdei_init();
if (acpi_disabled)
return;