diff options
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f0584ccad451..a0d54993edb3 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -464,28 +464,41 @@ static void ghes_clear_estatus(struct ghes *ghes, ghes_ack_error(ghes->generic_v2); } -/* - * Called as task_work before returning to user-space. - * Ensure any queued work has been done before we return to the context that - * triggered the notification. +/** + * struct ghes_task_work - for synchronous RAS event + * + * @twork: callback_head for task work + * @pfn: page frame number of corrupted page + * @flags: work control flags + * + * Structure to pass task work to be handled before + * returning to user-space via task_work_add(). */ -static void ghes_kick_task_work(struct callback_head *head) +struct ghes_task_work { + struct callback_head twork; + u64 pfn; + int flags; +}; + +static void memory_failure_cb(struct callback_head *twork) { - struct acpi_hest_generic_status *estatus; - struct ghes_estatus_node *estatus_node; - u32 node_len; + struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork); + int ret; - estatus_node = container_of(head, struct ghes_estatus_node, task_work); - if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) - memory_failure_queue_kick(estatus_node->task_work_cpu); + ret = memory_failure(twcb->pfn, twcb->flags); + gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb)); - estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); - gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); + if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP) + return; + + pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + twcb->pfn, current->comm, task_pid_nr(current)); + force_sig(SIGBUS); } static bool ghes_do_memory_failure(u64 physical_addr, int flags) { + struct ghes_task_work *twcb; unsigned long pfn; if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) @@ -499,6 +512,18 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) return false; } + if (flags == MF_ACTION_REQUIRED && current->mm) { + twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); + if (!twcb) + return false; + + twcb->pfn = pfn; + twcb->flags = flags; + init_task_work(&twcb->twork, memory_failure_cb); + task_work_add(current, &twcb->twork, TWA_RESUME); + return true; + } + memory_failure_queue(pfn, flags); return true; } @@ -842,7 +867,7 @@ int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) } EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL"); -static bool ghes_do_proc(struct ghes *ghes, +static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; @@ -902,7 +927,16 @@ static bool ghes_do_proc(struct ghes *ghes, } } - return queued; + /* + * If no memory failure work is queued for abnormal synchronous + * errors, do a force kill. + */ + if (sync && !queued) { + dev_err(ghes->dev, + HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n", + current->comm, task_pid_nr(current)); + force_sig(SIGBUS); + } } static void __ghes_print_estatus(const char *pfx, @@ -1088,6 +1122,8 @@ static void __ghes_panic(struct ghes *ghes, __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); if (!panic_timeout) @@ -1206,9 +1242,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; - bool task_work_pending; u32 len, node_len; - int ret; llnode = llist_del_all(&ghes_estatus_llist); /* @@ -1223,25 +1257,16 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = cper_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); + + ghes_do_proc(estatus_node->ghes, estatus); + if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) ghes_estatus_cache_add(generic, estatus); } - - if (task_work_pending && current->mm) { - estatus_node->task_work.func = ghes_kick_task_work; - estatus_node->task_work_cpu = smp_processor_id(); - ret = task_work_add(current, &estatus_node->task_work, - TWA_RESUME); - if (ret) - estatus_node->task_work.func = NULL; - } - - if (!estatus_node->task_work.func) - gen_pool_free(ghes_estatus_pool, - (unsigned long)estatus_node, node_len); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); llnode = next; } @@ -1302,7 +1327,6 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; - estatus_node->task_work.func = NULL; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { |