diff options
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
| -rw-r--r-- | drivers/acpi/apei/ghes.c | 1503 |
1 files changed, 1017 insertions, 486 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d661d452b238..0dc767392a6c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * APEI Generic Hardware Error Source support * @@ -14,26 +15,19 @@ * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation; - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ +#include <linux/arm_sdei.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/io.h> #include <linux/interrupt.h> #include <linux/timer.h> #include <linux/cper.h> -#include <linux/kdebug.h> +#include <linux/cleanup.h> #include <linux/platform_device.h> #include <linux/mutex.h> #include <linux/ratelimit.h> @@ -41,17 +35,23 @@ #include <linux/irq_work.h> #include <linux/llist.h> #include <linux/genalloc.h> +#include <linux/kfifo.h> #include <linux/pci.h> +#include <linux/pfn.h> #include <linux/aer.h> #include <linux/nmi.h> #include <linux/sched/clock.h> #include <linux/uuid.h> #include <linux/ras.h> +#include <linux/task_work.h> +#include <linux/vmcore_info.h> #include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> +#include <asm/fixmap.h> #include <asm/tlbflush.h> +#include <cxl/event.h> #include <ras/ras_event.h> #include "apei-internal.h" @@ -84,12 +84,43 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +#define GHES_VENDOR_ENTRY_LEN(gdata_len) \ + (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) +#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ + ((struct acpi_hest_generic_data *) \ + ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) + +/* + * NMI-like notifications vary by architecture, before the compiler can prune + * unused static functions it needs a value for these enums. + */ +#ifndef CONFIG_ARM_SDE_INTERFACE +#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses +#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses +#endif + +static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); + static inline bool is_hest_type_generic_v2(struct ghes *ghes) { return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; } /* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + +/* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain * compatible with existing boot arg use cases. @@ -98,6 +129,13 @@ bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); /* + * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform + * check. + */ +static bool ghes_edac_force_enable; +module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); + +/* * All error sources notified with HED (Hardware Error Device) share a * single notifier callback, so they need to be linked and checked one * by one. This holds true for NMI too. @@ -109,147 +147,98 @@ static LIST_HEAD(ghes_hed); static DEFINE_MUTEX(ghes_list_mutex); /* - * Because the memory area used to transfer hardware error information - * from BIOS to Linux can be determined only in NMI, IRQ or timer - * handler, but general ioremap can not be used in atomic context, so - * a special version of atomic ioremap is implemented for that. + * A list of GHES devices which are given to the corresponding EDAC driver + * ghes_edac for further use. */ +static LIST_HEAD(ghes_devs); +static DEFINE_MUTEX(ghes_devs_mutex); /* - * Two virtual pages are used, one for IRQ/PROCESS context, the other for - * NMI context (optionally). + * Because the memory area used to transfer hardware error information + * from BIOS to Linux can be determined only in NMI, IRQ or timer + * handler, but general ioremap can not be used in atomic context, so + * the fixmap is used instead. + * + * This spinlock is used to prevent the fixmap entry from being used + * simultaneously. */ -#define GHES_IOREMAP_PAGES 2 -#define GHES_IOREMAP_IRQ_PAGE(base) (base) -#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) +static DEFINE_SPINLOCK(ghes_notify_lock_irq); -/* virtual memory area for atomic ioremap */ -static struct vm_struct *ghes_ioremap_area; -/* - * These 2 spinlock is used to prevent atomic ioremap virtual memory - * area from being mapped simultaneously. - */ -static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); -static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +struct ghes_vendor_record_entry { + struct work_struct work; + int error_severity; + char vendor_record[]; +}; static struct gen_pool *ghes_estatus_pool; -static unsigned long ghes_estatus_pool_size_request; -static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; -static int ghes_panic_timeout __read_mostly = 30; - -static int ghes_ioremap_init(void) +static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) { - ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, - VM_IOREMAP, VMALLOC_START, VMALLOC_END); - if (!ghes_ioremap_area) { - pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); - return -ENOMEM; - } - - return 0; -} - -static void ghes_ioremap_exit(void) -{ - free_vm_area(ghes_ioremap_area); -} - -static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) -{ - unsigned long vaddr; phys_addr_t paddr; pgprot_t prot; - vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); - - paddr = pfn << PAGE_SHIFT; - prot = arch_apei_get_mem_attribute(paddr); - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); - - return (void __iomem *)vaddr; -} - -static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) -{ - unsigned long vaddr, paddr; - pgprot_t prot; - - vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); - - paddr = pfn << PAGE_SHIFT; + paddr = PFN_PHYS(pfn); prot = arch_apei_get_mem_attribute(paddr); + __set_fixmap(fixmap_idx, paddr, prot); - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); - - return (void __iomem *)vaddr; + return (void __iomem *) __fix_to_virt(fixmap_idx); } -static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) +static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) { - unsigned long vaddr = (unsigned long __force)vaddr_ptr; - void *base = ghes_ioremap_area->addr; + int _idx = virt_to_fix((unsigned long)vaddr); - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - arch_apei_flush_tlb_one(vaddr); + WARN_ON_ONCE(fixmap_idx != _idx); + clear_fixmap(fixmap_idx); } -static void ghes_iounmap_irq(void __iomem *vaddr_ptr) +int ghes_estatus_pool_init(unsigned int num_ghes) { - unsigned long vaddr = (unsigned long __force)vaddr_ptr; - void *base = ghes_ioremap_area->addr; - - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - arch_apei_flush_tlb_one(vaddr); -} + unsigned long addr, len; + int rc; -static int ghes_estatus_pool_init(void) -{ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); if (!ghes_estatus_pool) return -ENOMEM; + + len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; + len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); + + addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); + if (!addr) + goto err_pool_alloc; + + rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); + if (rc) + goto err_pool_add; + return 0; -} -static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, - struct gen_pool_chunk *chunk, - void *data) -{ - free_page(chunk->start_addr); -} +err_pool_add: + vfree((void *)addr); -static void ghes_estatus_pool_exit(void) -{ - gen_pool_for_each_chunk(ghes_estatus_pool, - ghes_estatus_pool_free_chunk_page, NULL); +err_pool_alloc: gen_pool_destroy(ghes_estatus_pool); + + return -ENOMEM; } -static int ghes_estatus_pool_expand(unsigned long len) +/** + * ghes_estatus_pool_region_free - free previously allocated memory + * from the ghes_estatus_pool. + * @addr: address of memory to free. + * @size: size of memory to free. + * + * Returns none. + */ +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { - unsigned long i, pages, size, addr; - int ret; - - ghes_estatus_pool_size_request += PAGE_ALIGN(len); - size = gen_pool_size(ghes_estatus_pool); - if (size >= ghes_estatus_pool_size_request) - return 0; - pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; - for (i = 0; i < pages; i++) { - addr = __get_free_page(GFP_KERNEL); - if (!addr) - return -ENOMEM; - ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); - if (ret) - return ret; - } - - return 0; + gen_pool_free(ghes_estatus_pool, addr, size); } +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); static int map_gen_v2(struct ghes *ghes) { @@ -261,6 +250,21 @@ static void unmap_gen_v2(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); } +static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &gv2->read_ack_register); + if (rc) + return; + + val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; + val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + + apei_write(val, &gv2->read_ack_register); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -283,10 +287,10 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) goto err_unmap_read_ack_addr; error_block_length = generic->error_block_length; if (error_block_length > GHES_ESTATUS_MAX_SIZE) { - pr_warning(FW_WARN GHES_PFX - "Error status block length is too long: %u for " - "generic hardware error source: %d.\n", - error_block_length, generic->header.source_id); + pr_warn(FW_WARN GHES_PFX + "Error status block length is too long: %u for " + "generic hardware error source: %d.\n", + error_block_length, generic->header.source_id); error_block_length = GHES_ESTATUS_MAX_SIZE; } ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); @@ -333,23 +337,16 @@ static inline int ghes_severity(int severity) } static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, - int from_phys) + int from_phys, + enum fixed_addresses fixmap_idx) { void __iomem *vaddr; - unsigned long flags = 0; - int in_nmi = in_nmi(); u64 offset; u32 trunk; while (len > 0) { offset = paddr - (paddr & PAGE_MASK); - if (in_nmi) { - raw_spin_lock(&ghes_ioremap_lock_nmi); - vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); - } else { - spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); - vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); - } + vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); trunk = PAGE_SIZE - offset; trunk = min(trunk, len); if (from_phys) @@ -359,113 +356,559 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, len -= trunk; paddr += trunk; buffer += trunk; - if (in_nmi) { - ghes_iounmap_nmi(vaddr); - raw_spin_unlock(&ghes_ioremap_lock_nmi); - } else { - ghes_iounmap_irq(vaddr); - spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); - } + ghes_unmap(vaddr, fixmap_idx); } } -static int ghes_read_estatus(struct ghes *ghes, int silent) +/* Check the top-level record header has an appropriate size. */ +static int __ghes_check_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus) +{ + u32 len = cper_estatus_len(estatus); + + if (len < sizeof(*estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); + return -EIO; + } + + if (len > ghes->generic->error_block_length) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); + return -EIO; + } + + if (cper_estatus_check_header(estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); + return -EIO; + } + + return 0; +} + +/* Read the CPER block, returning its address, and header in estatus. */ +static int __ghes_peek_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) { struct acpi_hest_generic *g = ghes->generic; - u64 buf_paddr; - u32 len; int rc; - rc = apei_read(&buf_paddr, &g->error_status_address); + rc = apei_read(buf_paddr, &g->error_status_address); if (rc) { - if (!silent && printk_ratelimit()) - pr_warning(FW_WARN GHES_PFX + *buf_paddr = 0; + pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block address for hardware error source: %d.\n", g->header.source_id); return -EIO; } - if (!buf_paddr) + if (!*buf_paddr) return -ENOENT; - ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, - sizeof(*ghes->estatus), 1); - if (!ghes->estatus->block_status) + ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, + fixmap_idx); + if (!estatus->block_status) { + *buf_paddr = 0; return -ENOENT; + } - ghes->buffer_paddr = buf_paddr; - ghes->flags |= GHES_TO_CLEAR; + return 0; +} - rc = -EIO; - len = cper_estatus_len(ghes->estatus); - if (len < sizeof(*ghes->estatus)) - goto err_read_block; - if (len > ghes->generic->error_block_length) - goto err_read_block; - if (cper_estatus_check_header(ghes->estatus)) - goto err_read_block; - ghes_copy_tofrom_phys(ghes->estatus + 1, - buf_paddr + sizeof(*ghes->estatus), - len - sizeof(*ghes->estatus), 1); - if (cper_estatus_check(ghes->estatus)) - goto err_read_block; - rc = 0; - -err_read_block: - if (rc && !silent && printk_ratelimit()) - pr_warning(FW_WARN GHES_PFX - "Failed to read error status block!\n"); - return rc; +static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx, + size_t buf_len) +{ + ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); + if (cper_estatus_check(estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Failed to read error status block!\n"); + return -EIO; + } + + return 0; +} + +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + int rc; + + rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); + if (rc) + return rc; + + rc = __ghes_check_estatus(ghes, estatus); + if (rc) + return rc; + + return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, + cper_estatus_len(estatus)); +} + +static void ghes_clear_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) +{ + estatus->block_status = 0; + + if (!buf_paddr) + return; + + ghes_copy_tofrom_phys(estatus, buf_paddr, + sizeof(estatus->block_status), 0, + fixmap_idx); + + /* + * GHESv2 type HEST entries introduce support for error acknowledgment, + * so only acknowledge the error if this support is present. + */ + if (is_hest_type_generic_v2(ghes)) + ghes_ack_error(ghes->generic_v2); } -static void ghes_clear_estatus(struct ghes *ghes) +/** + * struct ghes_task_work - for synchronous RAS event + * + * @twork: callback_head for task work + * @pfn: page frame number of corrupted page + * @flags: work control flags + * + * Structure to pass task work to be handled before + * returning to user-space via task_work_add(). + */ +struct ghes_task_work { + struct callback_head twork; + u64 pfn; + int flags; +}; + +static void memory_failure_cb(struct callback_head *twork) { - ghes->estatus->block_status = 0; - if (!(ghes->flags & GHES_TO_CLEAR)) + struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork); + int ret; + + ret = memory_failure(twcb->pfn, twcb->flags); + gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb)); + + if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP) return; - ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, - sizeof(ghes->estatus->block_status), 0); - ghes->flags &= ~GHES_TO_CLEAR; + + pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + twcb->pfn, current->comm, task_pid_nr(current)); + force_sig(SIGBUS); } -static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_do_memory_failure(u64 physical_addr, int flags) { -#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + struct ghes_task_work *twcb; unsigned long pfn; + + if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) + return false; + + pfn = PHYS_PFN(physical_addr); + + if (flags == MF_ACTION_REQUIRED && current->mm) { + twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); + if (!twcb) + return false; + + twcb->pfn = pfn; + twcb->flags = flags; + init_task_work(&twcb->twork, memory_failure_cb); + task_work_add(current, &twcb->twork, TWA_RESUME); + return true; + } + + memory_failure_queue(pfn, flags); + return true; +} + +static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, + int sev, bool sync) +{ int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) - return; - - pfn = mem_err->physical_addr >> PAGE_SHIFT; - if (!pfn_valid(pfn)) { - pr_warn_ratelimited(FW_WARN GHES_PFX - "Invalid address in generic error data: %#llx\n", - mem_err->physical_addr); - return; - } + return false; /* iff following two events can be handled properly by now */ if (sec_sev == GHES_SEV_CORRECTED && (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0; if (flags != -1) - memory_failure_queue(pfn, 0, flags); + return ghes_do_memory_failure(mem_err->physical_addr, flags); + + return false; +} + +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) +{ + struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; + char error_type[120]; + bool queued = false; + int sec_sev, i; + char *p; + + sec_sev = ghes_severity(gdata->error_severity); + log_arm_hw_error(err, sec_sev); + if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) + return false; + + p = (char *)(err + 1); + for (i = 0; i < err->err_info_num; i++) { + struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; + bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR; + bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); + + /* + * The field (err_info->error_info & BIT(26)) is fixed to set to + * 1 in some old firmware of HiSilicon Kunpeng920. We assume that + * firmware won't mix corrected errors in an uncorrected section, + * and don't filter out 'corrected' error here. + */ + if (is_cache && has_pa) { + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); + p += err_info->length; + continue; + } + + cper_bits_to_str(error_type, sizeof(error_type), + FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type), + cper_proc_error_type_strs, + ARRAY_SIZE(cper_proc_error_type_strs)); + + pr_warn_ratelimited(FW_WARN GHES_PFX + "Unhandled processor error type 0x%02x: %s%s\n", + err_info->type, error_type, + (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : ""); + p += err_info->length; + } + + return queued; +} + +/* + * PCIe AER errors need to be sent to the AER driver for reporting and + * recovery. The GHES severities map to the following AER severities and + * require the following handling: + * + * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE + * These need to be reported by the AER driver but no recovery is + * necessary. + * GHES_SEV_RECOVERABLE -> AER_NONFATAL + * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL + * These both need to be reported and recovered from by the AER driver. + * GHES_SEV_PANIC does not make it to this handling since the kernel must + * panic. + */ +static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + + if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { + unsigned int devfn; + int aer_severity; + u8 *aer_info; + + devfn = PCI_DEVFN(pcie_err->device_id.device, + pcie_err->device_id.function); + aer_severity = cper_severity_to_aer(gdata->error_severity); + + /* + * If firmware reset the component to contain + * the error, we must reinitialize it before + * use, so treat it as a fatal AER error. + */ + if (gdata->flags & CPER_SEC_RESET) + aer_severity = AER_FATAL; + + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, + sizeof(struct aer_capability_regs)); + if (!aer_info) + return; + memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); + + aer_recover_queue(pcie_err->device_id.segment, + pcie_err->device_id.bus, + devfn, aer_severity, + (struct aer_capability_regs *) + aer_info); + } +#endif +} + +static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); + +int ghes_register_vendor_record_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); + +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); + +static void ghes_vendor_record_work_func(struct work_struct *work) +{ + struct ghes_vendor_record_entry *entry; + struct acpi_hest_generic_data *gdata; + u32 len; + + entry = container_of(work, struct ghes_vendor_record_entry, work); + gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + + blocking_notifier_call_chain(&vendor_record_notify_list, + entry->error_severity, gdata); + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); +} + +static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, + int sev) +{ + struct acpi_hest_generic_data *copied_gdata; + struct ghes_vendor_record_entry *entry; + u32 len; + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); + if (!entry) + return; + + copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); + entry->error_severity = sev; + + INIT_WORK(&entry->work, ghes_vendor_record_work_func); + schedule_work(&entry->work); +} + +/* Room for 8 entries */ +#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8 +static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data, + CXL_CPER_PROT_ERR_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */ +static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock); +struct work_struct *cxl_cper_prot_err_work; + +static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, + int severity) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cxl_cper_prot_err_work_data wd; + u8 *dvsec_start, *cap_start; + + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { + pr_err_ratelimited("CXL CPER invalid agent type\n"); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); + return; + } + + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", + prot_err->err_len); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) + pr_warn(FW_WARN "CXL CPER no device serial number\n"); + + guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock); + + if (!cxl_cper_prot_err_work) + return; + + switch (prot_err->agent_type) { + case RCD: + case DEVICE: + case LD: + case FMLD: + case RP: + case DSP: + case USP: + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); + + dvsec_start = (u8 *)(prot_err + 1); + cap_start = dvsec_start + prot_err->dvsec_len; + + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); + wd.severity = cper_severity_to_aer(severity); + break; + default: + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", + prot_err->agent_type); + return; + } + + if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_prot_err_work); #endif } +int cxl_cper_register_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL"); + +int cxl_cper_unregister_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL"); + +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) +{ + return kfifo_get(&cxl_cper_prot_err_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL"); + +/* Room for 8 entries for each of the 4 event log queues */ +#define CXL_CPER_FIFO_DEPTH 32 +DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_work changes */ +static DEFINE_SPINLOCK(cxl_cper_work_lock); +struct work_struct *cxl_cper_work; + +static void cxl_cper_post_event(enum cxl_event_type event_type, + struct cxl_cper_event_rec *rec) +{ + struct cxl_cper_work_data wd; + + if (rec->hdr.length <= sizeof(rec->hdr) || + rec->hdr.length > sizeof(*rec)) { + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", + rec->hdr.length); + return; + } + + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { + pr_err(FW_WARN "CXL CPER invalid event\n"); + return; + } + + guard(spinlock_irqsave)(&cxl_cper_work_lock); + + if (!cxl_cper_work) + return; + + wd.event_type = event_type; + memcpy(&wd.rec, rec, sizeof(wd.rec)); + + if (!kfifo_put(&cxl_cper_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_work); +} + +int cxl_cper_register_work(struct work_struct *work) +{ + if (cxl_cper_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL"); + +int cxl_cper_unregister_work(struct work_struct *work) +{ + if (cxl_cper_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_work_lock); + cxl_cper_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL"); + +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) +{ + return kfifo_get(&cxl_cper_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL"); + +static void ghes_log_hwerr(int sev, guid_t *sec_type) +{ + if (sev != CPER_SEV_RECOVERABLE) + return; + + if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) || + guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) || + guid_equal(sec_type, &CPER_SEC_PROC_IA)) { + hwerr_log_error_type(HWERR_RECOV_CPU); + return; + } + + if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) || + guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) || + guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) || + guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { + hwerr_log_error_type(HWERR_RECOV_CXL); + return; + } + + if (guid_equal(sec_type, &CPER_SEC_PCIE) || + guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) { + hwerr_log_error_type(HWERR_RECOV_PCI); + return; + } + + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { + hwerr_log_error_type(HWERR_RECOV_MEMORY); + return; + } + + hwerr_log_error_type(HWERR_RECOV_OTHERS); +} + static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; guid_t *sec_type; - guid_t *fru_id = &NULL_UUID_LE; + const guid_t *fru_id = &guid_null; char *fru_text = ""; + bool queued = false; + bool sync = is_hest_sync_notify(ghes); sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -477,58 +920,54 @@ static void ghes_do_proc(struct ghes *ghes, if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) fru_text = gdata->fru_text; + ghes_log_hwerr(sev, sec_type); if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); - ghes_edac_report_mem_error(ghes, sev, mem_err); + atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - ghes_handle_memory_failure(gdata, sev); - } -#ifdef CONFIG_ACPI_APEI_PCIEAER - else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); - - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && - pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { - unsigned int devfn; - int aer_severity; - - devfn = PCI_DEVFN(pcie_err->device_id.device, - pcie_err->device_id.function); - aer_severity = cper_severity_to_aer(gdata->error_severity); - - /* - * If firmware reset the component to contain - * the error, we must reinitialize it before - * use, so treat it as a fatal AER error. - */ - if (gdata->flags & CPER_SEC_RESET) - aer_severity = AER_FATAL; - - aer_recover_queue(pcie_err->device_id.segment, - pcie_err->device_id.bus, - devfn, aer_severity, - (struct aer_capability_regs *) - pcie_err->aer_info); - } - - } -#endif - else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); - - log_arm_hw_error(err); + queued = ghes_handle_memory_failure(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { + ghes_handle_aer(gdata); + } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { + queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); + + cxl_cper_post_prot_err(prot_err, gdata->error_severity); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { + struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); + ghes_defer_non_standard_event(gdata, sev); log_non_standard_event(sec_type, fru_id, fru_text, sec_sev, err, gdata->error_data_length); } } + + /* + * If no memory failure work is queued for abnormal synchronous + * errors, do a force kill. + */ + if (sync && !queued) { + dev_err(ghes->dev, + HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n", + current->comm, task_pid_nr(current)); + force_sig(SIGBUS); + } } static void __ghes_print_estatus(const char *pfx, @@ -636,48 +1075,42 @@ static struct ghes_estatus_cache *ghes_estatus_cache_alloc( return cache; } -static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) +static void ghes_estatus_cache_rcu_free(struct rcu_head *head) { + struct ghes_estatus_cache *cache; u32 len; + cache = container_of(head, struct ghes_estatus_cache, rcu); len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); len = GHES_ESTATUS_CACHE_LEN(len); gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); atomic_dec(&ghes_estatus_cache_alloced); } -static void ghes_estatus_cache_rcu_free(struct rcu_head *head) -{ - struct ghes_estatus_cache *cache; - - cache = container_of(head, struct ghes_estatus_cache, rcu); - ghes_estatus_cache_free(cache); -} - -static void ghes_estatus_cache_add( - struct acpi_hest_generic *generic, - struct acpi_hest_generic_status *estatus) +static void +ghes_estatus_cache_add(struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) { - int i, slot = -1, count; unsigned long long now, duration, period, max_period = 0; - struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; + struct ghes_estatus_cache *cache, *new_cache; + struct ghes_estatus_cache __rcu *victim; + int i, slot = -1, count; new_cache = ghes_estatus_cache_alloc(generic, estatus); - if (new_cache == NULL) + if (!new_cache) return; + rcu_read_lock(); now = sched_clock(); for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { cache = rcu_dereference(ghes_estatus_caches[i]); if (cache == NULL) { slot = i; - slot_cache = NULL; break; } duration = now - cache->time_in; if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { slot = i; - slot_cache = cache; break; } count = atomic_read(&cache->count); @@ -686,74 +1119,72 @@ static void ghes_estatus_cache_add( if (period > max_period) { max_period = period; slot = i; - slot_cache = cache; } } - /* new_cache must be put into array after its contents are written */ - smp_wmb(); - if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, - slot_cache, new_cache) == slot_cache) { - if (slot_cache) - call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); - } else - ghes_estatus_cache_free(new_cache); rcu_read_unlock(); + + if (slot != -1) { + /* + * Use release semantics to ensure that ghes_estatus_cached() + * running on another CPU will see the updated cache fields if + * it can see the new value of the pointer. + */ + victim = xchg_release(&ghes_estatus_caches[slot], + RCU_INITIALIZER(new_cache)); + + /* + * At this point, victim may point to a cached item different + * from the one based on which we selected the slot. Instead of + * going to the loop again to pick another slot, let's just + * drop the other item anyway: this may cause a false cache + * miss later on, but that won't cause any problems. + */ + if (victim) + call_rcu(&unrcu_pointer(victim)->rcu, + ghes_estatus_cache_rcu_free); + } } -static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +static void __ghes_panic(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) { - int rc; - u64 val = 0; + const char *msg = GHES_PFX "Fatal hardware error"; - rc = apei_read(&val, &gv2->read_ack_register); - if (rc) - return rc; + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); - val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; - val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); - return apei_write(val, &gv2->read_ack_register); -} - -static void __ghes_panic(struct ghes *ghes) -{ - __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); - /* reboot to log the error! */ if (!panic_timeout) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); + pr_emerg("%s but panic disabled\n", msg); + + panic(msg); } static int ghes_proc(struct ghes *ghes) { + struct acpi_hest_generic_status *estatus = ghes->estatus; + u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes, 0); + rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); if (rc) goto out; - if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes); - } + if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) + __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); - if (!ghes_estatus_cached(ghes->estatus)) { - if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) - ghes_estatus_cache_add(ghes->generic, ghes->estatus); + if (!ghes_estatus_cached(estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, estatus)) + ghes_estatus_cache_add(ghes->generic, estatus); } - ghes_do_proc(ghes, ghes->estatus); + ghes_do_proc(ghes, estatus); - /* - * GHESv2 type HEST entries introduce support for error acknowledgment, - * so only acknowledge the error if this support is present. - */ - if (is_hest_type_generic_v2(ghes)) { - rc = ghes_ack_error(ghes->generic_v2); - if (rc) - return rc; - } out: - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); + return rc; } @@ -763,8 +1194,8 @@ static void ghes_add_timer(struct ghes *ghes) unsigned long expire; if (!g->notify.poll_interval) { - pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", - g->header.source_id); + pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", + g->header.source_id); return; } expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); @@ -772,11 +1203,14 @@ static void ghes_add_timer(struct ghes *ghes) add_timer(&ghes->timer); } -static void ghes_poll_func(unsigned long data) +static void ghes_poll_func(struct timer_list *t) { - struct ghes *ghes = (void *)data; + struct ghes *ghes = timer_container_of(ghes, t, timer); + unsigned long flags; + spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); if (!(ghes->flags & GHES_EXITING)) ghes_add_timer(ghes); } @@ -784,9 +1218,12 @@ static void ghes_poll_func(unsigned long data) static irqreturn_t ghes_irq_func(int irq, void *data) { struct ghes *ghes = data; + unsigned long flags; int rc; + spin_lock_irqsave(&ghes_notify_lock_irq, flags); rc = ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); if (rc) return IRQ_NONE; @@ -797,14 +1234,15 @@ static int ghes_notify_hed(struct notifier_block *this, unsigned long event, void *data) { struct ghes *ghes; + unsigned long flags; int ret = NOTIFY_DONE; - rcu_read_lock(); + spin_lock_irqsave(&ghes_notify_lock_irq, flags); list_for_each_entry_rcu(ghes, &ghes_hed, list) { if (!ghes_proc(ghes)) ret = NOTIFY_OK; } - rcu_read_unlock(); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); return ret; } @@ -813,75 +1251,20 @@ static struct notifier_block ghes_notifier_hed = { .notifier_call = ghes_notify_hed, }; -#ifdef CONFIG_ACPI_APEI_SEA -static LIST_HEAD(ghes_sea); - -/* - * Return 0 only if one of the SEA error sources successfully reported an error - * record sent from the firmware. - */ -int ghes_notify_sea(void) -{ - struct ghes *ghes; - int ret = -ENOENT; - - rcu_read_lock(); - list_for_each_entry_rcu(ghes, &ghes_sea, list) { - if (!ghes_proc(ghes)) - ret = 0; - } - rcu_read_unlock(); - return ret; -} - -static void ghes_sea_add(struct ghes *ghes) -{ - mutex_lock(&ghes_list_mutex); - list_add_rcu(&ghes->list, &ghes_sea); - mutex_unlock(&ghes_list_mutex); -} - -static void ghes_sea_remove(struct ghes *ghes) -{ - mutex_lock(&ghes_list_mutex); - list_del_rcu(&ghes->list); - mutex_unlock(&ghes_list_mutex); - synchronize_rcu(); -} -#else /* CONFIG_ACPI_APEI_SEA */ -static inline void ghes_sea_add(struct ghes *ghes) -{ - pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", - ghes->generic->header.source_id); -} - -static inline void ghes_sea_remove(struct ghes *ghes) -{ - pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", - ghes->generic->header.source_id); -} -#endif /* CONFIG_ACPI_APEI_SEA */ - -#ifdef CONFIG_HAVE_ACPI_APEI_NMI /* - * printk is not safe in NMI context. So in NMI handler, we allocate - * required memory from lock-less memory allocator - * (ghes_estatus_pool), save estatus into it, put them into lock-less - * list (ghes_estatus_llist), then delay printk into IRQ context via - * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record - * required pool size by all NMI error source. + * Handlers for CPER records may not be NMI safe. For example, + * memory_failure_queue() takes spinlocks and calls schedule_work_on(). + * In any NMI-like handler, memory from ghes_estatus_pool is used to save + * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes + * ghes_proc_in_irq() to run in IRQ context where each estatus in + * ghes_estatus_llist is processed. + * + * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache + * to suppress frequent messages. */ static struct llist_head ghes_estatus_llist; static struct irq_work ghes_proc_irq_work; -/* - * NMI may be triggered on any CPU, so ghes_in_nmi is used for - * having only one concurrent reader. - */ -static atomic_t ghes_in_nmi = ATOMIC_INIT(0); - -static LIST_HEAD(ghes_nmi); - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -903,7 +1286,9 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = cper_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); + ghes_do_proc(estatus_node->ghes, estatus); + if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -911,6 +1296,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) } gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); + llnode = next; } } @@ -921,7 +1307,6 @@ static void ghes_print_queued_estatus(void) struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; - u32 len, node_len; llnode = llist_del_all(&ghes_estatus_llist); /* @@ -933,104 +1318,160 @@ static void ghes_print_queued_estatus(void) estatus_node = llist_entry(llnode, struct ghes_estatus_node, llnode); estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - len = cper_estatus_len(estatus); - node_len = GHES_ESTATUS_NODE_LEN(len); generic = estatus_node->generic; ghes_print_estatus(NULL, generic, estatus); llnode = llnode->next; } } -/* Save estatus for further processing in IRQ context */ -static void __process_error(struct ghes *ghes) +static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, + enum fixed_addresses fixmap_idx) { -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - u32 len, node_len; + struct acpi_hest_generic_status *estatus, tmp_header; struct ghes_estatus_node *estatus_node; - struct acpi_hest_generic_status *estatus; + u32 len, node_len; + u64 buf_paddr; + int sev, rc; - if (ghes_estatus_cached(ghes->estatus)) - return; + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) + return -EOPNOTSUPP; - len = cper_estatus_len(ghes->estatus); - node_len = GHES_ESTATUS_NODE_LEN(len); + rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } + + rc = __ghes_check_estatus(ghes, &tmp_header); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } + len = cper_estatus_len(&tmp_header); + node_len = GHES_ESTATUS_NODE_LEN(len); estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (!estatus_node) - return; + return -ENOMEM; estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - memcpy(estatus, ghes->estatus, len); - llist_add(&estatus_node->llnode, &ghes_estatus_llist); -#endif -} -static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) -{ - struct ghes *ghes; - int sev, ret = NMI_DONE; + if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); + rc = -ENOENT; + goto no_work; + } - if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) - return ret; + sev = ghes_severity(estatus->error_severity); + if (sev >= GHES_SEV_PANIC) { + ghes_print_queued_estatus(); + __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); + } - list_for_each_entry_rcu(ghes, &ghes_nmi, list) { - if (ghes_read_estatus(ghes, 1)) { - ghes_clear_estatus(ghes); - continue; - } else { - ret = NMI_HANDLED; - } + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); - sev = ghes_severity(ghes->estatus->error_severity); - if (sev >= GHES_SEV_PANIC) { - oops_begin(); - ghes_print_queued_estatus(); - __ghes_panic(ghes); - } + /* This error has been reported before, don't process it again. */ + if (ghes_estatus_cached(estatus)) + goto no_work; - if (!(ghes->flags & GHES_TO_CLEAR)) - continue; + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + + return rc; - __process_error(ghes); - ghes_clear_estatus(ghes); +no_work: + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + + return rc; +} + +static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, + enum fixed_addresses fixmap_idx) +{ + int ret = -ENOENT; + struct ghes *ghes; + + rcu_read_lock(); + list_for_each_entry_rcu(ghes, rcu_list, list) { + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) + ret = 0; } + rcu_read_unlock(); -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - if (ret == NMI_HANDLED) + if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) irq_work_queue(&ghes_proc_irq_work); -#endif - atomic_dec(&ghes_in_nmi); + return ret; } -static unsigned long ghes_esource_prealloc_size( - const struct acpi_hest_generic *generic) +#ifdef CONFIG_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) { - unsigned long block_length, prealloc_records, prealloc_size; + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); + int rv; - block_length = min_t(unsigned long, generic->error_block_length, - GHES_ESTATUS_MAX_SIZE); - prealloc_records = max_t(unsigned long, - generic->records_to_preallocate, 1); - prealloc_size = min_t(unsigned long, block_length * prealloc_records, - GHES_ESOURCE_PREALLOC_MAX_SIZE); + raw_spin_lock(&ghes_notify_lock_sea); + rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); + raw_spin_unlock(&ghes_notify_lock_sea); - return prealloc_size; + return rv; } -static void ghes_estatus_pool_shrink(unsigned long len) +static void ghes_sea_add(struct ghes *ghes) { - ghes_estatus_pool_size_request -= PAGE_ALIGN(len); + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); } -static void ghes_nmi_add(struct ghes *ghes) +static void ghes_sea_remove(struct ghes *ghes) { - unsigned long len; + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_SEA */ +static inline void ghes_sea_add(struct ghes *ghes) { } +static inline void ghes_sea_remove(struct ghes *ghes) { } +#endif /* CONFIG_ACPI_APEI_SEA */ + +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +/* + * NMI may be triggered on any CPU, so ghes_in_nmi is used for + * having only one concurrent reader. + */ +static atomic_t ghes_in_nmi = ATOMIC_INIT(0); + +static LIST_HEAD(ghes_nmi); + +static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); + int ret = NMI_DONE; + + if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) + return ret; - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_expand(len); + raw_spin_lock(&ghes_notify_lock_nmi); + if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) + ret = NMI_HANDLED; + raw_spin_unlock(&ghes_notify_lock_nmi); + + atomic_dec(&ghes_in_nmi); + return ret; +} + +static void ghes_nmi_add(struct ghes *ghes) +{ mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); @@ -1040,8 +1481,6 @@ static void ghes_nmi_add(struct ghes *ghes) static void ghes_nmi_remove(struct ghes *ghes) { - unsigned long len; - mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) @@ -1052,38 +1491,79 @@ static void ghes_nmi_remove(struct ghes *ghes) * freed after NMI handler finishes. */ synchronize_rcu(); - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_shrink(len); } +#else /* CONFIG_HAVE_ACPI_APEI_NMI */ +static inline void ghes_nmi_add(struct ghes *ghes) { } +static inline void ghes_nmi_remove(struct ghes *ghes) { } +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ static void ghes_nmi_init_cxt(void) { init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); } -#else /* CONFIG_HAVE_ACPI_APEI_NMI */ -static inline void ghes_nmi_add(struct ghes *ghes) + +static int __ghes_sdei_callback(struct ghes *ghes, + enum fixed_addresses fixmap_idx) +{ + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { + irq_work_queue(&ghes_proc_irq_work); + + return 0; + } + + return -ENOENT; +} + +static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, + void *arg) { - pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", - ghes->generic->header.source_id); - BUG(); + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_normal); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); + raw_spin_unlock(&ghes_notify_lock_sdei_normal); + + return err; } -static inline void ghes_nmi_remove(struct ghes *ghes) +static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, + void *arg) { - pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", - ghes->generic->header.source_id); - BUG(); + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_critical); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); + raw_spin_unlock(&ghes_notify_lock_sdei_critical); + + return err; } -static inline void ghes_nmi_init_cxt(void) +static int apei_sdei_register_ghes(struct ghes *ghes) { + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_register_ghes(ghes, ghes_sdei_normal_callback, + ghes_sdei_critical_callback); +} + +static int apei_sdei_unregister_ghes(struct ghes *ghes) +{ + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_unregister_ghes(ghes); } -#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ static int ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; + unsigned long flags; int rc = -EINVAL; @@ -1114,22 +1594,28 @@ static int ghes_probe(struct platform_device *ghes_dev) goto err; } break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", + generic->header.source_id); + goto err; + } + break; case ACPI_HEST_NOTIFY_LOCAL: - pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", - generic->header.source_id); + pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", + generic->header.source_id); goto err; default: - pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", - generic->notify.type, generic->header.source_id); + pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", + generic->notify.type, generic->header.source_id); goto err; } rc = -EIO; if (generic->error_block_length < sizeof(struct acpi_hest_generic_status)) { - pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", - generic->error_block_length, - generic->header.source_id); + pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", + generic->error_block_length, generic->header.source_id); goto err; } ghes = ghes_new(generic); @@ -1139,14 +1625,9 @@ static int ghes_probe(struct platform_device *ghes_dev) goto err; } - rc = ghes_edac_register(ghes, &ghes_dev->dev); - if (rc < 0) - goto err; - switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: - setup_deferrable_timer(&ghes->timer, ghes_poll_func, - (unsigned long)ghes); + timer_setup(&ghes->timer, ghes_poll_func, 0); ghes_add_timer(ghes); break; case ACPI_HEST_NOTIFY_EXTERNAL: @@ -1155,13 +1636,14 @@ static int ghes_probe(struct platform_device *ghes_dev) if (rc) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err_edac_unreg; + goto err; } - rc = request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes); + rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, + "GHES IRQ", ghes); if (rc) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err_edac_unreg; + goto err; } break; @@ -1181,17 +1663,30 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_register_ghes(ghes); + if (rc) + goto err; + break; default: BUG(); } + platform_set_drvdata(ghes_dev, ghes); + ghes->dev = &ghes_dev->dev; + + mutex_lock(&ghes_devs_mutex); + list_add_tail(&ghes->elist, &ghes_devs); + mutex_unlock(&ghes_devs_mutex); + /* Handle any pending errors right away */ + spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); return 0; -err_edac_unreg: - ghes_edac_unregister(ghes); + err: if (ghes) { ghes_fini(ghes); @@ -1200,8 +1695,9 @@ err: return rc; } -static int ghes_remove(struct platform_device *ghes_dev) +static void ghes_remove(struct platform_device *ghes_dev) { + int rc; struct ghes *ghes; struct acpi_hest_generic *generic; @@ -1211,7 +1707,7 @@ static int ghes_remove(struct platform_device *ghes_dev) ghes->flags |= GHES_EXITING; switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: - del_timer_sync(&ghes->timer); + timer_shutdown_sync(&ghes->timer); break; case ACPI_HEST_NOTIFY_EXTERNAL: free_irq(ghes->irq, ghes); @@ -1234,6 +1730,18 @@ static int ghes_remove(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_unregister_ghes(ghes); + if (rc) { + /* + * Returning early results in a resource leak, but we're + * only here if stopping the hardware failed. + */ + dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n", + ERR_PTR(rc)); + return; + } + break; default: BUG(); break; @@ -1241,13 +1749,11 @@ static int ghes_remove(struct platform_device *ghes_dev) ghes_fini(ghes); - ghes_edac_unregister(ghes); + mutex_lock(&ghes_devs_mutex); + list_del(&ghes->elist); + mutex_unlock(&ghes_devs_mutex); kfree(ghes); - - platform_set_drvdata(ghes_dev, NULL); - - return 0; } static struct platform_driver ghes_platform_driver = { @@ -1258,41 +1764,35 @@ static struct platform_driver ghes_platform_driver = { .remove = ghes_remove, }; -static int __init ghes_init(void) +void __init acpi_ghes_init(void) { int rc; + acpi_sdei_init(); + if (acpi_disabled) - return -ENODEV; + return; - if (hest_disable) { + switch (hest_disable) { + case HEST_NOT_FOUND: + return; + case HEST_DISABLED: pr_info(GHES_PFX "HEST is not enabled!\n"); - return -EINVAL; + return; + default: + break; } if (ghes_disable) { pr_info(GHES_PFX "GHES is not enabled!\n"); - return -EINVAL; + return; } ghes_nmi_init_cxt(); - rc = ghes_ioremap_init(); - if (rc) - goto err; - - rc = ghes_estatus_pool_init(); - if (rc) - goto err_ioremap_exit; - - rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * - GHES_ESTATUS_CACHE_ALLOCED_MAX); - if (rc) - goto err_pool_exit; - rc = platform_driver_register(&ghes_platform_driver); if (rc) - goto err_pool_exit; + return; rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked) @@ -1303,13 +1803,44 @@ static int __init ghes_init(void) pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); else pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); +} - return 0; -err_pool_exit: - ghes_estatus_pool_exit(); -err_ioremap_exit: - ghes_ioremap_exit(); -err: - return rc; +/* + * Known x86 systems that prefer GHES error reporting: + */ +static struct acpi_platform_list plat_list[] = { + {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, + { } /* End */ +}; + +struct list_head *ghes_get_devices(void) +{ + int idx = -1; + + if (IS_ENABLED(CONFIG_X86)) { + idx = acpi_match_platform_list(plat_list); + if (idx < 0) { + if (!ghes_edac_force_enable) + return NULL; + + pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); + } + } else if (list_empty(&ghes_devs)) { + return NULL; + } + + return &ghes_devs; +} +EXPORT_SYMBOL_GPL(ghes_get_devices); + +void ghes_register_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_register(&ghes_report_chain, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_report_chain); + +void ghes_unregister_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&ghes_report_chain, nb); } -device_initcall(ghes_init); +EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); |
