diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/ras.c')
| -rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 686 |
1 files changed, 580 insertions, 106 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 7b3cbde8c783..adafd593d9d3 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2001 Dave Engebretsen IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/sched.h> @@ -22,27 +9,143 @@ #include <linux/of.h> #include <linux/fs.h> #include <linux/reboot.h> +#include <linux/irq_work.h> #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/firmware.h> +#include <asm/mce.h> #include "pseries.h" static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; static DEFINE_SPINLOCK(ras_log_buf_lock); -static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; -static DEFINE_PER_CPU(__u64, mce_data_buf); - static int ras_check_exception_token; #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 +/* EPOW events counter variable */ +static int num_epow_events; + +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); +/* RTAS pseries MCE errorlog section. */ +struct pseries_mc_errorlog { + __be32 fru_id; + __be32 proc_id; + u8 error_type; + /* + * sub_err_type (1 byte). Bit fields depends on error_type + * + * MSB0 + * | + * V + * 01234567 + * XXXXXXXX + * + * For error_type == MC_ERROR_TYPE_UE + * XXXXXXXX + * X 1: Permanent or Transient UE. + * X 1: Effective address provided. + * X 1: Logical address provided. + * XX 2: Reserved. + * XXX 3: Type of UE error. + * + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB + * XXXXXXXX + * X 1: Effective address provided. + * XXXXX 5: Reserved. + * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. + */ + u8 sub_err_type; + u8 reserved_1[6]; + __be64 effective_address; + __be64 logical_address; +} __packed; + +/* RTAS pseries MCE error types */ +#define MC_ERROR_TYPE_UE 0x00 +#define MC_ERROR_TYPE_SLB 0x01 +#define MC_ERROR_TYPE_ERAT 0x02 +#define MC_ERROR_TYPE_UNKNOWN 0x03 +#define MC_ERROR_TYPE_TLB 0x04 +#define MC_ERROR_TYPE_D_CACHE 0x05 +#define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 + +/* RTAS pseries MCE error sub types */ +#define MC_ERROR_UE_INDETERMINATE 0 +#define MC_ERROR_UE_IFETCH 1 +#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 +#define MC_ERROR_UE_LOAD_STORE 3 +#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 + +#define UE_EFFECTIVE_ADDR_PROVIDED 0x40 +#define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 + +#define MC_ERROR_SLB_PARITY 0 +#define MC_ERROR_SLB_MULTIHIT 1 +#define MC_ERROR_SLB_INDETERMINATE 2 + +#define MC_ERROR_ERAT_PARITY 1 +#define MC_ERROR_ERAT_MULTIHIT 2 +#define MC_ERROR_ERAT_INDETERMINATE 3 + +#define MC_ERROR_TLB_PARITY 1 +#define MC_ERROR_TLB_MULTIHIT 2 +#define MC_ERROR_TLB_INDETERMINATE 3 + +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + +static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) +{ + switch (mlog->error_type) { + case MC_ERROR_TYPE_UE: + return (mlog->sub_err_type & 0x07); + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + case MC_ERROR_TYPE_TLB: + return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; + default: + return 0; + } +} + +/* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +static int __init init_ras_hotplug_IRQ(void) +{ + struct device_node *np; + + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + + return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); /* * Initialize handlers for the set of interrupts caused by hardware errors @@ -52,7 +155,7 @@ static int __init init_ras_IRQ(void) { struct device_node *np; - ras_check_exception_token = rtas_token("check-exception"); + ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); @@ -71,7 +174,7 @@ static int __init init_ras_IRQ(void) return 0; } -subsys_initcall(init_ras_IRQ); +machine_subsys_initcall(pseries, init_ras_IRQ); #define EPOW_SHUTDOWN_NORMAL 1 #define EPOW_SHUTDOWN_ON_UPS 2 @@ -82,30 +185,29 @@ static void handle_system_shutdown(char event_modifier) { switch (event_modifier) { case EPOW_SHUTDOWN_NORMAL: - pr_emerg("Firmware initiated power off"); + pr_emerg("Power off requested\n"); orderly_poweroff(true); break; case EPOW_SHUTDOWN_ON_UPS: - pr_emerg("Loss of power reported by firmware, system is " - "running on UPS/battery"); + pr_emerg("Loss of system power detected. System is running on" + " UPS/battery. Check RTAS error log for details\n"); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: - pr_emerg("Loss of system critical functions reported by " - "firmware"); - pr_emerg("Check RTAS error log for details"); + pr_emerg("Loss of system critical functions detected. Check" + " RTAS error log for details\n"); orderly_poweroff(true); break; case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: - pr_emerg("Ambient temperature too high reported by firmware"); - pr_emerg("Check RTAS error log for details"); + pr_emerg("High ambient temperature detected. Check RTAS" + " error log for details\n"); orderly_poweroff(true); break; default: - pr_err("Unknown power/cooling shutdown event (modifier %d)", + pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", event_modifier); } } @@ -126,7 +228,7 @@ struct epow_errorlog { #define EPOW_MAIN_ENCLOSURE 5 #define EPOW_POWER_OFF 7 -void rtas_parse_epow_errlog(struct rtas_error_log *log) +static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; @@ -143,51 +245,88 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log) switch (action_code) { case EPOW_RESET: - pr_err("Non critical power or cooling issue cleared"); + if (num_epow_events) { + pr_info("Non critical power/cooling issue cleared\n"); + num_epow_events--; + } break; case EPOW_WARN_COOLING: - pr_err("Non critical cooling issue reported by firmware"); - pr_err("Check RTAS error log for details"); + pr_info("Non-critical cooling issue detected. Check RTAS error" + " log for details\n"); break; case EPOW_WARN_POWER: - pr_err("Non critical power issue reported by firmware"); - pr_err("Check RTAS error log for details"); + pr_info("Non-critical power issue detected. Check RTAS error" + " log for details\n"); break; case EPOW_SYSTEM_SHUTDOWN: - handle_system_shutdown(epow_log->event_modifier); + handle_system_shutdown(modifier); break; case EPOW_SYSTEM_HALT: - pr_emerg("Firmware initiated power off"); + pr_emerg("Critical power/cooling issue detected. Check RTAS" + " error log for details. Powering off.\n"); orderly_poweroff(true); break; case EPOW_MAIN_ENCLOSURE: case EPOW_POWER_OFF: - pr_emerg("Critical power/cooling issue reported by firmware"); - pr_emerg("Check RTAS error log for details"); - pr_emerg("Immediate power off"); + pr_emerg("System about to lose power. Check RTAS error log " + " for details. Powering off immediately.\n"); emergency_sync(); kernel_power_off(); break; default: - pr_err("Unknown power/cooling event (action code %d)", + pr_err("Unknown power/cooling event (action code = %d)\n", action_code); } + + /* Increment epow events counter variable */ + if (action_code != EPOW_RESET) + num_epow_events++; +} + +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) +{ + struct pseries_errorlog *pseries_log; + struct pseries_hp_errorlog *hp_elog; + + spin_lock(&ras_log_buf_lock); + + rtas_call(ras_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), + RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, + PSERIES_ELOG_SECT_ID_HOTPLUG); + hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; + + /* + * Since PCI hotplug is not currently supported on pseries, put PCI + * hotplug events on the ras_log_buf to be handled by rtas_errd. + */ + if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) + queue_hotplug_event(hp_elog); + else + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; } /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { - int status; int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); if (state > 3) critical = 1; /* Time Critical */ @@ -196,12 +335,9 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) spin_lock(&ras_log_buf_lock); - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RTAS_VECTOR_EXTERNAL_INTERRUPT, - virq_to_hw(irq), - RTAS_EPOW_WARNING, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); @@ -236,7 +372,8 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) rtas_elog = (struct rtas_error_log *)ras_log_buf; - if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) + if (status == 0 && + rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) fatal = 1; else fatal = 0; @@ -245,13 +382,12 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); if (fatal) { - pr_emerg("Fatal hardware error reported by firmware"); - pr_emerg("Check RTAS error log for details"); - pr_emerg("Immediate power off"); + pr_emerg("Fatal hardware error detected. Check RTAS error" + " log for details. Powering off immediately\n"); emergency_sync(); kernel_power_off(); } else { - pr_err("Recoverable hardware error reported by firmware"); + pr_err("Recoverable hardware error detected\n"); } spin_unlock(&ras_log_buf_lock); @@ -261,10 +397,30 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) + +static inline struct rtas_error_log *fwnmi_get_errlog(void) +{ + return (struct rtas_error_log *)local_paca->mce_data_buf; +} + +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} /* * Get the error information for errors coming through the @@ -272,10 +428,9 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. * - * If the RTAS error is not of the extended type, then we put it in a per - * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf. + * Use one buffer mce_data_buf per cpu to store RTAS error. * - * The global_mce_data_buf does not have any locks or protection around it, + * The mce_data_buf does not have any locks or protection around it, * if a second machine check comes in, or a system reset is done * before we have logged the error, then we will get corruption in the * error log. This is preferable over holding off on calling @@ -284,32 +439,29 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { - unsigned long *savep; - struct rtas_error_log *h, *errhdr = NULL; + struct rtas_error_log *h; + __be64 *savep; - if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + savep = fwnmi_get_savep(regs); + if (!savep) return NULL; - } - savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ - /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; - if (!h->extended) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + /* Use the per cpu buffer from paca to store rtas error log */ + memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); + if (!rtas_error_extended(h)) { + memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); } else { - int len; + int len, error_log_length; - len = max_t(int, 8+h->extended_log_length, RTAS_ERROR_LOG_MAX); - memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); - memcpy(global_mce_data_buf, h, len); - errhdr = (struct rtas_error_log *)global_mce_data_buf; + error_log_length = 8 + rtas_error_extended_log_length(h); + len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); + memcpy(local_paca->mce_data_buf, h, len); } - return errhdr; + return (struct rtas_error_log *)local_paca->mce_data_buf; } /* Call this when done with the data returned by FWNMI_get_errinfo. @@ -318,23 +470,306 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) */ static void fwnmi_release_errinfo(void) { - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); if (ret != 0) printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); } int pSeries_system_reset_exception(struct pt_regs *regs) { +#ifdef __LITTLE_ENDIAN__ + /* + * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try + * to detect the bad SRR1 pattern here. Flip the NIP back to correct + * endian for reporting purposes. Unfortunately the MSR can't be fixed, + * so clear it. It will be missing MSR_RI so we won't try to recover. + */ + if ((be64_to_cpu(regs->msr) & + (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| + MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); + } +#endif + if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ } + + if (smp_handle_nmi_ipi(regs)) + return 1; + return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; +#endif + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} + +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) +{ + struct mce_error_info mce_err = { 0 }; + int initiator = rtas_error_initiator(errp); + int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; + u8 error_type, err_sub_type; + + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + + if (initiator == RTAS_INITIATOR_UNKNOWN) + mce_err.initiator = MCE_INITIATOR_UNKNOWN; + else if (initiator == RTAS_INITIATOR_CPU) + mce_err.initiator = MCE_INITIATOR_CPU; + else if (initiator == RTAS_INITIATOR_PCI) + mce_err.initiator = MCE_INITIATOR_PCI; + else if (initiator == RTAS_INITIATOR_ISA) + mce_err.initiator = MCE_INITIATOR_ISA; + else if (initiator == RTAS_INITIATOR_MEMORY) + mce_err.initiator = MCE_INITIATOR_MEMORY; + else if (initiator == RTAS_INITIATOR_POWERMGM) + mce_err.initiator = MCE_INITIATOR_POWERMGM; + else + mce_err.initiator = MCE_INITIATOR_UNKNOWN; + + if (severity == RTAS_SEVERITY_NO_ERROR) + mce_err.severity = MCE_SEV_NO_ERROR; + else if (severity == RTAS_SEVERITY_EVENT) + mce_err.severity = MCE_SEV_WARNING; + else if (severity == RTAS_SEVERITY_WARNING) + mce_err.severity = MCE_SEV_WARNING; + else if (severity == RTAS_SEVERITY_ERROR_SYNC) + mce_err.severity = MCE_SEV_SEVERE; + else if (severity == RTAS_SEVERITY_ERROR) + mce_err.severity = MCE_SEV_SEVERE; + else + mce_err.severity = MCE_SEV_FATAL; + + if (severity <= RTAS_SEVERITY_ERROR_SYNC) + mce_err.sync_error = true; + else + mce_err.sync_error = false; + + mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err.error_class = MCE_ECLASS_UNKNOWN; + + switch (error_type) { + case MC_ERROR_TYPE_UE: + mce_err.error_type = MCE_ERROR_TYPE_UE; + mce_common_process_ue(regs, &mce_err); + if (mce_err.ignore_event) + disposition = RTAS_DISP_FULLY_RECOVERED; + switch (err_sub_type) { + case MC_ERROR_UE_IFETCH: + mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: + mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case MC_ERROR_UE_LOAD_STORE: + mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + break; + case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: + mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + break; + case MC_ERROR_UE_INDETERMINATE: + default: + mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + + if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { + paddr = be64_to_cpu(mce_log->logical_address); + } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { + unsigned long pfn; + + pfn = addr_to_pfn(regs, eaddr); + if (pfn != ULONG_MAX) + paddr = pfn << PAGE_SHIFT; + } + + break; + case MC_ERROR_TYPE_SLB: + mce_err.error_type = MCE_ERROR_TYPE_SLB; + switch (err_sub_type) { + case MC_ERROR_SLB_PARITY: + mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case MC_ERROR_SLB_MULTIHIT: + mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case MC_ERROR_SLB_INDETERMINATE: + default: + mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_ERAT: + mce_err.error_type = MCE_ERROR_TYPE_ERAT; + switch (err_sub_type) { + case MC_ERROR_ERAT_PARITY: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; + break; + case MC_ERROR_ERAT_MULTIHIT: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case MC_ERROR_ERAT_INDETERMINATE: + default: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_TLB: + mce_err.error_type = MCE_ERROR_TYPE_TLB; + switch (err_sub_type) { + case MC_ERROR_TLB_PARITY: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; + break; + case MC_ERROR_TLB_MULTIHIT: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case MC_ERROR_TLB_INDETERMINATE: + default: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_D_CACHE: + mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + break; + case MC_ERROR_TYPE_I_CACHE: + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; + break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_UNKNOWN: + default: + mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; + break; + } +out: + save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} + +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); +out: + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); + return disposition; +} + +/* + * Process MCE rtas errlog event. + */ +void pSeries_machine_check_log_err(void) +{ + struct rtas_error_log *err; + + err = fwnmi_get_errlog(); + log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); +} + /* * See if we can recover from a machine check exception. * This is only called on power4 (or above) and only via @@ -344,42 +779,51 @@ int pSeries_system_reset_exception(struct pt_regs *regs) * Return 1 if corrected (or delivered a signal). * Return 0 if there is nothing we can do. */ -static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) +static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - if (!(regs->msr & MSR_RI)) { + if (regs_is_unrecoverable(regs)) { /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; - - } else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { + } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ recovered = 1; + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } - } else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - recovered = 1; - - } else if (user_mode(regs) && !is_global_init(current) && - err->severity == RTAS_SEVERITY_ERROR_SYNC) { - + if (!recovered && evt->sync_error) { /* - * If we received a synchronous error when in userspace - * kill the task. Firmware may report details of the fail - * asynchronously, so we can't rely on the target and type - * fields being valid here. + * Try to kill processes if we get a synchronous machine check + * (e.g., one caused by execution of this instruction). This + * will devolve into a panic if we try to kill init or are in + * an interrupt etc. + * + * TODO: Queue up this address for hwpoisioning later. + * TODO: This is not quite right for d-side machine + * checks ->nip is not necessarily the important + * address. */ - printk(KERN_ERR "MCE: uncorrectable error, killing task " - "%s:%d\n", current->comm, current->pid); - - _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); - recovered = 1; + if ((user_mode(regs))) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else if (die_will_crash()) { + /* + * die() would kill the kernel, so better to go via + * the platform reboot code that will log the + * machine check. + */ + recovered = 0; + } else { + die_mce("Machine check", regs, SIGBUS); + recovered = 1; + } } - log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); - return recovered; } @@ -395,12 +839,42 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) */ int pSeries_machine_check_exception(struct pt_regs *regs) { + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return 0; + + /* Print things out */ + if (evt.version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt.version); + return 0; + } + machine_check_print_event_info(&evt, user_mode(regs), false); + + if (recover_mce(regs, &evt)) + return 1; + + return 0; +} + +long pseries_machine_check_realmode(struct pt_regs *regs) +{ struct rtas_error_log *errp; + int disposition; if (fwnmi_active) { errp = fwnmi_get_errinfo(regs); + /* + * Call to fwnmi_release_errinfo() in real mode causes kernel + * to panic. Hence we will call it as soon as we go into + * virtual mode. + */ + disposition = mce_handle_error(regs, errp); + fwnmi_release_errinfo(); - if (errp && recover_mce(regs, errp)) + + if (disposition == RTAS_DISP_FULLY_RECOVERED) return 1; } |
