diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/ras.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 262 |
1 files changed, 165 insertions, 97 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d7f973c647b..adafd593d9d3 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); static int ras_check_exception_token; -static void mce_process_errlog_event(struct irq_work *work); -static struct irq_work mce_errlog_process_work = { - .func = mce_process_errlog_event, -}; - #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -60,11 +55,17 @@ struct pseries_mc_errorlog { * XX 2: Reserved. * XXX 3: Type of UE error. * - * For error_type != MC_ERROR_TYPE_UE + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB * XXXXXXXX * X 1: Effective address provided. * XXXXX 5: Reserved. * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. */ u8 sub_err_type; u8 reserved_1[6]; @@ -80,6 +81,7 @@ struct pseries_mc_errorlog { #define MC_ERROR_TYPE_TLB 0x04 #define MC_ERROR_TYPE_D_CACHE 0x05 #define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 /* RTAS pseries MCE error sub types */ #define MC_ERROR_UE_INDETERMINATE 0 @@ -90,6 +92,7 @@ struct pseries_mc_errorlog { #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 #define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 #define MC_ERROR_SLB_PARITY 0 #define MC_ERROR_SLB_MULTIHIT 1 @@ -103,6 +106,9 @@ struct pseries_mc_errorlog { #define MC_ERROR_TLB_MULTIHIT 2 #define MC_ERROR_TLB_INDETERMINATE 3 +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) { switch (mlog->error_type) { @@ -112,6 +118,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; default: return 0; } @@ -122,7 +130,7 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) * devices or systems (e.g. hugepages) that have not been initialized at the * subsys stage. */ -int __init init_ras_hotplug_IRQ(void) +static int __init init_ras_hotplug_IRQ(void) { struct device_node *np; @@ -147,7 +155,7 @@ static int __init init_ras_IRQ(void) { struct device_node *np; - ras_check_exception_token = rtas_token("check-exception"); + ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); @@ -184,7 +192,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: @@ -316,12 +323,10 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { - int status; int state; int critical; - status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, - &state); + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); if (state > 3) critical = 1; /* Time Critical */ @@ -330,12 +335,9 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) spin_lock(&ras_log_buf_lock); - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RTAS_VECTOR_EXTERNAL_INTERRUPT, - virq_to_hw(irq), - RTAS_EPOW_WARNING, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); @@ -395,16 +397,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { return (struct rtas_error_log *)local_paca->mce_data_buf; } +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect @@ -422,19 +439,14 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void) */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { - unsigned long *savep; struct rtas_error_log *h; + __be64 *savep; - /* Mask top two bits */ - regs->gpr[3] &= ~(0x3UL << 62); - - if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + savep = fwnmi_get_savep(regs); + if (!savep) return NULL; - } - savep = __va(regs->gpr[3]); - regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ @@ -458,7 +470,15 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) */ static void fwnmi_release_errinfo(void) { - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); if (ret != 0) printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); } @@ -475,17 +495,27 @@ int pSeries_system_reset_exception(struct pt_regs *regs) if ((be64_to_cpu(regs->msr) & (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { - regs->nip = be64_to_cpu((__be64)regs->nip); - regs->msr = 0; + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); } #endif if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ } if (smp_handle_nmi_ipi(regs)) @@ -494,18 +524,60 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; +#endif + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) { struct mce_error_info mce_err = { 0 }; - unsigned long eaddr = 0, paddr = 0; - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log; - int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -531,8 +603,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; @@ -544,20 +614,12 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (pseries_log == NULL) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - - switch (mce_log->error_type) { + switch (error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; + mce_common_process_ue(regs, &mce_err); + if (mce_err.ignore_event) + disposition = RTAS_DISP_FULLY_RECOVERED; switch (err_sub_type) { case MC_ERROR_UE_IFETCH: mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; @@ -604,7 +666,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_ERAT: @@ -621,7 +683,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_TLB: @@ -638,61 +700,69 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_D_CACHE: mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; + break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } - -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_SLB: - case MC_ERROR_TYPE_ERAT: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif - out: save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} + +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); +out: + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); return disposition; } /* * Process MCE rtas errlog event. */ -static void mce_process_errlog_event(struct irq_work *work) +void pSeries_machine_check_log_err(void) { struct rtas_error_log *err; @@ -713,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - if (!(regs->msr & MSR_RI)) { + if (regs_is_unrecoverable(regs)) { /* If MSR_RI isn't set, we cannot recover */ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; @@ -749,7 +819,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } @@ -801,10 +871,8 @@ long pseries_machine_check_realmode(struct pt_regs *regs) * virtual mode. */ disposition = mce_handle_error(regs, errp); - fwnmi_release_errinfo(); - /* Queue irq work to log this rtas event later. */ - irq_work_queue(&mce_errlog_process_work); + fwnmi_release_errinfo(); if (disposition == RTAS_DISP_FULLY_RECOVERED) return 1; |