summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorJames Morse <james.morse@arm.com>2018-01-15 19:38:57 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2018-01-16 15:06:28 +0000
commit6bf0dcfd713563bd2e13ceb53217305c28a8aa5f (patch)
treeccbc78dba6f98df68a57d67ddb6c4ee27ea52017 /arch/arm64/kernel
parent64c02720ea3598bf5143b672274d923a941b8053 (diff)
arm64: kernel: Survive corrected RAS errors notified by SError
Prior to v8.2, SError is an uncontainable fatal exception. The v8.2 RAS extensions use SError to notify software about RAS errors, these can be contained by the Error Syncronization Barrier. An ACPI system with firmware-first may use SError as its 'SEI' notification. Future patches may add code to 'claim' this SError as a notification. Other systems can distinguish these RAS errors from the SError ESR and use the AET bits and additional data from RAS-Error registers to handle the error. Future patches may add this kernel-first handling. Without support for either of these we will panic(), even if we received a corrected error. Add code to decode the severity of RAS errors. We can safely ignore contained errors where the CPU can continue to make progress. For all other errors we continue to panic(). Signed-off-by: James Morse <james.morse@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/traps.c51
1 files changed, 46 insertions, 5 deletions
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 3d3588fcd1c7..bbb0fde2780e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -662,17 +662,58 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
}
#endif
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
{
- nmi_enter();
-
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
smp_processor_id(), esr, esr_get_class_string(esr));
- __show_regs(regs);
+ if (regs)
+ __show_regs(regs);
+
+ nmi_panic(regs, "Asynchronous SError Interrupt");
+
+ cpu_park_loop();
+ unreachable();
+}
+
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+{
+ u32 aet = arm64_ras_serror_get_severity(esr);
+
+ switch (aet) {
+ case ESR_ELx_AET_CE: /* corrected error */
+ case ESR_ELx_AET_UEO: /* restartable, not yet consumed */
+ /*
+ * The CPU can make progress. We may take UEO again as
+ * a more severe error.
+ */
+ return false;
+
+ case ESR_ELx_AET_UEU: /* Uncorrected Unrecoverable */
+ case ESR_ELx_AET_UER: /* Uncorrected Recoverable */
+ /*
+ * The CPU can't make progress. The exception may have
+ * been imprecise.
+ */
+ return true;
+
+ case ESR_ELx_AET_UC: /* Uncontainable or Uncategorized error */
+ default:
+ /* Error has been silently propagated */
+ arm64_serror_panic(regs, esr);
+ }
+}
+
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+ nmi_enter();
+
+ /* non-RAS errors are not containable */
+ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
+ arm64_serror_panic(regs, esr);
- panic("Asynchronous SError Interrupt");
+ nmi_exit();
}
void __pte_error(const char *file, int line, unsigned long val)