diff options
Diffstat (limited to 'arch/powerpc/kernel/mce_power.c')
| -rw-r--r-- | arch/powerpc/kernel/mce_power.c | 763 |
1 files changed, 463 insertions, 300 deletions
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b76ca198e09c..71e8f2a92e36 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Machine check exception handling CPU-side for power7 and power8 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright 2013 IBM Corporation * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> */ @@ -24,159 +11,101 @@ #include <linux/types.h> #include <linux/ptrace.h> +#include <linux/extable.h> +#include <linux/pgtable.h> #include <asm/mmu.h> #include <asm/mce.h> #include <asm/machdep.h> +#include <asm/pte-walk.h> +#include <asm/sstep.h> +#include <asm/exception-64s.h> +#include <asm/extable.h> +#include <asm/inst.h> -static void flush_tlb_206(unsigned int num_sets, unsigned int action) -{ - unsigned long rb; - unsigned int i; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } - - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < num_sets; i++) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } - asm volatile("ptesync" : : : "memory"); -} - -static void flush_tlb_300(unsigned int num_sets, unsigned int action) +/* + * Convert an address related to an mm to a PFN. NOTE: we are in real + * mode, we could potentially race with page table updates. + */ +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { - unsigned long rb; - unsigned int i; - unsigned int r; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } + pte_t *ptep, pte; + unsigned int shift; + unsigned long pfn, flags; + struct mm_struct *mm; - asm volatile("ptesync" : : : "memory"); - - if (early_radix_enabled()) - r = 1; + if (user_mode(regs)) + mm = current->mm; else - r = 0; + mm = &init_mm; - /* - * First flush table/PWC caches with set 0, then flush the - * rest of the sets, partition scope. Radix must then do it - * all again with process scope. Hash just has to flush - * process table. - */ - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r)); - for (i = 1; i < num_sets; i++) { - unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r)); + local_irq_save(flags); + ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift); + if (!ptep) { + pfn = ULONG_MAX; + goto out; } + pte = READ_ONCE(*ptep); - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r)); - if (early_radix_enabled()) { - for (i = 1; i < num_sets; i++) { - unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r)); - } + if (!pte_present(pte) || pte_special(pte)) { + pfn = ULONG_MAX; + goto out; } - asm volatile("ptesync" : : : "memory"); -} - -/* - * Generic routines to flush TLB on POWER processors. These routines - * are used as flush_tlb hook in the cpu_spec. - * - * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ -void __flush_tlb_power7(unsigned int action) -{ - flush_tlb_206(POWER7_TLB_SETS, action); -} - -void __flush_tlb_power8(unsigned int action) -{ - flush_tlb_206(POWER8_TLB_SETS, action); + if (shift <= PAGE_SHIFT) + pfn = pte_pfn(pte); + else { + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; + pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask))); + } +out: + local_irq_restore(flags); + return pfn; } -void __flush_tlb_power9(unsigned int action) +static bool mce_in_guest(void) { - unsigned int num_sets; - - if (radix_enabled()) - num_sets = POWER9_TLB_SETS_RADIX; - else - num_sets = POWER9_TLB_SETS_HASH; - - flush_tlb_300(num_sets, action); +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest context or low level KVM + * code, avoid looking up any translations or making any attempts + * to recover, just record the event and pass to KVM. + */ + if (get_paca()->kvm_hstate.in_guest) + return true; +#endif + return false; } - /* flush SLBs and reload */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void flush_and_reload_slb(void) +#ifdef CONFIG_PPC_64S_HASH_MMU +void flush_and_reload_slb(void) { - struct slb_shadow *slb; - unsigned long i, n; + if (early_radix_enabled()) + return; /* Invalidate all SLBs */ - asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + slb_flush_all_realmode(); -#ifdef CONFIG_KVM_BOOK3S_HANDLER /* - * If machine check is hit when in guest or in transition, we will - * only flush the SLBs and continue. + * This probably shouldn't happen, but it may be possible it's + * called in early boot before SLB shadows are allocated. */ - if (get_paca()->kvm_hstate.in_guest) - return; -#endif - - /* For host kernel, reload the SLBs from shadow SLB buffer. */ - slb = get_slb_shadow(); - if (!slb) + if (!get_slb_shadow()) return; - n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); - - /* Load up the SLB entries from shadow SLB */ - for (i = 0; i < n; i++) { - unsigned long rb = be64_to_cpu(slb->save_area[i].esid); - unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); - - rb = (rb & ~0xFFFul) | i; - asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); - } + slb_restore_bolted_realmode(); } #endif -static void flush_erat(void) +void flush_erat(void) { - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + flush_and_reload_slb(); + return; + } +#endif + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } #define MCE_FLUSH_SLB 1 @@ -185,7 +114,7 @@ static void flush_erat(void) static int mce_flush(int what) { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (what == MCE_FLUSH_SLB) { flush_and_reload_slb(); return 1; @@ -196,10 +125,8 @@ static int mce_flush(int what) return 1; } if (what == MCE_FLUSH_TLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - return 1; - } + tlbiel_all(); + return 1; } return 0; @@ -213,219 +140,352 @@ struct mce_ierror_table { bool nip_valid; /* nip is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_ierror_table mce_p7_ierror_table[] = { { 0x00000000001c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000100000, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p8_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p9_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000080c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008100000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008140000, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ { 0x00000000081c0000, 0x0000000008180000, false, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ -{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008080000, true, + MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, false, - MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000020, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000010, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000008, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; -static int mce_handle_ierror(struct pt_regs *regs, +static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, + uint64_t *phys_addr) +{ + /* + * Carefully look at the NIP to determine + * the instruction to analyse. Reading the NIP + * in real-mode is tricky and can lead to recursive + * faults + */ + ppc_inst_t instr; + unsigned long pfn, instr_addr; + struct instruction_op op; + struct pt_regs tmp = *regs; + + pfn = addr_to_pfn(regs, regs->nip); + if (pfn != ULONG_MAX) { + instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); + instr = ppc_inst_read((u32 *)instr_addr); + if (!analyse_instr(&op, &tmp, instr)) { + pfn = addr_to_pfn(regs, op.ea); + *addr = op.ea; + *phys_addr = (pfn << PAGE_SHIFT); + return 0; + } + /* + * analyse_instr() might fail if the instruction + * is not a load/store, although this is unexpected + * for load/store errors or if we got the NIP + * wrong + */ + } + *addr = 0; + return -1; +} + +static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, const struct mce_ierror_table table[], - struct mce_error_info *mce_err, uint64_t *addr) + struct mce_error_info *mce_err, uint64_t *addr, + uint64_t *phys_addr) { - uint64_t srr1 = regs->msr; int handled = 0; int i; @@ -435,21 +495,28 @@ static int mce_handle_ierror(struct pt_regs *regs, if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - handled = mce_flush(MCE_FLUSH_SLB); - break; - case MCE_ERROR_TYPE_ERAT: - handled = mce_flush(MCE_FLUSH_ERAT); - break; - case MCE_ERROR_TYPE_TLB: - handled = mce_flush(MCE_FLUSH_TLB); - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); +#endif + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } } /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -473,23 +540,40 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; - if (table[i].nip_valid) + if (table[i].nip_valid && !mce_in_guest()) { *addr = regs->nip; + if (mce_err->sync_error && + table[i].error_type == MCE_ERROR_TYPE_UE) { + unsigned long pfn; + + if (get_paca()->in_mce < MAX_MCE_DEPTH) { + pfn = addr_to_pfn(regs, regs->nip); + if (pfn != ULONG_MAX) { + *phys_addr = + (pfn << PAGE_SHIFT); + } + } + } + } return handled; } mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } static int mce_handle_derror(struct pt_regs *regs, const struct mce_derror_table table[], - struct mce_error_info *mce_err, uint64_t *addr) + struct mce_error_info *mce_err, uint64_t *addr, + uint64_t *phys_addr) { uint64_t dsisr = regs->dsisr; int handled = 0; @@ -502,20 +586,26 @@ static int mce_handle_derror(struct pt_regs *regs, if (!(dsisr & table[i].dsisr_value)) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (mce_flush(MCE_FLUSH_SLB)) - handled = 1; - break; - case MCE_ERROR_TYPE_ERAT: - if (mce_flush(MCE_FLUSH_ERAT)) - handled = 1; - break; - case MCE_ERROR_TYPE_TLB: - if (mce_flush(MCE_FLUSH_TLB)) - handled = 1; - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); +#endif + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; + } } /* @@ -528,6 +618,7 @@ static int mce_handle_derror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -551,11 +642,21 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - + else if (mce_err->sync_error && !mce_in_guest() && + table[i].error_type == MCE_ERROR_TYPE_UE) { + /* + * We do a maximum of 4 nested MCE calls, see + * kernel/exception-64s.h + */ + if (get_paca()->in_mce < MAX_MCE_DEPTH) + mce_find_instr_ea_and_phys(regs, addr, + phys_addr); + } found = 1; } @@ -563,15 +664,23 @@ static int mce_handle_derror(struct pt_regs *regs, return handled; mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } -static long mce_handle_ue_error(struct pt_regs *regs) +static long mce_handle_ue_error(struct pt_regs *regs, + struct mce_error_info *mce_err) { - long handled = 0; + if (mce_in_guest()) + return 0; + + mce_common_process_ue(regs, mce_err); + if (mce_err->ignore_event) + return 1; /* * On specific SCOM read via MMIO we may get a machine check @@ -582,29 +691,32 @@ static long mce_handle_ue_error(struct pt_regs *regs) if (ppc_md.mce_check_early_recovery) { if (ppc_md.mce_check_early_recovery(regs)) - handled = 1; + return 1; } - return handled; + + return 0; } static long mce_handle_error(struct pt_regs *regs, + unsigned long srr1, const struct mce_derror_table dtable[], const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; - uint64_t addr; - uint64_t srr1 = regs->msr; + uint64_t addr, phys_addr = ULONG_MAX; long handled; if (SRR1_MC_LOADSTORE(srr1)) - handled = mce_handle_derror(regs, dtable, &mce_err, &addr); + handled = mce_handle_derror(regs, dtable, &mce_err, &addr, + &phys_addr); else - handled = mce_handle_ierror(regs, itable, &mce_err, &addr); + handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr, + &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); + handled = mce_handle_ue_error(regs, &mce_err); - save_mce_event(regs, handled, &mce_err, regs->nip, addr); + save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr); return handled; } @@ -614,15 +726,66 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* P7 DD1 leaves top bits of DSISR undefined */ regs->dsisr &= 0x0000ffff; - return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p7_derror_table, mce_p7_ierror_table); } long __machine_check_early_realmode_p8(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); + unsigned long srr1 = regs->msr; + + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 25 is set. This + * will result in the MCE handler seeing an unknown event and the kernel + * crashing. An MCE that occurs like this is spurious, so we don't need + * to do anything in terms of servicing it. If there is something that + * needs to be serviced, the CPU will raise the MCE again with the + * correct DSISR so that it can be serviced properly. So detect this + * case and mark it as handled. + */ + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) + return 1; + + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p9_derror_table, mce_p9_ierror_table); +} + +long __machine_check_early_realmode_p10(struct pt_regs *regs) +{ + unsigned long srr1 = regs->msr; + + /* + * Async machine check due to bad real address from store comes with + * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in + * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table + * so it will find the cause (which describes it correctly as a store + * error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p10_derror_table, mce_p10_ierror_table); } |
