summaryrefslogtreecommitdiff
path: root/arch/arm64/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r--arch/arm64/mm/fault.c304
1 files changed, 212 insertions, 92 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4bf899fb451b..c7861c9864e6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/preempt.h>
+#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
@@ -42,7 +43,22 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
-static const char *fault_name(unsigned int esr);
+#include <acpi/ghes.h>
+
+struct fault_info {
+ int (*fn)(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs);
+ int sig;
+ int code;
+ const char *name;
+};
+
+static const struct fault_info fault_info[];
+
+static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+{
+ return fault_info + (esr & 63);
+}
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
@@ -67,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
- * Dump out the page tables associated with 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(unsigned long addr)
{
+ struct mm_struct *mm;
pgd_t *pgd;
- if (!mm)
+ if (addr < TASK_SIZE) {
+ /* TTBR0 */
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ pr_alert("[%016lx] user address but active_mm is swapper\n",
+ addr);
+ return;
+ }
+ } else if (addr >= VA_START) {
+ /* TTBR1 */
mm = &init_mm;
+ } else {
+ pr_alert("[%016lx] address between user and kernel address ranges\n",
+ addr);
+ return;
+ }
- pr_alert("pgd = %p\n", mm->pgd);
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
+ VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
@@ -161,12 +194,33 @@ static bool is_el1_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
+ unsigned long addr)
+{
+ unsigned int ec = ESR_ELx_EC(esr);
+ unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ return false;
+
+ if (fsc_type == ESR_ELx_FSC_PERM)
+ return true;
+
+ if (addr < USER_DS && system_uses_ttbr0_pan())
+ return fsc_type == ESR_ELx_FSC_FAULT &&
+ (regs->pstate & PSR_PAN_BIT);
+
+ return false;
+}
+
/*
* The kernel tried to access some page that wasn't present.
*/
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
+ const char *msg;
+
/*
* Are we prepared to handle this kernel fault?
* We are almost certainly not prepared to handle instruction faults.
@@ -178,11 +232,22 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
* No handler, we'll have to terminate things with extreme prejudice.
*/
bust_spinlocks(1);
- pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
- show_pte(mm, addr);
+ if (is_permission_fault(esr, regs, addr)) {
+ if (esr & ESR_ELx_WNR)
+ msg = "write to read-only memory";
+ else
+ msg = "read from unreadable memory";
+ } else if (addr < PAGE_SIZE) {
+ msg = "NULL pointer dereference";
+ } else {
+ msg = "paging request";
+ }
+
+ pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
+ addr);
+
+ show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -194,16 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
- struct pt_regs *regs)
+ struct pt_regs *regs, int fault)
{
struct siginfo si;
+ const struct fault_info *inf;
+ unsigned int lsb = 0;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
- pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
- tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
+ inf = esr_to_fault_info(esr);
+ pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
+ tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
- show_pte(tsk->mm, addr);
- show_regs(regs);
+ print_vma_addr(KERN_CONT ", in ", regs->pc);
+ pr_cont("\n");
+ __show_regs(regs);
}
tsk->thread.fault_address = addr;
@@ -212,22 +281,34 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
+ /*
+ * Either small page or large page may be poisoned.
+ * In other words, VM_FAULT_HWPOISON_LARGE and
+ * VM_FAULT_HWPOISON are mutually exclusive.
+ */
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ else if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ si.si_addr_lsb = lsb;
+
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
+ const struct fault_info *inf;
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
- if (user_mode(regs))
- __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
- else
- __do_kernel_fault(mm, addr, esr, regs);
+ if (user_mode(regs)) {
+ inf = esr_to_fault_info(esr);
+ __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
+ } else
+ __do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
@@ -270,21 +351,6 @@ out:
return fault;
}
-static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
-{
- unsigned int ec = ESR_ELx_EC(esr);
- unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
-
- if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
- return false;
-
- if (system_uses_ttbr0_pan())
- return fsc_type == ESR_ELx_FSC_FAULT &&
- (regs->pstate & PSR_PAN_BIT);
- else
- return fsc_type == ESR_ELx_FSC_PERM;
-}
-
static bool is_el0_instruction_abort(unsigned int esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -295,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
struct task_struct *tsk;
struct mm_struct *mm;
- int fault, sig, code;
+ int fault, sig, code, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -322,7 +388,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (addr < USER_DS && is_permission_fault(esr, regs)) {
+ if (addr < USER_DS && is_permission_fault(esr, regs, addr)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
@@ -334,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -357,24 +425,42 @@ retry:
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ major |= fault & VM_FAULT_MAJOR;
- /*
- * If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because it
- * would already be released in __lock_page_or_retry in mm/filemap.c.
- */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
- return 0;
+ if (fault & VM_FAULT_RETRY) {
+ /*
+ * If we need to retry but a fatal signal is pending,
+ * handle the signal first. We do not need to release
+ * the mmap_sem because it would already be released
+ * in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fatal_signal_pending(current))
+ return 0;
+
+ /*
+ * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
+ * starvation.
+ */
+ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
+ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+ up_read(&mm->mmap_sem);
/*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
+ * Handle the "normal" (no error) case first.
*/
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
+ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
+ VM_FAULT_BADACCESS)))) {
+ /*
+ * Major/minor page fault accounting is only done
+ * once. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at
+ * that point.
+ */
+ if (major) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
@@ -383,25 +469,9 @@ retry:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
- if (fault & VM_FAULT_RETRY) {
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
- * starvation.
- */
- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
- }
- up_read(&mm->mmap_sem);
-
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS))))
return 0;
+ }
/*
* If we are in kernel mode at this point, we have no context to
@@ -427,6 +497,9 @@ retry:
*/
sig = SIGBUS;
code = BUS_ADRERR;
+ } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
+ sig = SIGBUS;
+ code = BUS_MCEERR_AR;
} else {
/*
* Something tried to access memory that isn't in our memory
@@ -437,11 +510,11 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, esr, sig, code, regs);
+ __do_user_fault(tsk, addr, esr, sig, code, regs, fault);
return 0;
no_context:
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
return 0;
}
@@ -488,12 +561,48 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
-static const struct fault_info {
- int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
- int sig;
- int code;
- const char *name;
-} fault_info[] = {
+/*
+ * This abort handler deals with Synchronous External Abort.
+ * It calls notifiers, and then returns "fault".
+ */
+static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+ struct siginfo info;
+ const struct fault_info *inf;
+ int ret = 0;
+
+ inf = esr_to_fault_info(esr);
+ pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+ inf->name, esr, addr);
+
+ /*
+ * Synchronous aborts may interrupt code which had interrupts masked.
+ * Before calling out into the wider kernel tell the interested
+ * subsystems.
+ */
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ if (interrupts_enabled(regs))
+ nmi_enter();
+
+ ret = ghes_notify_sea();
+
+ if (interrupts_enabled(regs))
+ nmi_exit();
+ }
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = 0;
+ if (esr & ESR_ELx_FnV)
+ info.si_addr = NULL;
+ else
+ info.si_addr = (void __user *)addr;
+ arm64_notify_die("", regs, &info, esr);
+
+ return ret;
+}
+
+static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
{ do_bad, SIGBUS, 0, "level 2 address size fault" },
@@ -510,22 +619,22 @@ static const struct fault_info {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
- { do_bad, SIGBUS, 0, "synchronous external abort" },
+ { do_sea, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error" },
+ { do_sea, SIGBUS, 0, "level 0 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
@@ -560,10 +669,21 @@ static const struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 63" },
};
-static const char *fault_name(unsigned int esr)
+/*
+ * Handle Synchronous External Aborts that occur in a guest kernel.
+ *
+ * The return value will be zero if the SEA was successfully handled
+ * and non-zero if there was an error processing the error or there was
+ * no error to process.
+ */
+int handle_guest_sea(phys_addr_t addr, unsigned int esr)
{
- const struct fault_info *inf = fault_info + (esr & 63);
- return inf->name;
+ int ret = -ENOENT;
+
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
+ ret = ghes_notify_sea();
+
+ return ret;
}
/*
@@ -572,7 +692,7 @@ static const char *fault_name(unsigned int esr)
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- const struct fault_info *inf = fault_info + (esr & 63);
+ const struct fault_info *inf = esr_to_fault_info(esr);
struct siginfo info;
if (!inf->fn(addr, esr, regs))