summaryrefslogtreecommitdiff
path: root/arch/arm/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm/fault.c')
-rw-r--r--arch/arm/mm/fault.c347
1 files changed, 194 insertions, 153 deletions
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 58f69fa07df9..2bc828a1940c 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/arch/arm/mm/fault.c
*
* Copyright (C) 1995 Linus Torvalds
* Modifications for ARM processor (c) 1995-2004 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/extable.h>
#include <linux/signal.h>
@@ -20,8 +17,8 @@
#include <linux/sched/debug.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
+#include <linux/kfence.h>
-#include <asm/pgtable.h>
#include <asm/system_misc.h>
#include <asm/system_info.h>
#include <asm/tlbflush.h>
@@ -30,58 +27,43 @@
#ifdef CONFIG_MMU
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
- int ret = 0;
-
- if (!user_mode(regs)) {
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, fsr))
- ret = 1;
- preempt_enable();
- }
+ unsigned long addr = (unsigned long)unsafe_src;
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
-{
- return 0;
+ return addr >= TASK_SIZE && ULONG_MAX - addr >= size;
}
-#endif
/*
* This is useful to dump out the page tables associated with
* 'addr' in mm 'mm'.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
if (!mm)
mm = &init_mm;
- pr_alert("pgd = %p\n", mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%08llx",
- addr, (long long)pgd_val(*pgd));
+ printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
do {
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- if (pgd_none(*pgd))
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
break;
- if (pgd_bad(*pgd)) {
+ if (p4d_bad(*p4d)) {
pr_cont("(bad)");
break;
}
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
if (PTRS_PER_PUD != 1)
pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
@@ -110,6 +92,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
break;
pte = pte_offset_map(pmd, addr);
+ if (!pte)
+ break;
+
pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
#ifndef CONFIG_ARM_LPAE
pr_cont(", *ppte=%08llx",
@@ -121,10 +106,43 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
pr_cont("\n");
}
#else /* CONFIG_MMU */
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{ }
#endif /* CONFIG_MMU */
+static inline bool is_write_fault(unsigned int fsr)
+{
+ return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
+}
+
+static inline bool is_translation_fault(unsigned int fsr)
+{
+ int fs = fsr_fs(fsr);
+#ifdef CONFIG_ARM_LPAE
+ if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL)
+ return true;
+#else
+ if (fs == FS_L1_TRANS || fs == FS_L2_TRANS)
+ return true;
+#endif
+ return false;
+}
+
+static void die_kernel_fault(const char *msg, struct mm_struct *mm,
+ unsigned long addr, unsigned int fsr,
+ struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+ pr_alert("8<--- cut here ---\n");
+ pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n",
+ msg, addr, fsr & FSR_LNX_PF ? "execute" : str_write_read(fsr & FSR_WRITE));
+
+ show_pte(KERN_ALERT, mm, addr);
+ die("Oops", regs, fsr);
+ bust_spinlocks(0);
+ make_task_dead(SIGKILL);
+}
+
/*
* Oops. The kernel tried to access some page that wasn't present.
*/
@@ -132,6 +150,7 @@ static void
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
+ const char *msg;
/*
* Are we prepared to handle this kernel fault?
*/
@@ -141,15 +160,17 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
/*
* No handler, we'll have to terminate things with extreme prejudice.
*/
- bust_spinlocks(1);
- pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
+ if (addr < PAGE_SIZE) {
+ msg = "NULL pointer dereference";
+ } else {
+ if (is_translation_fault(fsr) &&
+ kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
+ return;
- show_pte(mm, addr);
- die("Oops", regs, fsr);
- bust_spinlocks(0);
- do_exit(SIGKILL);
+ msg = "paging request";
+ }
+
+ die_kernel_fault(msg, mm, addr, fsr, regs);
}
/*
@@ -157,19 +178,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
* User mode accesses just cause a SIGSEGV
*/
static void
-__do_user_fault(struct task_struct *tsk, unsigned long addr,
- unsigned int fsr, unsigned int sig, int code,
- struct pt_regs *regs)
+__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
+ int code, struct pt_regs *regs)
{
+ struct task_struct *tsk = current;
+
if (addr > TASK_SIZE)
harden_branch_predictor();
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
- printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
+ pr_err("8<--- cut here ---\n");
+ pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
tsk->comm, sig, addr, fsr);
- show_pte(tsk->mm, addr);
+ show_pte(KERN_ERR, tsk->mm, addr);
show_regs(regs);
}
#endif
@@ -183,7 +206,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
- force_sig_fault(sig, code, (void __user *)addr, tsk);
+ force_sig_fault(sig, code, (void __user *)addr);
}
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
@@ -196,81 +219,59 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* have no context to handle this fault with.
*/
if (user_mode(regs))
- __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, fsr, regs);
}
#ifdef CONFIG_MMU
-#define VM_FAULT_BADMAP 0x010000
-#define VM_FAULT_BADACCESS 0x020000
-
-/*
- * Check that the permissions on the VMA allow for the fault which occurred.
- * If we encountered a write fault, we must have write permission, otherwise
- * we allow any permission.
- */
-static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
+static inline bool is_permission_fault(unsigned int fsr)
{
- unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
-
- if (fsr & FSR_WRITE)
- mask = VM_WRITE;
- if (fsr & FSR_LNX_PF)
- mask = VM_EXEC;
-
- return vma->vm_flags & mask ? false : true;
+ int fs = fsr_fs(fsr);
+#ifdef CONFIG_ARM_LPAE
+ if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
+ return true;
+#else
+ if (fs == FS_L1_PERM || fs == FS_L2_PERM)
+ return true;
+#endif
+ return false;
}
-static vm_fault_t __kprobes
-__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- unsigned int flags, struct task_struct *tsk)
+#ifdef CONFIG_CPU_TTBR0_PAN
+static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
{
- struct vm_area_struct *vma;
- vm_fault_t fault;
-
- vma = find_vma(mm, addr);
- fault = VM_FAULT_BADMAP;
- if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > addr))
- goto check_stack;
+ struct svc_pt_regs *svcregs;
- /*
- * Ok, we have a good vm_area for this
- * memory access, so we can handle it.
- */
-good_area:
- if (access_error(fsr, vma)) {
- fault = VM_FAULT_BADACCESS;
- goto out;
- }
+ /* If we are in user mode: permission granted */
+ if (user_mode(regs))
+ return true;
- return handle_mm_fault(vma, addr & PAGE_MASK, flags);
+ /* uaccess state saved above pt_regs on SVC exception entry */
+ svcregs = to_svc_pt_regs(regs);
-check_stack:
- /* Don't allow expansion below FIRST_USER_ADDRESS */
- if (vma->vm_flags & VM_GROWSDOWN &&
- addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
- goto good_area;
-out:
- return fault;
+ return !(svcregs->ttbcr & TTBCR_EPD0);
+}
+#else
+static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
+{
+ return true;
}
+#endif
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
- struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
int sig, code;
vm_fault_t fault;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+ vm_flags_t vm_flags = VM_ACCESS_FLAGS;
- if (notify_page_fault(regs, fsr))
+ if (kprobe_page_fault(regs, fsr))
return 0;
- tsk = current;
- mm = tsk->mm;
/* Enable interrupts if they were enabled in the parent context. */
if (interrupts_enabled(regs))
@@ -285,79 +286,115 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (fsr & FSR_WRITE)
+
+ if (is_write_fault(fsr)) {
flags |= FAULT_FLAG_WRITE;
+ vm_flags = VM_WRITE;
+ }
+
+ if (fsr & FSR_LNX_PF) {
+ vm_flags = VM_EXEC;
+
+ if (is_permission_fault(fsr) && !user_mode(regs))
+ die_kernel_fault("execution of memory",
+ mm, addr, fsr, regs);
+ }
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
/*
- * As per x86, we may deadlock here. However, since the kernel only
- * validly references user space from well defined areas of the code,
- * we can bug out early if this is from code which shouldn't.
+ * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are
+ * routed via the translation fault mechanism. Check whether uaccess
+ * is disabled while in kernel mode.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
+ if (!ttbr0_usermode_access_allowed(regs))
+ goto no_context;
+
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (!(vma->vm_flags & vm_flags)) {
+ vma_end_read(vma);
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ fault = 0;
+ code = SEGV_ACCERR;
+ goto bad_area;
+ }
+ fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
+
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
goto no_context;
+ return 0;
+ }
+lock_mmap:
+
retry:
- down_read(&mm->mmap_sem);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case, we'll have missed the might_sleep() from
- * down_read()
- */
- might_sleep();
-#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) &&
- !search_exception_tables(regs->ARM_pc))
- goto no_context;
-#endif
+ vma = lock_mm_and_find_vma(mm, addr, regs);
+ if (unlikely(!vma)) {
+ fault = 0;
+ code = SEGV_MAPERR;
+ goto bad_area;
}
- fault = __do_page_fault(mm, addr, fsr, flags, tsk);
+ /*
+ * ok, we have a good vm_area for this memory access, check the
+ * permissions on the VMA allow for the fault which occurred.
+ */
+ if (!(vma->vm_flags & vm_flags)) {
+ mmap_read_unlock(mm);
+ fault = 0;
+ code = SEGV_ACCERR;
+ goto bad_area;
+ }
+
+ fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because
+ * signal first. We do not need to release the mmap_lock because
* it would already be released in __lock_page_or_retry in
* mm/filemap.c. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
goto no_context;
return 0;
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED)
+ return 0;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, addr);
- }
+ if (!(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_RETRY) {
- /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
+done:
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
+ /* Handle the "normal" case first */
+ if (likely(!(fault & VM_FAULT_ERROR)))
return 0;
+ code = SEGV_MAPERR;
+bad_area:
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
@@ -388,11 +425,9 @@ retry:
* isn't in our memory map..
*/
sig = SIGSEGV;
- code = fault == VM_FAULT_BADACCESS ?
- SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, fsr, sig, code, regs);
+ __do_user_fault(addr, fsr, sig, code, regs);
return 0;
no_context:
@@ -431,6 +466,7 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
{
unsigned int index;
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
@@ -445,13 +481,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
pgd = cpu_get_pgd() + index;
pgd_k = init_mm.pgd + index;
- if (pgd_none(*pgd_k))
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+
+ if (p4d_none(*p4d_k))
goto bad_area;
- if (!pgd_present(*pgd))
- set_pgd(pgd, *pgd_k);
+ if (!p4d_present(*p4d))
+ set_p4d(p4d, *p4d_k);
- pud = pud_offset(pgd, addr);
- pud_k = pud_offset(pgd_k, addr);
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
if (pud_none(*pud_k))
goto bad_area;
@@ -556,9 +595,10 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
return;
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
- show_pte(current->mm, addr);
+ show_pte(KERN_ALERT, current->mm, addr);
arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
fsr, 0);
@@ -585,6 +625,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
return;
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
inf->name, ifsr, addr);