diff options
Diffstat (limited to 'arch/arm/mm/fault.c')
| -rw-r--r-- | arch/arm/mm/fault.c | 347 |
1 files changed, 194 insertions, 153 deletions
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 58f69fa07df9..2bc828a1940c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/arch/arm/mm/fault.c * * Copyright (C) 1995 Linus Torvalds * Modifications for ARM processor (c) 1995-2004 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/extable.h> #include <linux/signal.h> @@ -20,8 +17,8 @@ #include <linux/sched/debug.h> #include <linux/highmem.h> #include <linux/perf_event.h> +#include <linux/kfence.h> -#include <asm/pgtable.h> #include <asm/system_misc.h> #include <asm/system_info.h> #include <asm/tlbflush.h> @@ -30,58 +27,43 @@ #ifdef CONFIG_MMU -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { - int ret = 0; - - if (!user_mode(regs)) { - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, fsr)) - ret = 1; - preempt_enable(); - } + unsigned long addr = (unsigned long)unsafe_src; - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) -{ - return 0; + return addr >= TASK_SIZE && ULONG_MAX - addr >= size; } -#endif /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; if (!mm) mm = &init_mm; - pr_alert("pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%08llx", - addr, (long long)pgd_val(*pgd)); + printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd)); do { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; - if (pgd_none(*pgd)) + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) break; - if (pgd_bad(*pgd)) { + if (p4d_bad(*p4d)) { pr_cont("(bad)"); break; } - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (PTRS_PER_PUD != 1) pr_cont(", *pud=%08llx", (long long)pud_val(*pud)); @@ -110,6 +92,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pte = pte_offset_map(pmd, addr); + if (!pte) + break; + pr_cont(", *pte=%08llx", (long long)pte_val(*pte)); #ifndef CONFIG_ARM_LPAE pr_cont(", *ppte=%08llx", @@ -121,10 +106,43 @@ void show_pte(struct mm_struct *mm, unsigned long addr) pr_cont("\n"); } #else /* CONFIG_MMU */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { } #endif /* CONFIG_MMU */ +static inline bool is_write_fault(unsigned int fsr) +{ + return (fsr & FSR_WRITE) && !(fsr & FSR_CM); +} + +static inline bool is_translation_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) + return true; +#else + if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) + return true; +#endif + return false; +} + +static void die_kernel_fault(const char *msg, struct mm_struct *mm, + unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + bust_spinlocks(1); + pr_alert("8<--- cut here ---\n"); + pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n", + msg, addr, fsr & FSR_LNX_PF ? "execute" : str_write_read(fsr & FSR_WRITE)); + + show_pte(KERN_ALERT, mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + make_task_dead(SIGKILL); +} + /* * Oops. The kernel tried to access some page that wasn't present. */ @@ -132,6 +150,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + const char *msg; /* * Are we prepared to handle this kernel fault? */ @@ -141,15 +160,17 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, /* * No handler, we'll have to terminate things with extreme prejudice. */ - bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); + if (addr < PAGE_SIZE) { + msg = "NULL pointer dereference"; + } else { + if (is_translation_fault(fsr) && + kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + return; - show_pte(mm, addr); - die("Oops", regs, fsr); - bust_spinlocks(0); - do_exit(SIGKILL); + msg = "paging request"; + } + + die_kernel_fault(msg, mm, addr, fsr, regs); } /* @@ -157,19 +178,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, * User mode accesses just cause a SIGSEGV */ static void -__do_user_fault(struct task_struct *tsk, unsigned long addr, - unsigned int fsr, unsigned int sig, int code, - struct pt_regs *regs) +__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, + int code, struct pt_regs *regs) { + struct task_struct *tsk = current; + if (addr > TASK_SIZE) harden_branch_predictor(); #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { - printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", + pr_err("8<--- cut here ---\n"); + pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); - show_pte(tsk->mm, addr); + show_pte(KERN_ERR, tsk->mm, addr); show_regs(regs); } #endif @@ -183,7 +206,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - force_sig_fault(sig, code, (void __user *)addr, tsk); + force_sig_fault(sig, code, (void __user *)addr); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -196,81 +219,59 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * have no context to handle this fault with. */ if (user_mode(regs)) - __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); else __do_kernel_fault(mm, addr, fsr, regs); } #ifdef CONFIG_MMU -#define VM_FAULT_BADMAP 0x010000 -#define VM_FAULT_BADACCESS 0x020000 - -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) +static inline bool is_permission_fault(unsigned int fsr) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; - - if (fsr & FSR_WRITE) - mask = VM_WRITE; - if (fsr & FSR_LNX_PF) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) + return true; +#else + if (fs == FS_L1_PERM || fs == FS_L2_PERM) + return true; +#endif + return false; } -static vm_fault_t __kprobes -__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk) +#ifdef CONFIG_CPU_TTBR0_PAN +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) { - struct vm_area_struct *vma; - vm_fault_t fault; - - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; - if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + struct svc_pt_regs *svcregs; - /* - * Ok, we have a good vm_area for this - * memory access, so we can handle it. - */ -good_area: - if (access_error(fsr, vma)) { - fault = VM_FAULT_BADACCESS; - goto out; - } + /* If we are in user mode: permission granted */ + if (user_mode(regs)) + return true; - return handle_mm_fault(vma, addr & PAGE_MASK, flags); + /* uaccess state saved above pt_regs on SVC exception entry */ + svcregs = to_svc_pt_regs(regs); -check_stack: - /* Don't allow expansion below FIRST_USER_ADDRESS */ - if (vma->vm_flags & VM_GROWSDOWN && - addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; + return !(svcregs->ttbcr & TTBCR_EPD0); +} +#else +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) +{ + return true; } +#endif static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; int sig, code; vm_fault_t fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned int flags = FAULT_FLAG_DEFAULT; + vm_flags_t vm_flags = VM_ACCESS_FLAGS; - if (notify_page_fault(regs, fsr)) + if (kprobe_page_fault(regs, fsr)) return 0; - tsk = current; - mm = tsk->mm; /* Enable interrupts if they were enabled in the parent context. */ if (interrupts_enabled(regs)) @@ -285,79 +286,115 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (fsr & FSR_WRITE) + + if (is_write_fault(fsr)) { flags |= FAULT_FLAG_WRITE; + vm_flags = VM_WRITE; + } + + if (fsr & FSR_LNX_PF) { + vm_flags = VM_EXEC; + + if (is_permission_fault(fsr) && !user_mode(regs)) + die_kernel_fault("execution of memory", + mm, addr, fsr, regs); + } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); /* - * As per x86, we may deadlock here. However, since the kernel only - * validly references user space from well defined areas of the code, - * we can bug out early if this is from code which shouldn't. + * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are + * routed via the translation fault mechanism. Check whether uaccess + * is disabled while in kernel mode. */ - if (!down_read_trylock(&mm->mmap_sem)) { - if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) + if (!ttbr0_usermode_access_allowed(regs)) + goto no_context; + + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (!(vma->vm_flags & vm_flags)) { + vma_end_read(vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + fault = 0; + code = SEGV_ACCERR; + goto bad_area; + } + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) goto no_context; + return 0; + } +lock_mmap: + retry: - down_read(&mm->mmap_sem); - } else { - /* - * The above down_read_trylock() might have succeeded in - * which case, we'll have missed the might_sleep() from - * down_read() - */ - might_sleep(); -#ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && - !search_exception_tables(regs->ARM_pc)) - goto no_context; -#endif + vma = lock_mm_and_find_vma(mm, addr, regs); + if (unlikely(!vma)) { + fault = 0; + code = SEGV_MAPERR; + goto bad_area; } - fault = __do_page_fault(mm, addr, fsr, flags, tsk); + /* + * ok, we have a good vm_area for this memory access, check the + * permissions on the VMA allow for the fault which occurred. + */ + if (!(vma->vm_flags & vm_flags)) { + mmap_read_unlock(mm); + fault = 0; + code = SEGV_ACCERR; + goto bad_area; + } + + fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); /* If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because + * signal first. We do not need to release the mmap_lock because * it would already be released in __lock_page_or_retry in * mm/filemap.c. */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) goto no_context; return 0; } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, addr); - } + if (!(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); +done: - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + /* Handle the "normal" case first */ + if (likely(!(fault & VM_FAULT_ERROR))) return 0; + code = SEGV_MAPERR; +bad_area: /* * If we are in kernel mode at this point, we * have no context to handle this fault with. @@ -388,11 +425,9 @@ retry: * isn't in our memory map.. */ sig = SIGSEGV; - code = fault == VM_FAULT_BADACCESS ? - SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, fsr, sig, code, regs); + __do_user_fault(addr, fsr, sig, code, regs); return 0; no_context: @@ -431,6 +466,7 @@ do_translation_fault(unsigned long addr, unsigned int fsr, { unsigned int index; pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; @@ -445,13 +481,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, pgd = cpu_get_pgd() + index; pgd_k = init_mm.pgd + index; - if (pgd_none(*pgd_k)) + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + + if (p4d_none(*p4d_k)) goto bad_area; - if (!pgd_present(*pgd)) - set_pgd(pgd, *pgd_k); + if (!p4d_present(*p4d)) + set_p4d(p4d, *p4d_k); - pud = pud_offset(pgd, addr); - pud_k = pud_offset(pgd_k, addr); + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); if (pud_none(*pud_k)) goto bad_area; @@ -556,9 +595,10 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; + pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); - show_pte(current->mm, addr); + show_pte(KERN_ALERT, current->mm, addr); arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, fsr, 0); @@ -585,6 +625,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; + pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); |
