diff options
Diffstat (limited to 'arch/um/kernel/trap.c')
| -rw-r--r-- | arch/um/kernel/trap.c | 245 |
1 files changed, 172 insertions, 73 deletions
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 0e8b6158f224..177615820a4c 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <linux/mm.h> @@ -10,7 +10,6 @@ #include <linux/uaccess.h> #include <linux/sched/debug.h> #include <asm/current.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <arch.h> #include <as-layout.h> @@ -19,6 +18,122 @@ #include <skas.h> /* + * NOTE: UML does not have exception tables. As such, this is almost a copy + * of the code in mm/memory.c, only adjusting the logic to simply check whether + * we are coming from the kernel instead of doing an additional lookup in the + * exception table. + * We can do this simplification because we never get here if the exception was + * fixable. + */ +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + if (likely(mmap_read_trylock(mm))) + return true; + + if (!is_user) + return false; + + return !mmap_read_lock_killable(mm); +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + mmap_read_unlock(mm); + if (!is_user) + return false; + + return !mmap_write_lock_killable(mm); +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +static struct vm_area_struct * +um_lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, bool is_user) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack_locked(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} + +/* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by * segv(). */ @@ -27,12 +142,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - pgd_t *pgd; - pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned int flags = FAULT_FLAG_DEFAULT; *code_out = SEGV_MAPERR; @@ -46,20 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (is_user) flags |= FAULT_FLAG_USER; retry: - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); + vma = um_lock_mm_and_find_vma(mm, address, is_user); if (!vma) - goto out; - else if (vma->vm_start <= address) - goto good_area; - else if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - else if (is_user && !ARCH_IS_STACKGROW(address)) - goto out; - else if (expand_stack(vma, address)) - goto out; + goto out_nosemaphore; -good_area: *code_out = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -74,11 +177,15 @@ good_area: do { vm_fault_t fault; - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; @@ -90,22 +197,13 @@ good_area: } BUG(); } - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - - goto retry; - } + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; + + goto retry; } - pgd = pgd_offset(mm, address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); + pmd = pmd_off(mm, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; @@ -120,9 +218,9 @@ good_area: #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif - flush_tlb_page(vma, address); + out: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_nosemaphore: return err; @@ -131,13 +229,12 @@ out_of_memory: * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); if (!is_user) goto out_nosemaphore; pagefault_out_of_memory(); return 0; } -EXPORT_SYMBOL(handle_page_fault); static void show_segv_info(struct uml_pt_regs *regs) { @@ -163,13 +260,12 @@ static void show_segv_info(struct uml_pt_regs *regs) static void bad_segv(struct faultinfo fi, unsigned long ip) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi), - current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi)); } void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV, current); + force_fatal_sig(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal @@ -184,12 +280,14 @@ void fatal_sigsegv(void) * @sig: the signal number * @unused_si: the signal info struct; unused in this handler * @regs: the ptrace register information + * @mc: the mcontext of the signal * * The handler first extracts the faultinfo from the UML ptrace regs struct. * If the userfault did not happen in an UML userspace process, bad_segv is called. * Otherwise the signal did happen in a cloned userspace process, handle it. */ -void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { struct faultinfo * fi = UPT_FAULTINFO(regs); @@ -198,7 +296,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) bad_segv(*fi, UPT_IP(regs)); return; } - segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc); } /* @@ -208,9 +306,8 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) * give us bad data! */ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, - struct uml_pt_regs *regs) + struct uml_pt_regs *regs, void *mc) { - jmp_buf *catcher; int si_code; int err; int is_write = FAULT_WRITE(fi); @@ -219,8 +316,30 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && (address >= start_vm) && (address < end_vm)) { - flush_tlb_kernel_vm(); + if (!is_user && address >= start_vm && address < end_vm) { + /* + * Kernel has pending updates from set_ptes that were not + * flushed yet. Syncing them should fix the pagefault (if not + * we'll get here again and panic). + */ + err = um_tlb_sync(&init_mm); + if (err == -ENOMEM) + report_enomem(); + if (err) + panic("Failed to sync kernel TLBs: %d", err); + goto out; + } + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); + } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; goto out; } else if (current->mm == NULL) { @@ -246,15 +365,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, address = 0; } - catcher = current->thread.fault_catcher; if (!err) goto out; - else if (catcher != NULL) { - current->thread.fault_addr = (void *) address; - UML_LONGJMP(catcher, 1); - } - else if (current->thread.fault_addr != NULL) - panic("fault_addr set but no fault catcher"); else if (!is_user && arch_fixup(ip, regs)) goto out; @@ -268,13 +380,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (err == -EACCES) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, - current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { BUG_ON(err != -EFAULT); current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, si_code, (void __user *) address, - current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address); } out: @@ -284,7 +394,8 @@ out: return 0; } -void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) +void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, + void *mc) { int code, err; if (!UPT_IS_USER(regs)) { @@ -304,28 +415,16 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { struct faultinfo *fi = UPT_FAULTINFO(regs); current->thread.arch.faultinfo = *fi; - force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi), - current); + force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi)); } else { printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", sig, code, err); - force_sig(sig, current); + force_sig(sig); } } -void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) -{ - if (current->thread.fault_catcher != NULL) - UML_LONGJMP(current->thread.fault_catcher, 1); - else - relay_signal(sig, si, regs); -} - -void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { do_IRQ(WINCH_IRQ, regs); } - -void trap_init(void) -{ -} |
