diff options
Diffstat (limited to 'arch/riscv/mm/fault.c')
| -rw-r--r-- | arch/riscv/mm/fault.c | 193 |
1 files changed, 141 insertions, 52 deletions
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index d86f7cebd4a7..04ed6f8acae4 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -15,12 +15,67 @@ #include <linux/uaccess.h> #include <linux/kprobes.h> #include <linux/kfence.h> +#include <linux/entry-common.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#define CREATE_TRACE_POINTS +#include <trace/events/exceptions.h> + #include "../kernel/head.h" +static void show_pte(unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + struct mm_struct *mm = current->mm; + + if (!mm) + mm = &init_mm; + + pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", + current->comm, PAGE_SIZE / SZ_1K, VA_BITS, + mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); + + pgdp = pgd_offset(mm, addr); + pgd = pgdp_get(pgdp); + pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); + if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) + goto out; + + p4dp = p4d_offset(pgdp, addr); + p4d = p4dp_get(p4dp); + pr_cont(", p4d=%016lx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) + goto out; + + pudp = pud_offset(p4dp, addr); + pud = pudp_get(pudp); + pr_cont(", pud=%016lx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) + goto out; + + pmdp = pmd_offset(pudp, addr); + pmd = pmdp_get(pmdp); + pr_cont(", pmd=%016lx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) + goto out; + + ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto out; + + pte = ptep_get(ptep); + pr_cont(", pte=%016lx", pte_val(pte)); + pte_unmap(ptep); +out: + pr_cont("\n"); +} + static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { @@ -30,6 +85,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, addr); bust_spinlocks(0); + show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } @@ -60,36 +116,37 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } -static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) +static inline void +bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr) { /* * Something tried to access memory that isn't in our memory map. * Fix it, but check if it's kernel or user first. */ - mmap_read_unlock(mm); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { do_trap(regs, SIGSEGV, code, addr); @@ -99,6 +156,15 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code no_context(regs, addr); } +static inline void +bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, + unsigned long addr) +{ + mmap_read_unlock(mm); + + bad_area_nosemaphore(regs, code, addr); +} + static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) { pgd_t *pgd, *pgd_k; @@ -126,33 +192,37 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; - if (!pgd_present(*pgd_k)) { + if (!pgd_present(pgdp_get(pgd_k))) { no_context(regs, addr); return; } - set_pgd(pgd, *pgd_k); + set_pgd(pgd, pgdp_get(pgd_k)); p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) { + if (!p4d_present(p4dp_get(p4d_k))) { no_context(regs, addr); return; } pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) { + if (!pud_present(pudp_get(pud_k))) { no_context(regs, addr); return; } + if (pud_leaf(pudp_get(pud_k))) + goto flush_tlb; /* * Since the vmalloc area is global, it is unnecessary * to copy individual PTEs */ pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) { + if (!pmd_present(pmdp_get(pmd_k))) { no_context(regs, addr); return; } + if (pmd_leaf(pmdp_get(pmd_k))) + goto flush_tlb; /* * Make sure the actual PTE exists as well to @@ -161,7 +231,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) { + if (!pte_present(ptep_get(pte_k))) { no_context(regs, addr); return; } @@ -172,6 +242,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. */ +flush_tlb: local_flush_tlb_page(addr); } @@ -204,7 +275,7 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ -asmlinkage void do_page_fault(struct pt_regs *regs) +void handle_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; @@ -223,6 +294,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs) if (kprobe_page_fault(regs, cause)) return; + if (user_mode(regs)) + trace_page_fault_user(addr, regs, cause); + else + trace_page_fault_kernel(addr, regs, cause); + /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. @@ -232,26 +308,14 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) { + if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && + unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { vmalloc_fault(regs, code, addr); return; } -#ifdef CONFIG_64BIT - /* - * Modules in 64bit kernels lie in their own virtual region which is not - * in the vmalloc region, but dealing with page faults in this region - * or the vmalloc region amounts to doing the same thing: checking that - * the mapping exists in init_mm.pgd and updating user page table, so - * just use vmalloc_fault. - */ - if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { - vmalloc_fault(regs, code, addr); - return; - } -#endif /* Enable interrupts if they were enabled in the parent context. */ - if (likely(regs->status & SR_PIE)) + if (!regs_irqs_disabled(regs)) local_irq_enable(); /* @@ -267,10 +331,12 @@ asmlinkage void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (!user_mode(regs) && addr < TASK_SIZE && - unlikely(!(regs->status & SR_SUM))) - die_kernel_fault("access to user memory without uaccess routines", - addr, regs); + if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { + if (fixup_exception(regs)) + return; + + die_kernel_fault("access to user memory without uaccess routines", addr, regs); + } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); @@ -278,24 +344,45 @@ asmlinkage void do_page_fault(struct pt_regs *regs) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; -retry: - mmap_read_lock(mm); - vma = find_vma(mm, addr); - if (unlikely(!vma)) { + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(cause, vma))) { + vma_end_read(vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); tsk->thread.bad_cause = cause; - bad_area(regs, mm, code, addr); + bad_area_nosemaphore(regs, SEGV_ACCERR, addr); return; } - if (likely(vma->vm_start <= addr)) - goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { - tsk->thread.bad_cause = cause; - bad_area(regs, mm, code, addr); + + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, addr); return; } - if (unlikely(expand_stack(vma, addr))) { +lock_mmap: + +retry: + vma = lock_mm_and_find_vma(mm, addr, regs); + if (unlikely(!vma)) { tsk->thread.bad_cause = cause; - bad_area(regs, mm, code, addr); + bad_area_nosemaphore(regs, code, addr); return; } @@ -303,7 +390,6 @@ retry: * Ok, we have a good vm_area for this memory access, so * we can handle it. */ -good_area: code = SEGV_ACCERR; if (unlikely(access_error(cause, vma))) { @@ -324,8 +410,11 @@ good_area: * signal first. We do not need to release the mmap_lock because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ - if (fault_signal_pending(fault, regs)) + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, addr); return; + } /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) @@ -344,6 +433,7 @@ good_area: mmap_read_unlock(mm); +done: if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); @@ -351,4 +441,3 @@ good_area: } return; } -NOKPROBE_SYMBOL(do_page_fault); |
