summaryrefslogtreecommitdiff
path: root/arch/riscv/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/mm/fault.c')
-rw-r--r--arch/riscv/mm/fault.c193
1 files changed, 141 insertions, 52 deletions
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index d86f7cebd4a7..04ed6f8acae4 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -15,12 +15,67 @@
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/kfence.h>
+#include <linux/entry-common.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/exceptions.h>
+
#include "../kernel/head.h"
+static void show_pte(unsigned long addr)
+{
+ pgd_t *pgdp, pgd;
+ p4d_t *p4dp, p4d;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
+ struct mm_struct *mm = current->mm;
+
+ if (!mm)
+ mm = &init_mm;
+
+ pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n",
+ current->comm, PAGE_SIZE / SZ_1K, VA_BITS,
+ mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd));
+
+ pgdp = pgd_offset(mm, addr);
+ pgd = pgdp_get(pgdp);
+ pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd));
+ if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd))
+ goto out;
+
+ p4dp = p4d_offset(pgdp, addr);
+ p4d = p4dp_get(p4dp);
+ pr_cont(", p4d=%016lx", p4d_val(p4d));
+ if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d))
+ goto out;
+
+ pudp = pud_offset(p4dp, addr);
+ pud = pudp_get(pudp);
+ pr_cont(", pud=%016lx", pud_val(pud));
+ if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud))
+ goto out;
+
+ pmdp = pmd_offset(pudp, addr);
+ pmd = pmdp_get(pmdp);
+ pr_cont(", pmd=%016lx", pmd_val(pmd));
+ if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd))
+ goto out;
+
+ ptep = pte_offset_map(pmdp, addr);
+ if (!ptep)
+ goto out;
+
+ pte = ptep_get(ptep);
+ pr_cont(", pte=%016lx", pte_val(pte));
+ pte_unmap(ptep);
+out:
+ pr_cont("\n");
+}
+
static void die_kernel_fault(const char *msg, unsigned long addr,
struct pt_regs *regs)
{
@@ -30,6 +85,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
addr);
bust_spinlocks(0);
+ show_pte(addr);
die(regs, "Oops");
make_task_dead(SIGKILL);
}
@@ -60,36 +116,37 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr)
static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
{
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
- if (!user_mode(regs)) {
- no_context(regs, addr);
- return;
- }
pagefault_out_of_memory();
return;
- } else if (fault & VM_FAULT_SIGBUS) {
+ } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
/* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs)) {
- no_context(regs, addr);
- return;
- }
do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
+ } else if (fault & VM_FAULT_SIGSEGV) {
+ do_trap(regs, SIGSEGV, SEGV_MAPERR, addr);
+ return;
}
+
BUG();
}
-static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+static inline void
+bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
- mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
@@ -99,6 +156,15 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code
no_context(regs, addr);
}
+static inline void
+bad_area(struct pt_regs *regs, struct mm_struct *mm, int code,
+ unsigned long addr)
+{
+ mmap_read_unlock(mm);
+
+ bad_area_nosemaphore(regs, code, addr);
+}
+
static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
{
pgd_t *pgd, *pgd_k;
@@ -126,33 +192,37 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
pgd = (pgd_t *)pfn_to_virt(pfn) + index;
pgd_k = init_mm.pgd + index;
- if (!pgd_present(*pgd_k)) {
+ if (!pgd_present(pgdp_get(pgd_k))) {
no_context(regs, addr);
return;
}
- set_pgd(pgd, *pgd_k);
+ set_pgd(pgd, pgdp_get(pgd_k));
p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k)) {
+ if (!p4d_present(p4dp_get(p4d_k))) {
no_context(regs, addr);
return;
}
pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k)) {
+ if (!pud_present(pudp_get(pud_k))) {
no_context(regs, addr);
return;
}
+ if (pud_leaf(pudp_get(pud_k)))
+ goto flush_tlb;
/*
* Since the vmalloc area is global, it is unnecessary
* to copy individual PTEs
*/
pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k)) {
+ if (!pmd_present(pmdp_get(pmd_k))) {
no_context(regs, addr);
return;
}
+ if (pmd_leaf(pmdp_get(pmd_k)))
+ goto flush_tlb;
/*
* Make sure the actual PTE exists as well to
@@ -161,7 +231,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k)) {
+ if (!pte_present(ptep_get(pte_k))) {
no_context(regs, addr);
return;
}
@@ -172,6 +242,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
*/
+flush_tlb:
local_flush_tlb_page(addr);
}
@@ -204,7 +275,7 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
*/
-asmlinkage void do_page_fault(struct pt_regs *regs)
+void handle_page_fault(struct pt_regs *regs)
{
struct task_struct *tsk;
struct vm_area_struct *vma;
@@ -223,6 +294,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
if (kprobe_page_fault(regs, cause))
return;
+ if (user_mode(regs))
+ trace_page_fault_user(addr, regs, cause);
+ else
+ trace_page_fault_kernel(addr, regs, cause);
+
/*
* Fault-in kernel-space virtual memory on-demand.
* The 'reference' page table is init_mm.pgd.
@@ -232,26 +308,14 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
+ if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) &&
+ unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) {
vmalloc_fault(regs, code, addr);
return;
}
-#ifdef CONFIG_64BIT
- /*
- * Modules in 64bit kernels lie in their own virtual region which is not
- * in the vmalloc region, but dealing with page faults in this region
- * or the vmalloc region amounts to doing the same thing: checking that
- * the mapping exists in init_mm.pgd and updating user page table, so
- * just use vmalloc_fault.
- */
- if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
- vmalloc_fault(regs, code, addr);
- return;
- }
-#endif
/* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->status & SR_PIE))
+ if (!regs_irqs_disabled(regs))
local_irq_enable();
/*
@@ -267,10 +331,12 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (!user_mode(regs) && addr < TASK_SIZE &&
- unlikely(!(regs->status & SR_SUM)))
- die_kernel_fault("access to user memory without uaccess routines",
- addr, regs);
+ if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) {
+ if (fixup_exception(regs))
+ return;
+
+ die_kernel_fault("access to user memory without uaccess routines", addr, regs);
+ }
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
@@ -278,24 +344,45 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
-retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, addr);
- if (unlikely(!vma)) {
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (unlikely(access_error(cause, vma))) {
+ vma_end_read(vma);
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+ bad_area_nosemaphore(regs, SEGV_ACCERR, addr);
return;
}
- if (likely(vma->vm_start <= addr))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+
+ fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
+
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, addr);
return;
}
- if (unlikely(expand_stack(vma, addr))) {
+lock_mmap:
+
+retry:
+ vma = lock_mm_and_find_vma(mm, addr, regs);
+ if (unlikely(!vma)) {
tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+ bad_area_nosemaphore(regs, code, addr);
return;
}
@@ -303,7 +390,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
-good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
@@ -324,8 +410,11 @@ good_area:
* signal first. We do not need to release the mmap_lock because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
- if (fault_signal_pending(fault, regs))
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, addr);
return;
+ }
/* The fault is fully completed (including releasing mmap lock) */
if (fault & VM_FAULT_COMPLETED)
@@ -344,6 +433,7 @@ good_area:
mmap_read_unlock(mm);
+done:
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
@@ -351,4 +441,3 @@ good_area:
}
return;
}
-NOKPROBE_SYMBOL(do_page_fault);