1 files changed, 236 insertions, 154 deletions
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index f76618b630f9..a885518ce1dd 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -1,36 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-#include <linux/signal.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/version.h>
-#include <linux/vt_kern.h>
 #include <linux/extable.h>
-#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
 #include <linux/perf_event.h>
 
-#include <asm/hardirq.h>
-#include <asm/mmu_context.h>
-#include <asm/traps.h>
-#include <asm/page.h>
-
 int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 
 	fixup = search_exception_tables(instruction_pointer(regs));
 	if (fixup) {
-		regs->pc = fixup->nextinsn;
+		regs->pc = fixup->fixup;
 
 		return 1;
 	}
@@ -38,179 +20,279 @@ int fixup_exception(struct pt_regs *regs)
 	return 0;
 }
 
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
-			      unsigned long mmu_meh)
+static inline bool is_write(struct pt_regs *regs)
 {
-	struct vm_area_struct *vma = NULL;
-	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
-	int si_code;
-	int fault;
-	unsigned long address = mmu_meh & PAGE_MASK;
+	switch (trap_no(regs)) {
+	case VEC_TLBINVALIDS:
+		return true;
+	case VEC_TLBMODIFIED:
+		return true;
+	}
 
-	si_code = SEGV_MAPERR;
+	return false;
+}
 
-#ifndef CONFIG_CPU_HAS_TLBI
-	/*
-	 * We fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
-	 *
-	 * NOTE! We MUST NOT take any locks for this case. We may
-	 * be in an interrupt or a critical region, and should
-	 * only copy the information from the master page table,
-	 * nothing more.
-	 */
-	if (unlikely(address >= VMALLOC_START) &&
-	    unlikely(address <= VMALLOC_END)) {
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int offset = __pgd_offset(address);
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
+#ifdef CONFIG_CPU_HAS_LDSTEX
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+	return;
+}
+#else
+extern unsigned long csky_cmpxchg_ldw;
+extern unsigned long csky_cmpxchg_stw;
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+	if (trap_no(regs) != VEC_TLBMODIFIED)
+		return;
 
-		unsigned long pgd_base;
+	if (instruction_pointer(regs) == csky_cmpxchg_stw)
+		instruction_pointer_set(regs, csky_cmpxchg_ldw);
+	return;
+}
+#endif
 
-		pgd_base = (unsigned long)__va(get_pgd());
-		pgd = (pgd_t *)pgd_base + offset;
-		pgd_k = init_mm.pgd + offset;
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+	current->thread.trap_no = trap_no(regs);
 
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-		set_pgd(pgd, *pgd_k);
+	/* Are we prepared to handle this kernel fault? */
+	if (fixup_exception(regs))
+		return;
 
-		pud = (pud_t *)pgd;
-		pud_k = (pud_t *)pgd_k;
-		if (!pud_present(*pud_k))
-			goto no_context;
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	pr_alert("Unable to handle kernel paging request at virtual "
+		 "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
+	die(regs, "Oops");
+	make_task_dead(SIGKILL);
+}
 
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-		set_pmd(pmd, *pmd_k);
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+	current->thread.trap_no = trap_no(regs);
 
-		pte_k = pte_offset_kernel(pmd_k, address);
-		if (!pte_present(*pte_k))
-			goto no_context;
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, call the OOM killer, and return the userspace
+		 * (which will retry the fault, or kill us if we got oom-killed).
+		 */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		pagefault_out_of_memory();
+		return;
+	} else if (fault & VM_FAULT_SIGBUS) {
+		/* Kernel mode? Handle exceptions or die */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		do_trap(regs, SIGBUS, BUS_ADRERR, addr);
 		return;
 	}
-#endif
+	BUG();
+}
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+static inline void bad_area_nosemaphore(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
 	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
+	 * Something tried to access memory that isn't in our memory map.
+	 * Fix it, but check if it's kernel or user first.
 	 */
-	if (in_atomic() || !mm)
-		goto bad_area_nosemaphore;
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= address)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (expand_stack(vma, address))
-		goto bad_area;
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		do_trap(regs, SIGSEGV, code, addr);
+		return;
+	}
+
+	no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+	pte_t *pte_k;
+	int offset;
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		do_trap(regs, SIGSEGV, code, addr);
+		return;
+	}
+
 	/*
-	 * Ok, we have a good vm_area for this memory access, so
-	 * we can handle it..
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "tsk" here. We might be inside
+	 * an interrupt in the middle of a task switch..
 	 */
-good_area:
-	si_code = SEGV_ACCERR;
+	offset = pgd_index(addr);
+
+	pgd = get_pgd() + offset;
+	pgd_k = init_mm.pgd + offset;
+
+	if (!pgd_present(*pgd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pgd(pgd, *pgd_k);
+
+	pud = (pud_t *)pgd;
+	pud_k = (pud_t *)pgd_k;
+	if (!pud_present(*pud_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	pmd = pmd_offset(pud, addr);
+	pmd_k = pmd_offset(pud_k, addr);
+	if (!pmd_present(*pmd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pmd(pmd, *pmd_k);
+
+	pte_k = pte_offset_kernel(pmd_k, addr);
+	if (!pte_present(*pte_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	flush_tlb_one(addr);
+}
 
-	if (write) {
+static inline bool access_error(struct pt_regs *regs, struct vm_area_struct *vma)
+{
+	if (is_write(regs)) {
 		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
+			return true;
 	} else {
-		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
-			goto bad_area;
+		if (unlikely(!vma_is_accessible(vma)))
+			return true;
 	}
+	return false;
+}
+
+/*
+ * This routine handles page faults.  It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long addr = read_mmu_entryhi() & PAGE_MASK;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
+	int code = SEGV_MAPERR;
+	vm_fault_t fault;
+
+	tsk = current;
+	mm = tsk->mm;
+
+	csky_cmpxchg_fixup(regs);
+
+	if (kprobe_page_fault(regs, tsk->thread.trap_no))
+		return;
 
 	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
+	 * Fault-in kernel-space virtual memory on-demand.
+	 * The 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
 	 */
-	fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		else if (fault & VM_FAULT_SIGSEGV)
-			goto bad_area;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR) {
-		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
-			      address);
-	} else {
-		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
-			      address);
+	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+		vmalloc_fault(regs, code, addr);
+		return;
 	}
 
-	up_read(&mm->mmap_sem);
-	return;
+	/* Enable interrupts if they were enabled in the parent context. */
+	if (likely(regs->sr & BIT(6)))
+		local_irq_enable();
 
 	/*
-	 * Something tried to access memory that isn't in our memory map..
-	 * Fix it, but check if it's kernel or user first..
+	 * If we're in an interrupt, have no user context, or are running
+	 * in an atomic region, then we must not take the fault.
 	 */
-bad_area:
-	up_read(&mm->mmap_sem);
+	if (unlikely(faulthandler_disabled() || !mm)) {
+		no_context(regs, addr);
+		return;
+	}
 
-bad_area_nosemaphore:
-	/* User mode accesses just cause a SIGSEGV */
-	if (user_mode(regs)) {
-		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+	if (is_write(regs))
+		flags |= FAULT_FLAG_WRITE;
+retry:
+	vma = lock_mm_and_find_vma(mm, addr, regs);
+	if (unlikely(!vma)) {
+		bad_area_nosemaphore(regs, mm, code, addr);
 		return;
 	}
 
-no_context:
-	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs))
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it.
+	 */
+	code = SEGV_ACCERR;
+
+	if (unlikely(access_error(regs, vma))) {
+		mmap_read_unlock(mm);
+		bad_area_nosemaphore(regs, mm, code, addr);
 		return;
+	}
 
 	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
+	 * If for any reason at all we could not handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
 	 */
-	bust_spinlocks(1);
-	pr_alert("Unable to handle kernel paging request at virtual "
-		 "address 0x%08lx, pc: 0x%08lx\n", address, regs->pc);
-	die_if_kernel("Oops", regs, write);
+	fault = handle_mm_fault(vma, addr, flags, regs);
 
-out_of_memory:
 	/*
-	 * We ran out of memory, call the OOM killer, and return the userspace
-	 * (which will retry the fault, or kill us if we got oom-killed).
+	 * If we need to retry but a fatal signal is pending, handle the
+	 * signal first. We do not need to release the mmap_lock because it
+	 * would already be released in __lock_page_or_retry in mm/filemap.c.
 	 */
-	pagefault_out_of_memory();
-	return;
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			no_context(regs, addr);
+		return;
+	}
 
-do_sigbus:
-	up_read(&mm->mmap_sem);
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		return;
 
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
+	if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+		flags |= FAULT_FLAG_TRIED;
 
-	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+		/*
+		 * No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+		goto retry;
+	}
+
+	mmap_read_unlock(mm);
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		mm_fault_error(regs, addr, fault);
+		return;
+	}
+	return;
 }