13 files changed, 994 insertions, 655 deletions
diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile
index c870eb36efbc..6e7696e55f71 100644
--- a/arch/csky/mm/Makefile
+++ b/arch/csky/mm/Makefile
@@ -1,7 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ifeq ($(CONFIG_CPU_HAS_CACHEV2),y)
 obj-y +=			cachev2.o
+CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE)
 else
 obj-y +=			cachev1.o
+CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE)
 endif
 
 obj-y +=			dma-mapping.o
@@ -11,3 +14,6 @@ obj-y +=			init.o
 obj-y +=			ioremap.o
 obj-y +=			syscache.o
 obj-y +=			tlb.o
+obj-y +=			asid.o
+obj-y +=			context.o
+obj-$(CONFIG_HAVE_TCM) +=	tcm.o
diff --git a/arch/csky/mm/asid.c b/arch/csky/mm/asid.c
new file mode 100644
index 000000000000..7fb6c417bbac
--- /dev/null
+++ b/arch/csky/mm/asid.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic ASID allocator.
+ *
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+
+#include <asm/asid.h>
+
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
+#define ASID_MASK(info)			(~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info)	(1UL << ((info)->bits))
+
+#define asid2idx(info, asid)		(((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
+#define idx2asid(info, idx)		(((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+
+static void flush_context(struct asid_info *info)
+{
+	int i;
+	u64 asid;
+
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_zero(info->map, NUM_CTXT_ASIDS(info));
+
+	for_each_possible_cpu(i) {
+		asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = reserved_asid(info, i);
+		__set_bit(asid2idx(info, asid), info->map);
+		reserved_asid(info, i) = asid;
+	}
+
+	/*
+	 * Queue a TLB invalidation for each CPU to perform on next
+	 * context-switch
+	 */
+	cpumask_setall(&info->flush_pending);
+}
+
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+				       u64 newasid)
+{
+	int cpu;
+	bool hit = false;
+
+	/*
+	 * Iterate over the set of reserved ASIDs looking for a match.
+	 * If we find one, then we can update our mm to use newasid
+	 * (i.e. the same ASID in the current generation) but we can't
+	 * exit the loop early, since we need to ensure that all copies
+	 * of the old ASID are updated to reflect the mm. Failure to do
+	 * so could result in us missing the reserved ASID in a future
+	 * generation.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (reserved_asid(info, cpu) == asid) {
+			hit = true;
+			reserved_asid(info, cpu) = newasid;
+		}
+	}
+
+	return hit;
+}
+
+static u64 new_context(struct asid_info *info, atomic64_t *pasid,
+		       struct mm_struct *mm)
+{
+	static u32 cur_idx = 1;
+	u64 asid = atomic64_read(pasid);
+	u64 generation = atomic64_read(&info->generation);
+
+	if (asid != 0) {
+		u64 newasid = generation | (asid & ~ASID_MASK(info));
+
+		/*
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
+		 */
+		if (check_update_reserved_asid(info, asid, newasid))
+			return newasid;
+
+		/*
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.
+		 */
+		if (!__test_and_set_bit(asid2idx(info, asid), info->map))
+			return newasid;
+	}
+
+	/*
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.  We
+	 * always count from ASID #2 (index 1), as we use ASID #0 when setting
+	 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+	 * pairs.
+	 */
+	asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+	if (asid != NUM_CTXT_ASIDS(info))
+		goto set_asid;
+
+	/* We're out of ASIDs, so increment the global generation count */
+	generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
+						 &info->generation);
+	flush_context(info);
+
+	/* We have more ASIDs than CPUs, so this will always succeed */
+	asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
+
+set_asid:
+	__set_bit(asid, info->map);
+	cur_idx = asid;
+	cpumask_clear(mm_cpumask(mm));
+	return idx2asid(info, asid) | generation;
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+		      unsigned int cpu, struct mm_struct *mm)
+{
+	unsigned long flags;
+	u64 asid;
+
+	raw_spin_lock_irqsave(&info->lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	asid = atomic64_read(pasid);
+	if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
+		asid = new_context(info, pasid, mm);
+		atomic64_set(pasid, asid);
+	}
+
+	if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
+		info->flush_cpu_ctxt_cb();
+
+	atomic64_set(&active_asid(info, cpu), asid);
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+	raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+int asid_allocator_init(struct asid_info *info,
+			u32 bits, unsigned int asid_per_ctxt,
+			void (*flush_cpu_ctxt_cb)(void))
+{
+	info->bits = bits;
+	info->ctxt_shift = ilog2(asid_per_ctxt);
+	info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+	/*
+	 * Expect allocation after rollover to fail if we don't have at least
+	 * one more ASID than CPUs. ASID #0 is always reserved.
+	 */
+	WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
+	atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
+	info->map = bitmap_zalloc(NUM_CTXT_ASIDS(info), GFP_KERNEL);
+	if (!info->map)
+		return -ENOMEM;
+
+	raw_spin_lock_init(&info->lock);
+
+	return 0;
+}
diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c
index b8a75cce0b8c..5a5a9804a0e3 100644
--- a/arch/csky/mm/cachev1.c
+++ b/arch/csky/mm/cachev1.c
@@ -94,6 +94,11 @@ void icache_inv_all(void)
 	cache_op_all(INS_CACHE|CACHE_INV, 0);
 }
 
+void local_icache_inv_all(void *priv)
+{
+	cache_op_all(INS_CACHE|CACHE_INV, 0);
+}
+
 void dcache_wb_range(unsigned long start, unsigned long end)
 {
 	cache_op_range(start, end, DATA_CACHE|CACHE_CLR, 0);
@@ -120,7 +125,12 @@ void dma_wbinv_range(unsigned long start, unsigned long end)
 	cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
 }
 
+void dma_inv_range(unsigned long start, unsigned long end)
+{
+	cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
+}
+
 void dma_wb_range(unsigned long start, unsigned long end)
 {
-	cache_op_range(start, end, DATA_CACHE|CACHE_INV, 1);
+	cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1);
 }
diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c
index baaf05d69f44..7a9664adce43 100644
--- a/arch/csky/mm/cachev2.c
+++ b/arch/csky/mm/cachev2.c
@@ -3,15 +3,24 @@
 
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/mm.h>
 #include <asm/cache.h>
 #include <asm/barrier.h>
 
-inline void dcache_wb_line(unsigned long start)
+/* for L1-cache */
+#define INS_CACHE		(1 << 0)
+#define DATA_CACHE		(1 << 1)
+#define CACHE_INV		(1 << 4)
+#define CACHE_CLR		(1 << 5)
+#define CACHE_OMS		(1 << 6)
+
+void local_icache_inv_all(void *priv)
 {
-	asm volatile("dcache.cval1 %0\n"::"r"(start):"memory");
+	mtcr("cr17", INS_CACHE|CACHE_INV);
 	sync_is();
 }
 
+#ifdef CONFIG_CPU_HAS_ICACHE_INS
 void icache_inv_range(unsigned long start, unsigned long end)
 {
 	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
@@ -20,43 +29,66 @@ void icache_inv_range(unsigned long start, unsigned long end)
 		asm volatile("icache.iva %0\n"::"r"(i):"memory");
 	sync_is();
 }
+#else
+struct cache_range {
+	unsigned long start;
+	unsigned long end;
+};
 
-void icache_inv_all(void)
+static DEFINE_SPINLOCK(cache_lock);
+
+static inline void cache_op_line(unsigned long i, unsigned int val)
 {
-	asm volatile("icache.ialls\n":::"memory");
-	sync_is();
+	mtcr("cr22", i);
+	mtcr("cr17", val);
 }
 
-void dcache_wb_range(unsigned long start, unsigned long end)
+void local_icache_inv_range(void *priv)
 {
-	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
+	struct cache_range *param = priv;
+	unsigned long i = param->start & ~(L1_CACHE_BYTES - 1);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache_lock, flags);
+
+	for (; i < param->end; i += L1_CACHE_BYTES)
+		cache_op_line(i, INS_CACHE | CACHE_INV | CACHE_OMS);
+
+	spin_unlock_irqrestore(&cache_lock, flags);
 
-	for (; i < end; i += L1_CACHE_BYTES)
-		asm volatile("dcache.cval1 %0\n"::"r"(i):"memory");
 	sync_is();
 }
 
-void dcache_inv_range(unsigned long start, unsigned long end)
+void icache_inv_range(unsigned long start, unsigned long end)
 {
-	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
+	struct cache_range param = { start, end };
 
-	for (; i < end; i += L1_CACHE_BYTES)
-		asm volatile("dcache.civa %0\n"::"r"(i):"memory");
+	if (irqs_disabled())
+		local_icache_inv_range(&param);
+	else
+		on_each_cpu(local_icache_inv_range, &param, 1);
+}
+#endif
+
+inline void dcache_wb_line(unsigned long start)
+{
+	asm volatile("dcache.cval1 %0\n"::"r"(start):"memory");
 	sync_is();
 }
 
-void cache_wbinv_range(unsigned long start, unsigned long end)
+void dcache_wb_range(unsigned long start, unsigned long end)
 {
 	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
 
 	for (; i < end; i += L1_CACHE_BYTES)
 		asm volatile("dcache.cval1 %0\n"::"r"(i):"memory");
 	sync_is();
+}
 
-	i = start & ~(L1_CACHE_BYTES - 1);
-	for (; i < end; i += L1_CACHE_BYTES)
-		asm volatile("icache.iva %0\n"::"r"(i):"memory");
-	sync_is();
+void cache_wbinv_range(unsigned long start, unsigned long end)
+{
+	dcache_wb_range(start, end);
+	icache_inv_range(start, end);
 }
 EXPORT_SYMBOL(cache_wbinv_range);
 
@@ -69,11 +101,20 @@ void dma_wbinv_range(unsigned long start, unsigned long end)
 	sync_is();
 }
 
+void dma_inv_range(unsigned long start, unsigned long end)
+{
+	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
+
+	for (; i < end; i += L1_CACHE_BYTES)
+		asm volatile("dcache.iva %0\n"::"r"(i):"memory");
+	sync_is();
+}
+
 void dma_wb_range(unsigned long start, unsigned long end)
 {
 	unsigned long i = start & ~(L1_CACHE_BYTES - 1);
 
 	for (; i < end; i += L1_CACHE_BYTES)
-		asm volatile("dcache.civa %0\n"::"r"(i):"memory");
+		asm volatile("dcache.cva %0\n"::"r"(i):"memory");
 	sync_is();
 }
diff --git a/arch/csky/mm/context.c b/arch/csky/mm/context.c
new file mode 100644
index 000000000000..0d95bdd93846
--- /dev/null
+++ b/arch/csky/mm/context.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/bitops.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/asid.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+
+struct asid_info asid_info;
+
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+{
+	asid_check_context(&asid_info, &mm->context.asid, cpu, mm);
+}
+
+static void asid_flush_cpu_ctxt(void)
+{
+	local_tlb_invalid_all();
+}
+
+static int asids_init(void)
+{
+	BUG_ON(((1 << CONFIG_CPU_ASID_BITS) - 1) <= num_possible_cpus());
+
+	if (asid_allocator_init(&asid_info, CONFIG_CPU_ASID_BITS, 1,
+				asid_flush_cpu_ctxt))
+		panic("Unable to initialize ASID allocator for %lu ASIDs\n",
+		      NUM_ASIDS(&asid_info));
+
+	asid_info.active = &active_asids;
+	asid_info.reserved = &reserved_asids;
+
+	pr_info("ASID allocator initialised with %lu entries\n",
+		NUM_CTXT_ASIDS(&asid_info));
+
+	return 0;
+}
+early_initcall(asids_init);
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 80783bb71c5c..82447029feb4 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -2,89 +2,61 @@
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <linux/cache.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-contiguous.h>
-#include <linux/dma-noncoherent.h>
+#include <linux/dma-map-ops.h>
 #include <linux/genalloc.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <asm/cache.h>
 
-static int __init atomic_pool_init(void)
-{
-	return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
-
-void arch_dma_prep_coherent(struct page *page, size_t size)
-{
-	if (PageHighMem(page)) {
-		unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-		do {
-			void *ptr = kmap_atomic(page);
-			size_t _size = (size < PAGE_SIZE) ? size : PAGE_SIZE;
-
-			memset(ptr, 0, _size);
-			dma_wbinv_range((unsigned long)ptr,
-					(unsigned long)ptr + _size);
-
-			kunmap_atomic(ptr);
-
-			page++;
-			size -= PAGE_SIZE;
-			count--;
-		} while (count);
-	} else {
-		void *ptr = page_address(page);
-
-		memset(ptr, 0, size);
-		dma_wbinv_range((unsigned long)ptr, (unsigned long)ptr + size);
-	}
-}
-
 static inline void cache_op(phys_addr_t paddr, size_t size,
 			    void (*fn)(unsigned long start, unsigned long end))
 {
-	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-	unsigned int offset = paddr & ~PAGE_MASK;
-	size_t left = size;
-	unsigned long start;
+	struct page *page    = phys_to_page(paddr);
+	void *start          = __va(page_to_phys(page));
+	unsigned long offset = offset_in_page(paddr);
+	size_t left          = size;
 
 	do {
 		size_t len = left;
 
+		if (offset + len > PAGE_SIZE)
+			len = PAGE_SIZE - offset;
+
 		if (PageHighMem(page)) {
-			void *addr;
+			start = kmap_atomic(page);
 
-			if (offset + len > PAGE_SIZE) {
-				if (offset >= PAGE_SIZE) {
-					page += offset >> PAGE_SHIFT;
-					offset &= ~PAGE_MASK;
-				}
-				len = PAGE_SIZE - offset;
-			}
+			fn((unsigned long)start + offset,
+					(unsigned long)start + offset + len);
 
-			addr = kmap_atomic(page);
-			start = (unsigned long)(addr + offset);
-			fn(start, start + len);
-			kunmap_atomic(addr);
+			kunmap_atomic(start);
 		} else {
-			start = (unsigned long)phys_to_virt(paddr);
-			fn(start, start + size);
+			fn((unsigned long)start + offset,
+					(unsigned long)start + offset + len);
 		}
 		offset = 0;
+
 		page++;
+		start += PAGE_SIZE;
 		left -= len;
 	} while (left);
 }
 
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
-			      size_t size, enum dma_data_direction dir)
+static void dma_wbinv_set_zero_range(unsigned long start, unsigned long end)
+{
+	memset((void *)start, 0, end - start);
+	dma_wbinv_range(start, end);
+}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+	cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range);
+}
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
@@ -99,16 +71,15 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 	}
 }
 
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
-			   size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_TO_DEVICE:
-		cache_op(paddr, size, dma_wb_range);
-		break;
+		return;
 	case DMA_FROM_DEVICE:
 	case DMA_BIDIRECTIONAL:
-		cache_op(paddr, size, dma_wbinv_range);
+		cache_op(paddr, size, dma_inv_range);
 		break;
 	default:
 		BUG();
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index d6f4b66b93e2..a6ca7dff4215 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -1,28 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-#include <linux/signal.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/version.h>
-#include <linux/vt_kern.h>
-#include <linux/kernel.h>
 #include <linux/extable.h>
-#include <linux/uaccess.h>
-
-#include <asm/hardirq.h>
-#include <asm/mmu_context.h>
-#include <asm/traps.h>
-#include <asm/page.h>
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/perf_event.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
@@ -30,7 +12,7 @@ int fixup_exception(struct pt_regs *regs)
 
 	fixup = search_exception_tables(instruction_pointer(regs));
 	if (fixup) {
-		regs->pc = fixup->nextinsn;
+		regs->pc = fixup->fixup;
 
 		return 1;
 	}
@@ -38,172 +20,279 @@ int fixup_exception(struct pt_regs *regs)
 	return 0;
 }
 
+static inline bool is_write(struct pt_regs *regs)
+{
+	switch (trap_no(regs)) {
+	case VEC_TLBINVALIDS:
+		return true;
+	case VEC_TLBMODIFIED:
+		return true;
+	}
+
+	return false;
+}
+
+#ifdef CONFIG_CPU_HAS_LDSTEX
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+	return;
+}
+#else
+extern unsigned long csky_cmpxchg_ldw;
+extern unsigned long csky_cmpxchg_stw;
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+	if (trap_no(regs) != VEC_TLBMODIFIED)
+		return;
+
+	if (instruction_pointer(regs) == csky_cmpxchg_stw)
+		instruction_pointer_set(regs, csky_cmpxchg_ldw);
+	return;
+}
+#endif
+
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+	current->thread.trap_no = trap_no(regs);
+
+	/* Are we prepared to handle this kernel fault? */
+	if (fixup_exception(regs))
+		return;
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	pr_alert("Unable to handle kernel paging request at virtual "
+		 "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
+	die(regs, "Oops");
+	make_task_dead(SIGKILL);
+}
+
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+	current->thread.trap_no = trap_no(regs);
+
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, call the OOM killer, and return the userspace
+		 * (which will retry the fault, or kill us if we got oom-killed).
+		 */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		pagefault_out_of_memory();
+		return;
+	} else if (fault & VM_FAULT_SIGBUS) {
+		/* Kernel mode? Handle exceptions or die */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+		return;
+	}
+	BUG();
+}
+
+static inline void bad_area_nosemaphore(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+	/*
+	 * Something tried to access memory that isn't in our memory map.
+	 * Fix it, but check if it's kernel or user first.
+	 */
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		do_trap(regs, SIGSEGV, code, addr);
+		return;
+	}
+
+	no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+	pte_t *pte_k;
+	int offset;
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		do_trap(regs, SIGSEGV, code, addr);
+		return;
+	}
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "tsk" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	offset = pgd_index(addr);
+
+	pgd = get_pgd() + offset;
+	pgd_k = init_mm.pgd + offset;
+
+	if (!pgd_present(*pgd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pgd(pgd, *pgd_k);
+
+	pud = (pud_t *)pgd;
+	pud_k = (pud_t *)pgd_k;
+	if (!pud_present(*pud_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	pmd = pmd_offset(pud, addr);
+	pmd_k = pmd_offset(pud_k, addr);
+	if (!pmd_present(*pmd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pmd(pmd, *pmd_k);
+
+	pte_k = pte_offset_kernel(pmd_k, addr);
+	if (!pte_present(*pte_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	flush_tlb_one(addr);
+}
+
+static inline bool access_error(struct pt_regs *regs, struct vm_area_struct *vma)
+{
+	if (is_write(regs)) {
+		if (!(vma->vm_flags & VM_WRITE))
+			return true;
+	} else {
+		if (unlikely(!vma_is_accessible(vma)))
+			return true;
+	}
+	return false;
+}
+
 /*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * This routine handles page faults.  It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
  */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
-			      unsigned long mmu_meh)
+asmlinkage void do_page_fault(struct pt_regs *regs)
 {
-	struct vm_area_struct *vma = NULL;
-	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
-	int si_code;
-	int fault;
-	unsigned long address = mmu_meh & PAGE_MASK;
+	struct task_struct *tsk;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long addr = read_mmu_entryhi() & PAGE_MASK;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
+	int code = SEGV_MAPERR;
+	vm_fault_t fault;
+
+	tsk = current;
+	mm = tsk->mm;
 
-	si_code = SEGV_MAPERR;
+	csky_cmpxchg_fixup(regs);
+
+	if (kprobe_page_fault(regs, tsk->thread.trap_no))
+		return;
 
-#ifndef CONFIG_CPU_HAS_TLBI
 	/*
-	 * We fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
+	 * Fault-in kernel-space virtual memory on-demand.
+	 * The 'reference' page table is init_mm.pgd.
 	 *
 	 * NOTE! We MUST NOT take any locks for this case. We may
 	 * be in an interrupt or a critical region, and should
 	 * only copy the information from the master page table,
 	 * nothing more.
 	 */
-	if (unlikely(address >= VMALLOC_START) &&
-	    unlikely(address <= VMALLOC_END)) {
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int offset = __pgd_offset(address);
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-
-		unsigned long pgd_base;
+	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+		vmalloc_fault(regs, code, addr);
+		return;
+	}
 
-		pgd_base = tlb_get_pgd();
-		pgd = (pgd_t *)pgd_base + offset;
-		pgd_k = init_mm.pgd + offset;
+	/* Enable interrupts if they were enabled in the parent context. */
+	if (likely(regs->sr & BIT(6)))
+		local_irq_enable();
 
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-		set_pgd(pgd, *pgd_k);
+	/*
+	 * If we're in an interrupt, have no user context, or are running
+	 * in an atomic region, then we must not take the fault.
+	 */
+	if (unlikely(faulthandler_disabled() || !mm)) {
+		no_context(regs, addr);
+		return;
+	}
 
-		pud = (pud_t *)pgd;
-		pud_k = (pud_t *)pgd_k;
-		if (!pud_present(*pud_k))
-			goto no_context;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-		set_pmd(pmd, *pmd_k);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 
-		pte_k = pte_offset_kernel(pmd_k, address);
-		if (!pte_present(*pte_k))
-			goto no_context;
+	if (is_write(regs))
+		flags |= FAULT_FLAG_WRITE;
+retry:
+	vma = lock_mm_and_find_vma(mm, addr, regs);
+	if (unlikely(!vma)) {
+		bad_area_nosemaphore(regs, mm, code, addr);
 		return;
 	}
-#endif
-	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
-	 */
-	if (in_atomic() || !mm)
-		goto bad_area_nosemaphore;
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= address)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (expand_stack(vma, address))
-		goto bad_area;
+
 	/*
 	 * Ok, we have a good vm_area for this memory access, so
-	 * we can handle it..
+	 * we can handle it.
 	 */
-good_area:
-	si_code = SEGV_ACCERR;
+	code = SEGV_ACCERR;
 
-	if (write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
-			goto bad_area;
+	if (unlikely(access_error(regs, vma))) {
+		mmap_read_unlock(mm);
+		bad_area_nosemaphore(regs, mm, code, addr);
+		return;
 	}
 
 	/*
-	 * If for any reason at all we couldn't handle the fault,
+	 * If for any reason at all we could not handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		else if (fault & VM_FAULT_SIGSEGV)
-			goto bad_area;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR)
-		tsk->maj_flt++;
-	else
-		tsk->min_flt++;
-
-	up_read(&mm->mmap_sem);
-	return;
+	fault = handle_mm_fault(vma, addr, flags, regs);
 
 	/*
-	 * Something tried to access memory that isn't in our memory map..
-	 * Fix it, but check if it's kernel or user first..
+	 * If we need to retry but a fatal signal is pending, handle the
+	 * signal first. We do not need to release the mmap_lock because it
+	 * would already be released in __lock_page_or_retry in mm/filemap.c.
 	 */
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	/* User mode accesses just cause a SIGSEGV */
-	if (user_mode(regs)) {
-		force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			no_context(regs, addr);
 		return;
 	}
 
-no_context:
-	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs))
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
 		return;
 
-	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
-	 */
-	bust_spinlocks(1);
-	pr_alert("Unable to handle kernel paging request at virtual "
-		 "address 0x%08lx, pc: 0x%08lx\n", address, regs->pc);
-	die_if_kernel("Oops", regs, write);
-
-out_of_memory:
-	/*
-	 * We ran out of memory, call the OOM killer, and return the userspace
-	 * (which will retry the fault, or kill us if we got oom-killed).
-	 */
-	pagefault_out_of_memory();
-	return;
+	if (unlikely(fault & VM_FAULT_RETRY)) {
+		flags |= FAULT_FLAG_TRIED;
 
-do_sigbus:
-	up_read(&mm->mmap_sem);
+		/*
+		 * No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+		goto retry;
+	}
 
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
+	mmap_read_unlock(mm);
 
-	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		mm_fault_error(regs, addr, fault);
+		return;
+	}
+	return;
 }
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index 53b1bfa4c462..4161df3c6c15 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -9,190 +9,28 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-static pte_t *kmap_pte;
-
 unsigned long highstart_pfn, highend_pfn;
 
-void *kmap(struct page *page)
-{
-	void *addr;
-
-	might_sleep();
-	if (!PageHighMem(page))
-		return page_address(page);
-	addr = kmap_high(page);
-	flush_tlb_one((unsigned long)addr);
-
-	return addr;
-}
-EXPORT_SYMBOL(kmap);
-
-void kunmap(struct page *page)
-{
-	BUG_ON(in_interrupt());
-	if (!PageHighMem(page))
-		return;
-	kunmap_high(page);
-}
-EXPORT_SYMBOL(kunmap);
-
-void *kmap_atomic(struct page *page)
+void kmap_flush_tlb(unsigned long addr)
 {
-	unsigned long vaddr;
-	int idx, type;
-
-	preempt_disable();
-	pagefault_disable();
-	if (!PageHighMem(page))
-		return page_address(page);
-
-	type = kmap_atomic_idx_push();
-	idx = type + KM_TYPE_NR*smp_processor_id();
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-	BUG_ON(!pte_none(*(kmap_pte - idx)));
-#endif
-	set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL));
-	flush_tlb_one((unsigned long)vaddr);
-
-	return (void *)vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic);
-
-void __kunmap_atomic(void *kvaddr)
-{
-	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	int idx;
-
-	if (vaddr < FIXADDR_START)
-		goto out;
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-	idx = KM_TYPE_NR*smp_processor_id() + kmap_atomic_idx();
-
-	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
-	pte_clear(&init_mm, vaddr, kmap_pte - idx);
-	flush_tlb_one(vaddr);
-#else
-	(void) idx; /* to kill a warning */
-#endif
-	kmap_atomic_idx_pop();
-out:
-	pagefault_enable();
-	preempt_enable();
-}
-EXPORT_SYMBOL(__kunmap_atomic);
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn)
-{
-	unsigned long vaddr;
-	int idx, type;
-
-	pagefault_disable();
-
-	type = kmap_atomic_idx_push();
-	idx = type + KM_TYPE_NR*smp_processor_id();
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-	set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL));
-	flush_tlb_one(vaddr);
-
-	return (void *) vaddr;
+	flush_tlb_one(addr);
 }
+EXPORT_SYMBOL(kmap_flush_tlb);
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long)ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
-static void __init fixrange_init(unsigned long start, unsigned long end,
-				pgd_t *pgd_base)
-{
-#ifdef CONFIG_HIGHMEM
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	int i, j, k;
-	unsigned long vaddr;
-
-	vaddr = start;
-	i = __pgd_offset(vaddr);
-	j = __pud_offset(vaddr);
-	k = __pmd_offset(vaddr);
-	pgd = pgd_base + i;
-
-	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
-		pud = (pud_t *)pgd;
-		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
-			pmd = (pmd_t *)pud;
-			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
-				if (pmd_none(*pmd)) {
-					pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-					set_pmd(pmd, __pmd(__pa(pte)));
-					BUG_ON(pte != pte_offset_kernel(pmd, 0));
-				}
-				vaddr += PMD_SIZE;
-			}
-			k = 0;
-		}
-		j = 0;
-	}
-#endif
-}
-
-void __init fixaddr_kmap_pages_init(void)
+void __init kmap_init(void)
 {
 	unsigned long vaddr;
-	pgd_t *pgd_base;
-#ifdef CONFIG_HIGHMEM
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pud_t *pud;
 	pte_t *pte;
-#endif
-	pgd_base = swapper_pg_dir;
 
-	/*
-	 * Fixed mappings:
-	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	fixrange_init(vaddr, 0, pgd_base);
-
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * Permanent kmaps:
-	 */
 	vaddr = PKMAP_BASE;
-	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
 
-	pgd = swapper_pg_dir + __pgd_offset(vaddr);
+	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = (pud_t *)pgd;
 	pmd = pmd_offset(pud, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
 	pkmap_page_table = pte;
-#endif
-}
-
-void __init kmap_init(void)
-{
-	unsigned long vaddr;
-
-	fixaddr_kmap_pages_init();
-
-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN);
-
-	kmap_pte = pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr);
 }
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index 66e597053488..573da66b2543 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -19,103 +19,134 @@
 #include <linux/swap.h>
 #include <linux/proc_fs.h>
 #include <linux/pfn.h>
+#include <linux/initrd.h>
 
 #include <asm/setup.h>
 #include <asm/cachectl.h>
 #include <asm/dma.h>
-#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/sections.h>
 #include <asm/tlb.h>
+#include <asm/cacheflush.h>
+
+#define PTRS_KERN_TABLE \
+		((PTRS_PER_PGD - USER_PTRS_PER_PGD) * PTRS_PER_PTE)
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
+pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
+
+EXPORT_SYMBOL(invalid_pte_table);
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 						__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
-void __init mem_init(void)
+void free_initmem(void)
 {
-#ifdef CONFIG_HIGHMEM
-	unsigned long tmp;
-
-	max_mapnr = highend_pfn;
-#else
-	max_mapnr = max_low_pfn;
-#endif
-	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
+	free_initmem_default(-1);
+}
 
-	memblock_free_all();
+void pgd_init(unsigned long *p)
+{
+	int i;
 
-#ifdef CONFIG_HIGHMEM
-	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
-		struct page *page = pfn_to_page(tmp);
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		p[i] = __pa(invalid_pte_table);
 
-		/* FIXME not sure about */
-		if (!memblock_is_reserved(tmp << PAGE_SHIFT))
-			free_highmem_page(page);
-	}
-#endif
-	mem_init_print_info(NULL);
+	flush_tlb_all();
+	local_icache_inv_all(NULL);
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
+void __init mmu_init(unsigned long min_pfn, unsigned long max_pfn)
 {
-	if (start < end)
-		pr_info("Freeing initrd memory: %ldk freed\n",
-			(end - start) >> 10);
-
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		init_page_count(virt_to_page(start));
-		free_page(start);
-		totalram_pages_inc();
-	}
-}
-#endif
+	int i;
 
-extern char __init_begin[], __init_end[];
+	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+		swapper_pg_dir[i].pgd = __pa(invalid_pte_table);
 
-void free_initmem(void)
-{
-	unsigned long addr;
+	for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++)
+		swapper_pg_dir[i].pgd =
+			__pa(kernel_pte_tables + (PTRS_PER_PTE * (i - USER_PTRS_PER_PGD)));
 
-	addr = (unsigned long) &__init_begin;
+	for (i = 0; i < PTRS_KERN_TABLE; i++)
+		set_pte(&kernel_pte_tables[i], __pte(_PAGE_GLOBAL));
 
-	while (addr < (unsigned long) &__init_end) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		free_page(addr);
-		totalram_pages_inc();
-		addr += PAGE_SIZE;
-	}
+	for (i = min_pfn; i < max_pfn; i++)
+		set_pte(&kernel_pte_tables[i - PFN_DOWN(va_pa_offset)], pfn_pte(i, PAGE_KERNEL));
 
-	pr_info("Freeing unused kernel memory: %dk freed\n",
-	((unsigned int)&__init_end - (unsigned int)&__init_begin) >> 10);
-}
+	flush_tlb_all();
+	local_icache_inv_all(NULL);
 
-void pgd_init(unsigned long *p)
-{
-	int i;
+	/* Setup page mask to 4k */
+	write_mmu_pagemask(0);
 
-	for (i = 0; i < PTRS_PER_PGD; i++)
-		p[i] = __pa(invalid_pte_table);
+	setup_pgd(swapper_pg_dir, 0);
 }
 
-void __init pre_mmu_init(void)
+void __init fixrange_init(unsigned long start, unsigned long end,
+			pgd_t *pgd_base)
 {
-	/*
-	 * Setup page-table and enable TLB-hardrefill
-	 */
-	flush_tlb_all();
-	pgd_init((unsigned long *)swapper_pg_dir);
-	TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);
-	TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j, k;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = pgd_index(vaddr);
+	j = pud_index(vaddr);
+	k = pmd_index(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pud = (pud_t *)pgd;
+		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+			pmd = (pmd_t *)pud;
+			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
+				if (pmd_none(*pmd)) {
+					pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+					if (!pte)
+						panic("%s: Failed to allocate %lu bytes align=%lx\n",
+						      __func__, PAGE_SIZE,
+						      PAGE_SIZE);
+
+					set_pmd(pmd, __pmd(__pa(pte)));
+					BUG_ON(pte != pte_offset_kernel(pmd, 0));
+				}
+				vaddr += PMD_SIZE;
+			}
+			k = 0;
+		}
+		j = 0;
+	}
+}
 
-	asid_cache(smp_processor_id()) = ASID_FIRST_VERSION;
+void __init fixaddr_init(void)
+{
+	unsigned long vaddr;
 
-	/* Setup page mask to 4k */
-	write_mmu_pagemask(0);
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	fixrange_init(vaddr, vaddr + PMD_SIZE, swapper_pg_dir);
 }
+
+static const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_NONE,
+	[VM_READ]					= PAGE_READ,
+	[VM_WRITE]					= PAGE_READ,
+	[VM_WRITE | VM_READ]				= PAGE_READ,
+	[VM_EXEC]					= PAGE_READ,
+	[VM_EXEC | VM_READ]				= PAGE_READ,
+	[VM_EXEC | VM_WRITE]				= PAGE_READ,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_READ,
+	[VM_SHARED]					= PAGE_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_READ,
+	[VM_SHARED | VM_WRITE]				= PAGE_WRITE,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_WRITE,
+	[VM_SHARED | VM_EXEC]				= PAGE_READ,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_READ,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_WRITE,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_WRITE
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c
index cb7c03e5cd21..70c8268d3b2b 100644
--- a/arch/csky/mm/ioremap.c
+++ b/arch/csky/mm/ioremap.c
@@ -3,46 +3,17 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
 #include <linux/io.h>
 
-#include <asm/pgtable.h>
-
-void __iomem *ioremap(phys_addr_t addr, size_t size)
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+			      unsigned long size, pgprot_t vma_prot)
 {
-	phys_addr_t last_addr;
-	unsigned long offset, vaddr;
-	struct vm_struct *area;
-	pgprot_t prot;
-
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
-		return NULL;
-
-	offset = addr & (~PAGE_MASK);
-	addr &= PAGE_MASK;
-	size = PAGE_ALIGN(size + offset);
-
-	area = get_vm_area_caller(size, VM_ALLOC, __builtin_return_address(0));
-	if (!area)
-		return NULL;
-
-	vaddr = (unsigned long)area->addr;
-
-	prot = __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE |
-			_PAGE_GLOBAL | _CACHE_UNCACHED | _PAGE_SO);
-
-	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
-		free_vm_area(area);
-		return NULL;
+	if (!pfn_valid(pfn)) {
+		return pgprot_noncached(vma_prot);
+	} else if (file->f_flags & O_SYNC) {
+		return pgprot_writecombine(vma_prot);
 	}
 
-	return (void __iomem *)(vaddr + offset);
-}
-EXPORT_SYMBOL(ioremap);
-
-void iounmap(void __iomem *addr)
-{
-	vunmap((void *)((unsigned long)addr & PAGE_MASK));
+	return vma_prot;
 }
-EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/csky/mm/syscache.c b/arch/csky/mm/syscache.c
index c4645e4e97f4..cd847ad62c7e 100644
--- a/arch/csky/mm/syscache.c
+++ b/arch/csky/mm/syscache.c
@@ -3,7 +3,7 @@
 
 #include <linux/syscalls.h>
 #include <asm/page.h>
-#include <asm/cache.h>
+#include <asm/cacheflush.h>
 #include <asm/cachectl.h>
 
 SYSCALL_DEFINE3(cacheflush,
@@ -12,17 +12,17 @@ SYSCALL_DEFINE3(cacheflush,
 		int, cache)
 {
 	switch (cache) {
-	case ICACHE:
-		icache_inv_range((unsigned long)addr,
-				 (unsigned long)addr + bytes);
-		break;
+	case BCACHE:
 	case DCACHE:
 		dcache_wb_range((unsigned long)addr,
 				(unsigned long)addr + bytes);
-		break;
-	case BCACHE:
-		cache_wbinv_range((unsigned long)addr,
-				  (unsigned long)addr + bytes);
+		if (cache != BCACHE)
+			break;
+		fallthrough;
+	case ICACHE:
+		flush_icache_mm_range(current->mm,
+				(unsigned long)addr,
+				(unsigned long)addr + bytes);
 		break;
 	default:
 		return -EINVAL;
diff --git a/arch/csky/mm/tcm.c b/arch/csky/mm/tcm.c
new file mode 100644
index 000000000000..ddeb36328819
--- /dev/null
+++ b/arch/csky/mm/tcm.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/highmem.h>
+#include <linux/genalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+
+#if (CONFIG_ITCM_RAM_BASE == 0xffffffff)
+#error "You should define ITCM_RAM_BASE"
+#endif
+
+#ifdef CONFIG_HAVE_DTCM
+#if (CONFIG_DTCM_RAM_BASE == 0xffffffff)
+#error "You should define DTCM_RAM_BASE"
+#endif
+
+#if (CONFIG_DTCM_RAM_BASE == CONFIG_ITCM_RAM_BASE)
+#error "You should define correct DTCM_RAM_BASE"
+#endif
+#endif
+
+extern char __tcm_start, __tcm_end, __dtcm_start;
+
+static struct gen_pool *tcm_pool;
+
+static void __init tcm_mapping_init(void)
+{
+	pte_t *tcm_pte;
+	unsigned long vaddr, paddr;
+	int i;
+
+	paddr = CONFIG_ITCM_RAM_BASE;
+
+	if (pfn_valid(PFN_DOWN(CONFIG_ITCM_RAM_BASE)))
+		goto panic;
+
+#ifndef CONFIG_HAVE_DTCM
+	for (i = 0; i < TCM_NR_PAGES; i++) {
+#else
+	for (i = 0; i < CONFIG_ITCM_NR_PAGES; i++) {
+#endif
+		vaddr = __fix_to_virt(FIX_TCM - i);
+
+		tcm_pte =
+			pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr);
+
+		set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL));
+
+		flush_tlb_one(vaddr);
+
+		paddr = paddr + PAGE_SIZE;
+	}
+
+#ifdef CONFIG_HAVE_DTCM
+	if (pfn_valid(PFN_DOWN(CONFIG_DTCM_RAM_BASE)))
+		goto panic;
+
+	paddr = CONFIG_DTCM_RAM_BASE;
+
+	for (i = 0; i < CONFIG_DTCM_NR_PAGES; i++) {
+		vaddr = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES - i);
+
+		tcm_pte =
+			pte_offset_kernel((pmd_t *) pgd_offset_k(vaddr), vaddr);
+
+		set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL));
+
+		flush_tlb_one(vaddr);
+
+		paddr = paddr + PAGE_SIZE;
+	}
+#endif
+
+#ifndef CONFIG_HAVE_DTCM
+	memcpy((void *)__fix_to_virt(FIX_TCM),
+				&__tcm_start, &__tcm_end - &__tcm_start);
+
+	pr_info("%s: mapping tcm va:0x%08lx to pa:0x%08x\n",
+			__func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE);
+
+	pr_info("%s: __tcm_start va:0x%08lx size:%d\n",
+			__func__, (unsigned long)&__tcm_start, &__tcm_end - &__tcm_start);
+#else
+	memcpy((void *)__fix_to_virt(FIX_TCM),
+				&__tcm_start, &__dtcm_start - &__tcm_start);
+
+	pr_info("%s: mapping itcm va:0x%08lx to pa:0x%08x\n",
+			__func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE);
+
+	pr_info("%s: __itcm_start va:0x%08lx size:%d\n",
+			__func__, (unsigned long)&__tcm_start, &__dtcm_start - &__tcm_start);
+
+	memcpy((void *)__fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES),
+				&__dtcm_start, &__tcm_end - &__dtcm_start);
+
+	pr_info("%s: mapping dtcm va:0x%08lx to pa:0x%08x\n",
+			__func__, __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES),
+						CONFIG_DTCM_RAM_BASE);
+
+	pr_info("%s: __dtcm_start va:0x%08lx size:%d\n",
+			__func__, (unsigned long)&__dtcm_start, &__tcm_end - &__dtcm_start);
+
+#endif
+	return;
+panic:
+	panic("TCM init error");
+}
+
+void *tcm_alloc(size_t len)
+{
+	unsigned long vaddr;
+
+	if (!tcm_pool)
+		return NULL;
+
+	vaddr = gen_pool_alloc(tcm_pool, len);
+	if (!vaddr)
+		return NULL;
+
+	return (void *) vaddr;
+}
+EXPORT_SYMBOL(tcm_alloc);
+
+void tcm_free(void *addr, size_t len)
+{
+	gen_pool_free(tcm_pool, (unsigned long) addr, len);
+}
+EXPORT_SYMBOL(tcm_free);
+
+static int __init tcm_setup_pool(void)
+{
+#ifndef CONFIG_HAVE_DTCM
+	u32 pool_size = (u32) (TCM_NR_PAGES * PAGE_SIZE)
+				- (u32) (&__tcm_end - &__tcm_start);
+
+	u32 tcm_pool_start = __fix_to_virt(FIX_TCM)
+				+ (u32) (&__tcm_end - &__tcm_start);
+#else
+	u32 pool_size = (u32) (CONFIG_DTCM_NR_PAGES * PAGE_SIZE)
+				- (u32) (&__tcm_end - &__dtcm_start);
+
+	u32 tcm_pool_start = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES)
+				+ (u32) (&__tcm_end - &__dtcm_start);
+#endif
+	int ret;
+
+	tcm_pool = gen_pool_create(2, -1);
+
+	ret = gen_pool_add(tcm_pool, tcm_pool_start, pool_size, -1);
+	if (ret) {
+		pr_err("%s: gen_pool add failed!\n", __func__);
+		return ret;
+	}
+
+	pr_info("%s: Added %d bytes @ 0x%08x to memory pool\n",
+		__func__, pool_size, tcm_pool_start);
+
+	return 0;
+}
+
+static int __init tcm_init(void)
+{
+	tcm_mapping_init();
+
+	tcm_setup_pool();
+
+	return 0;
+}
+arch_initcall(tcm_init);
diff --git a/arch/csky/mm/tlb.c b/arch/csky/mm/tlb.c
index 08b8394e5b8f..9234c5e5ceaf 100644
--- a/arch/csky/mm/tlb.c
+++ b/arch/csky/mm/tlb.c
@@ -7,10 +7,14 @@
 #include <linux/sched.h>
 
 #include <asm/mmu_context.h>
-#include <asm/pgtable.h>
 #include <asm/setup.h>
 
-#define CSKY_TLB_SIZE CONFIG_CPU_TLB_SIZE
+/*
+ * One C-SKY MMU TLB entry contain two PFN/page entry, ie:
+ * 1VPN -> 2PFN
+ */
+#define TLB_ENTRY_SIZE		(PAGE_SIZE * 2)
+#define TLB_ENTRY_SIZE_MASK	(PAGE_MASK << 1)
 
 void flush_tlb_all(void)
 {
@@ -19,201 +23,176 @@ void flush_tlb_all(void)
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	int cpu = smp_processor_id();
-
-	if (cpu_context(cpu, mm) != 0)
-		drop_mmu_context(mm, cpu);
-
+#ifdef CONFIG_CPU_HAS_TLBI
+	sync_is();
+	asm volatile(
+		"tlbi.asids %0	\n"
+		"sync.i		\n"
+		:
+		: "r" (cpu_asid(mm))
+		: "memory");
+#else
 	tlb_invalid_all();
+#endif
 }
 
+/*
+ * MMU operation regs only could invalid tlb entry in jtlb and we
+ * need change asid field to invalid I-utlb & D-utlb.
+ */
+#ifndef CONFIG_CPU_HAS_TLBI
 #define restore_asid_inv_utlb(oldpid, newpid) \
 do { \
-	if ((oldpid & ASID_MASK) == newpid) \
+	if (oldpid == newpid) \
 		write_mmu_entryhi(oldpid + 1); \
 	write_mmu_entryhi(oldpid); \
 } while (0)
+#endif
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			   unsigned long end)
+			unsigned long end)
 {
-	struct mm_struct *mm = vma->vm_mm;
-	int cpu = smp_processor_id();
-
-	if (cpu_context(cpu, mm) != 0) {
-		unsigned long size, flags;
-		int newpid = cpu_asid(cpu, mm);
-
-		local_irq_save(flags);
-		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-		size = (size + 1) >> 1;
-		if (size <= CSKY_TLB_SIZE/2) {
-			start &= (PAGE_MASK << 1);
-			end += ((PAGE_SIZE << 1) - 1);
-			end &= (PAGE_MASK << 1);
-#ifdef CONFIG_CPU_HAS_TLBI
-			while (start < end) {
-				asm volatile("tlbi.vaas %0"
-					     ::"r"(start | newpid));
-				start += (PAGE_SIZE << 1);
-			}
-			sync_is();
-#else
-			{
-			int oldpid = read_mmu_entryhi();
-
-			while (start < end) {
-				int idx;
-
-				write_mmu_entryhi(start | newpid);
-				start += (PAGE_SIZE << 1);
-				tlb_probe();
-				idx = read_mmu_index();
-				if (idx >= 0)
-					tlb_invalid_indexed();
-			}
-			restore_asid_inv_utlb(oldpid, newpid);
-			}
-#endif
-		} else {
-			drop_mmu_context(mm, cpu);
-		}
-		local_irq_restore(flags);
-	}
-}
+	unsigned long newpid = cpu_asid(vma->vm_mm);
 
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	unsigned long size, flags;
+	start &= TLB_ENTRY_SIZE_MASK;
+	end   += TLB_ENTRY_SIZE - 1;
+	end   &= TLB_ENTRY_SIZE_MASK;
 
-	local_irq_save(flags);
-	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (size <= CSKY_TLB_SIZE) {
-		start &= (PAGE_MASK << 1);
-		end += ((PAGE_SIZE << 1) - 1);
-		end &= (PAGE_MASK << 1);
 #ifdef CONFIG_CPU_HAS_TLBI
-		while (start < end) {
-			asm volatile("tlbi.vaas %0"::"r"(start));
-			start += (PAGE_SIZE << 1);
-		}
-		sync_is();
-#else
-		{
-		int oldpid = read_mmu_entryhi();
-
-		while (start < end) {
-			int idx;
-
-			write_mmu_entryhi(start);
-			start += (PAGE_SIZE << 1);
-			tlb_probe();
-			idx = read_mmu_index();
-			if (idx >= 0)
-				tlb_invalid_indexed();
-		}
-		restore_asid_inv_utlb(oldpid, 0);
-		}
-#endif
-	} else {
-		flush_tlb_all();
+	sync_is();
+	while (start < end) {
+		asm volatile(
+			"tlbi.vas %0	\n"
+			:
+			: "r" (start | newpid)
+			: "memory");
+
+		start += 2*PAGE_SIZE;
 	}
+	asm volatile("sync.i\n");
+#else
+	{
+	unsigned long flags, oldpid;
+
+	local_irq_save(flags);
+	oldpid = read_mmu_entryhi() & ASID_MASK;
+	while (start < end) {
+		int idx;
 
+		write_mmu_entryhi(start | newpid);
+		start += 2*PAGE_SIZE;
+		tlb_probe();
+		idx = read_mmu_index();
+		if (idx >= 0)
+			tlb_invalid_indexed();
+	}
+	restore_asid_inv_utlb(oldpid, newpid);
 	local_irq_restore(flags);
+	}
+#endif
 }
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	int cpu = smp_processor_id();
-	int newpid = cpu_asid(cpu, vma->vm_mm);
-
-	if (!vma || cpu_context(cpu, vma->vm_mm) != 0) {
-		page &= (PAGE_MASK << 1);
+	start &= TLB_ENTRY_SIZE_MASK;
+	end   += TLB_ENTRY_SIZE - 1;
+	end   &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-		asm volatile("tlbi.vaas %0"::"r"(page | newpid));
-		sync_is();
+	sync_is();
+	while (start < end) {
+		asm volatile(
+			"tlbi.vaas %0	\n"
+			:
+			: "r" (start)
+			: "memory");
+
+		start += 2*PAGE_SIZE;
+	}
+	asm volatile("sync.i\n");
 #else
-		{
-		int oldpid, idx;
-		unsigned long flags;
+	{
+	unsigned long flags, oldpid;
 
-		local_irq_save(flags);
-		oldpid = read_mmu_entryhi();
-		write_mmu_entryhi(page | newpid);
+	local_irq_save(flags);
+	oldpid = read_mmu_entryhi() & ASID_MASK;
+	while (start < end) {
+		int idx;
+
+		write_mmu_entryhi(start | oldpid);
+		start += 2*PAGE_SIZE;
 		tlb_probe();
 		idx = read_mmu_index();
 		if (idx >= 0)
 			tlb_invalid_indexed();
-
-		restore_asid_inv_utlb(oldpid, newpid);
-		local_irq_restore(flags);
-		}
-#endif
 	}
+	restore_asid_inv_utlb(oldpid, oldpid);
+	local_irq_restore(flags);
+	}
+#endif
 }
 
-/*
- * Remove one kernel space TLB entry.  This entry is assumed to be marked
- * global so we don't do the ASID thing.
- */
-void flush_tlb_one(unsigned long page)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	int oldpid;
+	int newpid = cpu_asid(vma->vm_mm);
 
-	oldpid = read_mmu_entryhi();
-	page &= (PAGE_MASK << 1);
+	addr &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-	page = page | (oldpid & 0xfff);
-	asm volatile("tlbi.vaas %0"::"r"(page));
 	sync_is();
+	asm volatile(
+		"tlbi.vas %0	\n"
+		"sync.i		\n"
+		:
+		: "r" (addr | newpid)
+		: "memory");
 #else
 	{
-	int idx;
+	int oldpid, idx;
 	unsigned long flags;
 
-	page = page | (oldpid & 0xff);
-
 	local_irq_save(flags);
-	write_mmu_entryhi(page);
+	oldpid = read_mmu_entryhi() & ASID_MASK;
+	write_mmu_entryhi(addr | newpid);
 	tlb_probe();
 	idx = read_mmu_index();
 	if (idx >= 0)
 		tlb_invalid_indexed();
-	restore_asid_inv_utlb(oldpid, oldpid);
+
+	restore_asid_inv_utlb(oldpid, newpid);
 	local_irq_restore(flags);
 	}
 #endif
 }
-EXPORT_SYMBOL(flush_tlb_one);
 
-/* show current 32 jtlbs */
-void show_jtlb_table(void)
+void flush_tlb_one(unsigned long addr)
 {
+	addr &= TLB_ENTRY_SIZE_MASK;
+
+#ifdef CONFIG_CPU_HAS_TLBI
+	sync_is();
+	asm volatile(
+		"tlbi.vaas %0	\n"
+		"sync.i		\n"
+		:
+		: "r" (addr)
+		: "memory");
+#else
+	{
+	int oldpid, idx;
 	unsigned long flags;
-	int entryhi, entrylo0, entrylo1;
-	int entry;
-	int oldpid;
 
 	local_irq_save(flags);
-	entry = 0;
-	pr_info("\n\n\n");
-
-	oldpid = read_mmu_entryhi();
-	while (entry < CSKY_TLB_SIZE) {
-		write_mmu_index(entry);
-		tlb_read();
-		entryhi = read_mmu_entryhi();
-		entrylo0 = read_mmu_entrylo0();
-		entrylo0 = entrylo0;
-		entrylo1 = read_mmu_entrylo1();
-		entrylo1 = entrylo1;
-		pr_info("jtlb[%d]:	entryhi - 0x%x;	entrylo0 - 0x%x;"
-			"	entrylo1 - 0x%x\n",
-			entry, entryhi, entrylo0, entrylo1);
-		entry++;
-	}
-	write_mmu_entryhi(oldpid);
+	oldpid = read_mmu_entryhi() & ASID_MASK;
+	write_mmu_entryhi(addr | oldpid);
+	tlb_probe();
+	idx = read_mmu_index();
+	if (idx >= 0)
+		tlb_invalid_indexed();
+
+	restore_asid_inv_utlb(oldpid, oldpid);
 	local_irq_restore(flags);
+	}
+#endif
 }
+EXPORT_SYMBOL(flush_tlb_one);