From 5ea72a90261552ed5fdca35239feb6cba498301e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sun, 27 Oct 2013 14:49:02 +0530
Subject: ARC: [SMP] TLB flush

- Add mm_cpumask setting (aggregating only, unlike some other arches)
  used to restrict the TLB flush cross-calling

- cross-calling versions of TLB flush routines (thanks to Noam)

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/mmu_context.h | 17 ++++++++-
 arch/arc/include/asm/tlbflush.h    | 11 ++++--
 arch/arc/kernel/smp.c              |  1 +
 arch/arc/mm/tlb.c                  | 73 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 45f06f566b02..1fd467ef658f 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -80,7 +80,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	/* move to new ASID and handle rollover */
 	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
 
-		flush_tlb_all();
+		local_flush_tlb_all();
 
 		/*
 		 * Above checke for rollover of 8 bit ASID in 32 bit container.
@@ -131,6 +131,21 @@ static inline void destroy_context(struct mm_struct *mm)
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+	const int cpu = smp_processor_id();
+
+	/*
+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
+	 * for the switched-out task, unlike some other arches.
+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
+	 * re-use, specially for a multi-threaded task.
+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+	 *      were to re-migrate to C1, it could access the unmapped region
+	 *      via any existing stale TLB entries.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
 #ifndef CONFIG_SMP
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index b2f9bc7f68c8..71c7b2e4b874 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
 
-/* XXX: Revisit for SMP */
+#ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 7f0ab1ecd640..41bc4c703f42 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index db0f0f823980..e1acf0ce5647 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -363,6 +363,79 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_SMP
+
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
+			 mm, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = uaddr
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
+#endif
+
 /*
  * Routine to create a TLB entry
  */
-- 
cgit