1 files changed, 214 insertions, 137 deletions
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index bca3052c956d..b2f2c59279a6 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * RajeshwarR: Dec 11, 2007
  *   -- Added support for Inter Processor Interrupts
  *
@@ -12,64 +9,98 @@
  *    -- Initial Write (Borrowed heavily from ARM)
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/profile.h>
-#include <linux/errno.h>
-#include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
-#include <linux/smp.h>
 #include <linux/irq.h>
-#include <linux/delay.h>
 #include <linux/atomic.h>
-#include <linux/percpu.h>
 #include <linux/cpumask.h>
-#include <linux/spinlock_types.h>
 #include <linux/reboot.h>
-#include <asm/processor.h>
-#include <asm/setup.h>
+#include <linux/irqdomain.h>
+#include <linux/export.h>
+#include <linux/of_fdt.h>
+
 #include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
 
+#ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
-struct plat_smp_ops  plat_smp_ops;
+EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
+#endif
 
-/* XXX: per cpu ? Only needed once in early seconday boot */
+struct plat_smp_ops  __weak plat_smp_ops;
+
+/* XXX: per cpu ? Only needed once in early secondary boot */
 struct task_struct *secondary_idle_tsk;
 
-/* Called from start_kernel */
-void __init smp_prepare_boot_cpu(void)
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+	unsigned long dt_root = of_get_flat_dt_root();
+	const char *buf;
+
+	buf = of_get_flat_dt_prop(dt_root, name, NULL);
+	if (!buf)
+		return -EINVAL;
+
+	if (cpulist_parse(buf, cpumask))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
 {
+	struct cpumask cpumask;
+
+	if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+		pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+			NR_CPUS);
+
+		cpumask_setall(&cpumask);
+	}
+
+	if (!cpumask_test_cpu(0, &cpumask))
+		panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+	init_cpu_possible(&cpumask);
 }
 
 /*
- * Initialise the CPU possible map early - this describes the CPUs
- * which may be present or become present in the system.
+ * Called from setup_arch() before calling setup_processor()
+ *
+ * - Initialise the CPU possible map early - this describes the CPUs
+ *   which may be present or become present in the system.
+ * - Call early smp init hook. This can initialize a specific multi-core
+ *   IP which is say common to several platforms (hence not part of
+ *   platform specific int_early() hook)
  */
 void __init smp_init_cpus(void)
 {
-	unsigned int i;
+	arc_init_cpu_possible();
 
-	for (i = 0; i < NR_CPUS; i++)
-		set_cpu_possible(i, true);
+	if (plat_smp_ops.init_early_smp)
+		plat_smp_ops.init_early_smp();
 }
 
 /* called from init ( ) =>  process 1 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
 	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
+	 * if platform didn't set the present map already, do it now
+	 * boot cpu is set to present already by init/main.c
 	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
+	if (num_present_cpus() <= 1)
+		init_cpu_present(cpu_possible_mask);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -78,38 +109,47 @@ void __init smp_cpus_done(unsigned int max_cpus)
 }
 
 /*
- * After power-up, a non Master CPU needs to wait for Master to kick start it
- *
- * The default implementation halts
- *
- * This relies on platform specific support allowing Master to directly set
- * this CPU's PC (to be @first_lines_of_secondary() and kick start it.
- *
- * In lack of such h/w assist, platforms can override this function
- *   - make this function busy-spin on a token, eventually set by Master
- *     (from arc_platform_smp_wakeup_cpu())
- *   - Once token is available, jump to @first_lines_of_secondary
- *     (using inline asm).
- *
- * Alert: can NOT use stack here as it has not been determined/setup for CPU.
- *        If it turns out to be elaborate, it's better to code it in assembly
- *
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
  */
-void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu)
+static volatile int wake_flag;
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+#define __boot_read(f)		f
+#define __boot_write(f, v)	f = v
+
+#else
+
+#define __boot_read(f)		arc_read_uncached_32(&f)
+#define __boot_write(f, v)	arc_write_uncached_32(&f, v)
+
+#endif
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 {
-	/*
-	 * As a hack for debugging - since debugger will single-step over the
-	 * FLAG insn - wrap the halt itself it in a self loop
-	 */
-	__asm__ __volatile__(
-	"1:		\n"
-	"	flag 1	\n"
-	"	b 1b	\n");
+	BUG_ON(cpu == 0);
+
+	__boot_write(wake_flag, cpu);
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	/* for halt-on-reset, we've waited already */
+	if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
+		return;
+
+	while (__boot_read(wake_flag) != cpu)
+		;
+
+	__boot_write(wake_flag, 0);
 }
 
 const char *arc_platform_smp_cpuinfo(void)
 {
-	return plat_smp_ops.info;
+	return plat_smp_ops.info ? : "";
 }
 
 /*
@@ -125,23 +165,25 @@ void start_kernel_secondary(void)
 	/* MMU, Caches, Vector Table, Interrupts etc */
 	setup_processor();
 
-	atomic_inc(&mm->mm_users);
-	atomic_inc(&mm->mm_count);
+	mmget(mm);
+	mmgrab(mm);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	/* Some SMP H/w setup - for each cpu */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
-	if (machine_desc->init_smp)
-		machine_desc->init_smp(smp_processor_id());
-
-	arc_local_timer_setup(cpu);
-
 	local_irq_enable();
-	preempt_disable();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /*
@@ -166,6 +208,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (plat_smp_ops.cpu_kick)
 		plat_smp_ops.cpu_kick(cpu,
 				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
 
 	/* wait for 1 sec after kicking the secondary */
 	wait_till = jiffies + HZ;
@@ -175,7 +219,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	}
 
 	if (!cpu_online(cpu)) {
-		pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu);
+		pr_info("Timeout: CPU%u FAILED to come up !!!\n", cpu);
 		return -1;
 	}
 
@@ -184,64 +228,69 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
-/*
- * not supported here
- */
-int __init setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
 /*****************************************************************************/
 /*              Inter Processor Interrupt Handling                           */
 /*****************************************************************************/
 
-/*
- * structures for inter-processor calls
- * A Collection of single bit ipi messages
- *
- */
-
-/*
- * TODO_rajesh investigate tlb message types.
- * IPI Timer not needed because each ARC has an individual Interrupting Timer
- */
 enum ipi_msg_type {
-	IPI_NOP = 0,
+	IPI_EMPTY = 0,
 	IPI_RESCHEDULE = 1,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
-	IPI_CPU_STOP
+	IPI_CPU_STOP,
 };
 
-struct ipi_data {
-	unsigned long bits;
-};
+/*
+ * In arches with IRQ for each msg type (above), receiver can use IRQ-id  to
+ * figure out what msg was sent. For those which don't (ARC has dedicated IPI
+ * IRQ), the msg-type needs to be conveyed via per-cpu data
+ */
 
-static DEFINE_PER_CPU(struct ipi_data, ipi_data);
+static DEFINE_PER_CPU(unsigned long, ipi_data);
 
-static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
+static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
 {
+	unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu);
+	unsigned long old, new;
 	unsigned long flags;
-	unsigned int cpu;
+
+	pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu);
 
 	local_irq_save(flags);
 
-	for_each_cpu(cpu, callmap) {
-		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-		set_bit(msg, &ipi->bits);
-	}
+	/*
+	 * Atomically write new msg bit (in case others are writing too),
+	 * and read back old value
+	 */
+	do {
+		new = old = *ipi_data_ptr;
+		new |= 1U << msg;
+	} while (cmpxchg(ipi_data_ptr, old, new) != old);
 
-	/* Call the platform specific cross-CPU call function  */
-	if (plat_smp_ops.ipi_send)
-		plat_smp_ops.ipi_send((void *)callmap);
+	/*
+	 * Call the platform specific IPI kick function, but avoid if possible:
+	 * Only do so if there's no pending msg from other concurrent sender(s).
+	 * Otherwise, receiver will see this msg as well when it takes the
+	 * IPI corresponding to that msg. This is true, even if it is already in
+	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
+	 * so @new msg can be a free-loader
+	 */
+	if (plat_smp_ops.ipi_send && !old)
+		plat_smp_ops.ipi_send(cpu);
 
 	local_irq_restore(flags);
 }
 
-void smp_send_reschedule(int cpu)
+static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_RESCHEDULE);
+	unsigned int cpu;
+
+	for_each_cpu(cpu, callmap)
+		ipi_send_msg_one(cpu, msg);
+}
+
+void arch_smp_send_reschedule(int cpu)
+{
+	ipi_send_msg_one(cpu, IPI_RESCHEDULE);
 }
 
 void smp_send_stop(void)
@@ -254,7 +303,7 @@ void smp_send_stop(void)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	ipi_send_msg_one(cpu, IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -265,68 +314,96 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(void)
 {
 	machine_halt();
 }
 
-static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu)
+static inline int __do_IPI(unsigned long msg)
 {
-	unsigned long msg = 0;
+	int rc = 0;
 
-	do {
-		msg = find_next_bit(ops, BITS_PER_LONG, msg+1);
-
-		switch (msg) {
-		case IPI_RESCHEDULE:
-			scheduler_ipi();
-			break;
+	switch (msg) {
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
 
-		case IPI_CALL_FUNC:
-			generic_smp_call_function_interrupt();
-			break;
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
+	case IPI_CPU_STOP:
+		ipi_cpu_stop();
+		break;
 
-		case IPI_CPU_STOP:
-			ipi_cpu_stop(cpu);
-			break;
-		}
-	} while (msg < BITS_PER_LONG);
+	default:
+		rc = 1;
+	}
 
+	return rc;
 }
 
 /*
  * arch-common ISR to handle for inter-processor interrupts
  * Has hooks for platform specific IPI
  */
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
 {
-	int cpu = smp_processor_id();
-	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-	unsigned long ops;
+	unsigned long pending;
+	unsigned long __maybe_unused copy;
+
+	pr_debug("IPI [%ld] received on cpu %d\n",
+		 *this_cpu_ptr(&ipi_data), smp_processor_id());
 
 	if (plat_smp_ops.ipi_clear)
-		plat_smp_ops.ipi_clear(cpu, irq);
+		plat_smp_ops.ipi_clear(irq);
 
 	/*
-	 * XXX: is this loop really needed
-	 * And do we need to move ipi_clean inside
+	 * "dequeue" the msg corresponding to this IPI (and possibly other
+	 * piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
 	 */
-	while ((ops = xchg(&ipi->bits, 0)) != 0)
-		__do_IPI(&ops, ipi, cpu);
+	copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
+
+	do {
+		unsigned long msg = __ffs(pending);
+		int rc;
+
+		rc = __do_IPI(msg);
+		if (rc)
+			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+		pending &= ~(1U << msg);
+	} while (pending);
 
 	return IRQ_HANDLED;
 }
 
 /*
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
+ *
+ * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
+ * function needs to call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * request_percpu_irq() below will fail
  */
 static DEFINE_PER_CPU(int, ipi_dev);
-int smp_ipi_irq_setup(int cpu, int irq)
+
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
 {
-	int *dev_id = &per_cpu(ipi_dev, smp_processor_id());
-	return request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev_id);
+	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+	unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+	if (!virq)
+		panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
+
+	/* Boot cpu calls request, all call enable */
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
+		if (rc)
+			panic("Percpu IRQ request failed for %u\n", virq);
+	}
+
+	enable_percpu_irq(virq, 0);
+
+	return 0;
 }