9 files changed, 342 insertions, 15 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index b8f4e9281599..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_OF) += dtb.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP) += smp.o
 
 USER_OBJS := config.o
 
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index e95f6c5a259d..f4b13f15a9c1 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -22,6 +22,9 @@
 #include <irq_kern.h>
 #include <linux/time-internal.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x)		(&per_cpu(irq_stat, x))
 
 /* When epoll triggers we do not know why it did so
  * we can also have different IRQs for read and write.
@@ -701,3 +704,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si,
 {
 	do_IRQ(SIGCHLD_IRQ, regs);
 }
+
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#if IS_ENABLED(CONFIG_SMP)
+	int cpu;
+
+	seq_printf(p, "%*s: ", prec, "RES");
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+	seq_puts(p, "  Rescheduling interrupts\n");
+
+	seq_printf(p, "%*s: ", prec, "CAL");
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+	seq_puts(p, "  Function call interrupts\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 3b28048f269c..63b38a3f73f7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -218,6 +218,11 @@ void arch_cpu_idle(void)
 	um_idle_sleep();
 }
 
+void arch_cpu_idle_prepare(void)
+{
+	os_idle_prepare();
+}
+
 int __uml_cant_sleep(void) {
 	return in_atomic() || irqs_disabled() || in_interrupt();
 	/* Is in_interrupt() really needed? */
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index afe9a2f251ef..00957788591b 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -23,17 +23,36 @@ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
 static spinlock_t mm_list_lock;
 static struct list_head mm_list;
 
+void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile)
+{
+	struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+	mutex_lock(&ctx->turnstile);
+}
+
+void exit_turnstile(struct mm_id *mm_id) __releases(turnstile)
+{
+	struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+	mutex_unlock(&ctx->turnstile);
+}
+
 int init_new_context(struct task_struct *task, struct mm_struct *mm)
 {
 	struct mm_id *new_id = &mm->context.id;
 	unsigned long stack = 0;
 	int ret = -ENOMEM;
 
+	mutex_init(&mm->context.turnstile);
+	spin_lock_init(&mm->context.sync_tlb_lock);
+
 	stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
 	if (stack == 0)
 		goto out;
 
 	new_id->stack = stack;
+	new_id->syscall_data_len = 0;
+	new_id->syscall_fd_num = 0;
 
 	scoped_guard(spinlock_irqsave, &mm_list_lock) {
 		/* Insert into list, used for lookups when the child dies */
@@ -73,6 +92,9 @@ void destroy_context(struct mm_struct *mm)
 		return;
 	}
 
+	scoped_guard(spinlock_irqsave, &mm_list_lock)
+		list_del(&mm->context.list);
+
 	if (mmu->id.pid > 0) {
 		os_kill_ptraced_process(mmu->id.pid, 1);
 		mmu->id.pid = -1;
@@ -82,10 +104,6 @@ void destroy_context(struct mm_struct *mm)
 		os_close_file(mmu->id.sock);
 
 	free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
-
-	guard(spinlock_irqsave)(&mm_list_lock);
-
-	list_del(&mm->context.list);
 }
 
 static irqreturn_t mm_sigchld_irq(int irq, void* dev)
@@ -110,12 +128,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev)
 				/* Marks the MM as dead */
 				mm_context->id.pid = -1;
 
-				/*
-				 * NOTE: If SMP is implemented, a futex_wake
-				 * needs to be added here.
-				 */
 				stub_data = (void *)mm_context->id.stack;
 				stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+				os_futex_wake(&stub_data->futex);
+#endif
 
 				/*
 				 * NOTE: Currently executing syscalls by
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 5881b17eb987..4a7673b0261a 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -7,6 +7,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/task.h>
+#include <linux/smp-internal.h>
 
 #include <asm/tlbflush.h>
 
@@ -26,12 +27,12 @@ static int __init start_kernel_proc(void *unused)
 	return 0;
 }
 
-static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE);
+char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
 
 int __init start_uml(void)
 {
-	stack_protections((unsigned long) &cpu0_irqstack);
-	set_sigstack(cpu0_irqstack, THREAD_SIZE);
+	stack_protections((unsigned long) &cpu_irqstacks[0]);
+	set_sigstack(cpu_irqstacks[0], THREAD_SIZE);
 
 	init_new_thread_signals();
 
@@ -64,3 +65,15 @@ void current_mm_sync(void)
 
 	um_tlb_sync(current->mm);
 }
+
+static DEFINE_SPINLOCK(initial_jmpbuf_spinlock);
+
+void initial_jmpbuf_lock(void)
+{
+	spin_lock_irq(&initial_jmpbuf_spinlock);
+}
+
+void initial_jmpbuf_unlock(void)
+{
+	spin_unlock_irq(&initial_jmpbuf_spinlock);
+}
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 000000000000..f1e52b7348fb
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ *
+ * Based on the previous implementation in TT mode
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/module.h>
+#include <linux/processor.h>
+#include <linux/threads.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+#include <linux/smp.h>
+#include <linux/smp-internal.h>
+#include <init.h>
+#include <kern.h>
+#include <os.h>
+#include <smp.h>
+
+enum {
+	UML_IPI_RES = 0,
+	UML_IPI_CALL_SINGLE,
+	UML_IPI_CALL,
+	UML_IPI_STOP,
+};
+
+void arch_smp_send_reschedule(int cpu)
+{
+	os_send_ipi(cpu, UML_IPI_RES);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	os_send_ipi(cpu, UML_IPI_CALL_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		os_send_ipi(cpu, UML_IPI_CALL);
+}
+
+void smp_send_stop(void)
+{
+	int cpu, me = smp_processor_id();
+
+	for_each_online_cpu(cpu) {
+		if (cpu == me)
+			continue;
+		os_send_ipi(cpu, UML_IPI_STOP);
+	}
+}
+
+static void ipi_handler(int vector, struct uml_pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+	int cpu = raw_smp_processor_id();
+
+	irq_enter();
+
+	if (current->mm)
+		os_alarm_process(current->mm->context.id.pid);
+
+	switch (vector) {
+	case UML_IPI_RES:
+		inc_irq_stat(irq_resched_count);
+		scheduler_ipi();
+		break;
+
+	case UML_IPI_CALL_SINGLE:
+		inc_irq_stat(irq_call_count);
+		generic_smp_call_function_single_interrupt();
+		break;
+
+	case UML_IPI_CALL:
+		inc_irq_stat(irq_call_count);
+		generic_smp_call_function_interrupt();
+		break;
+
+	case UML_IPI_STOP:
+		set_cpu_online(cpu, false);
+		while (1)
+			pause();
+		break;
+
+	default:
+		pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector);
+		break;
+	}
+
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+void uml_ipi_handler(int vector)
+{
+	struct uml_pt_regs r = { .is_user = 0 };
+
+	preempt_disable();
+	ipi_handler(vector, &r);
+	preempt_enable();
+}
+
+/* AP states used only during CPU startup */
+enum {
+	UML_CPU_PAUSED = 0,
+	UML_CPU_RUNNING,
+};
+
+static int cpu_states[NR_CPUS];
+
+static int start_secondary(void *unused)
+{
+	int err, cpu = raw_smp_processor_id();
+
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	err = um_setup_timer();
+	if (err)
+		panic("CPU#%d failed to setup timer, err = %d", cpu, err);
+
+	local_irq_enable();
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+	return 0;
+}
+
+void uml_start_secondary(void *opaque)
+{
+	int cpu = raw_smp_processor_id();
+	struct mm_struct *mm = &init_mm;
+	struct task_struct *idle;
+
+	stack_protections((unsigned long) &cpu_irqstacks[cpu]);
+	set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE);
+
+	set_cpu_present(cpu, true);
+	os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED);
+
+	smp_rmb(); /* paired with smp_wmb() in __cpu_up() */
+
+	idle = cpu_tasks[cpu];
+	idle->thread_info.cpu = cpu;
+
+	mmgrab(mm);
+	idle->active_mm = mm;
+
+	idle->thread.request.thread.proc = start_secondary;
+	idle->thread.request.thread.arg = NULL;
+
+	new_thread(task_stack_page(idle), &idle->thread.switch_buf,
+		   new_thread_handler);
+	os_start_secondary(opaque, &idle->thread.switch_buf);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	int err, cpu, me = smp_processor_id();
+	unsigned long deadline;
+
+	os_init_smp();
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == me)
+			continue;
+
+		pr_debug("Booting processor %d...\n", cpu);
+		err = os_start_cpu_thread(cpu);
+		if (err) {
+			pr_crit("CPU#%d failed to start cpu thread, err = %d",
+				cpu, err);
+			continue;
+		}
+
+		deadline = jiffies + msecs_to_jiffies(1000);
+		spin_until_cond(cpu_present(cpu) ||
+				time_is_before_jiffies(deadline));
+
+		if (!cpu_present(cpu))
+			pr_crit("CPU#%d failed to boot\n", cpu);
+	}
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	cpu_tasks[cpu] = tidle;
+	smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */
+	cpu_states[cpu] = UML_CPU_RUNNING;
+	os_futex_wake(&cpu_states[cpu]);
+	spin_until_cond(cpu_online(cpu));
+
+	return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
+
+void __init prefill_possible_map(void)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < uml_ncpus; cpu++)
+		set_cpu_possible(cpu, true);
+	for (; cpu < NR_CPUS; cpu++)
+		set_cpu_possible(cpu, false);
+}
+
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+	*add = 0;
+
+	if (kstrtoint(line, 10, &uml_ncpus)) {
+		os_warn("%s: Couldn't parse '%s'\n", __func__, line);
+		return -1;
+	}
+
+	uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS);
+
+	return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+"    This tells UML how many virtual processors to start. The maximum\n"
+"    number of supported virtual processors can be obtained by querying\n"
+"    the CONFIG_NR_CPUS option using --showconfig.\n\n"
+);
+
+EXPORT_SYMBOL(uml_curr_cpu);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index cf7e0d4407f2..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	struct vm_ops ops;
-	unsigned long addr = mm->context.sync_tlb_range_from, next;
+	unsigned long addr, next;
 	int ret = 0;
 
+	guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
 	if (mm->context.sync_tlb_range_to == 0)
 		return 0;
 
@@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm)
 		ops.unmap = unmap;
 	}
 
+	addr = mm->context.sync_tlb_range_from;
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 5b80a3a89c20..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 	if (!is_user && regs)
 		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
 
-	if (!is_user && init_mm.context.sync_tlb_range_to) {
+	if (!is_user && address >= start_vm && address < end_vm) {
 		/*
 		 * Kernel has pending updates from set_ptes that were not
 		 * flushed yet. Syncing them should fix the pagefault (if not
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index cf06bb732ed8..e2b24e1ecfa6 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -19,6 +19,7 @@
 #include <linux/kmsg_dump.h>
 #include <linux/suspend.h>
 #include <linux/random.h>
+#include <linux/smp-internal.h>
 
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
@@ -71,6 +72,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	int i = 0;
 
+#if IS_ENABLED(CONFIG_SMP)
+	i = (uintptr_t) v - 1;
+	if (!cpu_online(i))
+		return 0;
+#endif
+
 	seq_printf(m, "processor\t: %d\n", i);
 	seq_printf(m, "vendor_id\t: User Mode Linux\n");
 	seq_printf(m, "model name\t: UML\n");
@@ -87,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   loops_per_jiffy/(500000/HZ),
 		   (loops_per_jiffy/(5000/HZ)) % 100);
 
-
 	return 0;
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
+	if (*pos < nr_cpu_ids)
+		return (void *)(uintptr_t)(*pos + 1);
+	return NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -409,6 +417,7 @@ void __init setup_arch(char **cmdline_p)
 	strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 	setup_hostinfo(host_info, sizeof host_info);
+	prefill_possible_map();
 
 	if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
 		add_bootloader_randomness(rng_seed, sizeof(rng_seed));
@@ -443,6 +452,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 }
 
+#if IS_ENABLED(CONFIG_SMP)
+void alternatives_smp_module_add(struct module *mod, char *name,
+				 void *locks, void *locks_end,
+				 void *text,  void *text_end)
+{
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
+
 void *text_poke(void *addr, const void *opcode, size_t len)
 {
 	/*