diff options
| author | Tiwei Bie <tiwei.btw@antgroup.com> | 2025-10-27 08:18:12 +0800 |
|---|---|---|
| committer | Johannes Berg <johannes.berg@intel.com> | 2025-10-27 16:41:15 +0100 |
| commit | 1e4ee5135d814fe4785890790cec81c3132888fb (patch) | |
| tree | b88d4e37829af3c801081a25d2a08ce23600f59c /arch/um/os-Linux | |
| parent | 9c82de55d4783e906f18219f833ad97fd8d9c5df (diff) | |
um: Add initial SMP support
Add initial symmetric multi-processing (SMP) support to UML. With
this support enabled, users can tell UML to start multiple virtual
processors, each represented as a separate host thread.
In UML, kthreads and normal threads (when running in kernel mode)
can be scheduled and executed simultaneously on different virtual
processors. However, the userspace code of normal threads still
runs within their respective single-threaded stubs.
That is, SMP support is currently available both within the kernel
and across different processes, but still remains limited within
threads of the same process in userspace.
Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20251027001815.1666872-6-tiwei.bie@linux.dev
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'arch/um/os-Linux')
| -rw-r--r-- | arch/um/os-Linux/Makefile | 4 | ||||
| -rw-r--r-- | arch/um/os-Linux/internal.h | 8 | ||||
| -rw-r--r-- | arch/um/os-Linux/process.c | 20 | ||||
| -rw-r--r-- | arch/um/os-Linux/signal.c | 31 | ||||
| -rw-r--r-- | arch/um/os-Linux/skas/process.c | 39 | ||||
| -rw-r--r-- | arch/um/os-Linux/smp.c | 148 | ||||
| -rw-r--r-- | arch/um/os-Linux/start_up.c | 4 | ||||
| -rw-r--r-- | arch/um/os-Linux/time.c | 38 |
8 files changed, 267 insertions, 25 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index fae836713487..70c73c22f715 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o +obj-$(CONFIG_SMP) += smp.o + USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ - tty.o umid.o util.o + tty.o umid.o util.o smp.o include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index c2c7a0dc673c..bac9fcc8c14c 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -4,6 +4,7 @@ #include <mm_id.h> #include <stub-data.h> +#include <signal.h> /* * elf_aux.c @@ -18,6 +19,7 @@ void check_tmpexec(void); /* * signal.c */ +extern __thread int signals_enabled; int timer_alarm_pending(void); /* @@ -25,4 +27,10 @@ int timer_alarm_pending(void); */ void wait_stub_done(int pid); void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); + +/* + * smp.c + */ +#define IPI_SIGNAL SIGRTMIN + #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 00b49e90d05f..3a2a84ab9325 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -10,6 +10,8 @@ #include <errno.h> #include <signal.h> #include <fcntl.h> +#include <limits.h> +#include <linux/futex.h> #include <sys/mman.h> #include <sys/ptrace.h> #include <sys/prctl.h> @@ -189,3 +191,21 @@ void os_set_pdeathsig(void) { prctl(PR_SET_PDEATHSIG, SIGKILL); } + +int os_futex_wait(void *uaddr, unsigned int val) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} + +int os_futex_wake(void *uaddr) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 554a87dd32cc..327fb3c52fc7 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -69,7 +69,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGCHLD_BIT 2 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) -static __thread int signals_enabled; +__thread int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif @@ -259,9 +259,29 @@ int change_sig(int signal, int on) return 0; } -void block_signals(void) +static inline void __block_signals(void) { + if (!signals_enabled) + return; + + os_local_ipi_disable(); + barrier(); signals_enabled = 0; +} + +static inline void __unblock_signals(void) +{ + if (signals_enabled) + return; + + signals_enabled = 1; + barrier(); + os_local_ipi_enable(); +} + +void block_signals(void) +{ + __block_signals(); /* * This must return with signals disabled, so this barrier * ensures that writes are flushed out before the return. @@ -278,7 +298,8 @@ void unblock_signals(void) if (signals_enabled == 1) return; - signals_enabled = 1; + __unblock_signals(); + #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -312,7 +333,7 @@ void unblock_signals(void) * tracing that happens inside the handlers we call for the * pending signals will mess up the tracing state. */ - signals_enabled = 0; + __block_signals(); um_trace_signals_off(); /* @@ -344,7 +365,7 @@ void unblock_signals(void) /* Re-enable signals and trace that we're doing so. */ um_trace_signals_on(); - signals_enabled = 1; + __unblock_signals(); } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 820846ff7179..d6c22f8aa06d 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -546,7 +546,7 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op; - siginfo_t si_ptrace; + siginfo_t si_local; siginfo_t *si; int sig; @@ -557,6 +557,13 @@ void userspace(struct uml_pt_regs *regs) struct mm_id *mm_id = current_mm_id(); /* + * At any given time, only one CPU thread can enter the + * turnstile to operate on the same stub process, including + * executing stub system calls (mmap and munmap). + */ + enter_turnstile(mm_id); + + /* * When we are in time-travel mode, userspace can theoretically * do a *lot* of work without being scheduled. The problem with * this is that it will prevent kernel bookkeeping (primarily @@ -623,9 +630,10 @@ void userspace(struct uml_pt_regs *regs) } if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) - panic("%s - Invalid siginfo offset from child", - __func__); - si = (void *)&proc_data->sigstack[proc_data->si_offset]; + panic("%s - Invalid siginfo offset from child", __func__); + + si = &si_local; + memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si)); regs->is_user = 1; @@ -721,8 +729,8 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: ptrace(PTRACE_GETSIGINFO, pid, 0, - (struct siginfo *)&si_ptrace); - si = &si_ptrace; + (struct siginfo *)&si_local); + si = &si_local; break; default: si = NULL; @@ -733,6 +741,8 @@ void userspace(struct uml_pt_regs *regs) } } + exit_turnstile(mm_id); + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (sig) { @@ -802,10 +812,9 @@ void switch_threads(jmp_buf *me, jmp_buf *you) static jmp_buf initial_jmpbuf; -/* XXX Make these percpu */ -static void (*cb_proc)(void *arg); -static void *cb_arg; -static jmp_buf *cb_back; +static __thread void (*cb_proc)(void *arg); +static __thread void *cb_arg; +static __thread jmp_buf *cb_back; int start_idle_thread(void *stack, jmp_buf *switch_buf) { @@ -859,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) cb_arg = arg; cb_back = &here; - block_signals_trace(); + initial_jmpbuf_lock(); if (UML_SETJMP(&here) == 0) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); - unblock_signals_trace(); + initial_jmpbuf_unlock(); cb_proc = NULL; cb_arg = NULL; @@ -871,8 +880,9 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); + /* unreachable */ } static bool noreboot; @@ -892,6 +902,7 @@ __uml_setup("noreboot", noreboot_cmd_param, void reboot_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); + /* unreachable */ } diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c new file mode 100644 index 000000000000..18d3858a7cd2 --- /dev/null +++ b/arch/um/os-Linux/smp.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ + +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <kern_util.h> +#include <um_malloc.h> +#include <init.h> +#include <os.h> +#include <smp.h> +#include "internal.h" + +struct cpu_thread_data { + int cpu; + sigset_t sigset; +}; + +static __thread int __curr_cpu; + +int uml_curr_cpu(void) +{ + return __curr_cpu; +} + +static pthread_t cpu_threads[CONFIG_NR_CPUS]; + +static void *cpu_thread(void *arg) +{ + struct cpu_thread_data *data = arg; + + __curr_cpu = data->cpu; + + uml_start_secondary(data); + + return NULL; +} + +int os_start_cpu_thread(int cpu) +{ + struct cpu_thread_data *data; + sigset_t sigset, oset; + int err; + + data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = errno; + goto err; + } + + data->cpu = cpu; + data->sigset = oset; + + err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data); + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + if (err != 0) + goto err; + + return 0; + +err: + kfree(data); + return -err; +} + +void os_start_secondary(void *arg, jmp_buf *switch_buf) +{ + struct cpu_thread_data *data = arg; + + sigaddset(&data->sigset, IPI_SIGNAL); + sigaddset(&data->sigset, SIGIO); + + if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + + kfree(data); + longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); +} + +int os_send_ipi(int cpu, int vector) +{ + union sigval value = { .sival_int = vector }; + + return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value); +} + +static void __local_ipi_set(int enable) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, IPI_SIGNAL); + + if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + panic("%s: sigprocmask failed, errno = %d", __func__, errno); +} + +void os_local_ipi_enable(void) +{ + __local_ipi_set(1); +} + +void os_local_ipi_disable(void) +{ + __local_ipi_set(0); +} + +static void ipi_sig_handler(int sig, siginfo_t *si, void *uc) +{ + int save_errno = errno; + + signals_enabled = 0; + um_trace_signals_off(); + + uml_ipi_handler(si->si_value.sival_int); + + um_trace_signals_on(); + signals_enabled = 1; + + errno = save_errno; +} + +void __init os_init_smp(void) +{ + struct sigaction action = { + .sa_sigaction = ipi_sig_handler, + .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART, + }; + + sigfillset(&action.sa_mask); + + if (sigaction(IPI_SIGNAL, &action, NULL) < 0) + panic("%s: sigaction failed, errno = %d", __func__, errno); + + cpu_threads[0] = pthread_self(); +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 8b19dca83f71..054ac03bbf5e 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -22,6 +22,7 @@ #include <asm/unistd.h> #include <init.h> #include <os.h> +#include <smp.h> #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> @@ -481,6 +482,9 @@ void __init os_early_checks(void) fatal("SECCOMP userspace requested but not functional!\n"); } + if (uml_ncpus > 1) + fatal("SMP is not supported with PTRACE userspace.\n"); + using_seccomp = 0; check_ptrace(); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index e0197bfe4ac9..13ebc86918d4 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -11,9 +11,11 @@ #include <errno.h> #include <signal.h> #include <time.h> +#include <sys/signalfd.h> #include <sys/time.h> #include <kern_util.h> #include <os.h> +#include <smp.h> #include <string.h> #include "internal.h" @@ -41,7 +43,8 @@ long long os_persistent_clock_emulation(void) */ int os_timer_create(void) { - timer_t *t = &event_high_res_timer[0]; + int cpu = uml_curr_cpu(); + timer_t *t = &event_high_res_timer[cpu]; struct sigevent sev = { .sigev_notify = SIGEV_THREAD_ID, .sigev_signo = SIGALRM, @@ -105,24 +108,49 @@ long long os_nsecs(void) return timespec_to_ns(&ts); } +static __thread int wake_signals; + +void os_idle_prepare(void) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, IPI_SIGNAL); + + /* + * We need to use signalfd rather than sigsuspend in idle sleep + * because the IPI signal is a real-time signal that carries data, + * and unlike handling SIGALRM, we cannot simply flag it in + * signals_pending. + */ + wake_signals = signalfd(-1, &set, SFD_CLOEXEC); + if (wake_signals < 0) + panic("Failed to create signal FD, errno = %d", errno); +} + /** * os_idle_sleep() - sleep until interrupted */ void os_idle_sleep(void) { - sigset_t set, old; + sigset_t set; - /* Block SIGALRM while performing the need_resched check. */ + /* + * Block SIGALRM while performing the need_resched check. + * Note that, because IRQs are disabled, the IPI signal is + * already blocked. + */ sigemptyset(&set); sigaddset(&set, SIGALRM); - sigprocmask(SIG_BLOCK, &set, &old); + sigprocmask(SIG_BLOCK, &set, NULL); /* * Because disabling IRQs does not block SIGALRM, it is also * necessary to check for any pending timer alarms. */ if (!uml_need_resched() && !timer_alarm_pending()) - sigsuspend(&old); + os_poll(1, &wake_signals); /* Restore the signal mask. */ sigprocmask(SIG_UNBLOCK, &set, NULL); |
