diff options
Diffstat (limited to 'arch/um')
50 files changed, 943 insertions, 600 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 49781bee7905..8415d39b0d43 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_DISABLE_KASAN_INLINE if STATIC_LINK select ARCH_NEEDS_DEFER_KASAN if STATIC_LINK select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_HAS_CACHE_LINE_SIZE @@ -28,6 +29,7 @@ config UML select OF_EARLY_FLATTREE if OF select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES + select GENERIC_SMP_IDLE_THREAD select HAVE_GCC_PLUGINS select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN @@ -81,10 +83,48 @@ config HZ int default 100 -config NR_CPUS +config UML_SUBARCH_SUPPORTS_SMP + bool + +config SMP + bool "Symmetric multi-processing support" + default n + depends on UML_SUBARCH_SUPPORTS_SMP + help + This option enables UML SMP support. + + With this enabled, users can tell UML to start multiple virtual + processors. Each virtual processor is represented as a separate + host thread. + + In UML, kthreads and normal threads (when running in kernel mode) + can be scheduled and executed simultaneously on different virtual + processors. However, the userspace code of normal threads still + runs within their respective single-threaded stubs. + + That is, SMP support is available both within the kernel and + across different processes, but remains limited within threads + of the same process in userspace. + +config NR_CPUS_RANGE_BEGIN int - range 1 1 - default 1 + default 1 if !SMP + default 2 + +config NR_CPUS_RANGE_END + int + default 1 if !SMP + default 64 + +config NR_CPUS_DEFAULT + int + default 1 if !SMP + default 2 + +config NR_CPUS + int "Maximum number of CPUs" if SMP + range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END + default NR_CPUS_DEFAULT source "arch/$(HEADER_ARCH)/um/Kconfig" @@ -200,12 +240,6 @@ config KERNEL_STACK_ORDER increase in the size of the state which needs to be saved when handling signals. -config MMAPPER - tristate "iomem emulation driver" - help - This driver allows a host file to be used as emulated IO memory inside - UML. - config PGTABLE_LEVELS int default 4 if 64BIT @@ -260,6 +294,7 @@ source "arch/um/drivers/Kconfig" config ARCH_SUSPEND_POSSIBLE def_bool y + depends on !SMP menu "Power management options" diff --git a/arch/um/Makefile b/arch/um/Makefile index 7be0143b5ba3..721b652ffb65 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -46,19 +46,17 @@ ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS) ARCH_INCLUDE += -I$(srctree)/$(HOST_DIR)/um/shared KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um -# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so -# named - it's a common symbol in libpcap, so we get a binary which crashes. -# -# Same things for in6addr_loopback and mktime - found in libc. For these two we -# only get link-time error, luckily. +# -Dstrrchr=kernel_strrchr (as well as the various in6addr symbols) prevents +# anything from referencing +# libc symbols with the same name, which can cause a linker error. # # -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a # embedded copy of longjmp, same thing for setjmp. # -# These apply to USER_CFLAGS to. +# These apply to USER_CFLAGS too. KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ - $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 6bf8cbf71d3c..36dc57840084 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -29,7 +29,6 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_MCONSOLE) += mconsole.o -obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_BLK_DEV_UBD) += ubd.o obj-$(CONFIG_UML_SOUND) += hostaudio.o obj-$(CONFIG_NULL_CHAN) += null.o diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c deleted file mode 100644 index 807cd3358740..000000000000 --- a/arch/um/drivers/mmapper_kern.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/um/drivers/mmapper_kern.c - * - * BRIEF MODULE DESCRIPTION - * - * Copyright (C) 2000 RidgeRun, Inc. - * Author: RidgeRun, Inc. - * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com - * - */ - -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/miscdevice.h> -#include <linux/module.h> -#include <linux/mm.h> - -#include <linux/uaccess.h> -#include <mem_user.h> - -/* These are set in mmapper_init, which is called at boot time */ -static unsigned long mmapper_size; -static unsigned long p_buf; -static char *v_buf; - -static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); -} - -static ssize_t mmapper_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - if (*ppos > mmapper_size) - return -EINVAL; - - return simple_write_to_buffer(v_buf, mmapper_size, ppos, buf, count); -} - -static long mmapper_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return -ENOIOCTLCMD; -} - -static int mmapper_mmap(struct file *file, struct vm_area_struct *vma) -{ - int ret = -EINVAL; - int size; - - if (vma->vm_pgoff != 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size > mmapper_size) - return -EFAULT; - - /* - * XXX A comment above remap_pfn_range says it should only be - * called when the mm semaphore is held - */ - if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size, - vma->vm_page_prot)) - goto out; - ret = 0; -out: - return ret; -} - -static int mmapper_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static int mmapper_release(struct inode *inode, struct file *file) -{ - return 0; -} - -static const struct file_operations mmapper_fops = { - .owner = THIS_MODULE, - .read = mmapper_read, - .write = mmapper_write, - .unlocked_ioctl = mmapper_ioctl, - .mmap = mmapper_mmap, - .open = mmapper_open, - .release = mmapper_release, - .llseek = default_llseek, -}; - -/* - * No locking needed - only used (and modified) by below initcall and exitcall. - */ -static struct miscdevice mmapper_dev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "mmapper", - .fops = &mmapper_fops -}; - -static int __init mmapper_init(void) -{ - int err; - - printk(KERN_INFO "Mapper v0.1\n"); - - v_buf = (char *) find_iomem("mmapper", &mmapper_size); - if (mmapper_size == 0) { - printk(KERN_ERR "mmapper_init - find_iomem failed\n"); - return -ENODEV; - } - p_buf = __pa(v_buf); - - err = misc_register(&mmapper_dev); - if (err) { - printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", - err); - return err; - } - return 0; -} - -static void __exit mmapper_exit(void) -{ - misc_deregister(&mmapper_dev); -} - -module_init(mmapper_init); -module_exit(mmapper_exit); - -MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>"); -MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index de7867ae220d..6cf1152a1a4e 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -24,6 +24,7 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/virtio.h> #include <linux/virtio_config.h> #include <linux/virtio_ring.h> @@ -1151,8 +1152,7 @@ void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, return; vu_dev->no_vq_suspend = no_vq_suspend; - dev_info(&vdev->dev, "%sabled VQ suspend\n", - no_vq_suspend ? "dis" : "en"); + dev_info(&vdev->dev, "%s VQ suspend\n", str_disabled_enabled(no_vq_suspend)); } static void vu_of_conn_broken(struct work_struct *wk) diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h index 8accc6d6f502..159a29b3d4cc 100644 --- a/arch/um/include/asm/current.h +++ b/arch/um/include/asm/current.h @@ -7,15 +7,16 @@ #ifndef __ASSEMBLER__ +#include <shared/smp.h> + struct task_struct; extern struct task_struct *cpu_tasks[NR_CPUS]; static __always_inline struct task_struct *get_current(void) { - return cpu_tasks[0]; + return cpu_tasks[uml_curr_cpu()]; } - #define current get_current() #endif /* __ASSEMBLER__ */ diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h index 52e2c36267a9..8de71752a9b8 100644 --- a/arch/um/include/asm/hardirq.h +++ b/arch/um/include/asm/hardirq.h @@ -2,8 +2,30 @@ #ifndef __ASM_UM_HARDIRQ_H #define __ASM_UM_HARDIRQ_H -#include <asm-generic/hardirq.h> +#include <linux/cache.h> +#include <linux/threads.h> #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +typedef struct { + unsigned int __softirq_pending; +#if IS_ENABLED(CONFIG_SMP) + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) this_cpu_inc(irq_stat.member) + +#include <linux/irq.h> + +static inline void ack_bad_irq(unsigned int irq) +{ + pr_crit("unexpected IRQ trap at vector %02x\n", irq); +} + #endif /* __ASM_UM_HARDIRQ_H */ diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h index 1e69ef5bc35e..31e49e0894c5 100644 --- a/arch/um/include/asm/irqflags.h +++ b/arch/um/include/asm/irqflags.h @@ -2,7 +2,7 @@ #ifndef __UM_IRQFLAGS_H #define __UM_IRQFLAGS_H -extern int signals_enabled; +int um_get_signals(void); int um_set_signals(int enable); void block_signals(void); void unblock_signals(void); @@ -10,7 +10,7 @@ void unblock_signals(void); #define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { - return signals_enabled; + return um_get_signals(); } #define arch_local_irq_restore arch_local_irq_restore diff --git a/arch/um/include/asm/kasan.h b/arch/um/include/asm/kasan.h index b54a4e937fd1..81bcdc0f962e 100644 --- a/arch/um/include/asm/kasan.h +++ b/arch/um/include/asm/kasan.h @@ -24,10 +24,6 @@ #ifdef CONFIG_KASAN void kasan_init(void); - -#if defined(CONFIG_STATIC_LINK) && defined(CONFIG_KASAN_INLINE) -#error UML does not work in KASAN_INLINE mode with STATIC_LINK enabled! -#endif #else static inline void kasan_init(void) { } #endif /* CONFIG_KASAN */ diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index 4d0e4239f3cc..07d48738b402 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -7,16 +7,26 @@ #define __ARCH_UM_MMU_H #include "linux/types.h" +#include <linux/mutex.h> +#include <linux/spinlock.h> #include <mm_id.h> typedef struct mm_context { struct mm_id id; + struct mutex turnstile; struct list_head list; /* Address range in need of a TLB sync */ + spinlock_t sync_tlb_lock; unsigned long sync_tlb_range_from; unsigned long sync_tlb_range_to; } mm_context_t; +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .turnstile = __MUTEX_INITIALIZER(mm.context.turnstile), \ + .sync_tlb_lock = __SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock), \ + } + #endif diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 6f54254aaf44..2d363460d896 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -96,8 +96,4 @@ extern unsigned long uml_physmem; #endif /* __ASSEMBLER__ */ -#ifdef CONFIG_X86_32 -#define __HAVE_ARCH_GATE_AREA 1 -#endif - #endif /* __UM_PAGE_H */ diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 24fdea6f88c3..3b42b0f45bf6 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -45,10 +45,12 @@ extern unsigned long *empty_zero_page; * area for the same reason. ;) */ -extern unsigned long end_iomem; +#ifndef COMPILE_OFFSETS +#include <as-layout.h> /* for high_physmem */ +#endif #define VMALLOC_OFFSET (__va_space) -#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#define VMALLOC_START ((high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #define VMALLOC_END (TASK_SIZE-2*PAGE_SIZE) #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END @@ -225,6 +227,8 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, unsigned long end) { + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); + if (!mm->context.sync_tlb_range_to) { mm->context.sync_tlb_range_from = start; mm->context.sync_tlb_range_to = end; diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h index a8cc1d46ddcb..be1743a6ff3c 100644 --- a/arch/um/include/asm/smp.h +++ b/arch/um/include/asm/smp.h @@ -2,6 +2,19 @@ #ifndef __UM_SMP_H #define __UM_SMP_H -#define hard_smp_processor_id() 0 +#if IS_ENABLED(CONFIG_SMP) + +#include <linux/cpumask.h> +#include <shared/smp.h> + +#define raw_smp_processor_id() uml_curr_cpu() + +void arch_smp_send_reschedule(int cpu); + +void arch_send_call_function_single_ipi(int cpu); + +void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#endif /* CONFIG_SMP */ #endif diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 1c6e0ae41b0c..0df9ea4abda8 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -15,11 +15,6 @@ (((unsigned long) (addr) < TASK_SIZE) && \ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -#define __access_ok_vsyscall(addr, size) \ - (((unsigned long) (addr) >= FIXADDR_USER_START) && \ - ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ - ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) - #define __addr_range_nowrap(addr, size) \ ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) @@ -40,9 +35,7 @@ static inline int __access_ok(const void __user *ptr, unsigned long size); static inline int __access_ok(const void __user *ptr, unsigned long size) { unsigned long addr = (unsigned long)ptr; - return __addr_range_nowrap(addr, size) && - (__under_task_size(addr, size) || - __access_ok_vsyscall(addr, size)); + return __addr_range_nowrap(addr, size) && __under_task_size(addr, size); } #define __get_kernel_nofault(dst, src, type, err_label) \ diff --git a/arch/um/include/linux/smp-internal.h b/arch/um/include/linux/smp-internal.h new file mode 100644 index 000000000000..1dbcbc23f9c9 --- /dev/null +++ b/arch/um/include/linux/smp-internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SMP_INTERNAL_H +#define __UM_SMP_INTERNAL_H + +#if IS_ENABLED(CONFIG_SMP) + +void prefill_possible_map(void); + +#else /* !CONFIG_SMP */ + +static inline void prefill_possible_map(void) { } + +#endif /* CONFIG_SMP */ + +extern char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); + +#endif /* __UM_SMP_INTERNAL_H */ diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 138908b999d7..c274eb5ad55e 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -90,4 +90,7 @@ extern unsigned long tt_extra_sched_jiffies; * which is intentional since we really shouldn't link it in that case. */ void time_travel_ndelay(unsigned long nsec); + +int um_setup_timer(void); + #endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 7c7e17bce403..02ef258e3395 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -44,7 +44,6 @@ extern unsigned long start_vm; extern unsigned long brk_start; -extern unsigned long host_task_size; extern unsigned long stub_start; extern int linux_main(int argc, char **argv, char **envp); diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h deleted file mode 100644 index 8ca66a1918c3..000000000000 --- a/arch/um/include/shared/common-offsets.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* for use by sys-$SUBARCH/kernel-offsets.c */ - -DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); - -DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); -DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); -DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); - -DEFINE(UM_GFP_KERNEL, GFP_KERNEL); -DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); - -DEFINE(UM_THREAD_SIZE, THREAD_SIZE); - -DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); - -DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); - -DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 00ca3e12fd9a..38321188c04c 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -15,9 +15,6 @@ extern int uml_exitcode; extern int kmalloc_ok; -#define UML_ROUND_UP(addr) \ - ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) - extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); @@ -42,7 +39,6 @@ extern void uml_pm_wake(void); extern int start_uml(void); extern void paging_init(void); -extern int parse_iomem(char *str, int *add); extern void uml_cleanup(void); extern void do_uml_exitcalls(void); @@ -55,6 +51,7 @@ extern int __uml_cant_sleep(void); extern int get_current_pid(void); extern int copy_from_user_proc(void *to, void *from, int size); extern char *uml_strdup(const char *string); +int uml_need_resched(void); extern unsigned long to_irq_stack(unsigned long *mask_out); extern unsigned long from_irq_stack(int nested); diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h index 8863319039f3..c53e43d980c8 100644 --- a/arch/um/include/shared/longjmp.h +++ b/arch/um/include/shared/longjmp.h @@ -5,7 +5,6 @@ #include <sysdep/archsetjmp.h> #include <os.h> -extern int signals_enabled; extern int setjmp(jmp_buf); extern void longjmp(jmp_buf, int); @@ -15,7 +14,7 @@ extern void longjmp(jmp_buf, int); #define UML_SETJMP(buf) ({ \ int n, enable; \ - enable = *(volatile int *)&signals_enabled; \ + enable = um_get_signals(); \ n = setjmp(*buf); \ if(n != 0) \ um_set_signals_trace(enable); \ diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index d4727efcf23d..8a5b72872ff8 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -32,21 +32,8 @@ #ifndef _MEM_USER_H #define _MEM_USER_H -struct iomem_region { - struct iomem_region *next; - char *driver; - int fd; - int size; - unsigned long phys; - unsigned long virt; -}; - -extern struct iomem_region *iomem_regions; -extern int iomem_size; - #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -extern unsigned long find_iomem(char *driver, unsigned long *len_out); extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index b35cc8ce333b..b26e94292fc1 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -216,6 +216,9 @@ extern int can_drop_memory(void); void os_set_pdeathsig(void); +int os_futex_wait(void *uaddr, unsigned int val); +int os_futex_wake(void *uaddr); + /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); /* helper.c */ @@ -243,6 +246,7 @@ extern void send_sigio_to_self(void); extern int change_sig(int signal, int on); extern void block_signals(void); extern void unblock_signals(void); +extern int um_get_signals(void); extern int um_set_signals(int enable); extern int um_set_signals_trace(int enable); extern void deliver_alarm(void); @@ -266,11 +270,12 @@ extern void os_warn(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); /* time.c */ +void os_idle_prepare(void); extern void os_idle_sleep(void); extern int os_timer_create(void); -extern int os_timer_set_interval(unsigned long long nsecs); -extern int os_timer_one_shot(unsigned long long nsecs); -extern void os_timer_disable(void); +extern int os_timer_set_interval(int cpu, unsigned long long nsecs); +extern int os_timer_one_shot(int cpu, unsigned long long nsecs); +extern void os_timer_disable(int cpu); extern long long os_persistent_clock_emulation(void); extern long long os_nsecs(void); @@ -338,4 +343,17 @@ extern void um_trace_signals_off(void); /* time-travel */ extern void deliver_time_travel_irqs(void); +/* smp.c */ +#if IS_ENABLED(CONFIG_SMP) +void os_init_smp(void); +int os_start_cpu_thread(int cpu); +void os_start_secondary(void *arg, jmp_buf *switch_buf); +int os_send_ipi(int cpu, int vector); +void os_local_ipi_enable(void); +void os_local_ipi_disable(void); +#else /* !CONFIG_SMP */ +static inline void os_local_ipi_enable(void) { } +static inline void os_local_ipi_disable(void) { } +#endif /* CONFIG_SMP */ + #endif diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 4f977ef5dda5..fb96c0bd8222 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -6,6 +6,8 @@ #ifndef __MM_ID_H #define __MM_ID_H +#include <linux/compiler_types.h> + #define STUB_MAX_FDS 4 struct mm_id { @@ -19,6 +21,9 @@ struct mm_id { int syscall_fd_map[STUB_MAX_FDS]; }; +void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile); +void exit_turnstile(struct mm_id *mm_id) __releases(turnstile); + void notify_mm_kill(int pid); #endif diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index 807514e10538..2237ffedec75 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -15,5 +15,7 @@ extern void handle_syscall(struct uml_pt_regs *regs); extern unsigned long current_stub_stack(void); extern struct mm_id *current_mm_id(void); extern void current_mm_sync(void); +void initial_jmpbuf_lock(void); +void initial_jmpbuf_unlock(void); #endif diff --git a/arch/um/include/shared/smp.h b/arch/um/include/shared/smp.h new file mode 100644 index 000000000000..06e3faa95091 --- /dev/null +++ b/arch/um/include/shared/smp.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SHARED_SMP_H +#define __UM_SHARED_SMP_H + +#if IS_ENABLED(CONFIG_SMP) + +extern int uml_ncpus; + +int uml_curr_cpu(void); +void uml_start_secondary(void *opaque); +void uml_ipi_handler(int vector); + +#else /* !CONFIG_SMP */ + +#define uml_ncpus 1 +#define uml_curr_cpu() 0 + +#endif /* CONFIG_SMP */ + +#endif /* __UM_SHARED_SMP_H */ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index b8f4e9281599..be60bc451b3f 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_OF) += dtb.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_SMP) += smp.o USER_OBJS := config.o diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c index a69873aa697f..d38447e39d5e 100644 --- a/arch/um/kernel/asm-offsets.c +++ b/arch/um/kernel/asm-offsets.c @@ -1,3 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define COMPILE_OFFSETS +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/crypto.h> +#include <linux/kbuild.h> +#include <linux/audit.h> +#include <linux/fs.h> +#include <asm/mman.h> +#include <asm/seccomp.h> -#include <sysdep/kernel-offsets.h> +/* workaround for a warning with -Wmissing-prototypes */ +void foo(void); + +void foo(void) +{ + DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); + + DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); + DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); + DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); + + DEFINE(UM_GFP_KERNEL, GFP_KERNEL); + DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); + + DEFINE(UM_THREAD_SIZE, THREAD_SIZE); + + DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + + DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); + + DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); + + DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE); + DEFINE(HOSTFS_ATTR_UID, ATTR_UID); + DEFINE(HOSTFS_ATTR_GID, ATTR_GID); + DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE); + DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME); + DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME); + DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME); + DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET); + DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET); +} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index d69d137a0334..f4b13f15a9c1 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -22,6 +22,9 @@ #include <irq_kern.h> #include <linux/time-internal.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. @@ -683,7 +686,7 @@ void __init init_IRQ(void) { int i; - irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq); for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); @@ -701,3 +704,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si, { do_IRQ(SIGCHLD_IRQ, regs); } + +/* + * /proc/interrupts printing for arch specific interrupts + */ +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#if IS_ENABLED(CONFIG_SMP) + int cpu; + + seq_printf(p, "%*s: ", prec, "RES"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count); + seq_puts(p, " Rescheduling interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CAL"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count); + seq_puts(p, " Function call interrupts\n"); +#endif + + return 0; +} diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index f2fb77da08cf..96314c31e61c 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -6,8 +6,8 @@ #include <linux/module.h> #include <os.h> +EXPORT_SYMBOL(um_get_signals); EXPORT_SYMBOL(um_set_signals); -EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 32e3b1972dc1..39c4a7e21c6f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -71,7 +71,7 @@ void __init arch_mm_preinit(void) /* Map in the area just after the brk now that kmalloc is about * to be turned on. */ - brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); + brk_end = PAGE_ALIGN((unsigned long) sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; @@ -84,109 +84,6 @@ void __init mem_init(void) kmalloc_ok = 1; } -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static void __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, - PAGE_SIZE); - if (!pte) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pmd(pmd, __pmd(_KERNPG_TABLE + - (unsigned long) __pa(pte))); - BUG_ON(pte != pte_offset_kernel(pmd, 0)); - } -} - -static void __init one_md_table_init(pud_t *pud) -{ -#if CONFIG_PGTABLE_LEVELS > 2 - pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pmd_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); - BUG_ON(pmd_table != pmd_offset(pud, 0)); -#endif -} - -static void __init one_ud_table_init(p4d_t *p4d) -{ -#if CONFIG_PGTABLE_LEVELS > 3 - pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pud_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); - BUG_ON(pud_table != pud_offset(p4d, 0)); -#endif -} - -static void __init fixrange_init(unsigned long start, unsigned long end, - pgd_t *pgd_base) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = pgd_index(vaddr); - j = pmd_index(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { - p4d = p4d_offset(pgd, vaddr); - if (p4d_none(*p4d)) - one_ud_table_init(p4d); - pud = pud_offset(p4d, vaddr); - if (pud_none(*pud)) - one_md_table_init(pud); - pmd = pmd_offset(pud, vaddr); - for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { - one_page_table_init(pmd); - vaddr += PMD_SIZE; - } - j = 0; - } -} - -static void __init fixaddr_user_init( void) -{ - long size = FIXADDR_USER_END - FIXADDR_USER_START; - pte_t *pte; - phys_t p; - unsigned long v, vaddr = FIXADDR_USER_START; - - if (!size) - return; - - fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); - v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE); - if (!v) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, size, PAGE_SIZE); - - memcpy((void *) v , (void *) FIXADDR_USER_START, size); - p = __pa(v); - for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, - p += PAGE_SIZE) { - pte = virt_to_kpte(vaddr); - pte_set_val(*pte, p, PAGE_READONLY); - } -} -#endif - void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; @@ -197,12 +94,8 @@ void __init paging_init(void) panic("%s: Failed to allocate %lu bytes align=%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); - max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; + max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) - fixaddr_user_init(); -#endif } /* diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index af02b5f9911d..ae6ca373c261 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -105,19 +105,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) fd = physmem_fd; *offset_out = phys; } - else if (phys < __pa(end_iomem)) { - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if ((phys >= region->phys) && - (phys < region->phys + region->size)) { - fd = region->fd; - *offset_out = phys - region->phys; - break; - } - region = region->next; - } - } return fd; } @@ -140,61 +127,3 @@ __uml_setup("mem=", uml_mem_setup, " be more, and the excess, if it's ever used, will just be swapped out.\n" " Example: mem=64M\n\n" ); - -__uml_setup("iomem=", parse_iomem, -"iomem=<name>,<file>\n" -" Configure <file> as an IO memory region named <name>.\n\n" -); - -/* - * This list is constructed in parse_iomem and addresses filled in - * setup_iomem, both of which run during early boot. Afterwards, it's - * unchanged. - */ -struct iomem_region *iomem_regions; - -/* Initialized in parse_iomem and unchanged thereafter */ -int iomem_size; - -unsigned long find_iomem(char *driver, unsigned long *len_out) -{ - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if (!strcmp(region->driver, driver)) { - *len_out = region->size; - return region->virt; - } - - region = region->next; - } - - return 0; -} -EXPORT_SYMBOL(find_iomem); - -static int setup_iomem(void) -{ - struct iomem_region *region = iomem_regions; - unsigned long iomem_start = high_physmem + PAGE_SIZE; - int err; - - while (region != NULL) { - err = os_map_memory((void *) iomem_start, region->fd, 0, - region->size, 1, 1, 0); - if (err) - printk(KERN_ERR "Mapping iomem region for driver '%s' " - "failed, errno = %d\n", region->driver, -err); - else { - region->virt = iomem_start; - region->phys = __pa(region->virt); - } - - iomem_start += region->size + PAGE_SIZE; - region = region->next; - } - - return 0; -} - -__initcall(setup_iomem); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 9c9c66dc45f0..63b38a3f73f7 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,9 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct task_struct *cpu_tasks[NR_CPUS]; +struct task_struct *cpu_tasks[NR_CPUS] = { + [0 ... NR_CPUS - 1] = &init_task, +}; EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) @@ -185,11 +187,7 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) void initial_thread_cb(void (*proc)(void *), void *arg) { - int save_kmalloc_ok = kmalloc_ok; - - kmalloc_ok = 0; initial_thread_cb_skas(proc, arg); - kmalloc_ok = save_kmalloc_ok; } int arch_dup_task_struct(struct task_struct *dst, @@ -220,11 +218,21 @@ void arch_cpu_idle(void) um_idle_sleep(); } +void arch_cpu_idle_prepare(void) +{ + os_idle_prepare(); +} + int __uml_cant_sleep(void) { return in_atomic() || irqs_disabled() || in_interrupt(); /* Is in_interrupt() really needed? */ } +int uml_need_resched(void) +{ + return need_resched(); +} + extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index afe9a2f251ef..00957788591b 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -23,17 +23,36 @@ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); static spinlock_t mm_list_lock; static struct list_head mm_list; +void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_lock(&ctx->turnstile); +} + +void exit_turnstile(struct mm_id *mm_id) __releases(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_unlock(&ctx->turnstile); +} + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_id *new_id = &mm->context.id; unsigned long stack = 0; int ret = -ENOMEM; + mutex_init(&mm->context.turnstile); + spin_lock_init(&mm->context.sync_tlb_lock); + stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES)); if (stack == 0) goto out; new_id->stack = stack; + new_id->syscall_data_len = 0; + new_id->syscall_fd_num = 0; scoped_guard(spinlock_irqsave, &mm_list_lock) { /* Insert into list, used for lookups when the child dies */ @@ -73,6 +92,9 @@ void destroy_context(struct mm_struct *mm) return; } + scoped_guard(spinlock_irqsave, &mm_list_lock) + list_del(&mm->context.list); + if (mmu->id.pid > 0) { os_kill_ptraced_process(mmu->id.pid, 1); mmu->id.pid = -1; @@ -82,10 +104,6 @@ void destroy_context(struct mm_struct *mm) os_close_file(mmu->id.sock); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); - - guard(spinlock_irqsave)(&mm_list_lock); - - list_del(&mm->context.list); } static irqreturn_t mm_sigchld_irq(int irq, void* dev) @@ -110,12 +128,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev) /* Marks the MM as dead */ mm_context->id.pid = -1; - /* - * NOTE: If SMP is implemented, a futex_wake - * needs to be added here. - */ stub_data = (void *)mm_context->id.stack; stub_data->futex = FUTEX_IN_KERN; +#if IS_ENABLED(CONFIG_SMP) + os_futex_wake(&stub_data->futex); +#endif /* * NOTE: Currently executing syscalls by diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 5881b17eb987..4a7673b0261a 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -7,6 +7,7 @@ #include <linux/sched/mm.h> #include <linux/sched/task_stack.h> #include <linux/sched/task.h> +#include <linux/smp-internal.h> #include <asm/tlbflush.h> @@ -26,12 +27,12 @@ static int __init start_kernel_proc(void *unused) return 0; } -static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); +char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { - stack_protections((unsigned long) &cpu0_irqstack); - set_sigstack(cpu0_irqstack, THREAD_SIZE); + stack_protections((unsigned long) &cpu_irqstacks[0]); + set_sigstack(cpu_irqstacks[0], THREAD_SIZE); init_new_thread_signals(); @@ -64,3 +65,15 @@ void current_mm_sync(void) um_tlb_sync(current->mm); } + +static DEFINE_SPINLOCK(initial_jmpbuf_spinlock); + +void initial_jmpbuf_lock(void) +{ + spin_lock_irq(&initial_jmpbuf_spinlock); +} + +void initial_jmpbuf_unlock(void) +{ + spin_unlock_irq(&initial_jmpbuf_spinlock); +} diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c new file mode 100644 index 000000000000..f1e52b7348fb --- /dev/null +++ b/arch/um/kernel/smp.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + * + * Based on the previous implementation in TT mode + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/module.h> +#include <linux/processor.h> +#include <linux/threads.h> +#include <linux/cpu.h> +#include <linux/hardirq.h> +#include <linux/smp.h> +#include <linux/smp-internal.h> +#include <init.h> +#include <kern.h> +#include <os.h> +#include <smp.h> + +enum { + UML_IPI_RES = 0, + UML_IPI_CALL_SINGLE, + UML_IPI_CALL, + UML_IPI_STOP, +}; + +void arch_smp_send_reschedule(int cpu) +{ + os_send_ipi(cpu, UML_IPI_RES); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + os_send_ipi(cpu, UML_IPI_CALL_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + for_each_cpu(cpu, mask) + os_send_ipi(cpu, UML_IPI_CALL); +} + +void smp_send_stop(void) +{ + int cpu, me = smp_processor_id(); + + for_each_online_cpu(cpu) { + if (cpu == me) + continue; + os_send_ipi(cpu, UML_IPI_STOP); + } +} + +static void ipi_handler(int vector, struct uml_pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + int cpu = raw_smp_processor_id(); + + irq_enter(); + + if (current->mm) + os_alarm_process(current->mm->context.id.pid); + + switch (vector) { + case UML_IPI_RES: + inc_irq_stat(irq_resched_count); + scheduler_ipi(); + break; + + case UML_IPI_CALL_SINGLE: + inc_irq_stat(irq_call_count); + generic_smp_call_function_single_interrupt(); + break; + + case UML_IPI_CALL: + inc_irq_stat(irq_call_count); + generic_smp_call_function_interrupt(); + break; + + case UML_IPI_STOP: + set_cpu_online(cpu, false); + while (1) + pause(); + break; + + default: + pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector); + break; + } + + irq_exit(); + set_irq_regs(old_regs); +} + +void uml_ipi_handler(int vector) +{ + struct uml_pt_regs r = { .is_user = 0 }; + + preempt_disable(); + ipi_handler(vector, &r); + preempt_enable(); +} + +/* AP states used only during CPU startup */ +enum { + UML_CPU_PAUSED = 0, + UML_CPU_RUNNING, +}; + +static int cpu_states[NR_CPUS]; + +static int start_secondary(void *unused) +{ + int err, cpu = raw_smp_processor_id(); + + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + + err = um_setup_timer(); + if (err) + panic("CPU#%d failed to setup timer, err = %d", cpu, err); + + local_irq_enable(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + return 0; +} + +void uml_start_secondary(void *opaque) +{ + int cpu = raw_smp_processor_id(); + struct mm_struct *mm = &init_mm; + struct task_struct *idle; + + stack_protections((unsigned long) &cpu_irqstacks[cpu]); + set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE); + + set_cpu_present(cpu, true); + os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED); + + smp_rmb(); /* paired with smp_wmb() in __cpu_up() */ + + idle = cpu_tasks[cpu]; + idle->thread_info.cpu = cpu; + + mmgrab(mm); + idle->active_mm = mm; + + idle->thread.request.thread.proc = start_secondary; + idle->thread.request.thread.arg = NULL; + + new_thread(task_stack_page(idle), &idle->thread.switch_buf, + new_thread_handler); + os_start_secondary(opaque, &idle->thread.switch_buf); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int err, cpu, me = smp_processor_id(); + unsigned long deadline; + + os_init_smp(); + + for_each_possible_cpu(cpu) { + if (cpu == me) + continue; + + pr_debug("Booting processor %d...\n", cpu); + err = os_start_cpu_thread(cpu); + if (err) { + pr_crit("CPU#%d failed to start cpu thread, err = %d", + cpu, err); + continue; + } + + deadline = jiffies + msecs_to_jiffies(1000); + spin_until_cond(cpu_present(cpu) || + time_is_before_jiffies(deadline)); + + if (!cpu_present(cpu)) + pr_crit("CPU#%d failed to boot\n", cpu); + } +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + cpu_tasks[cpu] = tidle; + smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */ + cpu_states[cpu] = UML_CPU_RUNNING; + os_futex_wake(&cpu_states[cpu]); + spin_until_cond(cpu_online(cpu)); + + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* Set in uml_ncpus_setup */ +int uml_ncpus = 1; + +void __init prefill_possible_map(void) +{ + int cpu; + + for (cpu = 0; cpu < uml_ncpus; cpu++) + set_cpu_possible(cpu, true); + for (; cpu < NR_CPUS; cpu++) + set_cpu_possible(cpu, false); +} + +static int __init uml_ncpus_setup(char *line, int *add) +{ + *add = 0; + + if (kstrtoint(line, 10, ¨_ncpus)) { + os_warn("%s: Couldn't parse '%s'\n", __func__, line); + return -1; + } + + uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS); + + return 0; +} + +__uml_setup("ncpus=", uml_ncpus_setup, +"ncpus=<# of desired CPUs>\n" +" This tells UML how many virtual processors to start. The maximum\n" +" number of supported virtual processors can be obtained by querying\n" +" the CONFIG_NR_CPUS option using --showconfig.\n\n" +); + +EXPORT_SYMBOL(uml_curr_cpu); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 17da0a870650..b344a36b44eb 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -625,9 +625,10 @@ void time_travel_sleep(void) * controller application. */ unsigned long long next = S64_MAX; + int cpu = raw_smp_processor_id(); if (time_travel_mode == TT_MODE_BASIC) - os_timer_disable(); + os_timer_disable(cpu); time_travel_update_time(next, true); @@ -638,9 +639,9 @@ void time_travel_sleep(void) * This is somewhat wrong - we should get the first * one sooner like the os_timer_one_shot() below... */ - os_timer_set_interval(time_travel_timer_interval); + os_timer_set_interval(cpu, time_travel_timer_interval); } else { - os_timer_one_shot(time_travel_timer_event.time - next); + os_timer_one_shot(cpu, time_travel_timer_event.time - next); } } } @@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time); #define time_travel_del_event(e) do { } while (0) #endif +static struct clock_event_device timer_clockevent[NR_CPUS]; + void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { unsigned long flags; @@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { + int cpu = evt - &timer_clockevent[0]; + if (time_travel_mode != TT_MODE_OFF) time_travel_del_event(&time_travel_timer_event); if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_disable(); + os_timer_disable(cpu); return 0; } @@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt) static int itimer_set_periodic(struct clock_event_device *evt) { unsigned long long interval = NSEC_PER_SEC / HZ; + int cpu = evt - &timer_clockevent[0]; if (time_travel_mode != TT_MODE_OFF) { time_travel_del_event(&time_travel_timer_event); @@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt) if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_set_interval(interval); + os_timer_set_interval(cpu, interval); return 0; } @@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta, if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - return os_timer_one_shot(delta); + return os_timer_one_shot(raw_smp_processor_id(), delta); return 0; } @@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt) return itimer_next_event(0, evt); } -static struct clock_event_device timer_clockevent = { +static struct clock_event_device _timer_clockevent = { .name = "posix-timer", .rating = 250, - .cpumask = cpu_possible_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, @@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = { static irqreturn_t um_timer(int irq, void *dev) { + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + /* * Interrupt the (possibly) running userspace process, technically this * should only happen if userspace is currently executing. @@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev) get_current()->mm) os_alarm_process(get_current()->mm->context.id.pid); - (*timer_clockevent.event_handler)(&timer_clockevent); + evt->event_handler(evt); return IRQ_HANDLED; } @@ -904,7 +912,24 @@ static struct clocksource timer_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init um_timer_setup(void) +int um_setup_timer(void) +{ + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + int err; + + err = os_timer_create(); + if (err) + return err; + + memcpy(evt, &_timer_clockevent, sizeof(*evt)); + evt->cpumask = cpumask_of(cpu); + clockevents_register_device(evt); + + return 0; +} + +static void __init um_timer_init(void) { int err; @@ -913,8 +938,8 @@ static void __init um_timer_setup(void) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - err = os_timer_create(); - if (err != 0) { + err = um_setup_timer(); + if (err) { printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); return; } @@ -924,7 +949,6 @@ static void __init um_timer_setup(void) printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } - clockevents_register_device(&timer_clockevent); } void read_persistent_clock64(struct timespec64 *ts) @@ -945,7 +969,7 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { timer_set_signal_handler(); - late_time_init = um_timer_setup; + late_time_init = um_timer_init; } #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT @@ -961,21 +985,21 @@ static int setup_time_travel(char *str) { if (strcmp(str, "=inf-cpu") == 0) { time_travel_mode = TT_MODE_INFCPU; - timer_clockevent.name = "time-travel-timer-infcpu"; + _timer_clockevent.name = "time-travel-timer-infcpu"; timer_clocksource.name = "time-travel-clock"; return 1; } if (strncmp(str, "=ext:", 5) == 0) { time_travel_mode = TT_MODE_EXTERNAL; - timer_clockevent.name = "time-travel-timer-external"; + _timer_clockevent.name = "time-travel-timer-external"; timer_clocksource.name = "time-travel-clock-external"; return time_travel_connect_external(str + 5); } if (!*str) { time_travel_mode = TT_MODE_BASIC; - timer_clockevent.name = "time-travel-timer"; + _timer_clockevent.name = "time-travel-timer"; timer_clocksource.name = "time-travel-clock"; return 1; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index cf7e0d4407f2..39608cccf2c6 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; struct vm_ops ops; - unsigned long addr = mm->context.sync_tlb_range_from, next; + unsigned long addr, next; int ret = 0; + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); + if (mm->context.sync_tlb_range_to == 0) return 0; @@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm) ops.unmap = unmap; } + addr = mm->context.sync_tlb_range_from; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 5b80a3a89c20..177615820a4c 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && init_mm.context.sync_tlb_range_to) { + if (!is_user && address >= start_vm && address < end_vm) { /* * Kernel has pending updates from set_ptes that were not * flushed yet. Syncing them should fix the pagefault (if not diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index cfbbbf8500c3..e2b24e1ecfa6 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -19,6 +19,7 @@ #include <linux/kmsg_dump.h> #include <linux/suspend.h> #include <linux/random.h> +#include <linux/smp-internal.h> #include <asm/processor.h> #include <asm/cpufeature.h> @@ -71,6 +72,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i = 0; +#if IS_ENABLED(CONFIG_SMP) + i = (uintptr_t) v - 1; + if (!cpu_online(i)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "model name\t: UML\n"); @@ -87,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL; + if (*pos < nr_cpu_ids) + return (void *)(uintptr_t)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) @@ -239,8 +247,6 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { - cpu_tasks[0] = &init_task; - atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); @@ -254,11 +260,7 @@ unsigned long stub_start; unsigned long task_size; EXPORT_SYMBOL(task_size); -unsigned long host_task_size; - unsigned long brk_start; -unsigned long end_iomem; -EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) @@ -298,16 +300,14 @@ static unsigned long __init get_top_address(char **envp) top_addr = (unsigned long) envp[i]; } - top_addr &= ~(UM_KERN_PAGE_SIZE - 1); - top_addr += UM_KERN_PAGE_SIZE; - - return top_addr; + return PAGE_ALIGN(top_addr + 1); } int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; + unsigned long host_task_size; unsigned long stack; unsigned int i; int add; @@ -354,12 +354,11 @@ int __init linux_main(int argc, char **argv, char **envp) * so they actually get what they asked for. This should * add zero for non-exec shield users */ - - diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end); if (diff > 1024 * 1024) { os_info("Adding %ld bytes to physical memory to account for " "exec-shield gap\n", diff); - physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + physmem_size += diff; } uml_physmem = (unsigned long) __binary_start & PAGE_MASK; @@ -369,10 +368,8 @@ int __init linux_main(int argc, char **argv, char **envp) setup_machinename(init_utsname()->machine); - physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; - iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; - - max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; + physmem_size = PAGE_ALIGN(physmem_size); + max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC; if (physmem_size > max_physmem) { physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", @@ -380,7 +377,6 @@ int __init linux_main(int argc, char **argv, char **envp) } high_physmem = uml_physmem + physmem_size; - end_iomem = high_physmem + iomem_size; start_vm = VMALLOC_START; @@ -421,6 +417,7 @@ void __init setup_arch(char **cmdline_p) strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(host_info, sizeof host_info); + prefill_possible_map(); if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { add_bootloader_randomness(rng_seed, sizeof(rng_seed)); @@ -455,6 +452,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } +#if IS_ENABLED(CONFIG_SMP) +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ +} + +void alternatives_smp_module_del(struct module *mod) +{ +} +#endif + void *text_poke(void *addr, const void *opcode, size_t len) { /* diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index fae836713487..f8d672d570d9 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -6,7 +6,7 @@ # Don't instrument UML-specific code KCOV_INSTRUMENT := n -obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ +obj-y = elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ umid.o user_syms.o util.o skas/ @@ -14,10 +14,10 @@ CFLAGS_signal.o += -Wframe-larger-than=4096 CFLAGS_main.o += -Wno-frame-larger-than -obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o +obj-$(CONFIG_SMP) += smp.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ - tty.o umid.o util.o + tty.o umid.o util.o smp.o include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 0a0f91cf4d6d..72f416edf252 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -14,37 +14,26 @@ #include <elf_user.h> #include <mem_user.h> #include "internal.h" +#include <linux/swab.h> +#if __BITS_PER_LONG == 64 +typedef Elf64_auxv_t elf_auxv_t; +#else typedef Elf32_auxv_t elf_auxv_t; +#endif /* These are initialized very early in boot and never changed */ char * elf_aux_platform; -extern long elf_aux_hwcap; -unsigned long vsyscall_ehdr; -unsigned long vsyscall_end; -unsigned long __kernel_vsyscall; +long elf_aux_hwcap; __init void scan_elf_aux( char **envp) { - long page_size = 0; elf_auxv_t * auxv; while ( *envp++ != NULL) ; for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { switch ( auxv->a_type ) { - case AT_SYSINFO: - __kernel_vsyscall = auxv->a_un.a_val; - /* See if the page is under TASK_SIZE */ - if (__kernel_vsyscall < (unsigned long) envp) - __kernel_vsyscall = 0; - break; - case AT_SYSINFO_EHDR: - vsyscall_ehdr = auxv->a_un.a_val; - /* See if the page is under TASK_SIZE */ - if (vsyscall_ehdr < (unsigned long) envp) - vsyscall_ehdr = 0; - break; case AT_HWCAP: elf_aux_hwcap = auxv->a_un.a_val; break; @@ -56,20 +45,6 @@ __init void scan_elf_aux( char **envp) elf_aux_platform = (char *) (long) auxv->a_un.a_val; break; - case AT_PAGESZ: - page_size = auxv->a_un.a_val; - break; } } - if ( ! __kernel_vsyscall || ! vsyscall_ehdr || - ! elf_aux_hwcap || ! elf_aux_platform || - ! page_size || (vsyscall_ehdr % page_size) ) { - __kernel_vsyscall = 0; - vsyscall_ehdr = 0; - elf_aux_hwcap = 0; - elf_aux_platform = "i586"; - } - else { - vsyscall_end = vsyscall_ehdr + page_size; - } } diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index 5d8d3b0817a9..bac9fcc8c14c 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -4,6 +4,7 @@ #include <mm_id.h> #include <stub-data.h> +#include <signal.h> /* * elf_aux.c @@ -16,8 +17,20 @@ void scan_elf_aux(char **envp); void check_tmpexec(void); /* + * signal.c + */ +extern __thread int signals_enabled; +int timer_alarm_pending(void); + +/* * skas/process.c */ void wait_stub_done(int pid); void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); + +/* + * smp.c + */ +#define IPI_SIGNAL SIGRTMIN + #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 3c63ce19e3bf..7e114862a723 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -21,8 +21,6 @@ #define STACKSIZE (8 * 1024 * 1024) -long elf_aux_hwcap; - static void __init set_stklim(void) { struct rlimit lim; @@ -149,9 +147,7 @@ int __init main(int argc, char **argv, char **envp) install_fatal_handler(SIGINT); install_fatal_handler(SIGTERM); -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA scan_elf_aux(envp); -#endif change_sig(SIGPIPE, 0); ret = linux_main(argc, argv, envp); @@ -171,7 +167,7 @@ int __init main(int argc, char **argv, char **envp) */ /* stop timers and set timer signal to be ignored */ - os_timer_disable(); + os_timer_disable(0); /* disable SIGIO for the fds and set SIGIO to be ignored */ err = deactivate_all_fds(); diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 00b49e90d05f..3a2a84ab9325 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -10,6 +10,8 @@ #include <errno.h> #include <signal.h> #include <fcntl.h> +#include <limits.h> +#include <linux/futex.h> #include <sys/mman.h> #include <sys/ptrace.h> #include <sys/prctl.h> @@ -189,3 +191,21 @@ void os_set_pdeathsig(void) { prctl(PR_SET_PDEATHSIG, SIGKILL); } + +int os_futex_wait(void *uaddr, unsigned int val) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} + +int os_futex_wake(void *uaddr) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 11f07f498270..327fb3c52fc7 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -20,6 +20,7 @@ #include <um_malloc.h> #include <sys/ucontext.h> #include <timetravel.h> +#include "internal.h" void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = { [SIGTRAP] = relay_signal, @@ -68,12 +69,12 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGCHLD_BIT 2 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) -int signals_enabled; +__thread int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif -static unsigned int signals_pending; -static unsigned int signals_active = 0; +static __thread unsigned int signals_pending; +static __thread unsigned int signals_active; static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { @@ -159,6 +160,11 @@ void timer_set_signal_handler(void) set_handler(SIGALRM); } +int timer_alarm_pending(void) +{ + return !!(signals_pending & SIGALRM_MASK); +} + void set_sigstack(void *sig_stack, int size) { stack_t stack = { @@ -253,9 +259,29 @@ int change_sig(int signal, int on) return 0; } -void block_signals(void) +static inline void __block_signals(void) { + if (!signals_enabled) + return; + + os_local_ipi_disable(); + barrier(); signals_enabled = 0; +} + +static inline void __unblock_signals(void) +{ + if (signals_enabled) + return; + + signals_enabled = 1; + barrier(); + os_local_ipi_enable(); +} + +void block_signals(void) +{ + __block_signals(); /* * This must return with signals disabled, so this barrier * ensures that writes are flushed out before the return. @@ -272,7 +298,8 @@ void unblock_signals(void) if (signals_enabled == 1) return; - signals_enabled = 1; + __unblock_signals(); + #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -306,7 +333,7 @@ void unblock_signals(void) * tracing that happens inside the handlers we call for the * pending signals will mess up the tracing state. */ - signals_enabled = 0; + __block_signals(); um_trace_signals_off(); /* @@ -338,10 +365,15 @@ void unblock_signals(void) /* Re-enable signals and trace that we're doing so. */ um_trace_signals_on(); - signals_enabled = 1; + __unblock_signals(); } } +int um_get_signals(void) +{ + return signals_enabled; +} + int um_set_signals(int enable) { int ret; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 0bc10cd4cbed..d6c22f8aa06d 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -298,7 +298,6 @@ static int userspace_tramp(void *data) .seccomp = using_seccomp, .stub_start = STUB_START, }; - struct iomem_region *iomem; int ret; if (using_seccomp) { @@ -332,12 +331,6 @@ static int userspace_tramp(void *data) fcntl(init_data.stub_data_fd, F_SETFD, 0); - /* In SECCOMP mode, these FDs are passed when needed */ - if (!using_seccomp) { - for (iomem = iomem_regions; iomem; iomem = iomem->next) - fcntl(iomem->fd, F_SETFD, 0); - } - /* dup2 signaling FD/socket to STDIN */ if (dup2(tramp_data->sockpair[0], 0) < 0) exit(3); @@ -553,7 +546,7 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op; - siginfo_t si_ptrace; + siginfo_t si_local; siginfo_t *si; int sig; @@ -564,6 +557,13 @@ void userspace(struct uml_pt_regs *regs) struct mm_id *mm_id = current_mm_id(); /* + * At any given time, only one CPU thread can enter the + * turnstile to operate on the same stub process, including + * executing stub system calls (mmap and munmap). + */ + enter_turnstile(mm_id); + + /* * When we are in time-travel mode, userspace can theoretically * do a *lot* of work without being scheduled. The problem with * this is that it will prevent kernel bookkeeping (primarily @@ -630,9 +630,10 @@ void userspace(struct uml_pt_regs *regs) } if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) - panic("%s - Invalid siginfo offset from child", - __func__); - si = (void *)&proc_data->sigstack[proc_data->si_offset]; + panic("%s - Invalid siginfo offset from child", __func__); + + si = &si_local; + memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si)); regs->is_user = 1; @@ -728,8 +729,8 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: ptrace(PTRACE_GETSIGINFO, pid, 0, - (struct siginfo *)&si_ptrace); - si = &si_ptrace; + (struct siginfo *)&si_local); + si = &si_local; break; default: si = NULL; @@ -740,6 +741,8 @@ void userspace(struct uml_pt_regs *regs) } } + exit_turnstile(mm_id); + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (sig) { @@ -809,10 +812,9 @@ void switch_threads(jmp_buf *me, jmp_buf *you) static jmp_buf initial_jmpbuf; -/* XXX Make these percpu */ -static void (*cb_proc)(void *arg); -static void *cb_arg; -static jmp_buf *cb_back; +static __thread void (*cb_proc)(void *arg); +static __thread void *cb_arg; +static __thread jmp_buf *cb_back; int start_idle_thread(void *stack, jmp_buf *switch_buf) { @@ -866,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) cb_arg = arg; cb_back = &here; - block_signals_trace(); + initial_jmpbuf_lock(); if (UML_SETJMP(&here) == 0) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); - unblock_signals_trace(); + initial_jmpbuf_unlock(); cb_proc = NULL; cb_arg = NULL; @@ -878,8 +880,9 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); + /* unreachable */ } static bool noreboot; @@ -899,6 +902,7 @@ __uml_setup("noreboot", noreboot_cmd_param, void reboot_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); + /* unreachable */ } diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c new file mode 100644 index 000000000000..18d3858a7cd2 --- /dev/null +++ b/arch/um/os-Linux/smp.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ + +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <kern_util.h> +#include <um_malloc.h> +#include <init.h> +#include <os.h> +#include <smp.h> +#include "internal.h" + +struct cpu_thread_data { + int cpu; + sigset_t sigset; +}; + +static __thread int __curr_cpu; + +int uml_curr_cpu(void) +{ + return __curr_cpu; +} + +static pthread_t cpu_threads[CONFIG_NR_CPUS]; + +static void *cpu_thread(void *arg) +{ + struct cpu_thread_data *data = arg; + + __curr_cpu = data->cpu; + + uml_start_secondary(data); + + return NULL; +} + +int os_start_cpu_thread(int cpu) +{ + struct cpu_thread_data *data; + sigset_t sigset, oset; + int err; + + data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = errno; + goto err; + } + + data->cpu = cpu; + data->sigset = oset; + + err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data); + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + if (err != 0) + goto err; + + return 0; + +err: + kfree(data); + return -err; +} + +void os_start_secondary(void *arg, jmp_buf *switch_buf) +{ + struct cpu_thread_data *data = arg; + + sigaddset(&data->sigset, IPI_SIGNAL); + sigaddset(&data->sigset, SIGIO); + + if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + + kfree(data); + longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); +} + +int os_send_ipi(int cpu, int vector) +{ + union sigval value = { .sival_int = vector }; + + return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value); +} + +static void __local_ipi_set(int enable) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, IPI_SIGNAL); + + if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + panic("%s: sigprocmask failed, errno = %d", __func__, errno); +} + +void os_local_ipi_enable(void) +{ + __local_ipi_set(1); +} + +void os_local_ipi_disable(void) +{ + __local_ipi_set(0); +} + +static void ipi_sig_handler(int sig, siginfo_t *si, void *uc) +{ + int save_errno = errno; + + signals_enabled = 0; + um_trace_signals_off(); + + uml_ipi_handler(si->si_value.sival_int); + + um_trace_signals_on(); + signals_enabled = 1; + + errno = save_errno; +} + +void __init os_init_smp(void) +{ + struct sigaction action = { + .sa_sigaction = ipi_sig_handler, + .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART, + }; + + sigfillset(&action.sa_mask); + + if (sigaction(IPI_SIGNAL, &action, NULL) < 0) + panic("%s: sigaction failed, errno = %d", __func__, errno); + + cpu_threads[0] = pthread_self(); +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index a827c2e01aa5..054ac03bbf5e 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -22,6 +22,7 @@ #include <asm/unistd.h> #include <init.h> #include <os.h> +#include <smp.h> #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> @@ -481,6 +482,9 @@ void __init os_early_checks(void) fatal("SECCOMP userspace requested but not functional!\n"); } + if (uml_ncpus > 1) + fatal("SMP is not supported with PTRACE userspace.\n"); + using_seccomp = 0; check_ptrace(); @@ -489,53 +493,3 @@ void __init os_early_checks(void) fatal("Failed to initialize default registers"); stop_ptraced_child(pid, 1); } - -int __init parse_iomem(char *str, int *add) -{ - struct iomem_region *new; - struct stat64 buf; - char *file, *driver; - int fd, size; - - driver = str; - file = strchr(str,','); - if (file == NULL) { - os_warn("parse_iomem : failed to parse iomem\n"); - goto out; - } - *file = '\0'; - file++; - fd = open(file, O_RDWR, 0); - if (fd < 0) { - perror("parse_iomem - Couldn't open io file"); - goto out; - } - - if (fstat64(fd, &buf) < 0) { - perror("parse_iomem - cannot stat_fd file"); - goto out_close; - } - - new = malloc(sizeof(*new)); - if (new == NULL) { - perror("Couldn't allocate iomem_region struct"); - goto out_close; - } - - size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); - - *new = ((struct iomem_region) { .next = iomem_regions, - .driver = driver, - .fd = fd, - .size = size, - .phys = 0, - .virt = 0 }); - iomem_regions = new; - iomem_size += new->size + UM_KERN_PAGE_SIZE; - - return 0; - out_close: - close(fd); - out: - return 1; -} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 4d5591d96d8c..13ebc86918d4 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -11,12 +11,15 @@ #include <errno.h> #include <signal.h> #include <time.h> +#include <sys/signalfd.h> #include <sys/time.h> #include <kern_util.h> #include <os.h> +#include <smp.h> #include <string.h> +#include "internal.h" -static timer_t event_high_res_timer = 0; +static timer_t event_high_res_timer[CONFIG_NR_CPUS] = { 0 }; static inline long long timespec_to_ns(const struct timespec *ts) { @@ -31,20 +34,31 @@ long long os_persistent_clock_emulation(void) return timespec_to_ns(&realtime_tp); } +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + /** * os_timer_create() - create an new posix (interval) timer */ int os_timer_create(void) { - timer_t *t = &event_high_res_timer; + int cpu = uml_curr_cpu(); + timer_t *t = &event_high_res_timer[cpu]; + struct sigevent sev = { + .sigev_notify = SIGEV_THREAD_ID, + .sigev_signo = SIGALRM, + .sigev_value.sival_ptr = t, + .sigev_notify_thread_id = gettid(), + }; - if (timer_create(CLOCK_MONOTONIC, NULL, t) == -1) + if (timer_create(CLOCK_MONOTONIC, &sev, t) == -1) return -1; return 0; } -int os_timer_set_interval(unsigned long long nsecs) +int os_timer_set_interval(int cpu, unsigned long long nsecs) { struct itimerspec its; @@ -54,13 +68,13 @@ int os_timer_set_interval(unsigned long long nsecs) its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC; its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC; - if (timer_settime(event_high_res_timer, 0, &its, NULL) == -1) + if (timer_settime(event_high_res_timer[cpu], 0, &its, NULL) == -1) return -errno; return 0; } -int os_timer_one_shot(unsigned long long nsecs) +int os_timer_one_shot(int cpu, unsigned long long nsecs) { struct itimerspec its = { .it_value.tv_sec = nsecs / UM_NSEC_PER_SEC, @@ -70,19 +84,20 @@ int os_timer_one_shot(unsigned long long nsecs) .it_interval.tv_nsec = 0, // we cheat here }; - timer_settime(event_high_res_timer, 0, &its, NULL); + timer_settime(event_high_res_timer[cpu], 0, &its, NULL); return 0; } /** * os_timer_disable() - disable the posix (interval) timer + * @cpu: the CPU for which the timer is to be disabled */ -void os_timer_disable(void) +void os_timer_disable(int cpu) { struct itimerspec its; memset(&its, 0, sizeof(struct itimerspec)); - timer_settime(event_high_res_timer, 0, &its, NULL); + timer_settime(event_high_res_timer[cpu], 0, &its, NULL); } long long os_nsecs(void) @@ -93,23 +108,50 @@ long long os_nsecs(void) return timespec_to_ns(&ts); } +static __thread int wake_signals; + +void os_idle_prepare(void) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, IPI_SIGNAL); + + /* + * We need to use signalfd rather than sigsuspend in idle sleep + * because the IPI signal is a real-time signal that carries data, + * and unlike handling SIGALRM, we cannot simply flag it in + * signals_pending. + */ + wake_signals = signalfd(-1, &set, SFD_CLOEXEC); + if (wake_signals < 0) + panic("Failed to create signal FD, errno = %d", errno); +} + /** * os_idle_sleep() - sleep until interrupted */ void os_idle_sleep(void) { - struct itimerspec its; - sigset_t set, old; + sigset_t set; - /* block SIGALRM while we analyze the timer state */ + /* + * Block SIGALRM while performing the need_resched check. + * Note that, because IRQs are disabled, the IPI signal is + * already blocked. + */ sigemptyset(&set); sigaddset(&set, SIGALRM); - sigprocmask(SIG_BLOCK, &set, &old); + sigprocmask(SIG_BLOCK, &set, NULL); + + /* + * Because disabling IRQs does not block SIGALRM, it is also + * necessary to check for any pending timer alarms. + */ + if (!uml_need_resched() && !timer_alarm_pending()) + os_poll(1, &wake_signals); - /* check the timer, and if it'll fire then wait for it */ - timer_gettime(event_high_res_timer, &its); - if (its.it_value.tv_sec || its.it_value.tv_nsec) - sigsuspend(&old); - /* either way, restore the signal mask */ + /* Restore the signal mask. */ sigprocmask(SIG_UNBLOCK, &set, NULL); } diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index a310ae27b479..67f6112318b6 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -31,12 +31,6 @@ extern void *memset(void *, int, size_t); EXPORT_SYMBOL(memset); #endif -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA -/* needed for __access_ok() */ -EXPORT_SYMBOL(vsyscall_ehdr); -EXPORT_SYMBOL(vsyscall_end); -#endif - #ifdef _FORTIFY_SOURCE extern int __sprintf_chk(char *str, int flag, size_t len, const char *format); EXPORT_SYMBOL(__sprintf_chk); |
