summaryrefslogtreecommitdiff
path: root/arch/um/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile6
-rw-r--r--arch/um/kernel/asm-offsets.c50
-rw-r--r--arch/um/kernel/dtb.c3
-rw-r--r--arch/um/kernel/dyn.lds.S5
-rw-r--r--arch/um/kernel/exec.c2
-rw-r--r--arch/um/kernel/initrd.c1
-rw-r--r--arch/um/kernel/ioport.c13
-rw-r--r--arch/um/kernel/irq.c216
-rw-r--r--arch/um/kernel/kmsg_dump.c2
-rw-r--r--arch/um/kernel/ksyms.c2
-rw-r--r--arch/um/kernel/load_file.c4
-rw-r--r--arch/um/kernel/maccess.c19
-rw-r--r--arch/um/kernel/mem.c142
-rw-r--r--arch/um/kernel/physmem.c112
-rw-r--r--arch/um/kernel/process.c72
-rw-r--r--arch/um/kernel/ptrace.c9
-rw-r--r--arch/um/kernel/sigio.c26
-rw-r--r--arch/um/kernel/skas/.gitignore2
-rw-r--r--arch/um/kernel/skas/Makefile38
-rw-r--r--arch/um/kernel/skas/mmu.c134
-rw-r--r--arch/um/kernel/skas/process.c23
-rw-r--r--arch/um/kernel/skas/stub.c140
-rw-r--r--arch/um/kernel/skas/stub_exe.c230
-rw-r--r--arch/um/kernel/skas/stub_exe_embed.S11
-rw-r--r--arch/um/kernel/skas/syscall.c22
-rw-r--r--arch/um/kernel/smp.c242
-rw-r--r--arch/um/kernel/sysrq.c8
-rw-r--r--arch/um/kernel/time.c128
-rw-r--r--arch/um/kernel/tlb.c79
-rw-r--r--arch/um/kernel/trap.c174
-rw-r--r--arch/um/kernel/um_arch.c137
-rw-r--r--arch/um/kernel/uml.lds.S2
32 files changed, 1351 insertions, 703 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index f8567b933ffa..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -12,12 +12,12 @@ CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \
-DELF_ARCH=$(LDS_ELF_ARCH) \
-DELF_FORMAT=$(LDS_ELF_FORMAT) \
$(LDS_EXTRA)
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o sysrq.o time.o tlb.o trap.o \
- um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
+ um_arch.o umid.o kmsg_dump.o capflags.o skas/
obj-y += load_file.o
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
@@ -25,7 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
+obj-$(CONFIG_SMP) += smp.o
USER_OBJS := config.o
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
index 1fb12235ab9c..d620b6f6de9b 100644
--- a/arch/um/kernel/asm-offsets.c
+++ b/arch/um/kernel/asm-offsets.c
@@ -1 +1,49 @@
-#include <sysdep/kernel-offsets.h>
+/* SPDX-License-Identifier: GPL-2.0 */
+#define COMPILE_OFFSETS
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/crypto.h>
+#include <linux/kbuild.h>
+#include <linux/audit.h>
+#include <linux/fs.h>
+#include <asm/mman.h>
+#include <asm/seccomp.h>
+#include <asm/extable.h>
+
+/* workaround for a warning with -Wmissing-prototypes */
+void foo(void);
+
+void foo(void)
+{
+ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
+
+ DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
+ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
+ DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
+
+ DEFINE(UM_GFP_KERNEL, GFP_KERNEL);
+ DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
+
+ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
+
+ DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
+
+ DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
+
+ DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
+
+ DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE);
+ DEFINE(HOSTFS_ATTR_UID, ATTR_UID);
+ DEFINE(HOSTFS_ATTR_GID, ATTR_GID);
+ DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE);
+ DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME);
+ DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME);
+ DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME);
+ DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET);
+ DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET);
+
+ DEFINE(ALT_INSTR_SIZE, sizeof(struct alt_instr));
+ DEFINE(EXTABLE_SIZE, sizeof(struct exception_table_entry));
+}
diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c
index 8d78ced9e08f..47cd3d869fb2 100644
--- a/arch/um/kernel/dtb.c
+++ b/arch/um/kernel/dtb.c
@@ -31,11 +31,12 @@ void uml_dtb_init(void)
static int __init uml_dtb_setup(char *line, int *add)
{
+ *add = 0;
dtb = line;
return 0;
}
__uml_setup("dtb=", uml_dtb_setup,
"dtb=<file>\n"
-" Boot the kernel with the devicetree blob from the specified file.\n"
+" Boot the kernel with the devicetree blob from the specified file.\n\n"
);
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 3385d653ebd0..a36b7918a011 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -116,8 +116,6 @@ SECTIONS
.fini_array : { *(.fini_array) }
.data : {
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -178,3 +176,6 @@ SECTIONS
DISCARDS
}
+
+ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE,
+ "STUB code must not be larger than one page");
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index cb8b5cd9285c..13812fa97eee 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -26,8 +26,6 @@ void flush_thread(void)
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
-
- __switch_mm(&current->mm->context.id);
}
void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 47b8cb1a1156..99dba827461c 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -34,6 +34,7 @@ int __init read_initrd(void)
static int __init uml_initrd_setup(char *line, int *add)
{
+ *add = 0;
initrd = line;
return 0;
}
diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c
deleted file mode 100644
index 7220615b3beb..000000000000
--- a/arch/um/kernel/ioport.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2021 Intel Corporation
- * Author: Johannes Berg <johannes@sipsolutions.net>
- */
-#include <asm/iomap.h>
-#include <asm-generic/pci_iomap.h>
-
-void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
- unsigned int nr)
-{
- return NULL;
-}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 534e91797f89..f4b13f15a9c1 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -22,6 +22,9 @@
#include <irq_kern.h>
#include <linux/time-internal.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x) (&per_cpu(irq_stat, x))
/* When epoll triggers we do not know why it did so
* we can also have different IRQs for read and write.
@@ -52,7 +55,7 @@ struct irq_entry {
bool sigio_workaround;
};
-static DEFINE_SPINLOCK(irq_lock);
+static DEFINE_RAW_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
@@ -236,7 +239,8 @@ static void _sigio_handler(struct uml_pt_regs *regs,
free_irqs();
}
-void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
preempt_disable();
_sigio_handler(regs, irqs_suspended);
@@ -257,7 +261,7 @@ static struct irq_entry *get_irq_entry_by_fd(int fd)
return NULL;
}
-static void free_irq_entry(struct irq_entry *to_free, bool remove)
+static void remove_irq_entry(struct irq_entry *to_free, bool remove)
{
if (!to_free)
return;
@@ -265,7 +269,6 @@ static void free_irq_entry(struct irq_entry *to_free, bool remove)
if (remove)
os_del_epoll_fd(to_free->fd);
list_del(&to_free->list);
- kfree(to_free);
}
static bool update_irq_entry(struct irq_entry *entry)
@@ -286,17 +289,19 @@ static bool update_irq_entry(struct irq_entry *entry)
return false;
}
-static void update_or_free_irq_entry(struct irq_entry *entry)
+static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry)
{
- if (!update_irq_entry(entry))
- free_irq_entry(entry, false);
+ if (update_irq_entry(entry))
+ return NULL;
+ remove_irq_entry(entry, false);
+ return entry;
}
static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
- struct irq_entry *irq_entry;
+ struct irq_entry *irq_entry, *to_free = NULL;
int err, events = os_event_mask(type);
unsigned long flags;
@@ -304,9 +309,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
if (err < 0)
goto out;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
irq_entry = get_irq_entry_by_fd(fd);
if (irq_entry) {
+already:
/* cannot register the same FD twice with the same type */
if (WARN_ON(irq_entry->reg[type].events)) {
err = -EALREADY;
@@ -316,11 +322,22 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
/* temporarily disable to avoid IRQ-side locking */
os_del_epoll_fd(fd);
} else {
- irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
- if (!irq_entry) {
- err = -ENOMEM;
- goto out_unlock;
+ struct irq_entry *new;
+
+ /* don't restore interrupts */
+ raw_spin_unlock(&irq_lock);
+ new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
+ if (!new) {
+ local_irq_restore(flags);
+ return -ENOMEM;
+ }
+ raw_spin_lock(&irq_lock);
+ irq_entry = get_irq_entry_by_fd(fd);
+ if (irq_entry) {
+ to_free = new;
+ goto already;
}
+ irq_entry = new;
irq_entry->fd = fd;
list_add_tail(&irq_entry->list, &active_fds);
maybe_sigio_broken(fd);
@@ -339,12 +356,11 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
#endif
WARN_ON(!update_irq_entry(irq_entry));
- spin_unlock_irqrestore(&irq_lock, flags);
-
- return 0;
+ err = 0;
out_unlock:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
out:
+ kfree(to_free);
return err;
}
@@ -358,19 +374,20 @@ void free_irq_by_fd(int fd)
struct irq_entry *to_free;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
to_free = get_irq_entry_by_fd(fd);
- free_irq_entry(to_free, true);
- spin_unlock_irqrestore(&irq_lock, flags);
+ remove_irq_entry(to_free, true);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(to_free);
}
EXPORT_SYMBOL(free_irq_by_fd);
static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
- struct irq_entry *entry;
+ struct irq_entry *entry, *to_free = NULL;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type i;
@@ -386,12 +403,13 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
os_del_epoll_fd(entry->fd);
reg->events = 0;
- update_or_free_irq_entry(entry);
+ to_free = update_or_remove_irq_entry(entry);
goto out;
}
}
out:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(to_free);
}
void deactivate_fd(int fd, int irqnum)
@@ -402,7 +420,7 @@ void deactivate_fd(int fd, int irqnum)
os_del_epoll_fd(fd);
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
entry = get_irq_entry_by_fd(fd);
if (!entry)
goto out;
@@ -414,9 +432,10 @@ void deactivate_fd(int fd, int irqnum)
entry->reg[i].events = 0;
}
- update_or_free_irq_entry(entry);
+ entry = update_or_remove_irq_entry(entry);
out:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+ kfree(entry);
ignore_sigio_fd(fd);
}
@@ -546,7 +565,7 @@ void um_irqs_suspend(void)
irqs_suspended = true;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
bool clear = true;
@@ -579,7 +598,7 @@ void um_irqs_suspend(void)
!__ignore_sigio_fd(entry->fd);
}
}
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
}
void um_irqs_resume(void)
@@ -588,7 +607,7 @@ void um_irqs_resume(void)
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) {
int err = os_set_fd_async(entry->fd);
@@ -602,7 +621,7 @@ void um_irqs_resume(void)
}
}
}
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
irqs_suspended = false;
send_sigio_to_self();
@@ -613,7 +632,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
struct irq_entry *entry;
unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
+ raw_spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
@@ -628,7 +647,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
}
}
unlock:
- spin_unlock_irqrestore(&irq_lock, flags);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
return 0;
}
#else
@@ -667,7 +686,7 @@ void __init init_IRQ(void)
{
int i;
- irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
+ irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq);
for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
@@ -675,114 +694,35 @@ void __init init_IRQ(void)
os_setup_epoll();
}
-/*
- * IRQ stack entry and exit:
- *
- * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
- * and switch over to the IRQ stack after some preparation. We use
- * sigaltstack to receive signals on a separate stack from the start.
- * These two functions make sure the rest of the kernel won't be too
- * upset by being on a different stack. The IRQ stack has a
- * thread_info structure at the bottom so that current et al continue
- * to work.
- *
- * to_irq_stack copies the current task's thread_info to the IRQ stack
- * thread_info and sets the tasks's stack to point to the IRQ stack.
- *
- * from_irq_stack copies the thread_info struct back (flags may have
- * been modified) and resets the task's stack pointer.
- *
- * Tricky bits -
- *
- * What happens when two signals race each other? UML doesn't block
- * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
- * could arrive while a previous one is still setting up the
- * thread_info.
- *
- * There are three cases -
- * The first interrupt on the stack - sets up the thread_info and
- * handles the interrupt
- * A nested interrupt interrupting the copying of the thread_info -
- * can't handle the interrupt, as the stack is in an unknown state
- * A nested interrupt not interrupting the copying of the
- * thread_info - doesn't do any setup, just handles the interrupt
- *
- * The first job is to figure out whether we interrupted stack setup.
- * This is done by xchging the signal mask with thread_info->pending.
- * If the value that comes back is zero, then there is no setup in
- * progress, and the interrupt can be handled. If the value is
- * non-zero, then there is stack setup in progress. In order to have
- * the interrupt handled, we leave our signal in the mask, and it will
- * be handled by the upper handler after it has set up the stack.
- *
- * Next is to figure out whether we are the outer handler or a nested
- * one. As part of setting up the stack, thread_info->real_thread is
- * set to non-NULL (and is reset to NULL on exit). This is the
- * nesting indicator. If it is non-NULL, then the stack is already
- * set up and the handler can run.
- */
-
-static unsigned long pending_mask;
-
-unsigned long to_irq_stack(unsigned long *mask_out)
+int __init arch_probe_nr_irqs(void)
{
- struct thread_info *ti;
- unsigned long mask, old;
- int nested;
-
- mask = xchg(&pending_mask, *mask_out);
- if (mask != 0) {
- /*
- * If any interrupts come in at this point, we want to
- * make sure that their bits aren't lost by our
- * putting our bit in. So, this loop accumulates bits
- * until xchg returns the same value that we put in.
- * When that happens, there were no new interrupts,
- * and pending_mask contains a bit for each interrupt
- * that came in.
- */
- old = *mask_out;
- do {
- old |= mask;
- mask = xchg(&pending_mask, old);
- } while (mask != old);
- return 1;
- }
-
- ti = current_thread_info();
- nested = (ti->real_thread != NULL);
- if (!nested) {
- struct task_struct *task;
- struct thread_info *tti;
-
- task = cpu_tasks[ti->cpu].task;
- tti = task_thread_info(task);
-
- *ti = *tti;
- ti->real_thread = tti;
- task->stack = ti;
- }
-
- mask = xchg(&pending_mask, 0);
- *mask_out |= mask | nested;
- return 0;
+ return NR_IRQS;
}
-unsigned long from_irq_stack(int nested)
+void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc)
{
- struct thread_info *ti, *to;
- unsigned long mask;
-
- ti = current_thread_info();
-
- pending_mask = 1;
+ do_IRQ(SIGCHLD_IRQ, regs);
+}
- to = ti->real_thread;
- current->stack = to;
- ti->real_thread = NULL;
- *to = *ti;
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#if IS_ENABLED(CONFIG_SMP)
+ int cpu;
+
+ seq_printf(p, "%*s: ", prec, "RES");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+ seq_puts(p, " Rescheduling interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "CAL");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+ seq_puts(p, " Function call interrupts\n");
+#endif
- mask = xchg(&pending_mask, 0);
- return mask & ~1;
+ return 0;
}
-
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 419021175272..fc0f543d1d8e 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -31,7 +31,7 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
* expected to output the crash information.
*/
if (strcmp(con->name, "ttynull") != 0 &&
- (console_srcu_read_flags(con) & CON_ENABLED)) {
+ console_is_usable(con, console_srcu_read_flags(con), true)) {
break;
}
}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index f2fb77da08cf..96314c31e61c 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -6,8 +6,8 @@
#include <linux/module.h>
#include <os.h>
+EXPORT_SYMBOL(um_get_signals);
EXPORT_SYMBOL(um_set_signals);
-EXPORT_SYMBOL(signals_enabled);
EXPORT_SYMBOL(os_stat_fd);
EXPORT_SYMBOL(os_stat_file);
diff --git a/arch/um/kernel/load_file.c b/arch/um/kernel/load_file.c
index 5cecd0e291fb..cb9d178ab7d8 100644
--- a/arch/um/kernel/load_file.c
+++ b/arch/um/kernel/load_file.c
@@ -48,9 +48,7 @@ void *uml_load_file(const char *filename, unsigned long long *size)
return NULL;
}
- area = memblock_alloc(*size, SMP_CACHE_BYTES);
- if (!area)
- panic("%s: Failed to allocate %llu bytes\n", __func__, *size);
+ area = memblock_alloc_or_panic(*size, SMP_CACHE_BYTES);
if (__uml_load_file(filename, area, *size)) {
memblock_free(area, *size);
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
deleted file mode 100644
index 8ccd56813f68..000000000000
--- a/arch/um/kernel/maccess.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
- */
-
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <os.h>
-
-bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
-{
- void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
-
- if ((unsigned long)src < PAGE_SIZE || size <= 0)
- return false;
- if (os_mincore(psrc, size + src - psrc) <= 0)
- return false;
- return true;
-}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index a5b4fe2ad931..39c4a7e21c6f 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,11 +6,11 @@
#include <linux/stddef.h>
#include <linux/module.h>
#include <linux/memblock.h>
-#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
-#include <asm/fixmap.h>
+#include <linux/init.h>
+#include <asm/sections.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <as-layout.h>
@@ -21,10 +21,10 @@
#include <os.h>
#include <um_malloc.h>
#include <linux/sched/task.h>
+#include <linux/kasan.h>
#ifdef CONFIG_KASAN
-int kasan_um_is_ready;
-void kasan_init(void)
+void __init kasan_init(void)
{
/*
* kasan_map_memory will map all of the required address space and
@@ -32,7 +32,11 @@ void kasan_init(void)
*/
kasan_map_memory((void *)KASAN_SHADOW_START, KASAN_SHADOW_SIZE);
init_task.kasan_depth = 0;
- kasan_um_is_ready = true;
+ /*
+ * Since kasan_init() is called before main(),
+ * KASAN is initialized but the enablement is deferred after
+ * jump_label_init(). See arch_mm_preinit().
+ */
}
static void (*kasan_init_ptr)(void)
@@ -51,124 +55,38 @@ EXPORT_SYMBOL(empty_zero_page);
pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* Initialized at boot time, and readonly after that */
-unsigned long long highmem;
-EXPORT_SYMBOL(highmem);
int kmalloc_ok = 0;
/* Used during early boot */
static unsigned long brk_end;
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
+ /* Safe to call after jump_label_init(). Enables KASAN. */
+ kasan_init_generic();
+
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
/* Map in the area just after the brk now that kmalloc is about
* to be turned on.
*/
- brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
+ brk_end = PAGE_ALIGN((unsigned long) sbrk(0));
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
memblock_free((void *)brk_end, uml_reserved - brk_end);
uml_reserved = brk_end;
-
- /* this will put all low memory onto the freelists */
- memblock_free_all();
+ min_low_pfn = PFN_UP(__pa(uml_reserved));
max_pfn = max_low_pfn;
- kmalloc_ok = 1;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static void __init one_page_table_init(pmd_t *pmd)
-{
- if (pmd_none(*pmd)) {
- pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE,
- PAGE_SIZE);
- if (!pte)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
-
- set_pmd(pmd, __pmd(_KERNPG_TABLE +
- (unsigned long) __pa(pte)));
- BUG_ON(pte != pte_offset_kernel(pmd, 0));
- }
-}
-
-static void __init one_md_table_init(pud_t *pud)
-{
-#ifdef CONFIG_3_LEVEL_PGTABLES
- pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- if (!pmd_table)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
-
- set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
- BUG_ON(pmd_table != pmd_offset(pud, 0));
-#endif
}
-static void __init fixrange_init(unsigned long start, unsigned long end,
- pgd_t *pgd_base)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- int i, j;
- unsigned long vaddr;
-
- vaddr = start;
- i = pgd_index(vaddr);
- j = pmd_index(vaddr);
- pgd = pgd_base + i;
-
- for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- p4d = p4d_offset(pgd, vaddr);
- pud = pud_offset(p4d, vaddr);
- if (pud_none(*pud))
- one_md_table_init(pud);
- pmd = pmd_offset(pud, vaddr);
- for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) {
- one_page_table_init(pmd);
- vaddr += PMD_SIZE;
- }
- j = 0;
- }
-}
-
-static void __init fixaddr_user_init( void)
+void __init mem_init(void)
{
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
- long size = FIXADDR_USER_END - FIXADDR_USER_START;
- pte_t *pte;
- phys_t p;
- unsigned long v, vaddr = FIXADDR_USER_START;
-
- if (!size)
- return;
-
- fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir);
- v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE);
- if (!v)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, size, PAGE_SIZE);
-
- memcpy((void *) v , (void *) FIXADDR_USER_START, size);
- p = __pa(v);
- for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
- p += PAGE_SIZE) {
- pte = virt_to_kpte(vaddr);
- pte_set_val(*pte, p, PAGE_READONLY);
- }
-#endif
+ kmalloc_ok = 1;
}
void __init paging_init(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
- unsigned long vaddr;
empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
PAGE_SIZE);
@@ -176,17 +94,8 @@ void __init paging_init(void)
panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
- max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
+ max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT;
free_area_init(max_zone_pfn);
-
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
-
- fixaddr_user_init();
}
/*
@@ -202,14 +111,13 @@ void free_initmem(void)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+ pgd_t *pgd = __pgd_alloc(mm, 0);
- if (pgd) {
- memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ if (pgd)
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- }
+
return pgd;
}
@@ -237,3 +145,11 @@ static const pgprot_t protection_map[16] = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED
};
DECLARE_VM_GET_PAGE_PROT
+
+void mark_rodata_ro(void)
+{
+ unsigned long rodata_start = PFN_ALIGN(__start_rodata);
+ unsigned long rodata_end = PFN_ALIGN(__end_rodata);
+
+ os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0);
+}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index fb2adfb49945..ae6ca373c261 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -22,23 +22,6 @@ static int physmem_fd = -1;
unsigned long high_physmem;
EXPORT_SYMBOL(high_physmem);
-extern unsigned long long physmem_size;
-
-void __init mem_total_pages(unsigned long physmem, unsigned long iomem,
- unsigned long highmem)
-{
- unsigned long phys_pages, highmem_pages;
- unsigned long iomem_pages, total_pages;
-
- phys_pages = physmem >> PAGE_SHIFT;
- iomem_pages = iomem >> PAGE_SHIFT;
- highmem_pages = highmem >> PAGE_SHIFT;
-
- total_pages = phys_pages + iomem_pages + highmem_pages;
-
- max_mapnr = total_pages;
-}
-
void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
int r, int w, int x)
{
@@ -64,13 +47,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* @reserve_end: end address of the physical kernel memory.
* @len: Length of total physical memory that should be mapped/made
* available, in bytes.
- * @highmem: Number of highmem bytes that should be mapped/made available.
*
- * Creates an unlinked temporary file of size (len + highmem) and memory maps
+ * Creates an unlinked temporary file of size (len) and memory maps
* it on the last executable image address (uml_reserved).
*
* The offset is needed as the length of the total physical memory
- * (len + highmem) includes the size of the memory used be the executable image,
+ * (len) includes the size of the memory used be the executable image,
* but the mapped-to address is the last address of the executable image
* (uml_reserved == end address of executable image).
*
@@ -78,24 +60,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* of all user space processes/kernel tasks.
*/
void __init setup_physmem(unsigned long start, unsigned long reserve_end,
- unsigned long len, unsigned long long highmem)
+ unsigned long len)
{
unsigned long reserve = reserve_end - start;
- long map_size = len - reserve;
+ unsigned long map_size = len - reserve;
int err;
- if(map_size <= 0) {
+ if (len <= reserve) {
os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
reserve, len);
exit(1);
}
- physmem_fd = create_mem_file(len + highmem);
+ physmem_fd = create_mem_file(len);
err = os_map_memory((void *) reserve_end, physmem_fd, reserve,
map_size, 1, 1, 1);
if (err < 0) {
- os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
+ os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p "
"failed - errno = %d\n", map_size,
(void *) reserve_end, err);
exit(1);
@@ -107,9 +89,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
*/
os_seek_file(physmem_fd, __pa(__syscall_stub_start));
os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
- os_fsync_file(physmem_fd);
- memblock_add(__pa(start), len + highmem);
+ memblock_add(__pa(start), len);
memblock_reserve(__pa(start), reserve);
min_low_pfn = PFN_UP(__pa(reserve_end));
@@ -124,23 +105,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
fd = physmem_fd;
*offset_out = phys;
}
- else if (phys < __pa(end_iomem)) {
- struct iomem_region *region = iomem_regions;
-
- while (region != NULL) {
- if ((phys >= region->phys) &&
- (phys < region->phys + region->size)) {
- fd = region->fd;
- *offset_out = phys - region->phys;
- break;
- }
- region = region->next;
- }
- }
- else if (phys < __pa(end_iomem) + highmem) {
- fd = physmem_fd;
- *offset_out = phys - iomem_size;
- }
return fd;
}
@@ -149,6 +113,8 @@ EXPORT_SYMBOL(phys_mapping);
static int __init uml_mem_setup(char *line, int *add)
{
char *retptr;
+
+ *add = 0;
physmem_size = memparse(line,&retptr);
return 0;
}
@@ -161,61 +127,3 @@ __uml_setup("mem=", uml_mem_setup,
" be more, and the excess, if it's ever used, will just be swapped out.\n"
" Example: mem=64M\n\n"
);
-
-__uml_setup("iomem=", parse_iomem,
-"iomem=<name>,<file>\n"
-" Configure <file> as an IO memory region named <name>.\n\n"
-);
-
-/*
- * This list is constructed in parse_iomem and addresses filled in
- * setup_iomem, both of which run during early boot. Afterwards, it's
- * unchanged.
- */
-struct iomem_region *iomem_regions;
-
-/* Initialized in parse_iomem and unchanged thereafter */
-int iomem_size;
-
-unsigned long find_iomem(char *driver, unsigned long *len_out)
-{
- struct iomem_region *region = iomem_regions;
-
- while (region != NULL) {
- if (!strcmp(region->driver, driver)) {
- *len_out = region->size;
- return region->virt;
- }
-
- region = region->next;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(find_iomem);
-
-static int setup_iomem(void)
-{
- struct iomem_region *region = iomem_regions;
- unsigned long iomem_start = high_physmem + PAGE_SIZE;
- int err;
-
- while (region != NULL) {
- err = os_map_memory((void *) iomem_start, region->fd, 0,
- region->size, 1, 1, 0);
- if (err)
- printk(KERN_ERR "Mapping iomem region for driver '%s' "
- "failed, errno = %d\n", region->driver, -err);
- else {
- region->virt = iomem_start;
- region->phys = __pa(region->virt);
- }
-
- iomem_start += region->size + PAGE_SIZE;
- region = region->next;
- }
-
- return 0;
-}
-
-__initcall(setup_iomem);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index be2856af6d4c..63b38a3f73f7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -43,7 +43,10 @@
* cares about its entry, so it's OK if another processor is modifying its
* entry.
*/
-struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } };
+struct task_struct *cpu_tasks[NR_CPUS] = {
+ [0 ... NR_CPUS - 1] = &init_task,
+};
+EXPORT_SYMBOL(cpu_tasks);
void free_stack(unsigned long stack, int order)
{
@@ -64,7 +67,7 @@ unsigned long alloc_stack(int order, int atomic)
static inline void set_current(struct task_struct *task)
{
- cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task });
+ cpu_tasks[task_thread_info(task)->cpu] = task;
}
struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to)
@@ -81,14 +84,18 @@ struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to
void interrupt_end(void)
{
struct pt_regs *regs = &current->thread.regs;
-
- if (need_resched())
- schedule();
- if (test_thread_flag(TIF_SIGPENDING) ||
- test_thread_flag(TIF_NOTIFY_SIGNAL))
- do_signal(regs);
- if (test_thread_flag(TIF_NOTIFY_RESUME))
- resume_user_mode_work(regs);
+ unsigned long thread_flags;
+
+ thread_flags = read_thread_flags();
+ while (thread_flags & _TIF_WORK_MASK) {
+ if (thread_flags & _TIF_NEED_RESCHED)
+ schedule();
+ if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ do_signal(regs);
+ if (thread_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
+ thread_flags = read_thread_flags();
+ }
}
int get_current_pid(void)
@@ -116,7 +123,7 @@ void new_thread_handler(void)
* callback returns only if the kernel thread execs a process
*/
fn(arg);
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ userspace(&current->thread.regs.regs);
}
/* Called magically, see new_thread_handler above */
@@ -133,12 +140,12 @@ static void fork_handler(void)
current->thread.prev_sched = NULL;
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ userspace(&current->thread.regs.regs);
}
int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
+ u64 clone_flags = args->flags;
unsigned long sp = args->stack;
unsigned long tls = args->tls;
void (*handler)(void);
@@ -180,11 +187,22 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
void initial_thread_cb(void (*proc)(void *), void *arg)
{
- int save_kmalloc_ok = kmalloc_ok;
-
- kmalloc_ok = 0;
initial_thread_cb_skas(proc, arg);
- kmalloc_ok = save_kmalloc_ok;
+}
+
+int arch_dup_task_struct(struct task_struct *dst,
+ struct task_struct *src)
+{
+ /* init_task is not dynamically sized (missing FPU state) */
+ if (unlikely(src == &init_task)) {
+ memcpy(dst, src, sizeof(init_task));
+ memset((void *)dst + sizeof(init_task), 0,
+ arch_task_struct_size - sizeof(init_task));
+ } else {
+ memcpy(dst, src, arch_task_struct_size);
+ }
+
+ return 0;
}
void um_idle_sleep(void)
@@ -200,17 +218,19 @@ void arch_cpu_idle(void)
um_idle_sleep();
}
+void arch_cpu_idle_prepare(void)
+{
+ os_idle_prepare();
+}
+
int __uml_cant_sleep(void) {
return in_atomic() || irqs_disabled() || in_interrupt();
/* Is in_interrupt() really needed? */
}
-int user_context(unsigned long sp)
+int uml_need_resched(void)
{
- unsigned long stack;
-
- stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
- return stack != (unsigned long) current_thread_info();
+ return need_resched();
}
extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
@@ -287,11 +307,3 @@ unsigned long __get_wchan(struct task_struct *p)
return 0;
}
-
-int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
-{
- int cpu = current_thread_info()->cpu;
-
- return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
-
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 2124624b7817..fdbb37b5c399 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -9,6 +9,9 @@
#include <linux/uaccess.h>
#include <asm/ptrace-abi.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
void user_enable_single_step(struct task_struct *child)
{
set_tsk_thread_flag(child, TIF_SINGLESTEP);
@@ -126,6 +129,9 @@ int syscall_trace_enter(struct pt_regs *regs)
UPT_SYSCALL_ARG3(&regs->regs),
UPT_SYSCALL_ARG4(&regs->regs));
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, UPT_SYSCALL_NR(&regs->regs));
+
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return 0;
@@ -142,6 +148,9 @@ void syscall_trace_leave(struct pt_regs *regs)
if (test_thread_flag(TIF_SINGLESTEP))
send_sigtrap(&regs->regs, 0);
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, PT_REGS_SYSCALL_RET(regs));
+
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return;
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 5085a50c3b8c..4fc04742048a 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -8,32 +8,6 @@
#include <os.h>
#include <sigio.h>
-/* Protected by sigio_lock() called from write_sigio_workaround */
-static int sigio_irq_fd = -1;
-
-static irqreturn_t sigio_interrupt(int irq, void *data)
-{
- char c;
-
- os_read_file(sigio_irq_fd, &c, sizeof(c));
- return IRQ_HANDLED;
-}
-
-int write_sigio_irq(int fd)
-{
- int err;
-
- err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
- 0, "write sigio", NULL);
- if (err < 0) {
- printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
- "err = %d\n", err);
- return -1;
- }
- sigio_irq_fd = fd;
- return 0;
-}
-
/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
static DEFINE_MUTEX(sigio_mutex);
diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore
new file mode 100644
index 000000000000..c3409ced0f38
--- /dev/null
+++ b/arch/um/kernel/skas/.gitignore
@@ -0,0 +1,2 @@
+stub_exe
+stub_exe.dbg
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 6f86d53e3d69..3384be42691f 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,14 +3,48 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
#
-obj-y := stub.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o \
+ stub_exe_embed.o
+
+# Stub executable
+
+stub_exe_objs-y := stub_exe.o
+
+stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F)
+
+# Object file containing the ELF executable
+$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe
+
+$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE
+ $(call if_changed,stub_exe)
+
+$(obj)/stub_exe: OBJCOPYFLAGS := -S
+$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE
+ $(call if_changed,objcopy)
+
+quiet_cmd_stub_exe = STUB_EXE $@
+ cmd_stub_exe = $(CC) -nostdlib -o $@ \
+ $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \
+ $(filter %.o,$^)
+
+STUB_EXE_LDFLAGS = -Wl,-n -static
+
+targets += stub_exe.dbg stub_exe $(stub_exe_objs-y)
+
+# end
# stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it
CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := stub.o
+CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING)
+
+# Clang will call memset() from __builtin_alloca() when stack variable
+# initialization is enabled, which is used in stub_exe.c.
+CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
+UNPROFILE_OBJS := stub.o stub_exe.o
KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 886ed5e65674..00957788591b 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -8,6 +8,7 @@
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <shared/irq_kern.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu_context.h>
@@ -19,56 +20,56 @@
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
+static spinlock_t mm_list_lock;
+static struct list_head mm_list;
+
+void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile)
+{
+ struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+ mutex_lock(&ctx->turnstile);
+}
+
+void exit_turnstile(struct mm_id *mm_id) __releases(turnstile)
+{
+ struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+ mutex_unlock(&ctx->turnstile);
+}
+
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
+ mutex_init(&mm->context.turnstile);
+ spin_lock_init(&mm->context.sync_tlb_lock);
+
stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
if (stack == 0)
goto out;
new_id->stack = stack;
+ new_id->syscall_data_len = 0;
+ new_id->syscall_fd_num = 0;
- block_signals_trace();
- new_id->pid = start_userspace(stack);
- unblock_signals_trace();
+ scoped_guard(spinlock_irqsave, &mm_list_lock) {
+ /* Insert into list, used for lookups when the child dies */
+ list_add(&mm->context.list, &mm_list);
+ }
- if (new_id->pid < 0) {
- ret = new_id->pid;
+ ret = start_userspace(new_id);
+ if (ret < 0)
goto out_free;
- }
- /*
- * Ensure the new MM is clean and nothing unwanted is mapped.
- *
- * TODO: We should clear the memory up to STUB_START to ensure there is
- * nothing mapped there, i.e. we (currently) have:
- *
- * |- user memory -|- unused -|- stub -|- unused -|
- * ^ TASK_SIZE ^ STUB_START
- *
- * Meaning we have two unused areas where we may still have valid
- * mappings from our internal clone(). That isn't really a problem as
- * userspace is not going to access them, but it is definitely not
- * correct.
- *
- * However, we are "lucky" and if rseq is configured, then on 32 bit
- * it will fall into the first empty range while on 64 bit it is going
- * to use an anonymous mapping in the second range. As such, things
- * continue to work for now as long as we don't start unmapping these
- * areas.
- *
- * Change this to STUB_START once we have a clean userspace.
- */
- unmap(new_id, 0, TASK_SIZE);
+ /* Ensure the new MM is clean and nothing unwanted is mapped */
+ unmap(new_id, 0, STUB_START);
return 0;
out_free:
- if (new_id->stack != 0)
- free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
+ free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
@@ -82,13 +83,80 @@ void destroy_context(struct mm_struct *mm)
* zero, resulting in a kill(0), which will result in the
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
+ *
+ * Negative cases happen if the child died unexpectedly.
*/
- if (mmu->id.pid < 2) {
+ if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.pid, 1);
+
+ scoped_guard(spinlock_irqsave, &mm_list_lock)
+ list_del(&mm->context.list);
+
+ if (mmu->id.pid > 0) {
+ os_kill_ptraced_process(mmu->id.pid, 1);
+ mmu->id.pid = -1;
+ }
+
+ if (using_seccomp && mmu->id.sock)
+ os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
}
+
+static irqreturn_t mm_sigchld_irq(int irq, void* dev)
+{
+ struct mm_context *mm_context;
+ pid_t pid;
+
+ guard(spinlock)(&mm_list_lock);
+
+ while ((pid = os_reap_child()) > 0) {
+ /*
+ * A child died, check if we have an MM with the PID. This is
+ * only relevant in SECCOMP mode (as ptrace will fail anyway).
+ *
+ * See wait_stub_done_seccomp for more details.
+ */
+ list_for_each_entry(mm_context, &mm_list, list) {
+ if (mm_context->id.pid == pid) {
+ struct stub_data *stub_data;
+ printk("Unexpectedly lost MM child! Affected tasks will segfault.");
+
+ /* Marks the MM as dead */
+ mm_context->id.pid = -1;
+
+ stub_data = (void *)mm_context->id.stack;
+ stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+ os_futex_wake(&stub_data->futex);
+#endif
+
+ /*
+ * NOTE: Currently executing syscalls by
+ * affected tasks may finish normally.
+ */
+ break;
+ }
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __init init_child_tracking(void)
+{
+ int err;
+
+ spin_lock_init(&mm_list_lock);
+ INIT_LIST_HEAD(&mm_list);
+
+ err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
+ if (err < 0)
+ panic("Failed to register SIGCHLD IRQ: %d", err);
+
+ return 0;
+}
+early_initcall(init_child_tracking)
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 68657988c8d1..4a7673b0261a 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -7,6 +7,7 @@
#include <linux/sched/mm.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
+#include <linux/smp-internal.h>
#include <asm/tlbflush.h>
@@ -22,20 +23,16 @@ static int __init start_kernel_proc(void *unused)
{
block_signals_trace();
- cpu_tasks[0].task = current;
-
start_kernel();
return 0;
}
-extern int userspace_pid[];
-
-extern char cpu0_irqstack[];
+char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
int __init start_uml(void)
{
- stack_protections((unsigned long) &cpu0_irqstack);
- set_sigstack(cpu0_irqstack, THREAD_SIZE);
+ stack_protections((unsigned long) &cpu_irqstacks[0]);
+ set_sigstack(cpu_irqstacks[0], THREAD_SIZE);
init_new_thread_signals();
@@ -68,3 +65,15 @@ void current_mm_sync(void)
um_tlb_sync(current->mm);
}
+
+static DEFINE_SPINLOCK(initial_jmpbuf_spinlock);
+
+void initial_jmpbuf_lock(void)
+{
+ spin_lock_irq(&initial_jmpbuf_spinlock);
+}
+
+void initial_jmpbuf_unlock(void)
+{
+ spin_unlock_irq(&initial_jmpbuf_spinlock);
+}
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 5d52ffa682dc..67cab46a602c 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -5,21 +5,54 @@
#include <sysdep/stub.h>
-static __always_inline int syscall_handler(struct stub_data *d)
+#include <linux/futex.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+/*
+ * Known security issues
+ *
+ * Userspace can jump to this address to execute *any* syscall that is
+ * permitted by the stub. As we will return afterwards, it can do
+ * whatever it likes, including:
+ * - Tricking the kernel into handing out the memory FD
+ * - Using this memory FD to read/write all physical memory
+ * - Running in parallel to the kernel processing a syscall
+ * (possibly creating data races?)
+ * - Blocking e.g. SIGALRM to avoid time based scheduling
+ *
+ * To avoid this, the permitted location for each syscall needs to be
+ * checked for in the SECCOMP filter (which is reasonably simple). Also,
+ * more care will need to go into considerations how the code might be
+ * tricked by using a prepared stack (or even modifying the stack from
+ * another thread in case SMP support is added).
+ *
+ * As for the SIGALRM, the best counter measure will be to check in the
+ * kernel that the process is reporting back the SIGALRM in a timely
+ * fashion.
+ */
+static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
{
+ struct stub_data *d = get_stub_data();
int i;
unsigned long res;
+ int fd;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
+ if (fd_map)
+ fd = fd_map[sc->mem.fd];
+ else
+ fd = sc->mem.fd;
+
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
- sc->mem.fd, sc->mem.offset);
+ fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
@@ -35,16 +68,6 @@ static __always_inline int syscall_handler(struct stub_data *d)
return -1;
}
break;
- case STUB_SYSCALL_MPROTECT:
- res = stub_syscall3(__NR_mprotect,
- sc->mem.addr, sc->mem.length,
- sc->mem.prot);
- if (res) {
- d->err = res;
- d->syscall_data_len = i;
- return -1;
- }
- break;
default:
d->err = -95; /* EOPNOTSUPP */
d->syscall_data_len = i;
@@ -61,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d)
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
+ syscall_handler(NULL);
+
+ trap_myself();
+}
+
+void __section(".__syscall_stub")
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
struct stub_data *d = get_stub_data();
+ char rcv_data;
+ union {
+ char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
+ struct cmsghdr align;
+ } ctrl = {};
+ struct iovec iov = {
+ .iov_base = &rcv_data,
+ .iov_len = 1,
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ ucontext_t *uc = p;
+ struct cmsghdr *fd_msg;
+ int *fd_map;
+ int num_fds;
+ long res;
- syscall_handler(d);
+ d->signal = sig;
+ d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+ d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
- trap_myself();
+restart_wait:
+ d->futex = FUTEX_IN_KERN;
+ do {
+ res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAKE, 1);
+ } while (res == -EINTR);
+
+ do {
+ res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAIT, FUTEX_IN_KERN, 0);
+ } while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ if (d->syscall_data_len) {
+ /* Read passed FDs (if any) */
+ do {
+ res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
+ } while (res == -EINTR);
+
+ /* We should never have a receive error (other than -EAGAIN) */
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ /* Receive the FDs */
+ num_fds = 0;
+ fd_msg = msghdr.msg_control;
+ fd_map = (void *)&CMSG_DATA(fd_msg);
+ if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
+ num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ /* Try running queued syscalls. */
+ res = syscall_handler(fd_map);
+
+ while (num_fds)
+ stub_syscall2(__NR_close, fd_map[--num_fds], 0);
+ } else {
+ res = 0;
+ }
+
+ if (res < 0 || d->restart_wait) {
+ /* Report SIGSYS if we restart. */
+ d->signal = SIGSYS;
+ d->restart_wait = 0;
+
+ goto restart_wait;
+ }
+
+ /* Restore arch dependent state that is not part of the mcontext */
+ stub_seccomp_restore_state(&d->arch_data);
+
+ /* Return so that the host modified mcontext is restored. */
+}
+
+void __section(".__syscall_stub")
+stub_signal_restorer(void)
+{
+ /* We must not have anything on the stack when doing rt_sigreturn */
+ stub_syscall0(__NR_rt_sigreturn);
}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
new file mode 100644
index 000000000000..cbafaa684e66
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -0,0 +1,230 @@
+#include <sys/ptrace.h>
+#include <sys/prctl.h>
+#include <sys/fcntl.h>
+#include <asm/unistd.h>
+#include <sysdep/stub.h>
+#include <stub-data.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <generated/asm-offsets.h>
+
+void _start(void);
+
+noinline static void real_init(void)
+{
+ struct stub_init_data init_data;
+ unsigned long res;
+ struct {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+ } stack = {
+ .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ };
+ struct {
+ void *sa_handler_;
+ unsigned long sa_flags;
+ void *sa_restorer;
+ unsigned long long sa_mask;
+ } sa = {
+ /* Need to set SA_RESTORER (but the handler never returns) */
+ .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
+ };
+
+ /* set a nice name */
+ stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace");
+
+ /* Make sure this process dies if the kernel dies */
+ stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+
+ /* Needed in SECCOMP mode (and safe to do anyway) */
+ stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
+ /* read information from STDIN and close it */
+ res = stub_syscall3(__NR_read, 0,
+ (unsigned long)&init_data, sizeof(init_data));
+ if (res != sizeof(init_data))
+ stub_syscall1(__NR_exit, 10);
+
+ /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
+ if (!init_data.seccomp)
+ stub_syscall1(__NR_close, 0);
+ else
+ stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
+
+ /* map stub code + data */
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED,
+ init_data.stub_code_fd, init_data.stub_code_offset);
+ if (res != init_data.stub_start)
+ stub_syscall1(__NR_exit, 11);
+
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start + UM_KERN_PAGE_SIZE,
+ STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ init_data.stub_data_fd, init_data.stub_data_offset);
+ if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
+ stub_syscall1(__NR_exit, 12);
+
+ /* In SECCOMP mode, we only need the signalling FD from now on */
+ if (init_data.seccomp) {
+ res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+ }
+
+ /* setup signal stack inside stub data */
+ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
+ stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
+
+ /* register signal handlers */
+ sa.sa_handler_ = (void *) init_data.signal_handler;
+ sa.sa_restorer = (void *) init_data.signal_restorer;
+ if (!init_data.seccomp) {
+ /* In ptrace mode, the SIGSEGV handler never returns */
+ sa.sa_mask = 0;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 14);
+ } else {
+ /* SECCOMP mode uses rt_sigreturn, need to mask all signals */
+ sa.sa_mask = ~0ULL;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 15);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 16);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 17);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 18);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGILL,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 19);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 20);
+ }
+
+ /*
+ * If in seccomp mode, install the SECCOMP filter and trigger a syscall.
+ * Otherwise set PTRACE_TRACEME and do a SIGSTOP.
+ */
+ if (init_data.seccomp) {
+ struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+ /* [0] Load upper 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+ /* [1] Jump forward 3 instructions if the upper address is not identical */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
+#endif
+ /* [2] Load lower 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer))),
+
+ /* [3] Mask out lower bits */
+ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+ /* [4] Jump to [6] if the lower bits are not on the expected page */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
+
+ /* [5] Trap call, allow */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+
+ /* [6,7] Check architecture */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, arch)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
+ UM_SECCOMP_ARCH_NATIVE, 1, 0),
+
+ /* [8] Kill (for architecture check) */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [9] Load syscall number */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+ /* [10-16] Check against permitted syscalls */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
+ 7, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
+ 6, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
+ 5, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
+ 4, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
+ 3, 0),
+#ifdef __i386__
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
+ 2, 0),
+#else
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
+ 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
+ 1, 0),
+
+ /* [17] Not one of the permitted syscalls */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [18] Permitted call for the stub */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = sizeof(filter) / sizeof(filter[0]),
+ .filter = filter,
+ };
+
+ if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC,
+ (unsigned long)&prog) != 0)
+ stub_syscall1(__NR_exit, 21);
+
+ /* Fall through, the exit syscall will cause SIGSYS */
+ } else {
+ stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+
+ stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+ }
+
+ stub_syscall1(__NR_exit, 30);
+
+ __builtin_unreachable();
+}
+
+__attribute__((naked)) void _start(void)
+{
+ /*
+ * Since the stack after exec() starts at the top-most address,
+ * but that's exactly where we also want to map the stub data
+ * and code, this must:
+ * - push the stack by 1 code and STUB_DATA_PAGES data pages
+ * - call real_init()
+ * This way, real_init() can use the stack normally, while the
+ * original stack further down (higher address) will become
+ * inaccessible after the mmap() calls above.
+ */
+ stub_start(real_init);
+}
diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S
new file mode 100644
index 000000000000..6d8914fbe8f1
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe_embed.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+__INITDATA
+
+SYM_DATA_START(stub_exe_start)
+ .incbin "arch/um/kernel/skas/stub_exe"
+SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end)
+
+__FINIT
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index b09e85279d2b..ba7494f9bfe4 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -9,8 +9,8 @@
#include <kern_util.h>
#include <sysdep/ptrace.h>
#include <sysdep/ptrace_user.h>
-#include <sysdep/syscalls.h>
#include <linux/time-internal.h>
+#include <asm/syscall.h>
#include <asm/unistd.h>
#include <asm/delay.h>
@@ -31,8 +31,26 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
syscall = UPT_SYSCALL_NR(r);
+
+ /*
+ * If no time passes, then sched_yield may not actually yield, causing
+ * broken spinlock implementations in userspace (ASAN) to hang for long
+ * periods of time.
+ */
+ if ((time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL) &&
+ syscall == __NR_sched_yield)
+ tt_extra_sched_jiffies += 1;
+
if (syscall >= 0 && syscall < __NR_syscalls) {
- unsigned long ret = EXECUTE_SYSCALL(syscall, regs);
+ unsigned long ret;
+
+ ret = (*sys_call_table[syscall])(UPT_SYSCALL_ARG1(&regs->regs),
+ UPT_SYSCALL_ARG2(&regs->regs),
+ UPT_SYSCALL_ARG3(&regs->regs),
+ UPT_SYSCALL_ARG4(&regs->regs),
+ UPT_SYSCALL_ARG5(&regs->regs),
+ UPT_SYSCALL_ARG6(&regs->regs));
PT_REGS_SET_SYSCALL_RETURN(regs, ret);
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 000000000000..f1e52b7348fb
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ *
+ * Based on the previous implementation in TT mode
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/module.h>
+#include <linux/processor.h>
+#include <linux/threads.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+#include <linux/smp.h>
+#include <linux/smp-internal.h>
+#include <init.h>
+#include <kern.h>
+#include <os.h>
+#include <smp.h>
+
+enum {
+ UML_IPI_RES = 0,
+ UML_IPI_CALL_SINGLE,
+ UML_IPI_CALL,
+ UML_IPI_STOP,
+};
+
+void arch_smp_send_reschedule(int cpu)
+{
+ os_send_ipi(cpu, UML_IPI_RES);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ os_send_ipi(cpu, UML_IPI_CALL_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ os_send_ipi(cpu, UML_IPI_CALL);
+}
+
+void smp_send_stop(void)
+{
+ int cpu, me = smp_processor_id();
+
+ for_each_online_cpu(cpu) {
+ if (cpu == me)
+ continue;
+ os_send_ipi(cpu, UML_IPI_STOP);
+ }
+}
+
+static void ipi_handler(int vector, struct uml_pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+ int cpu = raw_smp_processor_id();
+
+ irq_enter();
+
+ if (current->mm)
+ os_alarm_process(current->mm->context.id.pid);
+
+ switch (vector) {
+ case UML_IPI_RES:
+ inc_irq_stat(irq_resched_count);
+ scheduler_ipi();
+ break;
+
+ case UML_IPI_CALL_SINGLE:
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ case UML_IPI_CALL:
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_interrupt();
+ break;
+
+ case UML_IPI_STOP:
+ set_cpu_online(cpu, false);
+ while (1)
+ pause();
+ break;
+
+ default:
+ pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector);
+ break;
+ }
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+void uml_ipi_handler(int vector)
+{
+ struct uml_pt_regs r = { .is_user = 0 };
+
+ preempt_disable();
+ ipi_handler(vector, &r);
+ preempt_enable();
+}
+
+/* AP states used only during CPU startup */
+enum {
+ UML_CPU_PAUSED = 0,
+ UML_CPU_RUNNING,
+};
+
+static int cpu_states[NR_CPUS];
+
+static int start_secondary(void *unused)
+{
+ int err, cpu = raw_smp_processor_id();
+
+ notify_cpu_starting(cpu);
+ set_cpu_online(cpu, true);
+
+ err = um_setup_timer();
+ if (err)
+ panic("CPU#%d failed to setup timer, err = %d", cpu, err);
+
+ local_irq_enable();
+
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+ return 0;
+}
+
+void uml_start_secondary(void *opaque)
+{
+ int cpu = raw_smp_processor_id();
+ struct mm_struct *mm = &init_mm;
+ struct task_struct *idle;
+
+ stack_protections((unsigned long) &cpu_irqstacks[cpu]);
+ set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE);
+
+ set_cpu_present(cpu, true);
+ os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED);
+
+ smp_rmb(); /* paired with smp_wmb() in __cpu_up() */
+
+ idle = cpu_tasks[cpu];
+ idle->thread_info.cpu = cpu;
+
+ mmgrab(mm);
+ idle->active_mm = mm;
+
+ idle->thread.request.thread.proc = start_secondary;
+ idle->thread.request.thread.arg = NULL;
+
+ new_thread(task_stack_page(idle), &idle->thread.switch_buf,
+ new_thread_handler);
+ os_start_secondary(opaque, &idle->thread.switch_buf);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ int err, cpu, me = smp_processor_id();
+ unsigned long deadline;
+
+ os_init_smp();
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == me)
+ continue;
+
+ pr_debug("Booting processor %d...\n", cpu);
+ err = os_start_cpu_thread(cpu);
+ if (err) {
+ pr_crit("CPU#%d failed to start cpu thread, err = %d",
+ cpu, err);
+ continue;
+ }
+
+ deadline = jiffies + msecs_to_jiffies(1000);
+ spin_until_cond(cpu_present(cpu) ||
+ time_is_before_jiffies(deadline));
+
+ if (!cpu_present(cpu))
+ pr_crit("CPU#%d failed to boot\n", cpu);
+ }
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ cpu_tasks[cpu] = tidle;
+ smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */
+ cpu_states[cpu] = UML_CPU_RUNNING;
+ os_futex_wake(&cpu_states[cpu]);
+ spin_until_cond(cpu_online(cpu));
+
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
+
+void __init prefill_possible_map(void)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < uml_ncpus; cpu++)
+ set_cpu_possible(cpu, true);
+ for (; cpu < NR_CPUS; cpu++)
+ set_cpu_possible(cpu, false);
+}
+
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+ *add = 0;
+
+ if (kstrtoint(line, 10, &uml_ncpus)) {
+ os_warn("%s: Couldn't parse '%s'\n", __func__, line);
+ return -1;
+ }
+
+ uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS);
+
+ return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+" This tells UML how many virtual processors to start. The maximum\n"
+" number of supported virtual processors can be obtained by querying\n"
+" the CONFIG_NR_CPUS option using --showconfig.\n\n"
+);
+
+EXPORT_SYMBOL(uml_curr_cpu);
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 4bb8622dc512..13ee5666668d 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -32,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack,
struct pt_regs *segv_regs = current->thread.segv_regs;
int i;
- if (!segv_regs && os_is_signal_stack()) {
- pr_err("Received SIGSEGV in SIGSEGV handler,"
- " aborting stack trace!\n");
- return;
- }
-
if (!stack)
stack = get_stack_pointer(task, segv_regs);
@@ -52,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack,
}
printk("%sCall Trace:\n", loglvl);
- dump_trace(current, &stackops, (void *)loglvl);
+ dump_trace(task ?: current, &stackops, (void *)loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 29b27b90581f..b344a36b44eb 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -25,6 +25,8 @@
#include <shared/init.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#include <linux/sched/clock.h>
+
enum time_travel_mode time_travel_mode;
EXPORT_SYMBOL_GPL(time_travel_mode);
@@ -47,6 +49,15 @@ static u16 time_travel_shm_id;
static struct um_timetravel_schedshm *time_travel_shm;
static union um_timetravel_schedshm_client *time_travel_shm_client;
+unsigned long tt_extra_sched_jiffies;
+
+notrace unsigned long long sched_clock(void)
+{
+ return (unsigned long long)(jiffies - INITIAL_JIFFIES +
+ tt_extra_sched_jiffies)
+ * (NSEC_PER_SEC / HZ);
+}
+
static void time_travel_set_time(unsigned long long ns)
{
if (unlikely(ns < time_travel_time))
@@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e)
{
time_travel_add_event(&time_travel_timer_event,
time_travel_time + time_travel_timer_interval);
+
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
@@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
static void time_travel_oneshot_timer(struct time_travel_event *e)
{
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
@@ -605,9 +625,10 @@ void time_travel_sleep(void)
* controller application.
*/
unsigned long long next = S64_MAX;
+ int cpu = raw_smp_processor_id();
if (time_travel_mode == TT_MODE_BASIC)
- os_timer_disable();
+ os_timer_disable(cpu);
time_travel_update_time(next, true);
@@ -618,9 +639,9 @@ void time_travel_sleep(void)
* This is somewhat wrong - we should get the first
* one sooner like the os_timer_one_shot() below...
*/
- os_timer_set_interval(time_travel_timer_interval);
+ os_timer_set_interval(cpu, time_travel_timer_interval);
} else {
- os_timer_one_shot(time_travel_timer_event.time - next);
+ os_timer_one_shot(cpu, time_travel_timer_event.time - next);
}
}
}
@@ -738,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time);
#define time_travel_del_event(e) do { } while (0)
#endif
+static struct clock_event_device timer_clockevent[NR_CPUS];
+
void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
unsigned long flags;
@@ -760,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
static int itimer_shutdown(struct clock_event_device *evt)
{
+ int cpu = evt - &timer_clockevent[0];
+
if (time_travel_mode != TT_MODE_OFF)
time_travel_del_event(&time_travel_timer_event);
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_disable();
+ os_timer_disable(cpu);
return 0;
}
@@ -773,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt)
static int itimer_set_periodic(struct clock_event_device *evt)
{
unsigned long long interval = NSEC_PER_SEC / HZ;
+ int cpu = evt - &timer_clockevent[0];
if (time_travel_mode != TT_MODE_OFF) {
time_travel_del_event(&time_travel_timer_event);
@@ -785,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt)
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_set_interval(interval);
+ os_timer_set_interval(cpu, interval);
return 0;
}
@@ -805,7 +831,7 @@ static int itimer_next_event(unsigned long delta,
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- return os_timer_one_shot(delta);
+ return os_timer_one_shot(raw_smp_processor_id(), delta);
return 0;
}
@@ -815,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt)
return itimer_next_event(0, evt);
}
-static struct clock_event_device timer_clockevent = {
+static struct clock_event_device _timer_clockevent = {
.name = "posix-timer",
.rating = 250,
- .cpumask = cpu_possible_mask,
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
.set_state_shutdown = itimer_shutdown,
@@ -836,13 +861,21 @@ static struct clock_event_device timer_clockevent = {
static irqreturn_t um_timer(int irq, void *dev)
{
- if (get_current()->mm != NULL)
- {
- /* userspace - relay signal, results in correct userspace timers */
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+
+ /*
+ * Interrupt the (possibly) running userspace process, technically this
+ * should only happen if userspace is currently executing.
+ * With infinite CPU time-travel, we can only get here when userspace
+ * is not executing. Do not notify there and avoid spurious scheduling.
+ */
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL &&
+ get_current()->mm)
os_alarm_process(get_current()->mm->context.id.pid);
- }
- (*timer_clockevent.event_handler)(&timer_clockevent);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
@@ -879,7 +912,24 @@ static struct clocksource timer_clocksource = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static void __init um_timer_setup(void)
+int um_setup_timer(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+ int err;
+
+ err = os_timer_create();
+ if (err)
+ return err;
+
+ memcpy(evt, &_timer_clockevent, sizeof(*evt));
+ evt->cpumask = cpumask_of(cpu);
+ clockevents_register_device(evt);
+
+ return 0;
+}
+
+static void __init um_timer_init(void)
{
int err;
@@ -888,8 +938,8 @@ static void __init um_timer_setup(void)
printk(KERN_ERR "register_timer : request_irq failed - "
"errno = %d\n", -err);
- err = os_timer_create();
- if (err != 0) {
+ err = um_setup_timer();
+ if (err) {
printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
return;
}
@@ -899,7 +949,6 @@ static void __init um_timer_setup(void)
printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
return;
}
- clockevents_register_device(&timer_clockevent);
}
void read_persistent_clock64(struct timespec64 *ts)
@@ -920,7 +969,7 @@ void read_persistent_clock64(struct timespec64 *ts)
void __init time_init(void)
{
timer_set_signal_handler();
- late_time_init = um_timer_setup;
+ late_time_init = um_timer_init;
}
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
@@ -936,21 +985,21 @@ static int setup_time_travel(char *str)
{
if (strcmp(str, "=inf-cpu") == 0) {
time_travel_mode = TT_MODE_INFCPU;
- timer_clockevent.name = "time-travel-timer-infcpu";
+ _timer_clockevent.name = "time-travel-timer-infcpu";
timer_clocksource.name = "time-travel-clock";
return 1;
}
if (strncmp(str, "=ext:", 5) == 0) {
time_travel_mode = TT_MODE_EXTERNAL;
- timer_clockevent.name = "time-travel-timer-external";
+ _timer_clockevent.name = "time-travel-timer-external";
timer_clocksource.name = "time-travel-clock-external";
return time_travel_connect_external(str + 5);
}
if (!*str) {
time_travel_mode = TT_MODE_BASIC;
- timer_clockevent.name = "time-travel-timer";
+ _timer_clockevent.name = "time-travel-timer";
timer_clocksource.name = "time-travel-clock";
return 1;
}
@@ -961,26 +1010,26 @@ static int setup_time_travel(char *str)
__setup("time-travel", setup_time_travel);
__uml_help(setup_time_travel,
"time-travel\n"
-"This option just enables basic time travel mode, in which the clock/timers\n"
-"inside the UML instance skip forward when there's nothing to do, rather than\n"
-"waiting for real time to elapse. However, instance CPU speed is limited by\n"
-"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
-"clock (but quicker when there's nothing to do).\n"
+" This option just enables basic time travel mode, in which the clock/timers\n"
+" inside the UML instance skip forward when there's nothing to do, rather than\n"
+" waiting for real time to elapse. However, instance CPU speed is limited by\n"
+" the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
+" clock (but quicker when there's nothing to do).\n"
"\n"
"time-travel=inf-cpu\n"
-"This enables time travel mode with infinite processing power, in which there\n"
-"are no wall clock timers, and any CPU processing happens - as seen from the\n"
-"guest - instantly. This can be useful for accurate simulation regardless of\n"
-"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
-"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
+" This enables time travel mode with infinite processing power, in which there\n"
+" are no wall clock timers, and any CPU processing happens - as seen from the\n"
+" guest - instantly. This can be useful for accurate simulation regardless of\n"
+" debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
+" easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
"\n"
"time-travel=ext:[ID:]/path/to/socket\n"
-"This enables time travel mode similar to =inf-cpu, except the system will\n"
-"use the given socket to coordinate with a central scheduler, in order to\n"
-"have more than one system simultaneously be on simulated time. The virtio\n"
-"driver code in UML knows about this so you can also simulate networks and\n"
-"devices using it, assuming the device has the right capabilities.\n"
-"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
+" This enables time travel mode similar to =inf-cpu, except the system will\n"
+" use the given socket to coordinate with a central scheduler, in order to\n"
+" have more than one system simultaneously be on simulated time. The virtio\n"
+" driver code in UML knows about this so you can also simulate networks and\n"
+" devices using it, assuming the device has the right capabilities.\n"
+" The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n");
static int setup_time_travel_start(char *str)
{
@@ -997,8 +1046,9 @@ static int setup_time_travel_start(char *str)
__setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start,
"time-travel-start=<nanoseconds>\n"
-"Configure the UML instance's wall clock to start at this value rather than\n"
-"the host's wall clock at the time of UML boot.\n");
+" Configure the UML instance's wall clock to start at this value rather than\n"
+" the host's wall clock at the time of UML boot.\n\n");
+
static struct kobject *bc_time_kobject;
static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 548af31d4111..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -23,9 +23,6 @@ struct vm_ops {
int phys_fd, unsigned long long offset);
int (*unmap)(struct mm_id *mm_idp,
unsigned long virt, unsigned long len);
- int (*mprotect)(struct mm_id *mm_idp,
- unsigned long virt, unsigned long len,
- unsigned int prot);
};
static int kern_map(struct mm_id *mm_idp,
@@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp,
return os_unmap_memory((void *)virt, len);
}
-static int kern_mprotect(struct mm_id *mm_idp,
- unsigned long virt, unsigned long len,
- unsigned int prot)
-{
- return os_protect_memory((void *)virt, len,
- prot & UM_PROT_READ, prot & UM_PROT_WRITE,
- 1);
-}
-
void report_enomem(void)
{
printk(KERN_ERR "UML ran out of memory on the host side! "
@@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
struct vm_ops *ops)
{
pte_t *pte;
- int r, w, x, prot, ret = 0;
+ int ret = 0;
pte = pte_offset_kernel(pmd, addr);
do {
- r = pte_read(*pte);
- w = pte_write(*pte);
- x = pte_exec(*pte);
- if (!pte_young(*pte)) {
- r = 0;
- w = 0;
- } else if (!pte_dirty(*pte))
- w = 0;
-
- prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
- (x ? UM_PROT_EXEC : 0));
- if (pte_newpage(*pte)) {
- if (pte_present(*pte)) {
- __u64 offset;
- unsigned long phys = pte_val(*pte) & PAGE_MASK;
- int fd = phys_mapping(phys, &offset);
-
- ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
- prot, fd, offset);
- } else
- ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
- } else if (pte_newprot(*pte))
- ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
+ if (!pte_needsync(*pte))
+ continue;
+
+ if (pte_present(*pte)) {
+ __u64 offset;
+ unsigned long phys = pte_val(*pte) & PAGE_MASK;
+ int fd = phys_mapping(phys, &offset);
+ int r, w, x, prot;
+
+ r = pte_read(*pte);
+ w = pte_write(*pte);
+ x = pte_exec(*pte);
+ if (!pte_young(*pte)) {
+ r = 0;
+ w = 0;
+ } else if (!pte_dirty(*pte))
+ w = 0;
+
+ prot = (r ? UM_PROT_READ : 0) |
+ (w ? UM_PROT_WRITE : 0) |
+ (x ? UM_PROT_EXEC : 0);
+
+ ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+ prot, fd, offset);
+ } else
+ ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
+
*pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret;
@@ -109,7 +101,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) {
- if (pmd_newpage(*pmd)) {
+ if (pmd_needsync(*pmd)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pmd_mkuptodate(*pmd);
@@ -132,7 +124,7 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
- if (pud_newpage(*pud)) {
+ if (pud_needsync(*pud)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pud_mkuptodate(*pud);
@@ -155,7 +147,7 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) {
- if (p4d_newpage(*p4d)) {
+ if (p4d_needsync(*p4d)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
p4d_mkuptodate(*p4d);
@@ -170,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
struct vm_ops ops;
- unsigned long addr = mm->context.sync_tlb_range_from, next;
+ unsigned long addr, next;
int ret = 0;
+ guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
if (mm->context.sync_tlb_range_to == 0)
return 0;
@@ -180,18 +174,17 @@ int um_tlb_sync(struct mm_struct *mm)
if (mm == &init_mm) {
ops.mmap = kern_map;
ops.unmap = kern_unmap;
- ops.mprotect = kern_mprotect;
} else {
ops.mmap = map;
ops.unmap = unmap;
- ops.mprotect = protect;
}
+ addr = mm->context.sync_tlb_range_from;
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) {
- if (pgd_newpage(*pgd)) {
+ if (pgd_needsync(*pgd)) {
ret = ops.unmap(ops.mm_idp, addr,
next - addr);
pgd_mkuptodate(*pgd);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 97c8df9c4401..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -18,6 +18,122 @@
#include <skas.h>
/*
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ if (likely(mmap_read_trylock(mm)))
+ return true;
+
+ if (!is_user)
+ return false;
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ mmap_read_unlock(mm);
+ if (!is_user)
+ return false;
+
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, bool is_user)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
+
+/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
* segv().
*/
@@ -43,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto out;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- vma = expand_stack(mm, address);
+ vma = um_lock_mm_and_find_vma(mm, address, is_user);
if (!vma)
goto out_nosemaphore;
-good_area:
*code_out = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -175,12 +280,14 @@ void fatal_sigsegv(void)
* @sig: the signal number
* @unused_si: the signal info struct; unused in this handler
* @regs: the ptrace register information
+ * @mc: the mcontext of the signal
*
* The handler first extracts the faultinfo from the UML ptrace regs struct.
* If the userfault did not happen in an UML userspace process, bad_segv is called.
* Otherwise the signal did happen in a cloned userspace process, handle it.
*/
-void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
struct faultinfo * fi = UPT_FAULTINFO(regs);
@@ -189,7 +296,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
bad_segv(*fi, UPT_IP(regs));
return;
}
- segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
+ segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc);
}
/*
@@ -199,9 +306,8 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
* give us bad data!
*/
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
- struct uml_pt_regs *regs)
+ struct uml_pt_regs *regs, void *mc)
{
- jmp_buf *catcher;
int si_code;
int err;
int is_write = FAULT_WRITE(fi);
@@ -210,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
- if (!is_user && init_mm.context.sync_tlb_range_to) {
+ if (!is_user && address >= start_vm && address < end_vm) {
/*
* Kernel has pending updates from set_ptes that were not
* flushed yet. Syncing them should fix the pagefault (if not
@@ -223,6 +329,19 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
panic("Failed to sync kernel TLBs: %d", err);
goto out;
}
+ else if (current->pagefault_disabled) {
+ if (!mc) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault with pagefaults disabled but no mcontext");
+ }
+ if (!current->thread.segv_continue) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault without recovery target");
+ }
+ mc_set_rip(mc, current->thread.segv_continue);
+ current->thread.segv_continue = NULL;
+ goto out;
+ }
else if (current->mm == NULL) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
@@ -246,15 +365,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
address = 0;
}
- catcher = current->thread.fault_catcher;
if (!err)
goto out;
- else if (catcher != NULL) {
- current->thread.fault_addr = (void *) address;
- UML_LONGJMP(catcher, 1);
- }
- else if (current->thread.fault_addr != NULL)
- panic("fault_addr set but no fault catcher");
else if (!is_user && arch_fixup(ip, regs))
goto out;
@@ -282,7 +394,8 @@ out:
return 0;
}
-void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
+void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
+ void *mc)
{
int code, err;
if (!UPT_IS_USER(regs)) {
@@ -310,15 +423,8 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
}
}
-void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
-{
- if (current->thread.fault_catcher != NULL)
- UML_LONGJMP(current->thread.fault_catcher, 1);
- else
- relay_signal(sig, si, regs);
-}
-
-void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+ void *mc)
{
do_IRQ(WINCH_IRQ, regs);
}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e8e8b54b3037..e2b24e1ecfa6 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -12,12 +12,14 @@
#include <linux/panic_notifier.h>
#include <linux/seq_file.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/utsname.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/kmsg_dump.h>
#include <linux/suspend.h>
#include <linux/random.h>
+#include <linux/smp-internal.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
@@ -53,21 +55,15 @@ static void __init add_arg(char *arg)
/*
* These fields are initialized at boot time and not changed.
- * XXX This structure is used only in the non-SMP case. Maybe this
- * should be moved to smp.c.
*/
struct cpuinfo_um boot_cpu_data = {
.loops_per_jiffy = 0,
- .ipi_pipe = { -1, -1 },
.cache_alignment = L1_CACHE_BYTES,
.x86_capability = { 0 }
};
EXPORT_SYMBOL(boot_cpu_data);
-union thread_union cpu0_irqstack
- __section(".data..init_irqstack") =
- { .thread_info = INIT_THREAD_INFO(init_task) };
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
@@ -76,12 +72,18 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
int i = 0;
+#if IS_ENABLED(CONFIG_SMP)
+ i = (uintptr_t) v - 1;
+ if (!cpu_online(i))
+ return 0;
+#endif
+
seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "vendor_id\t: User Mode Linux\n");
seq_printf(m, "model name\t: UML\n");
seq_printf(m, "mode\t\t: skas\n");
seq_printf(m, "host\t\t: %s\n", host_info);
- seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
+ seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU)));
seq_printf(m, "flags\t\t:");
for (i = 0; i < 32*NCAPINTS; i++)
if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
@@ -92,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
-
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
+ if (*pos < nr_cpu_ids)
+ return (void *)(uintptr_t)(*pos + 1);
+ return NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -131,7 +134,7 @@ static int have_root __initdata;
static int have_console __initdata;
/* Set in uml_mem_setup and modified in linux_main */
-long long physmem_size = 64 * 1024 * 1024;
+unsigned long long physmem_size = 64 * 1024 * 1024;
EXPORT_SYMBOL(physmem_size);
static const char *usage_string =
@@ -167,19 +170,6 @@ __uml_setup("root=", uml_root_setup,
" root=/dev/ubd5\n\n"
);
-static int __init no_skas_debug_setup(char *line, int *add)
-{
- os_warn("'debug' is not necessary to gdb UML in skas mode - run\n");
- os_warn("'gdb linux'\n");
-
- return 0;
-}
-
-__uml_setup("debug", no_skas_debug_setup,
-"debug\n"
-" this flag is not needed to run gdb on UML in skas mode\n\n"
-);
-
static int __init uml_console_setup(char *line, int *add)
{
have_console = 1;
@@ -270,15 +260,11 @@ unsigned long stub_start;
unsigned long task_size;
EXPORT_SYMBOL(task_size);
-unsigned long host_task_size;
-
unsigned long brk_start;
-unsigned long end_iomem;
-EXPORT_SYMBOL(end_iomem);
#define MIN_VMALLOC (32 * 1024 * 1024)
-static void parse_host_cpu_flags(char *line)
+static void __init parse_host_cpu_flags(char *line)
{
int i;
for (i = 0; i < 32*NCAPINTS; i++) {
@@ -286,7 +272,8 @@ static void parse_host_cpu_flags(char *line)
set_cpu_cap(&boot_cpu_data, i);
}
}
-static void parse_cache_line(char *line)
+
+static void __init parse_cache_line(char *line)
{
long res;
char *to_parse = strstr(line, ":");
@@ -302,10 +289,25 @@ static void parse_cache_line(char *line)
}
}
-int __init linux_main(int argc, char **argv)
+static unsigned long __init get_top_address(char **envp)
+{
+ unsigned long top_addr = (unsigned long) &top_addr;
+ int i;
+
+ /* The earliest variable should be after the program name in ELF */
+ for (i = 0; envp[i]; i++) {
+ if ((unsigned long) envp[i] > top_addr)
+ top_addr = (unsigned long) envp[i];
+ }
+
+ return PAGE_ALIGN(top_addr + 1);
+}
+
+int __init linux_main(int argc, char **argv, char **envp)
{
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
+ unsigned long host_task_size;
unsigned long stack;
unsigned int i;
int add;
@@ -324,20 +326,21 @@ int __init linux_main(int argc, char **argv)
if (have_console == 0)
add_arg(DEFAULT_COMMAND_LINE_CONSOLE);
- host_task_size = os_get_top_address();
- /* reserve a few pages for the stubs (taking care of data alignment) */
- /* align the data portion */
- BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES));
- stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1);
- /* another page for the code portion */
- stub_start -= PAGE_SIZE;
+ host_task_size = get_top_address(envp);
+ /* reserve a few pages for the stubs */
+ stub_start = host_task_size - STUB_SIZE;
host_task_size = stub_start;
+ /* Limit TASK_SIZE to what is addressable by the page table */
+ task_size = host_task_size;
+ if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE)
+ task_size = PTRS_PER_PGD * PGDIR_SIZE;
+
/*
* TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
* out
*/
- task_size = host_task_size & PGDIR_MASK;
+ task_size = task_size & PGDIR_MASK;
/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();
@@ -351,12 +354,11 @@ int __init linux_main(int argc, char **argv)
* so they actually get what they asked for. This should
* add zero for non-exec shield users
*/
-
- diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end);
if (diff > 1024 * 1024) {
os_info("Adding %ld bytes to physical memory to account for "
"exec-shield gap\n", diff);
- physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ physmem_size += diff;
}
uml_physmem = (unsigned long) __binary_start & PAGE_MASK;
@@ -366,23 +368,15 @@ int __init linux_main(int argc, char **argv)
setup_machinename(init_utsname()->machine);
- highmem = 0;
- iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
- max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
-
- /*
- * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary,
- * so this makes sure that's true for highmem
- */
- max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1);
- if (physmem_size + iomem_size > max_physmem) {
- highmem = physmem_size + iomem_size - max_physmem;
- physmem_size -= highmem;
+ physmem_size = PAGE_ALIGN(physmem_size);
+ max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC;
+ if (physmem_size > max_physmem) {
+ physmem_size = max_physmem;
+ os_info("Physical memory size shrunk to %llu bytes\n",
+ physmem_size);
}
high_physmem = uml_physmem + physmem_size;
- end_iomem = high_physmem + iomem_size;
- high_memory = (void *) end_iomem;
start_vm = VMALLOC_START;
@@ -398,6 +392,8 @@ int __init linux_main(int argc, char **argv)
os_info("Kernel virtual memory size shrunk to %lu bytes\n",
virtmem_size);
+ arch_task_struct_size = sizeof(struct task_struct) + host_fp_size;
+
os_flush_stdout();
return start_uml();
@@ -412,9 +408,8 @@ void __init setup_arch(char **cmdline_p)
{
u8 rng_seed[32];
- stack_protections((unsigned long) &init_thread_info);
- setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
- mem_total_pages(physmem_size, iomem_size, highmem);
+ stack_protections((unsigned long) init_task.stack);
+ setup_physmem(uml_physmem, uml_reserved, physmem_size);
uml_dtb_init();
read_initrd();
@@ -422,6 +417,7 @@ void __init setup_arch(char **cmdline_p)
strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
setup_hostinfo(host_info, sizeof host_info);
+ prefill_possible_map();
if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
add_bootloader_randomness(rng_seed, sizeof(rng_seed));
@@ -435,28 +431,39 @@ void __init arch_cpu_finalize_init(void)
os_check_bugs();
}
-void apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
+void apply_seal_endbr(s32 *start, s32 *end)
{
}
-void apply_retpolines(s32 *start, s32 *end, struct module *mod)
+void apply_retpolines(s32 *start, s32 *end)
{
}
-void apply_returns(s32 *start, s32 *end, struct module *mod)
+void apply_returns(s32 *start, s32 *end)
{
}
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, struct module *mod)
+ s32 *start_cfi, s32 *end_cfi)
+{
+}
+
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
- struct module *mod)
+#if IS_ENABLED(CONFIG_SMP)
+void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end)
{
}
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
+
void *text_poke(void *addr, const void *opcode, size_t len)
{
/*
@@ -474,7 +481,7 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
return text_poke(addr, opcode, len);
}
-void text_poke_sync(void)
+void smp_text_poke_sync_each_cpu(void)
{
}
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 5c92d58a78e8..a409d4b66114 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -77,8 +77,6 @@ SECTIONS
.data :
{
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.gnu.linkonce.d*)
CONSTRUCTORS