diff options
Diffstat (limited to 'arch/um/kernel')
-rw-r--r-- | arch/um/kernel/exec.c | 9 | ||||
-rw-r--r-- | arch/um/kernel/irq.c | 80 | ||||
-rw-r--r-- | arch/um/kernel/ksyms.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/mem.c | 1 | ||||
-rw-r--r-- | arch/um/kernel/process.c | 69 | ||||
-rw-r--r-- | arch/um/kernel/reboot.c | 15 | ||||
-rw-r--r-- | arch/um/kernel/skas/Makefile | 9 | ||||
-rw-r--r-- | arch/um/kernel/skas/clone.c | 48 | ||||
-rw-r--r-- | arch/um/kernel/skas/mmu.c | 54 | ||||
-rw-r--r-- | arch/um/kernel/skas/process.c | 18 | ||||
-rw-r--r-- | arch/um/kernel/skas/stub.c | 69 | ||||
-rw-r--r-- | arch/um/kernel/time.c | 187 | ||||
-rw-r--r-- | arch/um/kernel/tlb.c | 545 | ||||
-rw-r--r-- | arch/um/kernel/trap.c | 15 | ||||
-rw-r--r-- | arch/um/kernel/um_arch.c | 3 |
15 files changed, 468 insertions, 656 deletions
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 827a0d3fa589..2c15bb2c104c 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -22,17 +22,8 @@ void flush_thread(void) { - void *data = NULL; - int ret; - arch_flush_thread(¤t->thread.arch); - ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data); - if (ret) { - printk(KERN_ERR "%s - clearing address space failed, err = %d\n", - __func__, ret); - force_sig(SIGKILL); - } get_safe_registers(current_pt_regs()->regs.gp, current_pt_regs()->regs.fp); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 635d44606bfe..534e91797f89 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -37,7 +37,7 @@ struct irq_reg { bool pending; bool wakeup; #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - bool pending_on_resume; + bool pending_event; void (*timetravel_handler)(int, int, void *, struct time_travel_event *); struct time_travel_event event; @@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static bool irqs_pending; +#endif static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) { @@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev) { struct irq_reg *reg = container_of(ev, struct irq_reg, event); - /* do nothing if suspended - just to cause a wakeup */ - if (irqs_suspended) + /* do nothing if suspended; just cause a wakeup and mark as pending */ + if (irqs_suspended) { + irqs_pending = true; + reg->pending_event = true; return; + } generic_handle_irq(reg->irq); } @@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, if (!reg->event.pending) return false; - if (irqs_suspended) - reg->pending_on_resume = true; return true; } + +static void irq_do_pending_events(bool timetravel_handlers_only) +{ + struct irq_entry *entry; + + if (!irqs_pending || timetravel_handlers_only) + return; + + irqs_pending = false; + + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + struct irq_reg *reg = &entry->reg[t]; + + /* + * Any timetravel_handler was invoked already, just + * directly run the IRQ. + */ + if (reg->pending_event) { + irq_enter(); + generic_handle_irq(reg->irq); + irq_exit(); + reg->pending_event = false; + } + } + } +} #else static bool irq_do_timetravel_handler(struct irq_entry *entry, enum um_irq_type t) { return false; } + +static void irq_do_pending_events(bool timetravel_handlers_only) +{ +} #endif static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, @@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type */ if (timetravel_handlers_only) { #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + reg->pending_event = true; + irqs_pending = true; mark_sigio_pending(); #endif return; @@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs, if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) return; + /* Flush out pending events that were ignored due to time-travel. */ + if (!irqs_suspended) + irq_do_pending_events(timetravel_handlers_only); + while (1) { /* This is now lockless - epoll keeps back-referencesto the irqs * which have trigger it so there is no need to walk the irq @@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs, void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { + preempt_disable(); _sigio_handler(regs, irqs_suspended); + preempt_enable(); } static struct irq_entry *get_irq_entry_by_fd(int fd) @@ -543,30 +588,7 @@ void um_irqs_resume(void) unsigned long flags; - local_irq_save(flags); -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - /* - * We don't need to lock anything here since we're in resume - * and nothing else is running, but have disabled IRQs so we - * don't try anything else with the interrupt list from there. - */ - list_for_each_entry(entry, &active_fds, list) { - enum um_irq_type t; - - for (t = 0; t < NUM_IRQ_TYPES; t++) { - struct irq_reg *reg = &entry->reg[t]; - - if (reg->pending_on_resume) { - irq_enter(); - generic_handle_irq(reg->irq); - irq_exit(); - reg->pending_on_resume = false; - } - } - } -#endif - - spin_lock(&irq_lock); + spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { if (entry->suspended) { int err = os_set_fd_async(entry->fd); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 3a85bde3e173..f2fb77da08cf 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket); EXPORT_SYMBOL(os_create_unix_socket); EXPORT_SYMBOL(os_connect_socket); EXPORT_SYMBOL(os_accept_connection); -EXPORT_SYMBOL(os_rcv_fd); +EXPORT_SYMBOL(os_rcv_fd_msg); EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(os_major); EXPORT_SYMBOL(os_minor); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index ca91accd64fc..a5b4fe2ad931 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -73,7 +73,6 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ memblock_free_all(); - max_low_pfn = totalram_pages(); max_pfn = max_low_pfn; kmalloc_ok = 1; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index d2134802f6a8..f36b63f53bab 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -122,8 +122,6 @@ void new_thread_handler(void) /* Called magically, see new_thread_handler above */ static void fork_handler(void) { - force_flush_all(); - schedule_tail(current->thread.prev_sched); /* @@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size) return copy_from_user(to, from, size); } -static atomic_t using_sysemu = ATOMIC_INIT(0); -int sysemu_supported; - -static void set_using_sysemu(int value) -{ - if (value > sysemu_supported) - return; - atomic_set(&using_sysemu, value); -} - -static int get_using_sysemu(void) -{ - return atomic_read(&using_sysemu); -} - -static int sysemu_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%d\n", get_using_sysemu()); - return 0; -} - -static int sysemu_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, sysemu_proc_show, NULL); -} - -static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - char tmp[2]; - - if (copy_from_user(tmp, buf, 1)) - return -EFAULT; - - if (tmp[0] >= '0' && tmp[0] <= '2') - set_using_sysemu(tmp[0] - '0'); - /* We use the first char, but pretend to write everything */ - return count; -} - -static const struct proc_ops sysemu_proc_ops = { - .proc_open = sysemu_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = sysemu_proc_write, -}; - -static int __init make_proc_sysemu(void) -{ - struct proc_dir_entry *ent; - if (!sysemu_supported) - return 0; - - ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops); - - if (ent == NULL) - { - printk(KERN_WARNING "Failed to register /proc/sysemu\n"); - return 0; - } - - return 0; -} - -late_initcall(make_proc_sysemu); - int singlestepping(void) { return test_thread_flag(TIF_SINGLESTEP); diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 25840eee1068..3736bca626ba 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -59,3 +59,18 @@ void machine_halt(void) { machine_power_off(); } + +static int sys_power_off_handler(struct sys_off_data *data) +{ + machine_power_off(); + return 0; +} + +static int register_power_off(void) +{ + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + sys_power_off_handler, NULL); + return 0; +} +__initcall(register_power_off); diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index f93972a25765..6f86d53e3d69 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,15 +3,14 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := clone.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o -# clone.o is in the stub, so it can't be built with profiling +# stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it -CFLAGS_clone.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := clone.o - +CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) +UNPROFILE_OBJS := stub.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c deleted file mode 100644 index 62435187dda4..000000000000 --- a/arch/um/kernel/skas/clone.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <signal.h> -#include <sched.h> -#include <asm/unistd.h> -#include <sys/time.h> -#include <as-layout.h> -#include <ptrace_user.h> -#include <stub-data.h> -#include <sysdep/stub.h> - -/* - * This is in a separate file because it needs to be compiled with any - * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled - * - * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize - * on some systems. - */ - -void __attribute__ ((__section__ (".__syscall_stub"))) -stub_clone_handler(void) -{ - struct stub_data *data = get_stub_data(); - long err; - - err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, - (unsigned long)data + - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2); - if (err) { - data->parent_err = err; - goto done; - } - - err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - if (err) { - data->child_err = err; - goto done; - } - - remap_stack_and_trap(); - - done: - trap_myself(); -} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index aeed1c2aaf3c..47f98d87ea3c 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -14,11 +14,14 @@ #include <as-layout.h> #include <os.h> #include <skas.h> +#include <stub-data.h> + +/* Ensure the stub_data struct covers the allocated area */ +static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); int init_new_context(struct task_struct *task, struct mm_struct *mm) { - struct mm_context *from_mm = NULL; - struct mm_context *to_mm = &mm->context; + struct mm_id *new_id = &mm->context.id; unsigned long stack = 0; int ret = -ENOMEM; @@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) if (stack == 0) goto out; - to_mm->id.stack = stack; - if (current->mm != NULL && current->mm != &init_mm) - from_mm = ¤t->mm->context; + new_id->stack = stack; block_signals_trace(); - if (from_mm) - to_mm->id.u.pid = copy_context_skas0(stack, - from_mm->id.u.pid); - else to_mm->id.u.pid = start_userspace(stack); + new_id->u.pid = start_userspace(stack); unblock_signals_trace(); - if (to_mm->id.u.pid < 0) { - ret = to_mm->id.u.pid; + if (new_id->u.pid < 0) { + ret = new_id->u.pid; goto out_free; } - ret = init_new_ldt(to_mm, from_mm); - if (ret < 0) { - printk(KERN_ERR "init_new_context_skas - init_ldt" - " failed, errno = %d\n", ret); - goto out_free; - } + /* + * Ensure the new MM is clean and nothing unwanted is mapped. + * + * TODO: We should clear the memory up to STUB_START to ensure there is + * nothing mapped there, i.e. we (currently) have: + * + * |- user memory -|- unused -|- stub -|- unused -| + * ^ TASK_SIZE ^ STUB_START + * + * Meaning we have two unused areas where we may still have valid + * mappings from our internal clone(). That isn't really a problem as + * userspace is not going to access them, but it is definitely not + * correct. + * + * However, we are "lucky" and if rseq is configured, then on 32 bit + * it will fall into the first empty range while on 64 bit it is going + * to use an anonymous mapping in the second range. As such, things + * continue to work for now as long as we don't start unmapping these + * areas. + * + * Change this to STUB_START once we have a clean userspace. + */ + unmap(new_id, 0, TASK_SIZE); return 0; out_free: - if (to_mm->id.stack != 0) - free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES)); + if (new_id->stack != 0) + free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); out: return ret; } @@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm) os_kill_ptraced_process(mmu->id.u.pid, 1); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); - free_ldt(mmu); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 99a5cbb36083..5f9c1c5f36e2 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -8,6 +8,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/task.h> +#include <asm/tlbflush.h> + #include <as-layout.h> #include <kern.h> #include <os.h> @@ -50,3 +52,19 @@ unsigned long current_stub_stack(void) return current->mm->context.id.stack; } + +struct mm_id *current_mm_id(void) +{ + if (current->mm == NULL) + return NULL; + + return ¤t->mm->context.id; +} + +void current_mm_sync(void) +{ + if (current->mm == NULL) + return; + + um_tlb_sync(current->mm); +} diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c new file mode 100644 index 000000000000..5d52ffa682dc --- /dev/null +++ b/arch/um/kernel/skas/stub.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> + */ + +#include <sysdep/stub.h> + +static __always_inline int syscall_handler(struct stub_data *d) +{ + int i; + unsigned long res; + + for (i = 0; i < d->syscall_data_len; i++) { + struct stub_syscall *sc = &d->syscall_data[i]; + + switch (sc->syscall) { + case STUB_SYSCALL_MMAP: + res = stub_syscall6(STUB_MMAP_NR, + sc->mem.addr, sc->mem.length, + sc->mem.prot, + MAP_SHARED | MAP_FIXED, + sc->mem.fd, sc->mem.offset); + if (res != sc->mem.addr) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + case STUB_SYSCALL_MUNMAP: + res = stub_syscall2(__NR_munmap, + sc->mem.addr, sc->mem.length); + if (res) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + case STUB_SYSCALL_MPROTECT: + res = stub_syscall3(__NR_mprotect, + sc->mem.addr, sc->mem.length, + sc->mem.prot); + if (res) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + default: + d->err = -95; /* EOPNOTSUPP */ + d->syscall_data_len = i; + return -1; + } + } + + d->err = 0; + d->syscall_data_len = 0; + + return 0; +} + +void __section(".__syscall_stub") +stub_syscall_handler(void) +{ + struct stub_data *d = get_stub_data(); + + syscall_handler(d); + + trap_myself(); +} diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index a8bfe8be1526..47b9f5e63566 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode); static bool time_travel_start_set; static unsigned long long time_travel_start; static unsigned long long time_travel_time; +static unsigned long long time_travel_shm_offset; static LIST_HEAD(time_travel_events); static LIST_HEAD(time_travel_irqs); static unsigned long long time_travel_timer_interval; @@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1; static unsigned int time_travel_ext_waiting; static bool time_travel_ext_prev_request_valid; static unsigned long long time_travel_ext_prev_request; -static bool time_travel_ext_free_until_valid; -static unsigned long long time_travel_ext_free_until; +static unsigned long long *time_travel_ext_free_until; +static unsigned long long _time_travel_ext_free_until; +static u16 time_travel_shm_id; +static struct um_timetravel_schedshm *time_travel_shm; +static union um_timetravel_schedshm_client *time_travel_shm_client; static void time_travel_set_time(unsigned long long ns) { @@ -58,8 +62,52 @@ enum time_travel_message_handling { TTMH_IDLE, TTMH_POLL, TTMH_READ, + TTMH_READ_START_ACK, }; +static u64 bc_message; +int time_travel_should_print_bc_msg; + +void _time_travel_print_bc_msg(void) +{ + time_travel_should_print_bc_msg = 0; + printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message); +} + +static void time_travel_setup_shm(int fd, u16 id) +{ + u32 len; + + time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm)); + + if (!time_travel_shm) + goto out; + + len = time_travel_shm->len; + + if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION || + len < struct_size(time_travel_shm, clients, id + 1)) { + os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm)); + time_travel_shm = NULL; + goto out; + } + + time_travel_shm = os_mremap_rw_shared(time_travel_shm, + sizeof(*time_travel_shm), + len); + if (!time_travel_shm) + goto out; + + time_travel_shm_offset = time_travel_shm->current_time; + time_travel_shm_client = &time_travel_shm->clients[id]; + time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE; + time_travel_shm_id = id; + /* always look at that free_until from now on */ + time_travel_ext_free_until = &time_travel_shm->free_until; +out: + os_close_file(fd); +} + static void time_travel_handle_message(struct um_timetravel_msg *msg, enum time_travel_message_handling mode) { @@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, } } - ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + if (unlikely(mode == TTMH_READ_START_ACK)) { + int fd[UM_TIMETRAVEL_SHARED_MAX_FDS]; + + ret = os_rcv_fd_msg(time_travel_ext_fd, fd, + ARRAY_SIZE(fd), msg, sizeof(*msg)); + if (ret == sizeof(*msg)) { + time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD], + msg->time & UM_TIMETRAVEL_START_ACK_ID); + /* we don't use the logging for now */ + os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]); + } + } else { + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + } if (ret == 0) panic("time-travel external link is broken\n"); @@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, return; case UM_TIMETRAVEL_RUN: time_travel_set_time(msg->time); + if (time_travel_shm) { + /* no request right now since we're running */ + time_travel_shm_client->flags &= + ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + /* no ack for shared memory RUN */ + return; + } break; case UM_TIMETRAVEL_FREE_UNTIL: - time_travel_ext_free_until_valid = true; - time_travel_ext_free_until = msg->time; + /* not supposed to get this with shm, but ignore it */ + if (time_travel_shm) + break; + time_travel_ext_free_until = &_time_travel_ext_free_until; + _time_travel_ext_free_until = msg->time; + break; + case UM_TIMETRAVEL_BROADCAST: + bc_message = msg->time; + time_travel_should_print_bc_msg = 1; break; } @@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time) block_signals_hard(); os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + /* no ACK expected for WAIT in shared memory mode */ + if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm) + goto done; + while (msg.op != UM_TIMETRAVEL_ACK) - time_travel_handle_message(&msg, TTMH_READ); + time_travel_handle_message(&msg, + op == UM_TIMETRAVEL_START ? + TTMH_READ_START_ACK : + TTMH_READ); if (msg.seq != mseq) panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", @@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) if (op == UM_TIMETRAVEL_GET) time_travel_set_time(msg.time); +done: unblock_signals_hard(); return msg.time; @@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time) /* * if we're running and are allowed to run past the request * then we don't need to update it either + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. */ - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && - time < time_travel_ext_free_until) + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) return; time_travel_ext_prev_request = time; time_travel_ext_prev_request_valid = true; + + if (time_travel_shm) { + union um_timetravel_schedshm_client *running; + + running = &time_travel_shm->clients[time_travel_shm->running_id]; + + if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) { + time_travel_shm_client->flags |= + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + time += time_travel_shm_offset; + time_travel_shm_client->req_time = time; + if (time < time_travel_shm->free_until) + time_travel_shm->free_until = time; + return; + } + } + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); } @@ -194,6 +297,14 @@ void __time_travel_propagate_time(void) { static unsigned long long last_propagated; + if (time_travel_shm) { + if (time_travel_shm->running_id != time_travel_shm_id) + panic("time-travel: setting time while not running\n"); + time_travel_shm->current_time = time_travel_time + + time_travel_shm_offset; + return; + } + if (last_propagated == time_travel_time) return; @@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time) * If we received an external sync point ("free until") then we * don't have to request/wait for anything until then, unless * we're already waiting. + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. */ - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && - time < time_travel_ext_free_until) + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) return false; time_travel_ext_update_request(time); @@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle) }; time_travel_ext_prev_request_valid = false; - time_travel_ext_free_until_valid = false; + if (!time_travel_shm) + time_travel_ext_free_until = NULL; time_travel_ext_waiting++; time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); @@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle) static void time_travel_ext_get_time(void) { - time_travel_ext_req(UM_TIMETRAVEL_GET, -1); + if (time_travel_shm) + time_travel_set_time(time_travel_shm->current_time - + time_travel_shm_offset); + else + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); } static void __time_travel_update_time(unsigned long long ns, bool idle) @@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str) return 1; } -__setup("time-travel-start", setup_time_travel_start); +__setup("time-travel-start=", setup_time_travel_start); __uml_help(setup_time_travel_start, -"time-travel-start=<seconds>\n" +"time-travel-start=<nanoseconds>\n" "Configure the UML instance's wall clock to start at this value rather than\n" "the host's wall clock at the time of UML boot.\n"); +static struct kobject *bc_time_kobject; + +static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%llx", bc_message); +} + +static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + u64 user_bc_message; + + ret = kstrtou64(buf, 0, &user_bc_message); + if (ret) + return ret; + + bc_message = user_bc_message; + + time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message); + pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message); + return count; +} + +static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store); + +static int __init um_bc_start(void) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return 0; + + bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj); + if (!bc_time_kobject) + return 0; + + if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr)) + pr_debug("failed to create the bc file in /sys/kernel/um_time"); + + return 0; +} +late_initcall(um_bc_start); #endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 8784f03fa4a6..44c6fc697f3a 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,209 +15,54 @@ #include <skas.h> #include <kern_util.h> -struct host_vm_change { - struct host_vm_op { - enum { NONE, MMAP, MUNMAP, MPROTECT } type; - union { - struct { - unsigned long addr; - unsigned long len; - unsigned int prot; - int fd; - __u64 offset; - } mmap; - struct { - unsigned long addr; - unsigned long len; - } munmap; - struct { - unsigned long addr; - unsigned long len; - unsigned int prot; - } mprotect; - } u; - } ops[1]; - int userspace; - int index; - struct mm_struct *mm; - void *data; - int force; +struct vm_ops { + struct mm_id *mm_idp; + + int (*mmap)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset); + int (*unmap)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len); + int (*mprotect)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, + unsigned int prot); }; -#define INIT_HVC(mm, force, userspace) \ - ((struct host_vm_change) \ - { .ops = { { .type = NONE } }, \ - .mm = mm, \ - .data = NULL, \ - .userspace = userspace, \ - .index = 0, \ - .force = force }) - -static void report_enomem(void) +static int kern_map(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset) { - printk(KERN_ERR "UML ran out of memory on the host side! " - "This can happen due to a memory limitation or " - "vm.max_map_count has been reached.\n"); -} - -static int do_ops(struct host_vm_change *hvc, int end, - int finished) -{ - struct host_vm_op *op; - int i, ret = 0; - - for (i = 0; i < end && !ret; i++) { - op = &hvc->ops[i]; - switch (op->type) { - case MMAP: - if (hvc->userspace) - ret = map(&hvc->mm->context.id, op->u.mmap.addr, - op->u.mmap.len, op->u.mmap.prot, - op->u.mmap.fd, - op->u.mmap.offset, finished, - &hvc->data); - else - map_memory(op->u.mmap.addr, op->u.mmap.offset, - op->u.mmap.len, 1, 1, 1); - break; - case MUNMAP: - if (hvc->userspace) - ret = unmap(&hvc->mm->context.id, - op->u.munmap.addr, - op->u.munmap.len, finished, - &hvc->data); - else - ret = os_unmap_memory( - (void *) op->u.munmap.addr, - op->u.munmap.len); - - break; - case MPROTECT: - if (hvc->userspace) - ret = protect(&hvc->mm->context.id, - op->u.mprotect.addr, - op->u.mprotect.len, - op->u.mprotect.prot, - finished, &hvc->data); - else - ret = os_protect_memory( - (void *) op->u.mprotect.addr, - op->u.mprotect.len, - 1, 1, 1); - break; - default: - printk(KERN_ERR "Unknown op type %d in do_ops\n", - op->type); - BUG(); - break; - } - } - - if (ret == -ENOMEM) - report_enomem(); - - return ret; + /* TODO: Why is executable needed to be always set in the kernel? */ + return os_map_memory((void *)virt, phys_fd, offset, len, + prot & UM_PROT_READ, prot & UM_PROT_WRITE, + 1); } -static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - unsigned int prot, struct host_vm_change *hvc) +static int kern_unmap(struct mm_id *mm_idp, + unsigned long virt, unsigned long len) { - __u64 offset; - struct host_vm_op *last; - int fd = -1, ret = 0; - - if (hvc->userspace) - fd = phys_mapping(phys, &offset); - else - offset = phys; - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)) { - last->u.mmap.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MMAP, - .u = { .mmap = { .addr = virt, - .len = len, - .prot = prot, - .fd = fd, - .offset = offset } - } }); - return ret; + return os_unmap_memory((void *)virt, len); } -static int add_munmap(unsigned long addr, unsigned long len, - struct host_vm_change *hvc) +static int kern_mprotect(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, + unsigned int prot) { - struct host_vm_op *last; - int ret = 0; - - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)) { - last->u.munmap.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MUNMAP, - .u = { .munmap = { .addr = addr, - .len = len } } }); - return ret; + return os_protect_memory((void *)virt, len, + prot & UM_PROT_READ, prot & UM_PROT_WRITE, + 1); } -static int add_mprotect(unsigned long addr, unsigned long len, - unsigned int prot, struct host_vm_change *hvc) +void report_enomem(void) { - struct host_vm_op *last; - int ret = 0; - - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.prot == prot)) { - last->u.mprotect.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MPROTECT, - .u = { .mprotect = { .addr = addr, - .len = len, - .prot = prot } } }); - return ret; + printk(KERN_ERR "UML ran out of memory on the host side! " + "This can happen due to a memory limitation or " + "vm.max_map_count has been reached.\n"); } -#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) - static inline int update_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pte_t *pte; int r, w, x, prot, ret = 0; @@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | (x ? UM_PROT_EXEC : 0)); - if (hvc->force || pte_newpage(*pte)) { + if (pte_newpage(*pte)) { if (pte_present(*pte)) { - if (pte_newpage(*pte)) - ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, prot, hvc); + if (pte_newpage(*pte)) { + __u64 offset; + unsigned long phys = + pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + + ret = ops->mmap(ops->mm_idp, addr, + PAGE_SIZE, prot, fd, + offset); + } } else - ret = add_munmap(addr, PAGE_SIZE, hvc); + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); } else if (pte_newprot(*pte)) - ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); + ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); *pte = pte_mkuptodate(*pte); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; @@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, static inline int update_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pmd_t *pmd; unsigned long next; @@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) { - if (hvc->force || pmd_newpage(*pmd)) { - ret = add_munmap(addr, next - addr, hvc); + if (pmd_newpage(*pmd)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); pmd_mkuptodate(*pmd); } } - else ret = update_pte_range(pmd, addr, next, hvc); + else ret = update_pte_range(pmd, addr, next, ops); } while (pmd++, addr = next, ((addr < end) && !ret)); return ret; } static inline int update_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pud_t *pud; unsigned long next; @@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { - if (hvc->force || pud_newpage(*pud)) { - ret = add_munmap(addr, next - addr, hvc); + if (pud_newpage(*pud)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); pud_mkuptodate(*pud); } } - else ret = update_pmd_range(pud, addr, next, hvc); + else ret = update_pmd_range(pud, addr, next, ops); } while (pud++, addr = next, ((addr < end) && !ret)); return ret; } static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { p4d_t *p4d; unsigned long next; @@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end); if (!p4d_present(*p4d)) { - if (hvc->force || p4d_newpage(*p4d)) { - ret = add_munmap(addr, next - addr, hvc); + if (p4d_newpage(*p4d)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); p4d_mkuptodate(*p4d); } } else - ret = update_pud_range(p4d, addr, next, hvc); + ret = update_pud_range(p4d, addr, next, ops); } while (p4d++, addr = next, ((addr < end) && !ret)); return ret; } -static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force) +int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; - struct host_vm_change hvc; - unsigned long addr = start_addr, next; - int ret = 0, userspace = 1; + struct vm_ops ops; + unsigned long addr = mm->context.sync_tlb_range_from, next; + int ret = 0; + + if (mm->context.sync_tlb_range_to == 0) + return 0; + + ops.mm_idp = &mm->context.id; + if (mm == &init_mm) { + ops.mmap = kern_map; + ops.unmap = kern_unmap; + ops.mprotect = kern_mprotect; + } else { + ops.mmap = map; + ops.unmap = unmap; + ops.mprotect = protect; + } - hvc = INIT_HVC(mm, force, userspace); pgd = pgd_offset(mm, addr); do { - next = pgd_addr_end(addr, end_addr); + next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { - if (force || pgd_newpage(*pgd)) { - ret = add_munmap(addr, next - addr, &hvc); + if (pgd_newpage(*pgd)) { + ret = ops.unmap(ops.mm_idp, addr, + next - addr); pgd_mkuptodate(*pgd); } } else - ret = update_p4d_range(pgd, addr, next, &hvc); - } while (pgd++, addr = next, ((addr < end_addr) && !ret)); + ret = update_p4d_range(pgd, addr, next, &ops); + } while (pgd++, addr = next, + ((addr < mm->context.sync_tlb_range_to) && !ret)); - if (!ret) - ret = do_ops(&hvc, hvc.index, 1); - - /* This is not an else because ret is modified above */ - if (ret) { - struct mm_id *mm_idp = ¤t->mm->context.id; - - printk(KERN_ERR "fix_range_common: failed, killing current " - "process: %d\n", task_tgid_vnr(current)); - mm_idp->kill = 1; - } -} - -static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) -{ - struct mm_struct *mm; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long addr, last; - int updated = 0, err = 0, force = 0, userspace = 0; - struct host_vm_change hvc; - - mm = &init_mm; - hvc = INIT_HVC(mm, force, userspace); - for (addr = start; addr < end;) { - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) { - last = ADD_ROUND(addr, PGDIR_SIZE); - if (last > end) - last = end; - if (pgd_newpage(*pgd)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) { - last = ADD_ROUND(addr, P4D_SIZE); - if (last > end) - last = end; - if (p4d_newpage(*p4d)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pud = pud_offset(p4d, addr); - if (!pud_present(*pud)) { - last = ADD_ROUND(addr, PUD_SIZE); - if (last > end) - last = end; - if (pud_newpage(*pud)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - last = ADD_ROUND(addr, PMD_SIZE); - if (last > end) - last = end; - if (pmd_newpage(*pmd)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - if (!pte_present(*pte) || pte_newpage(*pte)) { - updated = 1; - err = add_munmap(addr, PAGE_SIZE, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - if (pte_present(*pte)) - err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, 0, &hvc); - } - else if (pte_newprot(*pte)) { - updated = 1; - err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); - } - addr += PAGE_SIZE; - } - if (!err) - err = do_ops(&hvc, hvc.index, 1); - - if (err < 0) - panic("flush_tlb_kernel failed, errno = %d\n", err); - return updated; -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - struct mm_struct *mm = vma->vm_mm; - void *flush = NULL; - int r, w, x, prot, err = 0; - struct mm_id *mm_id; - - address &= PAGE_MASK; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto kill; - - p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) - goto kill; - - pud = pud_offset(p4d, address); - if (!pud_present(*pud)) - goto kill; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto kill; - - pte = pte_offset_kernel(pmd, address); - - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) { - w = 0; - } - - mm_id = &mm->context.id; - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - unsigned long long offset; - int fd; - - fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); - err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, - 1, &flush); - } - else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); - } - else if (pte_newprot(*pte)) - err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); - - if (err) { - if (err == -ENOMEM) - report_enomem(); - - goto kill; - } - - *pte = pte_mkuptodate(*pte); + if (ret == -ENOMEM) + report_enomem(); - return; + mm->context.sync_tlb_range_from = 0; + mm->context.sync_tlb_range_to = 0; -kill: - printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); - force_sig(SIGKILL); + return ret; } void flush_tlb_all(void) @@ -540,60 +226,11 @@ void flush_tlb_all(void) flush_tlb_mm(current->mm); } -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_tlb_kernel_range_common(start, end); -} - -void flush_tlb_kernel_vm(void) -{ - flush_tlb_kernel_range_common(start_vm, end_vm); -} - -void __flush_tlb_one(unsigned long addr) -{ - flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); -} - -static void fix_range(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force) -{ - /* - * Don't bother flushing if this address space is about to be - * destroyed. - */ - if (atomic_read(&mm->mm_users) == 0) - return; - - fix_range_common(mm, start_addr, end_addr, force); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - if (vma->vm_mm == NULL) - flush_tlb_kernel_range_common(start, end); - else fix_range(vma->vm_mm, start, end, 0); -} -EXPORT_SYMBOL(flush_tlb_range); - void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); for_each_vma(vmi, vma) - fix_range(mm, vma->vm_start, vma->vm_end, 0); -} - -void force_flush_all(void) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) - fix_range(mm, vma->vm_start, vma->vm_end, 1); - mmap_read_unlock(mm); + um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 6d8ae86ae978..97c8df9c4401 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -113,7 +113,7 @@ good_area: #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif - flush_tlb_page(vma, address); + out: mmap_read_unlock(mm); out_nosemaphore: @@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && (address >= start_vm) && (address < end_vm)) { - flush_tlb_kernel_vm(); + if (!is_user && init_mm.context.sync_tlb_range_to) { + /* + * Kernel has pending updates from set_ptes that were not + * flushed yet. Syncing them should fix the pagefault (if not + * we'll get here again and panic). + */ + err = um_tlb_sync(&init_mm); + if (err == -ENOMEM) + report_enomem(); + if (err) + panic("Failed to sync kernel TLBs: %d", err); goto out; } else if (current->mm == NULL) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e95f805e5004..8e594cda6d77 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */ unsigned long start_vm; unsigned long end_vm; -/* Set in uml_ncpus_setup */ -int ncpus = 1; - /* Set in early boot */ static int have_root __initdata; static int have_console __initdata; |