diff options
Diffstat (limited to 'arch/um/kernel/time.c')
| -rw-r--r-- | arch/um/kernel/time.c | 1067 |
1 files changed, 1022 insertions, 45 deletions
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 117568d4f64a..b344a36b44eb 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -1,115 +1,1092 @@ +// SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) + * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL + * Copyright (C) 2019 Intel Corporation */ #include <linux/clockchips.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/jiffies.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/spinlock.h> #include <linux/threads.h> #include <asm/irq.h> #include <asm/param.h> #include <kern_util.h> #include <os.h> +#include <linux/delay.h> +#include <linux/time-internal.h> +#include <linux/um_timetravel.h> +#include <shared/init.h> + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#include <linux/sched/clock.h> + +enum time_travel_mode time_travel_mode; +EXPORT_SYMBOL_GPL(time_travel_mode); + +static bool time_travel_start_set; +static unsigned long long time_travel_start; +static unsigned long long time_travel_time; +static unsigned long long time_travel_shm_offset; +static LIST_HEAD(time_travel_events); +static LIST_HEAD(time_travel_irqs); +static unsigned long long time_travel_timer_interval; +static unsigned long long time_travel_next_event; +static struct time_travel_event time_travel_timer_event; +static int time_travel_ext_fd = -1; +static unsigned int time_travel_ext_waiting; +static bool time_travel_ext_prev_request_valid; +static unsigned long long time_travel_ext_prev_request; +static unsigned long long *time_travel_ext_free_until; +static unsigned long long _time_travel_ext_free_until; +static u16 time_travel_shm_id; +static struct um_timetravel_schedshm *time_travel_shm; +static union um_timetravel_schedshm_client *time_travel_shm_client; + +unsigned long tt_extra_sched_jiffies; + +notrace unsigned long long sched_clock(void) +{ + return (unsigned long long)(jiffies - INITIAL_JIFFIES + + tt_extra_sched_jiffies) + * (NSEC_PER_SEC / HZ); +} + +static void time_travel_set_time(unsigned long long ns) +{ + if (unlikely(ns < time_travel_time)) + panic("time-travel: time goes backwards %lld -> %lld\n", + time_travel_time, ns); + else if (unlikely(ns >= S64_MAX)) + panic("The system was going to sleep forever, aborting"); + + time_travel_time = ns; +} + +enum time_travel_message_handling { + TTMH_IDLE, + TTMH_POLL, + TTMH_READ, + TTMH_READ_START_ACK, +}; + +static u64 bc_message; +int time_travel_should_print_bc_msg; + +void _time_travel_print_bc_msg(void) +{ + time_travel_should_print_bc_msg = 0; + printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message); +} + +static void time_travel_setup_shm(int fd, u16 id) +{ + u32 len; + + time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm)); + + if (!time_travel_shm) + goto out; + + len = time_travel_shm->len; + + if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION || + len < struct_size(time_travel_shm, clients, id + 1)) { + os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm)); + time_travel_shm = NULL; + goto out; + } + + time_travel_shm = os_mremap_rw_shared(time_travel_shm, + sizeof(*time_travel_shm), + len); + if (!time_travel_shm) + goto out; + + time_travel_shm_offset = time_travel_shm->current_time; + time_travel_shm_client = &time_travel_shm->clients[id]; + time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE; + time_travel_shm_id = id; + /* always look at that free_until from now on */ + time_travel_ext_free_until = &time_travel_shm->free_until; +out: + os_close_file(fd); +} + +static void time_travel_handle_message(struct um_timetravel_msg *msg, + enum time_travel_message_handling mode) +{ + struct um_timetravel_msg resp = { + .op = UM_TIMETRAVEL_ACK, + }; + int ret; + + /* + * We can't unlock here, but interrupt signals with a timetravel_handler + * (see um_request_irq_tt) get to the timetravel_handler anyway. + */ + if (mode != TTMH_READ) { + BUG_ON(mode == TTMH_IDLE && !irqs_disabled()); + + while (os_poll(1, &time_travel_ext_fd) != 0) { + /* nothing */ + } + } + + if (unlikely(mode == TTMH_READ_START_ACK)) { + int fd[UM_TIMETRAVEL_SHARED_MAX_FDS]; + + ret = os_rcv_fd_msg(time_travel_ext_fd, fd, + ARRAY_SIZE(fd), msg, sizeof(*msg)); + if (ret == sizeof(*msg)) { + time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD], + msg->time & UM_TIMETRAVEL_START_ACK_ID); + /* we don't use the logging for now */ + os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]); + } + } else { + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + } + + if (ret == 0) + panic("time-travel external link is broken\n"); + if (ret != sizeof(*msg)) + panic("invalid time-travel message - %d bytes\n", ret); + + switch (msg->op) { + default: + WARN_ONCE(1, "time-travel: unexpected message %lld\n", + (unsigned long long)msg->op); + break; + case UM_TIMETRAVEL_ACK: + return; + case UM_TIMETRAVEL_RUN: + time_travel_set_time(msg->time); + if (time_travel_shm) { + /* no request right now since we're running */ + time_travel_shm_client->flags &= + ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + /* no ack for shared memory RUN */ + return; + } + break; + case UM_TIMETRAVEL_FREE_UNTIL: + /* not supposed to get this with shm, but ignore it */ + if (time_travel_shm) + break; + time_travel_ext_free_until = &_time_travel_ext_free_until; + _time_travel_ext_free_until = msg->time; + break; + case UM_TIMETRAVEL_BROADCAST: + bc_message = msg->time; + time_travel_should_print_bc_msg = 1; + break; + } + + resp.seq = msg->seq; + os_write_file(time_travel_ext_fd, &resp, sizeof(resp)); +} + +static u64 time_travel_ext_req(u32 op, u64 time) +{ + static int seq; + int mseq = ++seq; + struct um_timetravel_msg msg = { + .op = op, + .time = time, + .seq = mseq, + }; + + /* + * We need to block even the timetravel handlers of SIGIO here and + * only restore their use when we got the ACK - otherwise we may + * (will) get interrupted by that, try to queue the IRQ for future + * processing and thus send another request while we're still waiting + * for an ACK, but the peer doesn't know we got interrupted and will + * send the ACKs in the same order as the message, but we'd need to + * see them in the opposite order ... + * + * This wouldn't matter *too* much, but some ACKs carry the + * current time (for UM_TIMETRAVEL_GET) and getting another + * ACK without a time would confuse us a lot! + * + * The sequence number assignment that happens here lets us + * debug such message handling issues more easily. + */ + block_signals_hard(); + os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + + /* no ACK expected for WAIT in shared memory mode */ + if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm) + goto done; + + while (msg.op != UM_TIMETRAVEL_ACK) + time_travel_handle_message(&msg, + op == UM_TIMETRAVEL_START ? + TTMH_READ_START_ACK : + TTMH_READ); + + if (msg.seq != mseq) + panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", + msg.op, msg.seq, mseq, msg.time); + + if (op == UM_TIMETRAVEL_GET) + time_travel_set_time(msg.time); +done: + unblock_signals_hard(); + + return msg.time; +} + +void __time_travel_wait_readable(int fd) +{ + int fds[2] = { fd, time_travel_ext_fd }; + int ret; + + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + while ((ret = os_poll(2, fds))) { + struct um_timetravel_msg msg; + + if (ret == 1) + time_travel_handle_message(&msg, TTMH_READ); + } +} +EXPORT_SYMBOL_GPL(__time_travel_wait_readable); + +static void time_travel_ext_update_request(unsigned long long time) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + /* asked for exactly this time previously */ + if (time_travel_ext_prev_request_valid && + time == time_travel_ext_prev_request) + return; + + /* + * if we're running and are allowed to run past the request + * then we don't need to update it either + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) + return; + + time_travel_ext_prev_request = time; + time_travel_ext_prev_request_valid = true; + + if (time_travel_shm) { + union um_timetravel_schedshm_client *running; + + running = &time_travel_shm->clients[time_travel_shm->running_id]; + + if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) { + time_travel_shm_client->flags |= + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + time += time_travel_shm_offset; + time_travel_shm_client->req_time = time; + if (time < time_travel_shm->free_until) + time_travel_shm->free_until = time; + return; + } + } + + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); +} + +void __time_travel_propagate_time(void) +{ + static unsigned long long last_propagated; + + if (time_travel_shm) { + if (time_travel_shm->running_id != time_travel_shm_id) + panic("time-travel: setting time while not running\n"); + time_travel_shm->current_time = time_travel_time + + time_travel_shm_offset; + return; + } + + if (last_propagated == time_travel_time) + return; + + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); + last_propagated = time_travel_time; +} +EXPORT_SYMBOL_GPL(__time_travel_propagate_time); + +/* returns true if we must do a wait to the simtime device */ +static bool time_travel_ext_request(unsigned long long time) +{ + /* + * If we received an external sync point ("free until") then we + * don't have to request/wait for anything until then, unless + * we're already waiting. + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) + return false; + + time_travel_ext_update_request(time); + return true; +} + +static void time_travel_ext_wait(bool idle) +{ + struct um_timetravel_msg msg = { + .op = UM_TIMETRAVEL_ACK, + }; + + time_travel_ext_prev_request_valid = false; + if (!time_travel_shm) + time_travel_ext_free_until = NULL; + time_travel_ext_waiting++; + + time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); + + /* + * Here we are deep in the idle loop, so we have to break out of the + * kernel abstraction in a sense and implement this in terms of the + * UML system waiting on the VQ interrupt while sleeping, when we get + * the signal it'll call time_travel_ext_vq_notify_done() completing the + * call. + */ + while (msg.op != UM_TIMETRAVEL_RUN) + time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL); + + time_travel_ext_waiting--; + + /* we might request more stuff while polling - reset when we run */ + time_travel_ext_prev_request_valid = false; +} + +static void time_travel_ext_get_time(void) +{ + if (time_travel_shm) + time_travel_set_time(time_travel_shm->current_time - + time_travel_shm_offset); + else + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); +} + +static void __time_travel_update_time(unsigned long long ns, bool idle) +{ + if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns)) + time_travel_ext_wait(idle); + else + time_travel_set_time(ns); +} + +static struct time_travel_event *time_travel_first_event(void) +{ + return list_first_entry_or_null(&time_travel_events, + struct time_travel_event, + list); +} + +static void __time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + struct time_travel_event *tmp; + bool inserted = false; + unsigned long flags; + + if (e->pending) + return; + + e->pending = true; + e->time = time; + + local_irq_save(flags); + list_for_each_entry(tmp, &time_travel_events, list) { + /* + * Add the new entry before one with higher time, + * or if they're equal and both on stack, because + * in that case we need to unwind the stack in the + * right order, and the later event (timer sleep + * or such) must be dequeued first. + */ + if ((tmp->time > e->time) || + (tmp->time == e->time && tmp->onstack && e->onstack)) { + list_add_tail(&e->list, &tmp->list); + inserted = true; + break; + } + } + + if (!inserted) + list_add_tail(&e->list, &time_travel_events); + + tmp = time_travel_first_event(); + time_travel_ext_update_request(tmp->time); + time_travel_next_event = tmp->time; + local_irq_restore(flags); +} + +static void time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + if (WARN_ON(!e->fn)) + return; + + __time_travel_add_event(e, time); +} + +void time_travel_add_event_rel(struct time_travel_event *e, + unsigned long long delay_ns) +{ + time_travel_add_event(e, time_travel_time + delay_ns); +} + +static void time_travel_periodic_timer(struct time_travel_event *e) +{ + time_travel_add_event(&time_travel_timer_event, + time_travel_time + time_travel_timer_interval); + + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + + deliver_alarm(); +} + +void deliver_time_travel_irqs(void) +{ + struct time_travel_event *e; + unsigned long flags; + + /* + * Don't do anything for most cases. Note that because here we have + * to disable IRQs (and re-enable later) we'll actually recurse at + * the end of the function, so this is strictly necessary. + */ + if (likely(list_empty(&time_travel_irqs))) + return; + + local_irq_save(flags); + irq_enter(); + while ((e = list_first_entry_or_null(&time_travel_irqs, + struct time_travel_event, + list))) { + list_del(&e->list); + e->pending = false; + e->fn(e); + } + irq_exit(); + local_irq_restore(flags); +} + +static void time_travel_deliver_event(struct time_travel_event *e) +{ + if (e == &time_travel_timer_event) { + /* + * deliver_alarm() does the irq_enter/irq_exit + * by itself, so must handle it specially here + */ + e->fn(e); + } else if (irqs_disabled()) { + list_add_tail(&e->list, &time_travel_irqs); + /* + * set pending again, it was set to false when the + * event was deleted from the original list, but + * now it's still pending until we deliver the IRQ. + */ + e->pending = true; + } else { + unsigned long flags; + + local_irq_save(flags); + irq_enter(); + e->fn(e); + irq_exit(); + local_irq_restore(flags); + } +} + +bool time_travel_del_event(struct time_travel_event *e) +{ + unsigned long flags; + + if (!e->pending) + return false; + local_irq_save(flags); + list_del(&e->list); + e->pending = false; + local_irq_restore(flags); + return true; +} + +static void time_travel_update_time(unsigned long long next, bool idle) +{ + struct time_travel_event ne = { + .onstack = true, + }; + struct time_travel_event *e; + bool finished = idle; + + /* add it without a handler - we deal with that specifically below */ + __time_travel_add_event(&ne, next); + + do { + e = time_travel_first_event(); + + BUG_ON(!e); + __time_travel_update_time(e->time, idle); + + /* new events may have been inserted while we were waiting */ + if (e == time_travel_first_event()) { + BUG_ON(!time_travel_del_event(e)); + BUG_ON(time_travel_time != e->time); + + if (e == &ne) { + finished = true; + } else { + if (e->onstack) + panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n", + time_travel_time, e->time, e); + time_travel_deliver_event(e); + } + } + + e = time_travel_first_event(); + if (e) + time_travel_ext_update_request(e->time); + } while (ne.pending && !finished); + + time_travel_del_event(&ne); +} + +static void time_travel_update_time_rel(unsigned long long offs) +{ + unsigned long flags; + + /* + * Disable interrupts before calculating the new time so + * that a real timer interrupt (signal) can't happen at + * a bad time e.g. after we read time_travel_time but + * before we've completed updating the time. + */ + local_irq_save(flags); + time_travel_update_time(time_travel_time + offs, false); + local_irq_restore(flags); +} + +void time_travel_ndelay(unsigned long nsec) +{ + /* + * Not strictly needed to use _rel() version since this is + * only used in INFCPU/EXT modes, but it doesn't hurt and + * is more readable too. + */ + time_travel_update_time_rel(nsec); +} +EXPORT_SYMBOL(time_travel_ndelay); + +void time_travel_add_irq_event(struct time_travel_event *e) +{ + BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); + + time_travel_ext_get_time(); + /* + * We could model interrupt latency here, for now just + * don't have any latency at all and request the exact + * same time (again) to run the interrupt... + */ + time_travel_add_event(e, time_travel_time); +} +EXPORT_SYMBOL_GPL(time_travel_add_irq_event); + +static void time_travel_oneshot_timer(struct time_travel_event *e) +{ + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + + deliver_alarm(); +} + +void time_travel_sleep(void) +{ + /* + * Wait "forever" (using S64_MAX because there are some potential + * wrapping issues, especially with the current TT_MODE_EXTERNAL + * controller application. + */ + unsigned long long next = S64_MAX; + int cpu = raw_smp_processor_id(); + + if (time_travel_mode == TT_MODE_BASIC) + os_timer_disable(cpu); + + time_travel_update_time(next, true); + + if (time_travel_mode == TT_MODE_BASIC && + time_travel_timer_event.pending) { + if (time_travel_timer_event.fn == time_travel_periodic_timer) { + /* + * This is somewhat wrong - we should get the first + * one sooner like the os_timer_one_shot() below... + */ + os_timer_set_interval(cpu, time_travel_timer_interval); + } else { + os_timer_one_shot(cpu, time_travel_timer_event.time - next); + } + } +} + +static void time_travel_handle_real_alarm(void) +{ + time_travel_set_time(time_travel_next_event); + + time_travel_del_event(&time_travel_timer_event); + + if (time_travel_timer_event.fn == time_travel_periodic_timer) + time_travel_add_event(&time_travel_timer_event, + time_travel_time + + time_travel_timer_interval); +} + +static void time_travel_set_interval(unsigned long long interval) +{ + time_travel_timer_interval = interval; +} + +static int time_travel_connect_external(const char *socket) +{ + const char *sep; + unsigned long long id = (unsigned long long)-1; + int rc; + + if ((sep = strchr(socket, ':'))) { + char buf[25] = {}; + if (sep - socket > sizeof(buf) - 1) + goto invalid_number; + + memcpy(buf, socket, sep - socket); + if (kstrtoull(buf, 0, &id)) { +invalid_number: + panic("time-travel: invalid external ID in string '%s'\n", + socket); + return -EINVAL; + } + + socket = sep + 1; + } + + rc = os_connect_socket(socket); + if (rc < 0) { + panic("time-travel: failed to connect to external socket %s\n", + socket); + return rc; + } + + time_travel_ext_fd = rc; + + time_travel_ext_req(UM_TIMETRAVEL_START, id); + + return 1; +} + +static void time_travel_set_start(void) +{ + if (time_travel_start_set) + return; + + switch (time_travel_mode) { + case TT_MODE_EXTERNAL: + time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); + /* controller gave us the *current* time, so adjust by that */ + time_travel_ext_get_time(); + time_travel_start -= time_travel_time; + break; + case TT_MODE_INFCPU: + case TT_MODE_BASIC: + if (!time_travel_start_set) + time_travel_start = os_persistent_clock_emulation(); + break; + case TT_MODE_OFF: + /* we just read the host clock with os_persistent_clock_emulation() */ + break; + } + + time_travel_start_set = true; +} +#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#define time_travel_start_set 0 +#define time_travel_start 0 +#define time_travel_time 0 +#define time_travel_ext_waiting 0 + +static inline void time_travel_update_time(unsigned long long ns, bool idle) +{ +} + +static inline void time_travel_update_time_rel(unsigned long long offs) +{ +} + +static inline void time_travel_handle_real_alarm(void) +{ +} + +static void time_travel_set_interval(unsigned long long interval) +{ +} + +static inline void time_travel_set_start(void) +{ +} + +/* fail link if this actually gets used */ +extern u64 time_travel_ext_req(u32 op, u64 time); + +/* these are empty macros so the struct/fn need not exist */ +#define time_travel_add_event(e, time) do { } while (0) +/* externally not usable - redefine here so we can */ +#undef time_travel_del_event +#define time_travel_del_event(e) do { } while (0) +#endif + +static struct clock_event_device timer_clockevent[NR_CPUS]; void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { unsigned long flags; + /* + * In basic time-travel mode we still get real interrupts + * (signals) but since we don't read time from the OS, we + * must update the simulated time here to the expiry when + * we get a signal. + * This is not the case in inf-cpu mode, since there we + * never get any real signals from the OS. + */ + if (time_travel_mode == TT_MODE_BASIC) + time_travel_handle_real_alarm(); + local_irq_save(flags); do_IRQ(TIMER_IRQ, regs); local_irq_restore(flags); } -static void itimer_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static int itimer_shutdown(struct clock_event_device *evt) { - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - set_interval(); - break; + int cpu = evt - &timer_clockevent[0]; - case CLOCK_EVT_MODE_SHUTDOWN: - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_ONESHOT: - disable_timer(); - break; + if (time_travel_mode != TT_MODE_OFF) + time_travel_del_event(&time_travel_timer_event); - case CLOCK_EVT_MODE_RESUME: - break; + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + os_timer_disable(cpu); + + return 0; +} + +static int itimer_set_periodic(struct clock_event_device *evt) +{ + unsigned long long interval = NSEC_PER_SEC / HZ; + int cpu = evt - &timer_clockevent[0]; + + if (time_travel_mode != TT_MODE_OFF) { + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_periodic_timer); + time_travel_set_interval(interval); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + interval); } + + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + os_timer_set_interval(cpu, interval); + + return 0; } static int itimer_next_event(unsigned long delta, struct clock_event_device *evt) { - return timer_one_shot(delta + 1); + delta += 1; + + if (time_travel_mode != TT_MODE_OFF) { + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_oneshot_timer); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + delta); + } + + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) + return os_timer_one_shot(raw_smp_processor_id(), delta); + + return 0; } -static struct clock_event_device itimer_clockevent = { - .name = "itimer", - .rating = 250, - .cpumask = cpu_all_mask, - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = itimer_set_mode, - .set_next_event = itimer_next_event, - .shift = 32, - .irq = 0, +static int itimer_one_shot(struct clock_event_device *evt) +{ + return itimer_next_event(0, evt); +} + +static struct clock_event_device _timer_clockevent = { + .name = "posix-timer", + .rating = 250, + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_state_shutdown = itimer_shutdown, + .set_state_periodic = itimer_set_periodic, + .set_state_oneshot = itimer_one_shot, + .set_next_event = itimer_next_event, + .shift = 0, + .max_delta_ns = 0xffffffff, + .max_delta_ticks = 0xffffffff, + .min_delta_ns = TIMER_MIN_DELTA, + .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM + .irq = 0, + .mult = 1, }; static irqreturn_t um_timer(int irq, void *dev) { - (*itimer_clockevent.event_handler)(&itimer_clockevent); + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + + /* + * Interrupt the (possibly) running userspace process, technically this + * should only happen if userspace is currently executing. + * With infinite CPU time-travel, we can only get here when userspace + * is not executing. Do not notify there and avoid spurious scheduling. + */ + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL && + get_current()->mm) + os_alarm_process(get_current()->mm->context.id.pid); + + evt->event_handler(evt); return IRQ_HANDLED; } -static cycle_t itimer_read(struct clocksource *cs) +static u64 timer_read(struct clocksource *cs) { - return os_nsecs() / 1000; + if (time_travel_mode != TT_MODE_OFF) { + /* + * We make reading the timer cost a bit so that we don't get + * stuck in loops that expect time to move more than the + * exact requested sleep amount, e.g. python's socket server, + * see https://bugs.python.org/issue37026. + * + * However, don't do that when we're in interrupt or such as + * then we might recurse into our own processing, and get to + * even more waiting, and that's not good - it messes up the + * "what do I do next" and onstack event we use to know when + * to return from time_travel_update_time(). + */ + if (!irqs_disabled() && !in_interrupt() && !in_softirq() && + !time_travel_ext_waiting) + time_travel_update_time_rel(TIMER_MULTIPLIER); + return time_travel_time / TIMER_MULTIPLIER; + } + + return os_nsecs() / TIMER_MULTIPLIER; } -static struct clocksource itimer_clocksource = { - .name = "itimer", +static struct clocksource timer_clocksource = { + .name = "timer", .rating = 300, - .read = itimer_read, + .read = timer_read, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init setup_itimer(void) +int um_setup_timer(void) { + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; int err; - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); + err = os_timer_create(); + if (err) + return err; + + memcpy(evt, &_timer_clockevent, sizeof(*evt)); + evt->cpumask = cpumask_of(cpu); + clockevents_register_device(evt); + + return 0; +} + +static void __init um_timer_init(void) +{ + int err; + + err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL); if (err != 0) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); - itimer_clockevent.max_delta_ns = - clockevent_delta2ns(60 * HZ, &itimer_clockevent); - itimer_clockevent.min_delta_ns = - clockevent_delta2ns(1, &itimer_clockevent); - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); + err = um_setup_timer(); + if (err) { + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); + return; + } + + err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); if (err) { printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } - clockevents_register_device(&itimer_clockevent); } -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { - long long nsecs = os_nsecs(); + long long nsecs; + + time_travel_set_start(); - set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, - nsecs % NSEC_PER_SEC); + if (time_travel_mode != TT_MODE_OFF) + nsecs = time_travel_start + time_travel_time; + else + nsecs = os_persistent_clock_emulation(); + + set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC, + nsecs % NSEC_PER_SEC); } void __init time_init(void) { - timer_init(); - late_time_init = setup_itimer; + timer_set_signal_handler(); + late_time_init = um_timer_init; +} + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +unsigned long calibrate_delay_is_known(void) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + return 1; + return 0; +} + +static int setup_time_travel(char *str) +{ + if (strcmp(str, "=inf-cpu") == 0) { + time_travel_mode = TT_MODE_INFCPU; + _timer_clockevent.name = "time-travel-timer-infcpu"; + timer_clocksource.name = "time-travel-clock"; + return 1; + } + + if (strncmp(str, "=ext:", 5) == 0) { + time_travel_mode = TT_MODE_EXTERNAL; + _timer_clockevent.name = "time-travel-timer-external"; + timer_clocksource.name = "time-travel-clock-external"; + return time_travel_connect_external(str + 5); + } + + if (!*str) { + time_travel_mode = TT_MODE_BASIC; + _timer_clockevent.name = "time-travel-timer"; + timer_clocksource.name = "time-travel-clock"; + return 1; + } + + return -EINVAL; +} + +__setup("time-travel", setup_time_travel); +__uml_help(setup_time_travel, +"time-travel\n" +" This option just enables basic time travel mode, in which the clock/timers\n" +" inside the UML instance skip forward when there's nothing to do, rather than\n" +" waiting for real time to elapse. However, instance CPU speed is limited by\n" +" the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n" +" clock (but quicker when there's nothing to do).\n" +"\n" +"time-travel=inf-cpu\n" +" This enables time travel mode with infinite processing power, in which there\n" +" are no wall clock timers, and any CPU processing happens - as seen from the\n" +" guest - instantly. This can be useful for accurate simulation regardless of\n" +" debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" +" easily lead to getting stuck (e.g. if anything in the system busy loops).\n" +"\n" +"time-travel=ext:[ID:]/path/to/socket\n" +" This enables time travel mode similar to =inf-cpu, except the system will\n" +" use the given socket to coordinate with a central scheduler, in order to\n" +" have more than one system simultaneously be on simulated time. The virtio\n" +" driver code in UML knows about this so you can also simulate networks and\n" +" devices using it, assuming the device has the right capabilities.\n" +" The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n"); + +static int setup_time_travel_start(char *str) +{ + int err; + + err = kstrtoull(str, 0, &time_travel_start); + if (err) + return err; + + time_travel_start_set = 1; + return 1; +} + +__setup("time-travel-start=", setup_time_travel_start); +__uml_help(setup_time_travel_start, +"time-travel-start=<nanoseconds>\n" +" Configure the UML instance's wall clock to start at this value rather than\n" +" the host's wall clock at the time of UML boot.\n\n"); + +static struct kobject *bc_time_kobject; + +static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%llx", bc_message); +} + +static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + u64 user_bc_message; + + ret = kstrtou64(buf, 0, &user_bc_message); + if (ret) + return ret; + + bc_message = user_bc_message; + + time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message); + pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message); + return count; +} + +static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store); + +static int __init um_bc_start(void) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return 0; + + bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj); + if (!bc_time_kobject) + return 0; + + if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr)) + pr_debug("failed to create the bc file in /sys/kernel/um_time"); + + return 0; } +late_initcall(um_bc_start); +#endif |
