summaryrefslogtreecommitdiff
path: root/arch/x86/xen/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/time.c')
-rw-r--r--arch/x86/xen/time.c559
1 files changed, 327 insertions, 232 deletions
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index ee365895b06b..96521b1874ac 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Xen time implementation.
*
@@ -11,15 +12,15 @@
#include <linux/interrupt.h>
#include <linux/clocksource.h>
#include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/pvclock_gtod.h>
+#include <linux/timekeeper_internal.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <asm/xen/cpuid.h>
#include <xen/events.h>
#include <xen/features.h>
@@ -28,120 +29,10 @@
#include "xen-ops.h"
-/* Xen may fire a timer up to this many ns early */
-#define TIMER_SLOP 100000
-#define NS_PER_TICK (1000000000LL / HZ)
+/* Minimum amount of time until next clock event fires */
+#define TIMER_SLOP 1
-/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-/* return an consistent snapshot of 64-bit time/counter value */
-static u64 get64(const u64 *p)
-{
- u64 ret;
-
- if (BITS_PER_LONG < 64) {
- u32 *p32 = (u32 *)p;
- u32 h, l;
-
- /*
- * Read high then low, and then make sure high is
- * still the same; this will only loop if low wraps
- * and carries into high.
- * XXX some clean way to make this endian-proof?
- */
- do {
- h = p32[1];
- barrier();
- l = p32[0];
- barrier();
- } while (p32[1] != h);
-
- ret = (((u64)h) << 32) | l;
- } else
- ret = *p;
-
- return ret;
-}
-
-/*
- * Runstate accounting
- */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = &__get_cpu_var(xen_runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = get64(&state->state_entry_time);
- barrier();
- *res = *state;
- barrier();
- } while (get64(&state->state_entry_time) != state_time);
-}
-
-/* return true when a vcpu could run but has no real cpu to run on */
-bool xen_vcpu_stolen(int vcpu)
-{
- return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
-}
-
-void xen_setup_runstate_info(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
-
- area.addr.v = &per_cpu(xen_runstate, cpu);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- cpu, &area))
- BUG();
-}
-
-static void do_stolen_accounting(void)
-{
- struct vcpu_runstate_info state;
- struct vcpu_runstate_info *snap;
- s64 runnable, offline, stolen;
- cputime_t ticks;
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- snap = &__get_cpu_var(xen_runstate_snapshot);
-
- /* work out how much time the VCPU has not been runn*ing* */
- runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
- offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
- *snap = state;
-
- /* Add the appropriate number of ticks of stolen time,
- including any left-overs from last time. */
- stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
- if (stolen < 0)
- stolen = 0;
-
- ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
- __this_cpu_write(xen_residual_stolen, stolen);
- account_steal_ticks(ticks);
-}
+static u64 xen_sched_clock_offset __read_mostly;
/* Get the TSC speed from Xen */
static unsigned long xen_tsc_khz(void)
@@ -149,27 +40,40 @@ static unsigned long xen_tsc_khz(void)
struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
return pvclock_tsc_khz(info);
}
-cycle_t xen_clocksource_read(void)
+static u64 xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
- cycle_t ret;
+ u64 ret;
preempt_disable_notrace();
- src = &__get_cpu_var(xen_vcpu)->time;
+ src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
}
-static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
+static u64 xen_clocksource_get_cycles(struct clocksource *cs)
{
return xen_clocksource_read();
}
-static void xen_read_wallclock(struct timespec *ts)
+static noinstr u64 xen_sched_clock(void)
+{
+ struct pvclock_vcpu_time_info *src;
+ u64 ret;
+
+ src = &__this_cpu_read(xen_vcpu)->time;
+ ret = pvclock_clocksource_read_nowd(src);
+ ret -= xen_sched_clock_offset;
+
+ return ret;
+}
+
+static void xen_read_wallclock(struct timespec64 *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
struct pvclock_wall_clock *wall_clock = &(s->wc);
@@ -180,40 +84,60 @@ static void xen_read_wallclock(struct timespec *ts)
put_cpu_var(xen_vcpu);
}
-static void xen_get_wallclock(struct timespec *now)
+static void xen_get_wallclock(struct timespec64 *now)
{
xen_read_wallclock(now);
}
-static int xen_set_wallclock(const struct timespec *now)
+static int xen_set_wallclock(const struct timespec64 *now)
{
- return -1;
+ return -ENODEV;
}
static int xen_pvclock_gtod_notify(struct notifier_block *nb,
unsigned long was_set, void *priv)
{
/* Protected by the calling core code serialization */
- static struct timespec next_sync;
+ static struct timespec64 next_sync;
struct xen_platform_op op;
- struct timespec now;
+ struct timespec64 now;
+ struct timekeeper *tk = priv;
+ static bool settime64_supported = true;
+ int ret;
- now = __current_kernel_time();
+ now.tv_sec = tk->xtime_sec;
+ now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
/*
* We only take the expensive HV call when the clock was set
* or when the 11 minutes RTC synchronization time elapsed.
*/
- if (!was_set && timespec_compare(&now, &next_sync) < 0)
+ if (!was_set && timespec64_compare(&now, &next_sync) < 0)
return NOTIFY_OK;
- op.cmd = XENPF_settime;
- op.u.settime.secs = now.tv_sec;
- op.u.settime.nsecs = now.tv_nsec;
- op.u.settime.system_time = xen_clocksource_read();
+again:
+ if (settime64_supported) {
+ op.cmd = XENPF_settime64;
+ op.u.settime64.mbz = 0;
+ op.u.settime64.secs = now.tv_sec;
+ op.u.settime64.nsecs = now.tv_nsec;
+ op.u.settime64.system_time = xen_clocksource_read();
+ } else {
+ op.cmd = XENPF_settime32;
+ op.u.settime32.secs = now.tv_sec;
+ op.u.settime32.nsecs = now.tv_nsec;
+ op.u.settime32.system_time = xen_clocksource_read();
+ }
- (void)HYPERVISOR_dom0_op(&op);
+ ret = HYPERVISOR_platform_op(&op);
+
+ if (ret == -ENOSYS && settime64_supported) {
+ settime64_supported = false;
+ goto again;
+ }
+ if (ret < 0)
+ return NOTIFY_BAD;
/*
* Move the next drift compensation time 11 minutes
@@ -230,12 +154,19 @@ static struct notifier_block xen_pvclock_gtod_notifier = {
.notifier_call = xen_pvclock_gtod_notify,
};
+static int xen_cs_enable(struct clocksource *cs)
+{
+ vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
+ return 0;
+}
+
static struct clocksource xen_clocksource __read_mostly = {
- .name = "xen",
- .rating = 400,
- .read = xen_clocksource_get_cycles,
- .mask = ~0,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .name = "xen",
+ .rating = 400,
+ .read = xen_clocksource_get_cycles,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .enable = xen_cs_enable,
};
/*
@@ -274,30 +205,18 @@ static s64 get_abs_timeout(unsigned long delta)
return xen_clocksource_read() + delta;
}
-static void xen_timerop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
+static int xen_timerop_shutdown(struct clock_event_device *evt)
{
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- /* unsupported */
- WARN_ON(1);
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- case CLOCK_EVT_MODE_RESUME:
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- HYPERVISOR_set_timer_op(0); /* cancel timeout */
- break;
- }
+ /* cancel timeout */
+ HYPERVISOR_set_timer_op(0);
+
+ return 0;
}
static int xen_timerop_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+ WARN_ON(!clockevent_state_oneshot(evt));
if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
BUG();
@@ -309,47 +228,45 @@ static int xen_timerop_set_next_event(unsigned long delta,
return 0;
}
-static const struct clock_event_device xen_timerop_clockevent = {
- .name = "xen",
- .features = CLOCK_EVT_FEAT_ONESHOT,
+static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
+ .name = "xen",
+ .features = CLOCK_EVT_FEAT_ONESHOT,
- .max_delta_ns = 0xffffffff,
- .min_delta_ns = TIMER_SLOP,
+ .max_delta_ns = 0xffffffff,
+ .max_delta_ticks = 0xffffffff,
+ .min_delta_ns = TIMER_SLOP,
+ .min_delta_ticks = TIMER_SLOP,
- .mult = 1,
- .shift = 0,
- .rating = 500,
+ .mult = 1,
+ .shift = 0,
+ .rating = 500,
- .set_mode = xen_timerop_set_mode,
- .set_next_event = xen_timerop_set_next_event,
+ .set_state_shutdown = xen_timerop_shutdown,
+ .set_next_event = xen_timerop_set_next_event,
};
+static int xen_vcpuop_shutdown(struct clock_event_device *evt)
+{
+ int cpu = smp_processor_id();
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
+ NULL) ||
+ HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL))
+ BUG();
-static void xen_vcpuop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
+ return 0;
+}
+
+static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
{
int cpu = smp_processor_id();
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- WARN_ON(1); /* unsupported */
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL))
+ BUG();
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
- HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
- case CLOCK_EVT_MODE_RESUME:
- break;
- }
+ return 0;
}
static int xen_vcpuop_set_next_event(unsigned long delta,
@@ -359,30 +276,34 @@ static int xen_vcpuop_set_next_event(unsigned long delta,
struct vcpu_set_singleshot_timer single;
int ret;
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+ WARN_ON(!clockevent_state_oneshot(evt));
single.timeout_abs_ns = get_abs_timeout(delta);
- single.flags = VCPU_SSHOTTMR_future;
-
- ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
+ /* Get an event anyway, even if the timeout is already expired */
+ single.flags = 0;
- BUG_ON(ret != 0 && ret != -ETIME);
+ ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
+ &single);
+ BUG_ON(ret != 0);
return ret;
}
-static const struct clock_event_device xen_vcpuop_clockevent = {
+static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
.max_delta_ns = 0xffffffff,
+ .max_delta_ticks = 0xffffffff,
.min_delta_ns = TIMER_SLOP,
+ .min_delta_ticks = TIMER_SLOP,
.mult = 1,
.shift = 0,
.rating = 500,
- .set_mode = xen_vcpuop_set_mode,
+ .set_state_shutdown = xen_vcpuop_shutdown,
+ .set_state_oneshot = xen_vcpuop_set_oneshot,
.set_next_event = xen_vcpuop_set_next_event,
};
@@ -391,13 +312,13 @@ static const struct clock_event_device *xen_clockevent =
struct xen_clock_event_device {
struct clock_event_device evt;
- char *name;
+ char name[16];
};
static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
- struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt;
+ struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
irqreturn_t ret;
ret = IRQ_NONE;
@@ -406,90 +327,207 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
ret = IRQ_HANDLED;
}
- do_stolen_accounting();
-
return ret;
}
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
- BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu).evt;
if (evt->irq >= 0) {
unbind_from_irqhandler(evt->irq, NULL);
evt->irq = -1;
- kfree(per_cpu(xen_clock_events, cpu).name);
- per_cpu(xen_clock_events, cpu).name = NULL;
}
}
void xen_setup_timer(int cpu)
{
- char *name;
- struct clock_event_device *evt;
+ struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
+ struct clock_event_device *evt = &xevt->evt;
int irq;
- evt = &per_cpu(xen_clock_events, cpu).evt;
WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
if (evt->irq >= 0)
xen_teardown_timer(cpu);
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
- name = kasprintf(GFP_KERNEL, "timer%d", cpu);
- if (!name)
- name = "<timer kasprintf failed>";
+ snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|
- IRQF_NOBALANCING|IRQF_TIMER|
- IRQF_FORCE_RESUME,
- name, NULL);
+ IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
+ IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
+ xevt->name, NULL);
+ (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
memcpy(evt, xen_clockevent, sizeof(*evt));
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
- per_cpu(xen_clock_events, cpu).name = name;
}
void xen_setup_cpu_clockevents(void)
{
- BUG_ON(preemptible());
-
- clockevents_register_device(&__get_cpu_var(xen_clock_events).evt);
+ clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
}
void xen_timer_resume(void)
{
int cpu;
- pvclock_resume();
-
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
for_each_online_cpu(cpu) {
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
+ xen_vcpu_nr(cpu), NULL))
BUG();
}
}
-static const struct pv_time_ops xen_time_ops __initconst = {
- .sched_clock = xen_clocksource_read,
-};
+static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+static u64 xen_clock_value_saved;
+
+void xen_save_time_memory_area(void)
+{
+ struct vcpu_register_time_memory_area t;
+ int ret;
+
+ xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
+
+ if (!xen_clock)
+ return;
+
+ t.addr.v = NULL;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+ if (ret != 0)
+ pr_notice("Cannot save secondary vcpu_time_info (err %d)",
+ ret);
+ else
+ clear_page(xen_clock);
+}
+
+void xen_restore_time_memory_area(void)
+{
+ struct vcpu_register_time_memory_area t;
+ int ret;
+
+ if (!xen_clock)
+ goto out;
+
+ t.addr.v = &xen_clock->pvti;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+
+ /*
+ * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
+ * register the secondary time info with Xen or if we migrated to a
+ * host without the necessary flags. On both of these cases what
+ * happens is either process seeing a zeroed out pvti or seeing no
+ * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
+ * if 0, it discards the data in pvti and fallbacks to a system
+ * call for a reliable timestamp.
+ */
+ if (ret != 0)
+ pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
+ ret);
+
+out:
+ /* Need pvclock_resume() before using xen_clocksource_read(). */
+ pvclock_resume();
+ xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
+}
+
+static void xen_setup_vsyscall_time_info(void)
+{
+ struct vcpu_register_time_memory_area t;
+ struct pvclock_vsyscall_time_info *ti;
+ int ret;
+
+ ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
+ if (!ti)
+ return;
+
+ t.addr.v = &ti->pvti;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+ if (ret) {
+ pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
+ free_page((unsigned long)ti);
+ return;
+ }
+
+ /*
+ * If primary time info had this bit set, secondary should too since
+ * it's the same data on both just different memory regions. But we
+ * still check it in case hypervisor is buggy.
+ */
+ if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
+ t.addr.v = NULL;
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
+ 0, &t);
+ if (!ret)
+ free_page((unsigned long)ti);
+
+ pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
+ return;
+ }
+
+ xen_clock = ti;
+ pvclock_set_pvti_cpu0_va(xen_clock);
+
+ xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+}
+
+/*
+ * Check if it is possible to safely use the tsc as a clocksource. This is
+ * only true if the hypervisor notifies the guest that its tsc is invariant,
+ * the tsc is stable, and the tsc instruction will never be emulated.
+ */
+static int __init xen_tsc_safe_clocksource(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
+ return 0;
+
+ if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
+ return 0;
+
+ if (check_tsc_unstable())
+ return 0;
+
+ /* Leaf 4, sub-leaf 0 (0x40000x03) */
+ cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
+
+ return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
+}
static void __init xen_time_init(void)
{
+ struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
- struct timespec tp;
+ struct timespec64 tp;
+
+ /*
+ * As Dom0 is never moved, no penalty on using TSC there.
+ *
+ * If it is possible for the guest to determine that the tsc is a safe
+ * clocksource, then set xen_clocksource rating below that of the tsc
+ * so that the system prefers tsc instead.
+ */
+ if (xen_initial_domain())
+ xen_clocksource.rating = 275;
+ else if (xen_tsc_safe_clocksource())
+ xen_clocksource.rating = 299;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL) == 0) {
/* Successfully turned off 100Hz tick, so we have the
vcpuop-based timer interface */
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
@@ -498,28 +536,48 @@ static void __init xen_time_init(void)
/* Set initial system time with full resolution */
xen_read_wallclock(&tp);
- do_settimeofday(&tp);
+ do_settimeofday64(&tp);
setup_force_cpu_cap(X86_FEATURE_TSC);
+ /*
+ * We check ahead on the primary time info if this
+ * bit is supported hence speeding up Xen clocksource.
+ */
+ pvti = &__this_cpu_read(xen_vcpu)->time;
+ if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
+ pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+ xen_setup_vsyscall_time_info();
+ }
+
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
+ xen_time_setup_guest();
+
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
+static void __init xen_init_time_common(void)
+{
+ xen_sched_clock_offset = xen_clocksource_read();
+ static_call_update(pv_steal_clock, xen_steal_clock);
+ paravirt_set_sched_clock(xen_sched_clock);
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+}
+
void __init xen_init_time_ops(void)
{
- pv_time_ops = xen_time_ops;
+ xen_init_time_common();
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
x86_cpuinit.setup_percpu_clockev = x86_init_noop;
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
/* Dom0 uses the native method to set the hardware RTC. */
if (!xen_initial_domain())
x86_platform.set_wallclock = xen_set_wallclock;
@@ -540,23 +598,60 @@ static void xen_hvm_setup_cpu_clockevents(void)
void __init xen_hvm_init_time_ops(void)
{
- /* vector callback is needed otherwise we cannot receive interrupts
+ static bool hvm_time_initialized;
+
+ if (hvm_time_initialized)
+ return;
+
+ /*
+ * vector callback is needed otherwise we cannot receive interrupts
* on cpu > 0 and at this point we don't know how many cpus are
- * available */
+ * available.
+ */
if (!xen_have_vector_callback)
return;
+
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
- printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
- "disable pv timer\n");
+ pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
+ return;
+ }
+
+ /*
+ * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
+ * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
+ * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
+ * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
+ *
+ * The xen_hvm_init_time_ops() should be called again later after
+ * __this_cpu_read(xen_vcpu) is available.
+ */
+ if (!__this_cpu_read(xen_vcpu)) {
+ pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
+ xen_vcpu_nr(0));
return;
}
- pv_time_ops = xen_time_ops;
+ xen_init_time_common();
+
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
x86_platform.set_wallclock = xen_set_wallclock;
+
+ hvm_time_initialized = true;
}
#endif
+
+/* Kernel parameter to specify Xen timer slop */
+static int __init parse_xen_timer_slop(char *ptr)
+{
+ unsigned long slop = memparse(ptr, NULL);
+
+ xen_timerop_clockevent.min_delta_ns = slop;
+ xen_timerop_clockevent.min_delta_ticks = slop;
+ xen_vcpuop_clockevent.min_delta_ns = slop;
+ xen_vcpuop_clockevent.min_delta_ticks = slop;
+
+ return 0;
+}
+early_param("xen_timer_slop", parse_xen_timer_slop);