diff options
Diffstat (limited to 'arch/x86/kvm/xen.c')
-rw-r--r-- | arch/x86/kvm/xen.c | 449 |
1 files changed, 303 insertions, 146 deletions
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 4b4e738c6f1b..9b029bb29a16 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -10,7 +10,7 @@ #include "x86.h" #include "xen.h" #include "hyperv.h" -#include "lapic.h" +#include "irq.h" #include <linux/eventfd.h> #include <linux/kvm_host.h> @@ -24,6 +24,7 @@ #include <xen/interface/sched.h> #include <asm/xen/cpuid.h> +#include <asm/pvclock.h> #include "cpuid.h" #include "trace.h" @@ -34,41 +35,32 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r); DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); -static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) +static int kvm_xen_shared_info_init(struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct pvclock_wall_clock *wc; - gpa_t gpa = gfn_to_gpa(gfn); u32 *wc_sec_hi; u32 wc_version; u64 wall_nsec; int ret = 0; int idx = srcu_read_lock(&kvm->srcu); - if (gfn == KVM_XEN_INVALID_GFN) { - kvm_gpc_deactivate(gpc); - goto out; - } + read_lock_irq(&gpc->lock); + while (!kvm_gpc_check(gpc, PAGE_SIZE)) { + read_unlock_irq(&gpc->lock); - do { - ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE); + ret = kvm_gpc_refresh(gpc, PAGE_SIZE); if (ret) goto out; - /* - * This code mirrors kvm_write_wall_clock() except that it writes - * directly through the pfn cache and doesn't mark the page dirty. - */ - wall_nsec = kvm_get_wall_clock_epoch(kvm); - - /* It could be invalid again already, so we need to check */ read_lock_irq(&gpc->lock); + } - if (gpc->valid) - break; - - read_unlock_irq(&gpc->lock); - } while (1); + /* + * This code mirrors kvm_write_wall_clock() except that it writes + * directly through the pfn cache and doesn't mark the page dirty. + */ + wall_nsec = kvm_get_wall_clock_epoch(kvm); /* Paranoia checks on the 32-bit struct layout */ BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); @@ -158,8 +150,148 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns) +static int xen_get_guest_pvclock(struct kvm_vcpu *vcpu, + struct pvclock_vcpu_time_info *hv_clock, + struct gfn_to_pfn_cache *gpc, + unsigned int offset) +{ + unsigned long flags; + int r; + + read_lock_irqsave(&gpc->lock, flags); + while (!kvm_gpc_check(gpc, offset + sizeof(*hv_clock))) { + read_unlock_irqrestore(&gpc->lock, flags); + + r = kvm_gpc_refresh(gpc, offset + sizeof(*hv_clock)); + if (r) + return r; + + read_lock_irqsave(&gpc->lock, flags); + } + + memcpy(hv_clock, gpc->khva + offset, sizeof(*hv_clock)); + read_unlock_irqrestore(&gpc->lock, flags); + + /* + * Sanity check TSC shift+multiplier to verify the guest's view of time + * is more or less consistent. + */ + if (hv_clock->tsc_shift != vcpu->arch.pvclock_tsc_shift || + hv_clock->tsc_to_system_mul != vcpu->arch.pvclock_tsc_mul) + return -EINVAL; + + return 0; +} + +static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, + bool linux_wa) { + struct kvm_vcpu_xen *xen = &vcpu->arch.xen; + int64_t kernel_now, delta; + uint64_t guest_now; + int r = -EOPNOTSUPP; + + /* + * The guest provides the requested timeout in absolute nanoseconds + * of the KVM clock — as *it* sees it, based on the scaled TSC and + * the pvclock information provided by KVM. + * + * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW + * so use CLOCK_MONOTONIC. In the timescales covered by timers, the + * difference won't matter much as there is no cumulative effect. + * + * Calculate the time for some arbitrary point in time around "now" + * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the + * delta between the kvmclock "now" value and the guest's requested + * timeout, apply the "Linux workaround" described below, and add + * the resulting delta to the CLOCK_MONOTONIC "now" value, to get + * the absolute CLOCK_MONOTONIC time at which the timer should + * fire. + */ + do { + struct pvclock_vcpu_time_info hv_clock; + uint64_t host_tsc, guest_tsc; + + if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC) || + !vcpu->kvm->arch.use_master_clock) + break; + + /* + * If both Xen PV clocks are active, arbitrarily try to use the + * compat clock first, but also try to use the non-compat clock + * if the compat clock is unusable. The two PV clocks hold the + * same information, but it's possible one (or both) is stale + * and/or currently unreachable. + */ + if (xen->vcpu_info_cache.active) + r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_info_cache, + offsetof(struct compat_vcpu_info, time)); + if (r && xen->vcpu_time_info_cache.active) + r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_time_info_cache, 0); + if (r) + break; + + if (!IS_ENABLED(CONFIG_64BIT) || + !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) { + /* + * Don't fall back to get_kvmclock_ns() because it's + * broken; it has a systemic error in its results + * because it scales directly from host TSC to + * nanoseconds, and doesn't scale first to guest TSC + * and *then* to nanoseconds as the guest does. + * + * There is a small error introduced here because time + * continues to elapse between the ktime_get() and the + * subsequent rdtsc(). But not the systemic drift due + * to get_kvmclock_ns(). + */ + kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */ + host_tsc = rdtsc(); + } + + /* Calculate the guest kvmclock as the guest would do it. */ + guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc); + guest_now = __pvclock_read_cycles(&hv_clock, guest_tsc); + } while (0); + + if (r) { + /* + * Without CONSTANT_TSC, get_kvmclock_ns() is the only option. + * + * Also if the guest PV clock hasn't been set up yet, as is + * likely to be the case during migration when the vCPU has + * not been run yet. It would be possible to calculate the + * scaling factors properly in that case but there's not much + * point in doing so. The get_kvmclock_ns() drift accumulates + * over time, so it's OK to use it at startup. Besides, on + * migration there's going to be a little bit of skew in the + * precise moment at which timers fire anyway. Often they'll + * be in the "past" by the time the VM is running again after + * migration. + */ + guest_now = get_kvmclock_ns(vcpu->kvm); + kernel_now = ktime_get(); + } + + delta = guest_abs - guest_now; + + /* + * Xen has a 'Linux workaround' in do_set_timer_op() which checks for + * negative absolute timeout values (caused by integer overflow), and + * for values about 13 days in the future (2^50ns) which would be + * caused by jiffies overflow. For those cases, Xen sets the timeout + * 100ms in the future (not *too* soon, since if a guest really did + * set a long timeout on purpose we don't want to keep churning CPU + * time by waking it up). Emulate Xen's workaround when starting the + * timer in response to __HYPERVISOR_set_timer_op. + */ + if (linux_wa && + unlikely((int64_t)guest_abs < 0 || + (delta > 0 && (uint32_t) (delta >> 50) != 0))) { + delta = 100 * NSEC_PER_MSEC; + guest_abs = guest_now + delta; + } + /* * Avoid races with the old timer firing. Checking timer_expires * to avoid calling hrtimer_cancel() will only have false positives @@ -171,14 +303,12 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ atomic_set(&vcpu->arch.xen.timer_pending, 0); vcpu->arch.xen.timer_expires = guest_abs; - if (delta_ns <= 0) { + if (delta <= 0) xen_timer_callback(&vcpu->arch.xen.timer); - } else { - ktime_t ktime_now = ktime_get(); + else hrtimer_start(&vcpu->arch.xen.timer, - ktime_add_ns(ktime_now, delta_ns), + ktime_add_ns(kernel_now, delta), HRTIMER_MODE_ABS_HARD); - } } static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) @@ -188,13 +318,6 @@ static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) atomic_set(&vcpu->arch.xen.timer_pending, 0); } -static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) -{ - hrtimer_init(&vcpu->arch.xen.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_HARD); - vcpu->arch.xen.timer.function = xen_timer_callback; -} - static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) { struct kvm_vcpu_xen *vx = &v->arch.xen; @@ -452,14 +575,13 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) smp_wmb(); } - if (user_len2) + if (user_len2) { + kvm_gpc_mark_dirty_in_slot(gpc2); read_unlock(&gpc2->lock); + } + kvm_gpc_mark_dirty_in_slot(gpc1); read_unlock_irqrestore(&gpc1->lock, flags); - - mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); - if (user_len2) - mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); } void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) @@ -493,10 +615,9 @@ void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } -static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; - int r; irq.dest_id = v->vcpu_id; irq.vector = v->arch.xen.upcall_vector; @@ -505,8 +626,7 @@ static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) irq.delivery_mode = APIC_DM_FIXED; irq.level = 1; - /* The fast version will always work for physical unicast */ - WARN_ON_ONCE(!kvm_irq_delivery_to_apic_fast(v->kvm, NULL, &irq, &r, NULL)); + kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); } /* @@ -565,13 +685,13 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) : "0" (evtchn_pending_sel32)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } + + kvm_gpc_mark_dirty_in_slot(gpc); read_unlock_irqrestore(&gpc->lock, flags); /* For the per-vCPU lapic vector, deliver it as MSI. */ if (v->arch.xen.upcall_vector) kvm_xen_inject_vcpu_vector(v); - - mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); } int __kvm_xen_has_interrupt(struct kvm_vcpu *v) @@ -635,17 +755,59 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) } else { mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; + + /* + * Re-initialize shared_info to put the wallclock in the + * correct place. Whilst it's not necessary to do this + * unless the mode is actually changed, it does no harm + * to make the call anyway. + */ + r = kvm->arch.xen.shinfo_cache.active ? + kvm_xen_shared_info_init(kvm) : 0; mutex_unlock(&kvm->arch.xen.xen_lock); - r = 0; } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: + case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: { + int idx; + mutex_lock(&kvm->arch.xen.xen_lock); - r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); + + idx = srcu_read_lock(&kvm->srcu); + + if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) { + gfn_t gfn = data->u.shared_info.gfn; + + if (gfn == KVM_XEN_INVALID_GFN) { + kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); + r = 0; + } else { + r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache, + gfn_to_gpa(gfn), PAGE_SIZE); + } + } else { + void __user * hva = u64_to_user_ptr(data->u.shared_info.hva); + + if (!PAGE_ALIGNED(hva)) { + r = -EINVAL; + } else if (!hva) { + kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); + r = 0; + } else { + r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache, + (unsigned long)hva, PAGE_SIZE); + } + } + + srcu_read_unlock(&kvm->srcu, idx); + + if (!r && kvm->arch.xen.shinfo_cache.active) + r = kvm_xen_shared_info_init(kvm); + mutex_unlock(&kvm->arch.xen.xen_lock); break; - + } case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; @@ -699,13 +861,21 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - if (kvm->arch.xen.shinfo_cache.active) + if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache)) data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); else data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; r = 0; break; + case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: + if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache)) + data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva; + else + data->u.shared_info.hva = 0; + r = 0; + break; + case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: data->u.vector = kvm->arch.xen.upcall_vector; r = 0; @@ -742,20 +912,33 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: + case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: /* No compat necessary here. */ BUILD_BUG_ON(sizeof(struct vcpu_info) != sizeof(struct compat_vcpu_info)); BUILD_BUG_ON(offsetof(struct vcpu_info, time) != offsetof(struct compat_vcpu_info, time)); - if (data->u.gpa == KVM_XEN_INVALID_GPA) { - kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); - r = 0; - break; + if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) { + if (data->u.gpa == KVM_XEN_INVALID_GPA) { + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); + r = 0; + break; + } + + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, + data->u.gpa, sizeof(struct vcpu_info)); + } else { + if (data->u.hva == 0) { + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); + r = 0; + break; + } + + r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache, + data->u.hva, sizeof(struct vcpu_info)); } - r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, - data->u.gpa, sizeof(struct vcpu_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -935,18 +1118,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } - if (!vcpu->arch.xen.timer.function) - kvm_xen_init_timer(vcpu); - /* Stop the timer (if it's running) before changing the vector */ kvm_xen_stop_timer(vcpu); vcpu->arch.xen.timer_virq = data->u.timer.port; /* Start the timer if the new value has a valid vector+expiry. */ if (data->u.timer.port && data->u.timer.expires_ns) - kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, - data->u.timer.expires_ns - - get_kvmclock_ns(vcpu->kvm)); + kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false); r = 0; break; @@ -977,13 +1155,21 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: - if (vcpu->arch.xen.vcpu_info_cache.active) + if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache)) data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; else data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; + case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: + if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache)) + data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva; + else + data->u.hva = 0; + r = 0; + break; + case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (vcpu->arch.xen.vcpu_time_info_cache.active) data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; @@ -1093,9 +1279,24 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) u32 page_num = data & ~PAGE_MASK; u64 page_addr = data & PAGE_MASK; bool lm = is_long_mode(vcpu); + int r = 0; + + mutex_lock(&kvm->arch.xen.xen_lock); + if (kvm->arch.xen.long_mode != lm) { + kvm->arch.xen.long_mode = lm; + + /* + * Re-initialize shared_info to put the wallclock in the + * correct place. + */ + if (kvm->arch.xen.shinfo_cache.active && + kvm_xen_shared_info_init(kvm)) + r = 1; + } + mutex_unlock(&kvm->arch.xen.xen_lock); - /* Latch long_mode for shared_info pages etc. */ - vcpu->kvm->arch.xen.long_mode = lm; + if (r) + return r; /* * If Xen hypercall intercept is enabled, fill the hypercall @@ -1114,7 +1315,7 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) instructions[0] = 0xb8; /* vmcall / vmmcall */ - static_call(kvm_x86_patch_hypercall)(vcpu, instructions + 5); + kvm_x86_call(patch_hypercall)(vcpu, instructions + 5); /* ret */ instructions[8] = 0xc3; @@ -1134,10 +1335,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) * Note, truncation is a non-issue as 'lm' is guaranteed to be * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. */ - hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 - : kvm->arch.xen_hvm_config.blob_addr_32; - u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 - : kvm->arch.xen_hvm_config.blob_size_32; + hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64 + : kvm->arch.xen.hvm_config.blob_addr_32; + u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64 + : kvm->arch.xen.hvm_config.blob_size_32; u8 *page; int ret; @@ -1178,15 +1379,24 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; + /* + * Restrict the MSR to the range that is unofficially reserved for + * synthetic, virtualization-defined MSRs, e.g. to prevent confusing + * KVM by colliding with a real MSR that requires special handling. + */ + if (xhc->msr && + (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX)) + return -EINVAL; + mutex_lock(&kvm->arch.xen.xen_lock); - if (xhc->msr && !kvm->arch.xen_hvm_config.msr) + if (xhc->msr && !kvm->arch.xen.hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); - else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) + else if (!xhc->msr && kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); - old_flags = kvm->arch.xen_hvm_config.flags; - memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); + old_flags = kvm->arch.xen.hvm_config.flags; + memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); @@ -1267,7 +1477,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, int i; if (!lapic_in_kernel(vcpu) || - !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) + !(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; if (IS_ENABLED(CONFIG_64BIT) && !longmode) { @@ -1334,7 +1544,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) { - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED); if (sched_poll.timeout) mod_timer(&vcpu->arch.xen.poll_timer, @@ -1343,9 +1553,9 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, kvm_vcpu_halt(vcpu); if (sched_poll.timeout) - del_timer(&vcpu->arch.xen.poll_timer); + timer_delete(&vcpu->arch.xen.poll_timer); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); } vcpu->arch.xen.poll_evtchn = 0; @@ -1361,7 +1571,8 @@ out: static void cancel_evtchn_poll(struct timer_list *t) { - struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); + struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, + arch.xen.poll_timer); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); @@ -1396,7 +1607,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, { struct vcpu_set_singleshot_timer oneshot; struct x86_exception e; - s64 delta; if (!kvm_xen_timer_enabled(vcpu)) return false; @@ -1430,9 +1640,7 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, return true; } - /* A delta <= 0 results in an immediate callback, which is what we want */ - delta = oneshot.timeout_abs_ns - get_kvmclock_ns(vcpu->kvm); - kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, delta); + kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false); *r = 0; return true; @@ -1455,29 +1663,10 @@ static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout, if (!kvm_xen_timer_enabled(vcpu)) return false; - if (timeout) { - uint64_t guest_now = get_kvmclock_ns(vcpu->kvm); - int64_t delta = timeout - guest_now; - - /* Xen has a 'Linux workaround' in do_set_timer_op() which - * checks for negative absolute timeout values (caused by - * integer overflow), and for values about 13 days in the - * future (2^50ns) which would be caused by jiffies - * overflow. For those cases, it sets the timeout 100ms in - * the future (not *too* soon, since if a guest really did - * set a long timeout on purpose we don't want to keep - * churning CPU time by waking it up). - */ - if (unlikely((int64_t)timeout < 0 || - (delta > 0 && (uint32_t) (delta >> 50) != 0))) { - delta = 100 * NSEC_PER_MSEC; - timeout = guest_now + delta; - } - - kvm_xen_start_timer(vcpu, timeout, delta); - } else { + if (timeout) + kvm_xen_start_timer(vcpu, timeout, true); + else kvm_xen_stop_timer(vcpu); - } *r = 0; return true; @@ -1516,7 +1705,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif - cpl = static_call(kvm_x86_get_cpl)(vcpu); + cpl = kvm_x86_call(get_cpl)(vcpu); trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); @@ -1621,9 +1810,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); } - if (!vcpu->arch.xen.vcpu_info_cache.active) - return -EINVAL; - if (xe->port >= max_evtchn_port(kvm)) return -EINVAL; @@ -1731,8 +1917,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) mm_borrowed = true; } - mutex_lock(&kvm->arch.xen.xen_lock); - /* * It is theoretically possible for the page to be unmapped * and the MMU notifier to invalidate the shared_info before @@ -1760,8 +1944,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } while(!rc); - mutex_unlock(&kvm->arch.xen.xen_lock); - if (mm_borrowed) kthread_unuse_mm(kvm->mm); @@ -2108,15 +2290,13 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.xen.poll_evtchn = 0; timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); + hrtimer_setup(&vcpu->arch.xen.timer, xen_timer_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_HARD); - kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL, - KVM_HOST_USES_PFN); - kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL, - KVM_HOST_USES_PFN); - kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL, - KVM_HOST_USES_PFN); - kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL, - KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm); + kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm); + kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) @@ -2129,37 +2309,14 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); - del_timer_sync(&vcpu->arch.xen.poll_timer); -} - -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *entry; - u32 function; - - if (!vcpu->arch.xen.cpuid.base) - return; - - function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); - if (function > vcpu->arch.xen.cpuid.limit) - return; - - entry = kvm_find_cpuid_entry_index(vcpu, function, 1); - if (entry) { - entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul; - entry->edx = vcpu->arch.hv_clock.tsc_shift; - } - - entry = kvm_find_cpuid_entry_index(vcpu, function, 2); - if (entry) - entry->eax = vcpu->arch.hw_tsc_khz; + timer_delete_sync(&vcpu->arch.xen.poll_timer); } void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); - kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); + kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm); } void kvm_xen_destroy_vm(struct kvm *kvm) @@ -2176,6 +2333,6 @@ void kvm_xen_destroy_vm(struct kvm *kvm) } idr_destroy(&kvm->arch.xen.evtchn_ports); - if (kvm->arch.xen_hvm_config.msr) + if (kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); } |