summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c163
1 files changed, 119 insertions, 44 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04c5d96b1d67..51ccfe08e32f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -434,12 +434,14 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
kvm_inject_gp(vcpu, 0);
else
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
@@ -573,7 +575,7 @@ out:
}
EXPORT_SYMBOL_GPL(load_pdptrs);
-static bool pdptrs_changed(struct kvm_vcpu *vcpu)
+bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
bool changed = true;
@@ -599,6 +601,7 @@ out:
return changed;
}
+EXPORT_SYMBOL_GPL(pdptrs_changed);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -1128,8 +1131,8 @@ struct pvclock_gtod_data {
struct { /* extract of a clocksource struct */
int vclock_mode;
- cycle_t cycle_last;
- cycle_t mask;
+ u64 cycle_last;
+ u64 mask;
u32 mult;
u32 shift;
} clock;
@@ -1569,9 +1572,9 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
#ifdef CONFIG_X86_64
-static cycle_t read_tsc(void)
+static u64 read_tsc(void)
{
- cycle_t ret = (cycle_t)rdtsc_ordered();
+ u64 ret = (u64)rdtsc_ordered();
u64 last = pvclock_gtod_data.clock.cycle_last;
if (likely(ret >= last))
@@ -1589,7 +1592,7 @@ static cycle_t read_tsc(void)
return last;
}
-static inline u64 vgettsc(cycle_t *cycle_now)
+static inline u64 vgettsc(u64 *cycle_now)
{
long v;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
@@ -1600,7 +1603,7 @@ static inline u64 vgettsc(cycle_t *cycle_now)
return v * gtod->clock.mult;
}
-static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *cycle_now)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1621,7 +1624,7 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
}
/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
{
/* checked again under seqlock below */
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
@@ -2071,6 +2074,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
+ vcpu->arch.st.steal.preempted = 0;
+
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2176,7 +2181,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
- u64 gpa_offset;
struct kvm_arch *ka = &vcpu->kvm->arch;
kvmclock_reset(vcpu);
@@ -2198,8 +2202,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- gpa_offset = data & ~(PAGE_MASK | 1);
-
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
@@ -2294,7 +2296,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
+ vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
} else {
@@ -2506,7 +2508,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
+ vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
+ msr_info->index);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
@@ -2810,7 +2813,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_get_lapic_target_expiration_tsc(vcpu)))
kvm_lapic_switch_to_sw_timer(vcpu);
/*
* On a host with synchronized TSC, there is no need to update
@@ -2826,8 +2829,39 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
}
+static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+ vcpu->arch.st.steal.preempted = 1;
+
+ kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal.preempted,
+ offsetof(struct kvm_steal_time, preempted),
+ sizeof(vcpu->arch.st.steal.preempted));
+}
+
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ int idx;
+ /*
+ * Disable page faults because we're in atomic context here.
+ * kvm_write_guest_offset_cached() would call might_fault()
+ * that relies on pagefault_disable() to tell if there's a
+ * bug. NOTE: the write to guest memory may not go through if
+ * during postcopy live migration or if there's heavy guest
+ * paging.
+ */
+ pagefault_disable();
+ /*
+ * kvm_memslots() will be called by
+ * kvm_write_guest_offset_cached() so take the srcu lock.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
@@ -4816,7 +4850,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
-int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
+static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4836,8 +4870,8 @@ int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_emulate_wbinvd_noskip(vcpu);
+ kvm_emulate_wbinvd_noskip(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
@@ -5081,11 +5115,6 @@ static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
{
preempt_disable();
kvm_load_guest_fpu(emul_to_vcpu(ctxt));
- /*
- * CR0.TS may reference the host fpu state, not the guest fpu state,
- * so it may be clear at this point.
- */
- clts();
}
static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
@@ -5440,7 +5469,6 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
kvm_run->exit_reason = KVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
@@ -5453,6 +5481,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
}
}
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ int r = EMULATE_DONE;
+
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ return r == EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
{
if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
@@ -5638,6 +5677,49 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
+static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ /* We should only ever be called with arch.pio.count equal to 1 */
+ BUG_ON(vcpu->arch.pio.count != 1);
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
+ : 0;
+
+ /*
+ * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
+ * the copy and tracing
+ */
+ emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
+ vcpu->arch.pio.port, &val, 1);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+
+ return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+ unsigned long val;
+ int ret;
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+
+ ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+ &val, 1);
+ if (ret) {
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ return ret;
+ }
+
+ vcpu->arch.complete_userspace_io = complete_fast_pio_in;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
@@ -5773,7 +5855,7 @@ static void kvm_timer_init(void)
}
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
- cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
@@ -5987,8 +6069,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_vcpu_halt(vcpu);
+ int ret = kvm_skip_emulated_instruction(vcpu);
+ /*
+ * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_vcpu_halt(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
@@ -6019,9 +6105,9 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r = 1;
+ int op_64_bit, r;
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ r = kvm_skip_emulated_instruction(vcpu);
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -7407,25 +7493,13 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded) {
- vcpu->fpu_counter = 0;
+ if (!vcpu->guest_fpu_loaded)
return;
- }
vcpu->guest_fpu_loaded = 0;
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- /*
- * If using eager FPU mode, or if the guest is a frequent user
- * of the FPU, just leave the FPU active for next time.
- * Every 255 times fpu_counter rolls over to 0; a guest that uses
- * the FPU in bursts will revert to loading it on demand.
- */
- if (!use_eager_fpu()) {
- if (++vcpu->fpu_counter < 5)
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
- }
trace_kvm_fpu(0);
}
@@ -7824,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
+ mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
@@ -8176,7 +8251,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_invalidate_zap_all_pages(kvm);
+ kvm_page_track_flush_slot(kvm, slot);
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)