summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/kvm_emulate.h3
-rw-r--r--arch/x86/kvm/lapic.c36
-rw-r--r--arch/x86/kvm/mmu/mmu.c26
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c6
-rw-r--r--arch/x86/kvm/svm/avic.c12
-rw-r--r--arch/x86/kvm/svm/sev.c26
-rw-r--r--arch/x86/kvm/svm/svm.c4
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx/capabilities.h3
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c14
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h1
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c52
18 files changed, 160 insertions, 66 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9a48f138832d..b4da665bb892 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
entry->ecx = F(RDPID);
++array->nent;
+ break;
default:
break;
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8a0ccdb56076..5e5de05a8fbf 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5111,7 +5111,7 @@ done:
return rc;
}
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
{
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
@@ -5322,7 +5322,8 @@ done_prefixes:
ctxt->execute = opcode.u.execute;
- if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+ if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
+ likely(!(ctxt->d & EmulateOnUD)))
return EMULATION_FAILED;
if (unlikely(ctxt->d &
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a39936..f00830e5202f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
{
struct kvm_hv *hv = to_kvm_hv(kvm);
u64 gfn;
+ int idx;
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
@@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
hv->tsc_ref.tsc_sequence = 0;
+
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&kvm->srcu);
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+ srcu_read_unlock(&kvm->srcu, idx);
out_unlock:
mutex_unlock(&hv->hv_lock);
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index f016838faedd..3e870bf9ca4d 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -314,7 +314,6 @@ struct x86_emulate_ctxt {
int interruptibility;
bool perm_ok; /* do not check permissions if true */
- bool ud; /* inject an #UD if host doesn't support insn */
bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
@@ -491,7 +490,7 @@ enum x86_intercept {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type);
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c0ebef560bd1..17fa4ab1b834 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
+ if (alignment + len > 4)
+ return 1;
+
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
@@ -1494,6 +1497,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
static void cancel_hv_timer(struct kvm_lapic *apic);
+static void cancel_apic_timer(struct kvm_lapic *apic)
+{
+ hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
+}
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1502,11 +1514,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic->lapic_timer.timer_mode != timer_mode) {
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
- hrtimer_cancel(&apic->lapic_timer.timer);
- preempt_disable();
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- preempt_enable();
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1598,11 +1606,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
+ if (lapic_timer_advance_dynamic) {
+ adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+ /*
+ * If the timer fired early, reread the TSC to account for the
+ * overhead of the above adjustment to avoid waiting longer
+ * than is necessary.
+ */
+ if (guest_tsc < tsc_deadline)
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ }
+
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
- if (lapic_timer_advance_dynamic)
- adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
@@ -1661,7 +1677,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
}
atomic_inc(&apic->lapic_timer.pending);
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
if (from_timer_fn)
kvm_vcpu_kick(vcpu);
}
@@ -2084,7 +2100,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
if (apic_lvtt_tscdeadline(apic))
break;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
break;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0144c40d09c7..8d5876dfc6b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+ /*
+ * Nested MMUs are used only for walking L2's gva->gpa, they never have
+ * shadow pages of their own and so "direct" has no meaning. Set it
+ * to "true" to try to detect bogus usage of the nested MMU.
+ */
+ role.base.direct = true;
+
+ if (!is_paging(vcpu))
+ role.base.level = 0;
+ else if (is_long_mode(vcpu))
+ role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+ PT64_ROOT_4LEVEL;
+ else if (is_pae(vcpu))
+ role.base.level = PT32E_ROOT_LEVEL;
+ else
+ role.base.level = PT32_ROOT_LEVEL;
+
+ return role;
+}
+
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
- union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+ union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 70b7e44e3035..823a5919f9fa 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -90,8 +90,8 @@ struct guest_walker {
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
bool pte_writable[PT_MAX_FULL_LEVELS];
- unsigned pt_access;
- unsigned pte_access;
+ unsigned int pt_access[PT_MAX_FULL_LEVELS];
+ unsigned int pte_access;
gfn_t gfn;
struct x86_exception fault;
};
@@ -418,13 +418,15 @@ retry_walk:
}
walker->ptes[walker->level - 1] = pte;
+
+ /* Convert to ACC_*_MASK flags for struct guest_walker. */
+ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
} while (!is_last_gpte(mmu, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
- walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
@@ -463,7 +465,8 @@ retry_walk:
}
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __func__, (u64)pte, walker->pte_access, walker->pt_access);
+ __func__, (u64)pte, walker->pte_access,
+ walker->pt_access[walker->level - 1]);
return 1;
error:
@@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
- unsigned direct_access, access = gw->pt_access;
+ unsigned int direct_access, access;
int top_level, level, req_level, ret;
gfn_t base_gfn = gw->gfn;
@@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
sp = NULL;
if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
+ access = gw->pt_access[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
false, access);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 95eeb5ac6a8a..237317b1eddd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1192,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
}
/*
- * Remove write access from all the SPTEs mapping GFNs [start, end). If
- * skip_4k is set, SPTEs that map 4k pages, will not be write-protected.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ * Remove write access from all SPTEs at or above min_level that map GFNs
+ * [start, end). Returns true if an SPTE has been changed and the TLBs need to
+ * be flushed.
*/
static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, int min_level)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712b4e0de481..5e7e920113f3 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -28,10 +28,8 @@
#include "svm.h"
/* enable / disable AVIC */
-int avic;
-#ifdef CONFIG_X86_LOCAL_APIC
-module_param(avic, int, S_IRUGO);
-#endif
+bool avic;
+module_param(avic, bool, S_IRUGO);
#define SVM_AVIC_DOORBELL 0xc001011b
@@ -223,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
return &avic_physical_id_table[index];
}
-/**
+/*
* Note:
* AVIC hardware walks the nested page table to check permissions,
* but does not use the SPA address specified in the leaf page
@@ -766,7 +764,7 @@ out:
return ret;
}
-/**
+/*
* Note:
* The HW cannot support posting multicast/broadcast
* interrupts to a vCPU. So, we still use legacy interrupt
@@ -1007,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/**
+/*
* This function is called during VCPU halt/unhalt.
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5bc887e9a986..8d36f0c73071 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
sev->misc_cg = NULL;
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission decommission;
+
+ if (!handle)
+ return;
+
+ decommission.handle = handle;
+ sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate deactivate;
if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_deactivate(&deactivate, NULL);
up_read(&sev_deactivate_lock);
- /* decommission handle */
- decommission.handle = handle;
- sev_guest_decommission(&decommission, NULL);
+ sev_decommission(handle);
}
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start.handle;
@@ -1103,10 +1113,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_start data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
- if (ret < 0)
- return ret;
params->session_len = data.session_len;
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
@@ -1215,10 +1224,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_update_data data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
- if (ret < 0)
- return ret;
params->hdr_len = data.hdr_len;
params->trans_len = data.trans_len;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 05eca131eaf2..e088086f3de6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1010,9 +1010,7 @@ static __init int svm_hardware_setup(void)
}
if (avic) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_AVIC) ||
- !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
+ if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
avic = false;
} else {
pr_info("AVIC enabled\n");
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 2c9ece618b29..2908c6ab5bb4 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -480,7 +480,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-extern int avic;
+extern bool avic;
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a61c015870e3..4f839148948b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
TP_ARGS(msg, err),
TP_STRUCT__entry(
- __field(const char *, msg)
+ __string(msg, msg)
__field(u32, err)
),
TP_fast_assign(
- __entry->msg = msg;
+ __assign_str(msg, msg);
__entry->err = err;
),
- TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+ TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
);
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 8dee8a5fbc17..aa0e7872fcc9 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void)
static inline bool cpu_has_vmx_posted_intr(void)
{
- return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
- vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
}
static inline bool cpu_has_load_ia32_efer(void)
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 459748680daf..5f81ef092bd4 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -238,6 +238,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
/*
+ * Bail out of the block loop if the VM has an assigned
+ * device, but the blocking vCPU didn't reconfigure the
+ * PI.NV to the wakeup vector, i.e. the assigned device
+ * came along after the initial check in pi_pre_block().
+ */
+void vmx_pi_start_assignment(struct kvm *kvm)
+{
+ if (!irq_remapping_cap(IRQ_POSTING_CAP))
+ return;
+
+ kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
+}
+
+/*
* pi_update_irte - set IRTE for Posted-Interrupts
*
* @kvm: kvm
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 0bdc41391c5b..7f7b2326caf5 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
bool set);
+void vmx_pi_start_assignment(struct kvm *kvm);
#endif /* __KVM_X86_VMX_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4bceb5ca3a89..c2a779b688e6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4843,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
u32 intr_info, ex_no, error_code;
- unsigned long cr2, rip, dr6;
+ unsigned long cr2, dr6;
u32 vect_info;
vect_info = vmx->idt_vectoring_info;
@@ -4933,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- rip = kvm_rip_read(vcpu);
- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.exception = ex_no;
break;
case AC_VECTOR:
@@ -6248,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
switch (kvm_get_apic_mode(vcpu)) {
case LAPIC_MODE_INVALID:
WARN_ONCE(true, "Invalid local APIC state");
+ break;
case LAPIC_MODE_DISABLED:
break;
case LAPIC_MODE_XAPIC:
@@ -7721,6 +7721,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.nested_ops = &vmx_nested_ops,
.update_pi_irte = pi_update_irte,
+ .start_assignment = vmx_pi_start_assignment,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbc4e04e67ad..e0f4a46649d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
+
+ if (!tdp_enabled) {
+ /*
+ * A TLB flush on behalf of the guest is equivalent to
+ * INVPCID(all), toggling CR4.PGE, etc., which requires
+ * a forced sync of the shadow page tables. Unload the
+ * entire MMU here and the subsequent load will sync the
+ * shadow page tables, and also flush the TLB.
+ */
+ kvm_mmu_unload(vcpu);
+ return;
+ }
+
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
@@ -3101,10 +3114,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+ u8 st_preempted = xchg(&st->preempted, 0);
+
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- st->preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ st_preempted & KVM_VCPU_FLUSH_TLB);
+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
+ } else {
+ st->preempted = 0;
}
vcpu->arch.st.preempted = 0;
@@ -7089,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ vcpu->arch.hflags = emul_flags;
+ kvm_mmu_reset_context(vcpu);
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -7226,6 +7246,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+ ctxt->interruptibility = 0;
+ ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
+ ctxt->perm_ok = false;
+
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
}
@@ -7561,14 +7586,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
kvm_vcpu_check_breakpoint(vcpu, &r))
return r;
- ctxt->interruptibility = 0;
- ctxt->have_exception = false;
- ctxt->exception.vector = -1;
- ctxt->perm_ok = false;
-
- ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-
- r = x86_decode_insn(ctxt, insn, insn_len);
+ r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
trace_kvm_emulate_insn_start(vcpu);
++vcpu->stat.insn_emulation;
@@ -8243,6 +8261,7 @@ void kvm_arch_exit(void)
kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
+ kmem_cache_destroy(x86_emulator_cache);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
@@ -8360,6 +8379,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
vcpu->stat.directed_yield_attempted++;
+ if (single_task_running())
+ goto no_yield;
+
rcu_read_lock();
map = rcu_dereference(vcpu->kvm->arch.apic_map);
@@ -9496,7 +9518,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0)
break;
- kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
@@ -10115,8 +10137,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_update_dr7(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
- get_segment_base(vcpu, VCPU_SREG_CS);
+ vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
/*
* Trigger an rflags update that will inject or remove the trace
@@ -11499,7 +11520,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
void kvm_arch_start_assignment(struct kvm *kvm)
{
- atomic_inc(&kvm->arch.assigned_device_count);
+ if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
+ static_call_cond(kvm_x86_start_assignment)(kvm);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);