summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/emulate.c24
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--arch/x86/kvm/mmu/mmu.c7
-rw-r--r--arch/x86/kvm/svm/nested.c7
-rw-r--r--arch/x86/kvm/svm/sev.c1
-rw-r--r--arch/x86/kvm/svm/svm.c46
-rw-r--r--arch/x86/kvm/vmx/nested.c10
-rw-r--r--arch/x86/kvm/vmx/vmx.c54
-rw-r--r--arch/x86/kvm/vmx/vmx.h6
-rw-r--r--arch/x86/kvm/x86.c44
12 files changed, 138 insertions, 71 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d0e2825ae617..2f6510de6b0c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2505,9 +2505,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
- ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+
+ if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
+ return X86EMUL_UNHANDLEABLE;
+
val = GET_SMSTATE(u32, smstate, 0x7fc8);
- ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
+ return X86EMUL_UNHANDLEABLE;
selector = GET_SMSTATE(u32, smstate, 0x7fc4);
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
@@ -2560,16 +2565,23 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
val = GET_SMSTATE(u32, smstate, 0x7f68);
- ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+
+ if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
+ return X86EMUL_UNHANDLEABLE;
+
val = GET_SMSTATE(u32, smstate, 0x7f60);
- ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
+ return X86EMUL_UNHANDLEABLE;
cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
val = GET_SMSTATE(u64, smstate, 0x7ed0);
- ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
+
+ if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ return X86EMUL_UNHANDLEABLE;
selector = GET_SMSTATE(u32, smstate, 0x7e90);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
@@ -3016,7 +3028,7 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
case 0xa4: /* movsb */
case 0xa5: /* movsd/w */
*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
- /* fall through */
+ fallthrough;
case 0xaa: /* stosb */
case 0xab: /* stosd/w */
*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 814d3aee5cef..1d330564eed8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1779,7 +1779,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
- /* fall through - maybe userspace knows this conn_id. */
+ fallthrough; /* maybe userspace knows this conn_id */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index c47d2acec529..4aa1c2e00e2a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -285,7 +285,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
e->irqchip.pin += PIC_NUM_PINS / 2;
- /* fall through */
+ fallthrough;
case KVM_IRQCHIP_PIC_MASTER:
if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
return -EINVAL;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5ccbee7165a2..35cca2e0c802 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1053,7 +1053,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
- /* fall through */
+ fallthrough;
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
@@ -1341,7 +1341,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
- /* fall thru */
+ fallthrough;
default:
val = kvm_lapic_get_reg(apic, offset);
break;
@@ -2027,7 +2027,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
- /* fall through */
+ fallthrough;
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4e03841f053d..71aa3da2a0b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1916,7 +1916,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
@@ -2468,7 +2469,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
}
if (sp->unsync_children)
- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
__clear_sp_write_flooding_count(sp);
@@ -4421,7 +4422,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[1][4] =
rsvd_check->rsvd_bits_mask[0][4];
- /* fall through */
+ fallthrough;
case PT64_ROOT_4LEVEL:
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index fb68467e6049..e90bc436f584 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -586,7 +586,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
/* Give the current vmcb to the guest */
- svm_set_gif(svm, false);
nested_vmcb->save.es = vmcb->save.es;
nested_vmcb->save.cs = vmcb->save.cs;
@@ -632,6 +631,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(&vmcb->control, &hsave->control);
+ /* On vmexit the GIF is set to false */
+ svm_set_gif(svm, false);
+
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
svm->vcpu.arch.l1_tsc_offset;
@@ -1132,6 +1134,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
load_nested_vmcb_control(svm, &ctl);
nested_prepare_vmcb_control(svm);
+ if (!nested_svm_vmrun_msrpm(svm))
+ return -EINVAL;
+
out_set_gif:
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
return 0;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 402dc4234e39..7bf7bf734979 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1106,6 +1106,7 @@ void sev_vm_destroy(struct kvm *kvm)
list_for_each_safe(pos, q, head) {
__unregister_enc_region_locked(kvm,
list_entry(pos, struct enc_region, list));
+ cond_resched();
}
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 03dd7bac8034..91ea74ae71b8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm)
return 1;
}
+static int invd_interception(struct vcpu_svm *svm)
+{
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2668,7 +2674,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
- /* Fall through */
+ fallthrough;
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_RDPMC] = rdpmc_interception,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
- [SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
@@ -2938,8 +2944,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (npt_enabled)
vcpu->arch.cr3 = svm->vmcb->save.cr3;
- svm_complete_interrupts(svm);
-
if (is_guest_mode(vcpu)) {
int vmexit;
@@ -3504,7 +3508,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
stgi();
/* Any pending NMI will happen here */
- exit_fastpath = svm_exit_handlers_fastpath(vcpu);
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_after_interrupt(&svm->vcpu);
@@ -3518,6 +3521,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+ vmcb_mark_all_clean(svm->vmcb);
/* if exit due to PF check for async PF */
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
@@ -3537,7 +3541,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
SVM_EXIT_EXCP_BASE + MC_VECTOR))
svm_handle_mce(svm);
- vmcb_mark_all_clean(svm->vmcb);
+ svm_complete_interrupts(svm);
+ exit_fastpath = svm_exit_handlers_fastpath(vcpu);
return exit_fastpath;
}
@@ -3900,21 +3905,28 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *nested_vmcb;
struct kvm_host_map map;
- u64 guest;
- u64 vmcb;
int ret = 0;
- guest = GET_SMSTATE(u64, smstate, 0x7ed8);
- vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+ u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+ u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (guest) {
- if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
- return 1;
- nested_vmcb = map.hva;
- ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb);
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ if (guest) {
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ return 1;
+
+ if (!(saved_efer & EFER_SVME))
+ return 1;
+
+ if (kvm_vcpu_map(&svm->vcpu,
+ gpa_to_gfn(vmcb), &map) == -EINVAL)
+ return 1;
+
+ ret = enter_svm_guest_mode(svm, vmcb, map.hva);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ }
}
return ret;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 23b58c28a1c9..1bb6b31eb646 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4404,6 +4404,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
+ /*
+ * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+ * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are
+ * up-to-date before switching to L1.
+ */
+ if (enable_ept && is_pae_paging(vcpu))
+ vmx_ept_load_pdptrs(vcpu);
+
leave_guest_mode(vcpu);
if (nested_cpu_has_preemption_timer(vmcs12))
@@ -4668,7 +4676,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
vmx->nested.msrs.entry_ctls_high &=
~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
vmx->nested.msrs.exit_ctls_high &=
- ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
}
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 46ba2e03a892..96979c09ebd1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -129,6 +129,9 @@ static bool __read_mostly enable_preemption_timer = 1;
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
+extern bool __read_mostly allow_smaller_maxphyaddr;
+module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
+
#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
#define KVM_VM_CR0_ALWAYS_ON \
@@ -791,6 +794,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
+ else {
+ /*
+ * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
+ * between guest and host. In that case we only care about present
+ * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
+ * prepare_vmcs02_rare.
+ */
+ bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
+ int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
+ }
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -2971,7 +2986,7 @@ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
vpid_sync_context(to_vmx(vcpu)->vpid);
}
-static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
+void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
@@ -3114,7 +3129,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
guest_cr3 = vcpu->arch.cr3;
else /* vmcs01.GUEST_CR3 is already up-to-date. */
update_guest_cr3 = false;
- ept_load_pdptrs(vcpu);
+ vmx_ept_load_pdptrs(vcpu);
} else {
guest_cr3 = pgd;
}
@@ -4352,16 +4367,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmx->pt_desc.guest.output_mask = 0x7F;
vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
}
-
- /*
- * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
- * between guest and host. In that case we only care about present
- * faults.
- */
- if (enable_ept) {
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK);
- }
}
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -4654,7 +4659,7 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
return false;
- /* fall through */
+ fallthrough;
case DB_VECTOR:
return !(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP));
@@ -4803,6 +4808,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* EPT will cause page fault only if we need to
* detect illegal GPAs.
*/
+ WARN_ON_ONCE(!allow_smaller_maxphyaddr);
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
return 1;
} else
@@ -4827,7 +4833,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
}
kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
- /* fall through */
+ fallthrough;
case BP_VECTOR:
/*
* Update instruction length as we may reinject #BP from
@@ -5257,7 +5263,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
error_code =
vmcs_read32(IDT_VECTORING_ERROR_CODE);
}
- /* fall through */
+ fallthrough;
case INTR_TYPE_SOFT_EXCEPTION:
kvm_clear_exception_queue(vcpu);
break;
@@ -5331,7 +5337,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* would also use advanced VM-exit information for EPT violations to
* reconstruct the page fault error code.
*/
- if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
+ if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -5610,7 +5616,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
* keeping track of global entries in shadow page tables.
*/
- /* fall-through */
+ fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
kvm_mmu_unload(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -6054,6 +6060,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
exit_reason != EXIT_REASON_PML_FULL &&
+ exit_reason != EXIT_REASON_APIC_ACCESS &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -6578,7 +6585,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
break;
case INTR_TYPE_SOFT_EXCEPTION:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
- /* fall through */
+ fallthrough;
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
u32 err = vmcs_read32(error_code_field);
@@ -6588,7 +6595,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
break;
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
- /* fall through */
+ fallthrough;
case INTR_TYPE_EXT_INTR:
kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
break;
@@ -8304,11 +8311,12 @@ static int __init vmx_init(void)
vmx_check_vmcs12_offsets();
/*
- * Intel processors don't have problems with
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
- * it for VMX by default
+ * Shadow paging doesn't have a (further) performance penalty
+ * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
+ * by default
*/
- allow_smaller_maxphyaddr = true;
+ if (!enable_ept)
+ allow_smaller_maxphyaddr = true;
return 0;
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 26175a4759fa..a0e47720f60c 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -356,6 +356,7 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
struct x86_exception *e);
+void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
@@ -551,7 +552,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
{
- return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ if (!enable_ept)
+ return true;
+
+ return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
}
void dump_vmcs(void);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 599d73206299..ce856e0ece84 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,7 +188,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
-bool __read_mostly allow_smaller_maxphyaddr;
+bool __read_mostly allow_smaller_maxphyaddr = 0;
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
static u64 __read_mostly host_xss;
@@ -975,7 +975,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+ X86_CR4_SMEP;
+ unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
if (kvm_valid_cr4(vcpu, cr4))
return 1;
@@ -1003,7 +1004,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (kvm_x86_ops.set_cr4(vcpu, cr4))
return 1;
- if (((cr4 ^ old_cr4) & pdptr_bits) ||
+ if (((cr4 ^ old_cr4) & mmu_role_bits) ||
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
@@ -1116,14 +1117,12 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
vcpu->arch.eff_db[dr] = val;
break;
case 4:
- /* fall through */
case 6:
if (!kvm_dr6_valid(val))
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
break;
case 5:
- /* fall through */
default: /* 7 */
if (!kvm_dr7_valid(val))
return -1; /* #GP */
@@ -1154,12 +1153,10 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
*val = vcpu->arch.db[array_index_nospec(dr, size)];
break;
case 4:
- /* fall through */
case 6:
*val = vcpu->arch.dr6;
break;
case 5:
- /* fall through */
default: /* 7 */
*val = vcpu->arch.dr7;
break;
@@ -2735,7 +2732,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 1;
if (!lapic_in_kernel(vcpu))
- return 1;
+ return data ? 1 : 0;
vcpu->arch.apf.msr_en_val = data;
@@ -3051,7 +3048,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
- pr = true; /* fall through */
+ pr = true;
+ fallthrough;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
if (kvm_pmu_is_valid_msr(vcpu, msr))
@@ -3224,9 +3222,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_POWER_CTL:
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
break;
- case MSR_IA32_TSC:
- msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
+ case MSR_IA32_TSC: {
+ /*
+ * Intel SDM states that MSR_IA32_TSC read adds the TSC offset
+ * even when not intercepted. AMD manual doesn't explicitly
+ * state this but appears to behave the same.
+ *
+ * On userspace reads and writes, however, we unconditionally
+ * operate L1's TSC value to ensure backwards-compatible
+ * behavior for migration.
+ */
+ u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
+ vcpu->arch.tsc_offset;
+
+ msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
break;
+ }
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -3581,6 +3592,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SMALLER_MAXPHYADDR:
r = (int) allow_smaller_maxphyaddr;
break;
+ case KVM_CAP_STEAL_TIME:
+ r = sched_info_on();
+ break;
default:
break;
}
@@ -4359,7 +4373,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
- /* fall through */
+ fallthrough;
case KVM_CAP_HYPERV_SYNIC:
if (!irqchip_in_kernel(vcpu->kvm))
@@ -8672,7 +8686,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
- /* fall through */
+ fallthrough;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
@@ -10751,9 +10765,11 @@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
{
struct x86_exception fault;
+ u32 access = error_code &
+ (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
if (!(error_code & PFERR_PRESENT_MASK) ||
- vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) {
+ vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
/*
* If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
* tables probably do not match the TLB. Just proceed