diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 444 |
1 files changed, 274 insertions, 170 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6db3a506b402..0c0ca599a353 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -110,6 +110,8 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE + #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -126,16 +128,15 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); struct kvm_x86_ops kvm_x86_ops __read_mostly; -EXPORT_SYMBOL_GPL(kvm_x86_ops); #define KVM_X86_OP(func) \ DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \ *(((struct kvm_x86_ops *)0)->func)); -#define KVM_X86_OP_NULL KVM_X86_OP +#define KVM_X86_OP_OPTIONAL KVM_X86_OP +#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP #include <asm/kvm-x86-ops.h> EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits); EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg); -EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); @@ -194,6 +195,9 @@ bool __read_mostly enable_pmu = true; EXPORT_SYMBOL_GPL(enable_pmu); module_param(enable_pmu, bool, 0444); +bool __read_mostly eager_page_split = true; +module_param(eager_page_split, bool, 0644); + /* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU @@ -760,7 +764,7 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, if ((fault->error_code & PFERR_PRESENT_MASK) && !(fault->error_code & PFERR_RSVD_MASK)) kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, - fault_mmu->root_hpa); + fault_mmu->root.hpa); fault_mmu->inject_page_fault(vcpu, fault); return fault->nested_page_fault; @@ -853,7 +857,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) * Shadow page roots need to be reconstructed instead. */ if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs))) - kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOT_CURRENT); + kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT); memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); @@ -869,6 +873,13 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon if ((cr0 ^ old_cr0) & X86_CR0_PG) { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); + + /* + * Clearing CR0.PG is defined to flush the TLB from the guest's + * perspective. + */ + if (!(cr0 & X86_CR0_PG)) + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS) @@ -1067,28 +1078,43 @@ EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) { + if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) + kvm_mmu_reset_context(vcpu); + /* - * If any role bit is changed, the MMU needs to be reset. - * - * If CR4.PCIDE is changed 1 -> 0, the guest TLB must be flushed. * If CR4.PCIDE is changed 0 -> 1, there is no need to flush the TLB * according to the SDM; however, stale prev_roots could be reused * incorrectly in the future after a MOV to CR3 with NOFLUSH=1, so we - * free them all. KVM_REQ_MMU_RELOAD is fit for the both cases; it - * is slow, but changing CR4.PCIDE is a rare case. - * - * If CR4.PGE is changed, the guest TLB must be flushed. + * free them all. This is *not* a superset of KVM_REQ_TLB_FLUSH_GUEST + * or KVM_REQ_TLB_FLUSH_CURRENT, because the hardware TLB is not flushed, + * so fall through. + */ + if (!tdp_enabled && + (cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) + kvm_mmu_unload(vcpu); + + /* + * The TLB has to be flushed for all PCIDs if any of the following + * (architecturally required) changes happen: + * - CR4.PCIDE is changed from 1 to 0 + * - CR4.PGE is toggled * - * Note: resetting MMU is a superset of KVM_REQ_MMU_RELOAD and - * KVM_REQ_MMU_RELOAD is a superset of KVM_REQ_TLB_FLUSH_GUEST, hence - * the usage of "else if". + * This is a superset of KVM_REQ_TLB_FLUSH_CURRENT. */ - if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) - kvm_mmu_reset_context(vcpu); - else if ((cr4 ^ old_cr4) & X86_CR4_PCIDE) - kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); - else if ((cr4 ^ old_cr4) & X86_CR4_PGE) + if (((cr4 ^ old_cr4) & X86_CR4_PGE) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); + + /* + * The TLB has to be flushed for the current PCID if any of the + * following (architecturally required) changes happen: + * - CR4.SMEP is changed from 0 to 1 + * - CR4.PAE is toggled + */ + else if (((cr4 ^ old_cr4) & X86_CR4_PAE) || + ((cr4 & X86_CR4_SMEP) && !(old_cr4 & X86_CR4_SMEP))) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } EXPORT_SYMBOL_GPL(kvm_post_set_cr4); @@ -1166,7 +1192,7 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - kvm_mmu_free_roots(vcpu, mmu, roots_to_free); + kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free); } int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) @@ -1656,8 +1682,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0; @@ -1723,9 +1748,6 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, { struct msr_data msr; - if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return KVM_MSR_RET_FILTERED; - switch (index) { case MSR_FS_BASE: case MSR_GS_BASE: @@ -1807,9 +1829,6 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, struct msr_data msr; int ret; - if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return KVM_MSR_RET_FILTERED; - switch (index) { case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) @@ -1846,6 +1865,20 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, return ret; } +static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) + return KVM_MSR_RET_FILTERED; + return kvm_get_msr_ignored_check(vcpu, index, data, false); +} + +static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) + return KVM_MSR_RET_FILTERED; + return kvm_set_msr_ignored_check(vcpu, index, data, false); +} + int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { return kvm_get_msr_ignored_check(vcpu, index, data, false); @@ -1928,7 +1961,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r; - r = kvm_get_msr(vcpu, ecx, &data); + r = kvm_get_msr_with_filter(vcpu, ecx, &data); if (!r) { trace_kvm_msr_read(ecx, data); @@ -1953,7 +1986,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r; - r = kvm_set_msr(vcpu, ecx, data); + r = kvm_set_msr_with_filter(vcpu, ecx, data); if (!r) { trace_kvm_msr_write(ecx, data); @@ -2026,17 +2059,10 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data return 1; if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) && - ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && - ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && - ((u32)(data >> 32) != X2APIC_BROADCAST)) { - - data &= ~(1 << 12); - kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32)); - kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data); - trace_kvm_apic_write(APIC_ICR, (u32)data); - return 0; - } + ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && + ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && + ((u32)(data >> 32) != X2APIC_BROADCAST)) + return kvm_x2apic_icr_write(vcpu->arch.apic, data); return 1; } @@ -2413,7 +2439,7 @@ static inline u64 __scale_tsc(u64 ratio, u64 tsc) return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); } -u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio) +u64 kvm_scale_tsc(u64 tsc, u64 ratio) { u64 _tsc = tsc; @@ -2428,7 +2454,7 @@ static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { u64 tsc; - tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio); + tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio); return target_tsc - tsc; } @@ -2436,7 +2462,7 @@ static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { return vcpu->arch.l1_tsc_offset + - kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio); + kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); @@ -2639,7 +2665,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) { if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) WARN_ON(adjustment < 0); - adjustment = kvm_scale_tsc(vcpu, (u64) adjustment, + adjustment = kvm_scale_tsc((u64) adjustment, vcpu->arch.l1_tsc_scaling_ratio); adjust_tsc_offset_guest(vcpu, adjustment); } @@ -3059,7 +3085,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ if (kvm_has_tsc_control) - tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz, + tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { @@ -3282,7 +3308,7 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; - static_call(kvm_x86_tlb_flush_all)(vcpu); + static_call(kvm_x86_flush_tlb_all)(vcpu); } static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) @@ -3300,14 +3326,14 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) kvm_mmu_sync_prev_roots(vcpu); } - static_call(kvm_x86_tlb_flush_guest)(vcpu); + static_call(kvm_x86_flush_tlb_guest)(vcpu); } static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; - static_call(kvm_x86_tlb_flush_current)(vcpu); + static_call(kvm_x86_flush_tlb_current)(vcpu); } /* @@ -3869,7 +3895,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ratio = vcpu->arch.tsc_scaling_ratio; } - msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset; + msr_info->data = kvm_scale_tsc(rdtsc(), ratio) + offset; break; } case MSR_MTRRcap: @@ -4245,6 +4271,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_SYS_ATTRIBUTES: + case KVM_CAP_VAPIC: case KVM_CAP_ENABLE_CAP: r = 1; break; @@ -4286,9 +4313,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE); break; - case KVM_CAP_VAPIC: - r = !static_call(kvm_x86_cpu_has_accelerated_tpr)(); - break; case KVM_CAP_NR_VCPUS: r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; @@ -4343,7 +4367,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) if (r < sizeof(struct kvm_xsave)) r = sizeof(struct kvm_xsave); break; + case KVM_CAP_PMU_CAPABILITY: + r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0; + break; } + case KVM_CAP_DISABLE_QUIRKS2: + r = KVM_X86_VALID_QUIRKS; + break; default: break; } @@ -5145,7 +5175,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz && kvm->arch.last_tsc_offset == offset); - tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset; + tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset; ns = get_kvmclock_base_ns(); __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched); @@ -5890,6 +5920,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return -EINVAL; switch (cap->cap) { + case KVM_CAP_DISABLE_QUIRKS2: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_VALID_QUIRKS) + break; + fallthrough; case KVM_CAP_DISABLE_QUIRKS: kvm->arch.disabled_quirks = cap->args[0]; r = 0; @@ -5911,7 +5946,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; - kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); + kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT); r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -5990,15 +6025,18 @@ split_irqchip_unlock: #endif case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM: r = -EINVAL; - if (kvm_x86_ops.vm_copy_enc_context_from) - r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]); - return r; + if (!kvm_x86_ops.vm_copy_enc_context_from) + break; + + r = static_call(kvm_x86_vm_copy_enc_context_from)(kvm, cap->args[0]); + break; case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM: r = -EINVAL; - if (kvm_x86_ops.vm_move_enc_context_from) - r = kvm_x86_ops.vm_move_enc_context_from( - kvm, cap->args[0]); - return r; + if (!kvm_x86_ops.vm_move_enc_context_from) + break; + + r = static_call(kvm_x86_vm_move_enc_context_from)(kvm, cap->args[0]); + break; case KVM_CAP_EXIT_HYPERCALL: if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) { r = -EINVAL; @@ -6014,6 +6052,18 @@ split_irqchip_unlock: kvm->arch.exit_on_emulation_error = cap->args[0]; r = 0; break; + case KVM_CAP_PMU_CAPABILITY: + r = -EINVAL; + if (!enable_pmu || (cap->args[0] & ~KVM_CAP_PMU_VALID_MASK)) + break; + + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE); + r = 0; + } + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; @@ -6293,7 +6343,7 @@ set_identity_unlock: /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; - kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); + kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT); create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -6473,8 +6523,10 @@ set_pit2_out: break; case KVM_MEMORY_ENCRYPT_OP: { r = -ENOTTY; - if (kvm_x86_ops.mem_enc_op) - r = static_call(kvm_x86_mem_enc_op)(kvm, argp); + if (!kvm_x86_ops.mem_enc_ioctl) + goto out; + + r = static_call(kvm_x86_mem_enc_ioctl)(kvm, argp); break; } case KVM_MEMORY_ENCRYPT_REG_REGION: { @@ -6485,8 +6537,10 @@ set_pit2_out: goto out; r = -ENOTTY; - if (kvm_x86_ops.mem_enc_reg_region) - r = static_call(kvm_x86_mem_enc_reg_region)(kvm, ®ion); + if (!kvm_x86_ops.mem_enc_register_region) + goto out; + + r = static_call(kvm_x86_mem_enc_register_region)(kvm, ®ion); break; } case KVM_MEMORY_ENCRYPT_UNREG_REGION: { @@ -6497,8 +6551,10 @@ set_pit2_out: goto out; r = -ENOTTY; - if (kvm_x86_ops.mem_enc_unreg_region) - r = static_call(kvm_x86_mem_enc_unreg_region)(kvm, ®ion); + if (!kvm_x86_ops.mem_enc_unregister_region) + goto out; + + r = static_call(kvm_x86_mem_enc_unregister_region)(kvm, ®ion); break; } case KVM_HYPERV_EVENTFD: { @@ -6678,7 +6734,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, static_call(kvm_x86_get_segment)(vcpu, var, seg); } -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -6698,7 +6754,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); @@ -6708,7 +6764,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_FETCH_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } @@ -6718,7 +6774,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_WRITE_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } @@ -6734,7 +6790,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, } static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, u32 access, + struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -6771,7 +6827,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; unsigned offset; int ret; @@ -6796,7 +6852,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; /* * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED @@ -6815,9 +6871,11 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt, struct x86_exception *exception, bool system) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - u32 access = 0; + u64 access = 0; - if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3) + if (system) + access |= PFERR_IMPLICIT_ACCESS; + else if (static_call(kvm_x86_get_cpl)(vcpu) == 3) access |= PFERR_USER_MASK; return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); @@ -6833,7 +6891,7 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, } static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, u32 access, + struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -6867,9 +6925,11 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v bool system) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - u32 access = PFERR_WRITE_MASK; + u64 access = PFERR_WRITE_MASK; - if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3) + if (system) + access |= PFERR_IMPLICIT_ACCESS; + else if (static_call(kvm_x86_get_cpl)(vcpu) == 3) access |= PFERR_USER_MASK; return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, @@ -6936,7 +6996,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, bool write) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0) + u64 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0) | (write ? PFERR_WRITE_MASK : 0); /* @@ -7579,13 +7639,13 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, return; } -static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 *pdata) +static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_get_msr(vcpu, msr_index, pdata); + r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0, complete_emulated_rdmsr, r)) { @@ -7596,13 +7656,13 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, return r; } -static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 data) +static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_set_msr(vcpu, msr_index, data); + r = kvm_set_msr_with_filter(vcpu, msr_index, data); if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data, complete_emulated_msr_access, r)) { @@ -7613,6 +7673,18 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return r; } +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) +{ + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} + +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); +} + static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); @@ -7676,6 +7748,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } +static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read_raw(emul_to_vcpu(ctxt), reg); @@ -7746,6 +7823,8 @@ static const struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .get_smbase = emulator_get_smbase, .set_smbase = emulator_set_smbase, + .set_msr_with_filter = emulator_set_msr_with_filter, + .get_msr_with_filter = emulator_get_msr_with_filter, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .check_pmc = emulator_check_pmc, @@ -7758,6 +7837,7 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, + .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .exiting_smm = emulator_exiting_smm, @@ -8426,8 +8506,7 @@ writeback: kvm_rip_write(vcpu, ctxt->eip); if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); - if (kvm_x86_ops.update_emulated_instruction) - static_call(kvm_x86_update_emulated_instruction)(vcpu); + static_call_cond(kvm_x86_update_emulated_instruction)(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -8826,6 +8905,12 @@ int kvm_arch_init(void *opaque) goto out; } + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n"); + r = -EOPNOTSUPP; + goto out; + } + r = -ENOMEM; x86_emulator_cache = kvm_alloc_emulator_cache(); @@ -8985,7 +9070,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, * * @apicid - apicid of vcpu to be kicked. */ -static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) +static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { struct kvm_lapic_irq lapic_irq; @@ -9005,15 +9090,29 @@ bool kvm_apicv_activated(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_apicv_activated); + +static void set_or_clear_apicv_inhibit(unsigned long *inhibits, + enum kvm_apicv_inhibit reason, bool set) +{ + if (set) + __set_bit(reason, inhibits); + else + __clear_bit(reason, inhibits); + + trace_kvm_apicv_inhibit_changed(reason, set, *inhibits); +} + static void kvm_apicv_init(struct kvm *kvm) { + unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons; + init_rwsem(&kvm->arch.apicv_update_lock); - set_bit(APICV_INHIBIT_REASON_ABSENT, - &kvm->arch.apicv_inhibit_reasons); + set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true); + if (!enable_apicv) - set_bit(APICV_INHIBIT_REASON_DISABLE, - &kvm->arch.apicv_inhibit_reasons); + set_or_clear_apicv_inhibit(inhibits, + APICV_INHIBIT_REASON_ABSENT, true); } static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) @@ -9104,7 +9203,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT)) break; - kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + kvm_pv_kick_cpu_op(vcpu->kvm, a1); kvm_sched_yield(vcpu, a1); ret = 0; break; @@ -9268,10 +9367,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) */ else if (!vcpu->arch.exception.pending) { if (vcpu->arch.nmi_injected) { - static_call(kvm_x86_set_nmi)(vcpu); + static_call(kvm_x86_inject_nmi)(vcpu); can_inject = false; } else if (vcpu->arch.interrupt.injected) { - static_call(kvm_x86_set_irq)(vcpu); + static_call(kvm_x86_inject_irq)(vcpu); can_inject = false; } } @@ -9351,7 +9450,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) if (r) { --vcpu->arch.nmi_pending; vcpu->arch.nmi_injected = true; - static_call(kvm_x86_set_nmi)(vcpu); + static_call(kvm_x86_inject_nmi)(vcpu); can_inject = false; WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0); } @@ -9365,7 +9464,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) goto out; if (r) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); - static_call(kvm_x86_set_irq)(vcpu); + static_call(kvm_x86_inject_irq)(vcpu); WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0); } if (kvm_cpu_has_injectable_intr(vcpu)) @@ -9687,25 +9786,21 @@ out: } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); -void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set) { unsigned long old, new; lockdep_assert_held_write(&kvm->arch.apicv_update_lock); - if (!kvm_x86_ops.check_apicv_inhibit_reasons || - !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit)) + if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason)) return; old = new = kvm->arch.apicv_inhibit_reasons; - if (activate) - __clear_bit(bit, &new); - else - __set_bit(bit, &new); + set_or_clear_apicv_inhibit(&new, reason, set); if (!!old != !!new) { - trace_kvm_apicv_update_request(activate, bit); /* * Kick all vCPUs before setting apicv_inhibit_reasons to avoid * false positives in the sanity check WARN in svm_vcpu_run(). @@ -9724,18 +9819,22 @@ void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE); kvm_zap_gfn_range(kvm, gfn, gfn+1); } - } else + } else { kvm->arch.apicv_inhibit_reasons = new; + } } -EXPORT_SYMBOL_GPL(__kvm_request_apicv_update); -void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set) { + if (!enable_apicv) + return; + down_write(&kvm->arch.apicv_update_lock); - __kvm_request_apicv_update(kvm, activate, bit); + __kvm_set_or_clear_apicv_inhibit(kvm, reason, set); up_write(&kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(kvm_request_apicv_update); +EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit); static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { @@ -9769,11 +9868,11 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, to_hv_synic(vcpu)->vec_bitmap, 256); - static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); + static_call_cond(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); return; } - static_call(kvm_x86_load_eoi_exitmap)( + static_call_cond(kvm_x86_load_eoi_exitmap)( vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); } @@ -9796,10 +9895,7 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; - if (!kvm_x86_ops.set_apic_access_page_addr) - return; - - static_call(kvm_x86_set_apic_access_page_addr)(vcpu); + static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu); } void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -9844,8 +9940,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } } - if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) - kvm_mmu_unload(vcpu); + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) + kvm_mmu_free_obsolete_roots(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) __kvm_migrate_timers(vcpu); if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) @@ -9990,7 +10086,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); - static_call(kvm_x86_prepare_guest_switch)(vcpu); + static_call(kvm_x86_prepare_switch_to_guest)(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -10071,7 +10167,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu)); - exit_fastpath = static_call(kvm_x86_run)(vcpu); + exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu); if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; @@ -10373,10 +10469,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - /* - * Exclude PKRU from restore as restored separately in - * kvm_x86_ops.run(). - */ + /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); } @@ -10566,16 +10659,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -{ - struct kvm_segment cs; - - kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); - *db = cs.db; - *l = cs.l; -} -EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); - static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; @@ -10899,7 +10982,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) { - bool inhibit = false; + bool set = false; struct kvm_vcpu *vcpu; unsigned long i; @@ -10907,11 +10990,11 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) { - inhibit = true; + set = true; break; } } - __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ); + __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_BLOCKIRQ, set); up_write(&kvm->arch.apicv_update_lock); } @@ -11348,15 +11431,17 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static_call(kvm_x86_update_exception_bitmap)(vcpu); /* - * Reset the MMU context if paging was enabled prior to INIT (which is - * implied if CR0.PG=1 as CR0 will be '0' prior to RESET). Unlike the - * standard CR0/CR4/EFER modification paths, only CR0.PG needs to be - * checked because it is unconditionally cleared on INIT and all other - * paging related bits are ignored if paging is disabled, i.e. CR0.WP, - * CR4, and EFER changes are all irrelevant if CR0.PG was '0'. + * On the standard CR0/CR4/EFER modification paths, there are several + * complex conditions determining whether the MMU has to be reset and/or + * which PCIDs have to be flushed. However, CR0.WP and the paging-related + * bits in CR4 and EFER are irrelevant if CR0.PG was '0'; and a reset+flush + * is needed anyway if CR0.PG was '1' (which can only happen for INIT, as + * CR0 will be '0' prior to RESET). So we only need to check CR0.PG here. */ - if (old_cr0 & X86_CR0_PG) + if (old_cr0 & X86_CR0_PG) { + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); kvm_mmu_reset_context(vcpu); + } /* * Intel's SDM states that all TLB entries are flushed on INIT. AMD's @@ -11517,10 +11602,8 @@ int kvm_arch_hardware_setup(void *opaque) u64 max = min(0x7fffffffULL, __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); kvm_max_guest_tsc_khz = max; - - kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; } - + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; kvm_init_msr_list(); return 0; } @@ -11589,12 +11672,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ret = kvm_page_track_init(kvm); if (ret) - return ret; + goto out; + + ret = kvm_mmu_init_vm(kvm); + if (ret) + goto out_page_track; INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); - INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); - INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -11614,6 +11698,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); kvm->arch.guest_can_read_msr_platform_info = true; + kvm->arch.enable_pmu = enable_pmu; #if IS_ENABLED(CONFIG_HYPERV) spin_lock_init(&kvm->arch.hv_root_tdp_lock); @@ -11625,10 +11710,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_apicv_init(kvm); kvm_hv_init_vm(kvm); - kvm_mmu_init_vm(kvm); kvm_xen_init_vm(kvm); return static_call(kvm_x86_vm_init)(kvm); + +out_page_track: + kvm_page_track_cleanup(kvm); +out: + return ret; } int kvm_arch_post_init_vm(struct kvm *kvm) @@ -11811,7 +11900,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11848,7 +11937,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; @@ -11998,6 +12087,9 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, if (kvm_dirty_log_manual_protect_and_init_set(kvm)) return; + if (READ_ONCE(eager_page_split)) + kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K); + if (kvm_x86_ops.cpu_dirty_log_size) { kvm_mmu_slot_leaf_clear_dirty(kvm, new); kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M); @@ -12042,8 +12134,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { return (is_guest_mode(vcpu) && - kvm_x86_ops.guest_apic_has_interrupt && - static_call(kvm_x86_guest_apic_has_interrupt)(vcpu)); + static_call(kvm_x86_guest_apic_has_interrupt)(vcpu)); } static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) @@ -12296,14 +12387,28 @@ static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu) static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) + + if (!kvm_pv_async_pf_enabled(vcpu)) return false; - if (!kvm_pv_async_pf_enabled(vcpu) || - (vcpu->arch.apf.send_user_only && static_call(kvm_x86_get_cpl)(vcpu) == 0)) + if (vcpu->arch.apf.send_user_only && + static_call(kvm_x86_get_cpl)(vcpu) == 0) return false; - return true; + if (is_guest_mode(vcpu)) { + /* + * L1 needs to opt into the special #PF vmexits that are + * used to deliver async page faults. + */ + return vcpu->arch.apf.delivery_as_pf_vmexit; + } else { + /* + * Play it safe in case the guest temporarily disables paging. + * The real mode IDT in particular is unlikely to have a #PF + * exception setup. + */ + return is_paging(vcpu); + } } bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) @@ -12398,7 +12503,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) void kvm_arch_start_assignment(struct kvm *kvm) { if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) - static_call_cond(kvm_x86_start_assignment)(kvm); + static_call_cond(kvm_x86_pi_start_assignment)(kvm); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); @@ -12446,7 +12551,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); - ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, + ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) @@ -12471,7 +12576,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ - ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); + ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); @@ -12482,7 +12587,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set) { - return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set); + return static_call(kvm_x86_pi_update_irte)(kvm, host_irq, guest_irq, set); } bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, @@ -12536,7 +12641,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; struct x86_exception fault; - u32 access = error_code & + u64 access = error_code & (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK); if (!(error_code & PFERR_PRESENT_MASK) || @@ -12876,7 +12981,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); -EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); |