summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorJim Mattson <jmattson@google.com>2023-09-25 17:34:46 +0000
committerSean Christopherson <seanjc@google.com>2023-09-25 14:42:52 -0700
commit73554b29bd70546c1a9efc9c160641ef1b849358 (patch)
treea65c69bed2cb04ddc574ecf050d8600e892b9af9 /arch/x86
parenta16eb25b09c02a54c1c1b449d4b6cfa2cf3f013a (diff)
KVM: x86/pmu: Synthesize at most one PMI per VM-exit
When the irq_work callback, kvm_pmi_trigger_fn(), is invoked during a VM-exit that also invokes __kvm_perf_overflow() as a result of instruction emulation, kvm_pmu_deliver_pmi() will be called twice before the next VM-entry. Calling kvm_pmu_deliver_pmi() twice is unlikely to be problematic now that KVM sets the LVTPC mask bit when delivering a PMI. But using IRQ work to trigger the PMI is still broken, albeit very theoretically. E.g. if the self-IPI to trigger IRQ work is be delayed long enough for the vCPU to be migrated to a different pCPU, then it's possible for kvm_pmi_trigger_fn() to race with the kvm_pmu_deliver_pmi() from KVM_REQ_PMI and still generate two PMIs. KVM could set the mask bit using an atomic operation, but that'd just be piling on unnecessary code to workaround what is effectively a hack. The *only* reason KVM uses IRQ work is to ensure the PMI is treated as a wake event, e.g. if the vCPU just executed HLT. Remove the irq_work callback for synthesizing a PMI, and all of the logic for invoking it. Instead, to prevent a vcpu from leaving C0 with a PMI pending, add a check for KVM_REQ_PMI to kvm_vcpu_has_events(). Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Jim Mattson <jmattson@google.com> Tested-by: Mingwei Zhang <mizhang@google.com> Tested-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Mingwei Zhang <mizhang@google.com> Link: https://lore.kernel.org/r/20230925173448.3518223-2-mizhang@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/pmu.c27
-rw-r--r--arch/x86/kvm/x86.c3
3 files changed, 4 insertions, 27 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d..70d139406bc8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -528,7 +528,6 @@ struct kvm_pmu {
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
- struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index edb89b51b383..9ae07db6f0f6 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
-{
- struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
- kvm_pmu_deliver_pmi(vcpu);
-}
-
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
- if (!pmc->intr || skip_pmi)
- return;
-
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
+ if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-
- irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
- init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda6..42a4e8f5e89a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12843,6 +12843,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
#endif
+ if (kvm_test_request(KVM_REQ_PMI, vcpu))
+ return true;
+
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))