summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst23
-rw-r--r--arch/x86/kvm/svm/nested.c4
-rw-r--r--arch/x86/kvm/svm/svm.c5
-rw-r--r--arch/x86/kvm/vmx/nested.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--tools/include/uapi/linux/kvm.h1
9 files changed, 53 insertions, 2 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index f0d961436d0f..6be1ddedec49 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7844,6 +7844,7 @@ Valid bits in args[0] are::
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+ #define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
@@ -7854,6 +7855,28 @@ all such vmexits.
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
+Virtualizing the ``IA32_APERF`` and ``IA32_MPERF`` MSRs requires more
+than just disabling APERF/MPERF exits. While both Intel and AMD
+document strict usage conditions for these MSRs--emphasizing that only
+the ratio of their deltas over a time interval (T0 to T1) is
+architecturally defined--simply passing through the MSRs can still
+produce an incorrect ratio.
+
+This erroneous ratio can occur if, between T0 and T1:
+
+1. The vCPU thread migrates between logical processors.
+2. Live migration or suspend/resume operations take place.
+3. Another task shares the vCPU's logical processor.
+4. C-states lower than C0 are emulated (e.g., via HLT interception).
+5. The guest TSC frequency doesn't match the host TSC frequency.
+
+Due to these complexities, KVM does not automatically associate this
+passthrough capability with the guest CPUID bit,
+``CPUID.6:ECX.APERFMPERF[bit 0]``. Userspace VMMs that deem this
+mechanism adequate for virtualizing the ``IA32_APERF`` and
+``IA32_MPERF`` MSRs must set the guest CPUID bit explicitly.
+
+
7.14 KVM_CAP_S390_HPAGE_1M
--------------------------
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 749f7b866ac8..b7fd2e869998 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -194,7 +194,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
* Hardcode the capacity of the array based on the maximum number of _offsets_.
* MSRs are batched together, so there are fewer offsets than MSRs.
*/
-static int nested_svm_msrpm_merge_offsets[6] __ro_after_init;
+static int nested_svm_msrpm_merge_offsets[7] __ro_after_init;
static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
typedef unsigned long nsvm_msrpm_merge_t;
@@ -216,6 +216,8 @@ int __init nested_svm_init_msrpm_merge_offsets(void)
MSR_IA32_SPEC_CTRL,
MSR_IA32_PRED_CMD,
MSR_IA32_FLUSH_CMD,
+ MSR_IA32_APERF,
+ MSR_IA32_MPERF,
MSR_IA32_LASTBRANCHFROMIP,
MSR_IA32_LASTBRANCHTOIP,
MSR_IA32_LASTINTFROMIP,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1261447ffcdd..fedf68c63318 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -838,6 +838,11 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
guest_cpuid_is_intel_compatible(vcpu));
+ if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+ svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
+ svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
+ }
+
if (sev_es_guest(vcpu->kvm))
sev_es_recalc_msr_intercepts(vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c69df3aba8d1..b8ea1969113d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -715,6 +715,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_APERF, MSR_TYPE_R);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_MPERF, MSR_TYPE_R);
+
kvm_vcpu_unmap(vcpu, &map);
vmx->nested.force_msr_bitmap_recalc = false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b064e50c6e64..77bbb2b93418 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4084,6 +4084,10 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
}
+ if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
+ }
/* PT MSRs can be passed through iff PT is exposed to the guest. */
if (vmx_pt_mode_is_host_guest())
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6dda7bf4c44c..912260e3725d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4577,6 +4577,9 @@ static u64 kvm_get_allowed_disable_exits(void)
{
u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
+ if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+ r |= KVM_X86_DISABLE_EXITS_APERFMPERF;
+
if (!mitigate_smt_rsb) {
r |= KVM_X86_DISABLE_EXITS_HLT |
KVM_X86_DISABLE_EXITS_CSTATE;
@@ -6613,7 +6616,8 @@ split_irqchip_unlock:
if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
cpu_smt_possible() &&
- (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+ (cap->args[0] & ~(KVM_X86_DISABLE_EXITS_PAUSE |
+ KVM_X86_DISABLE_EXITS_APERFMPERF)))
pr_warn_once(SMT_RSB_MSG);
kvm_disable_exits(kvm, cap->args[0]);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 17ec8436e565..e77281b6e2b2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -524,6 +524,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_CSTATE;
}
+static inline bool kvm_aperfmperf_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_APERFMPERF;
+}
+
static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
{
return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7a4c35ff03fe..aeb2ca10b190 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -644,6 +644,7 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index b6ae8ad8934b..eef57c117140 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -617,6 +617,7 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {