diff options
Diffstat (limited to 'arch/x86/kvm/vmx/pmu_intel.c')
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 272 |
1 files changed, 114 insertions, 158 deletions
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 315c7c2ba89b..77012b2eca0e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -20,53 +20,19 @@ #include "nested.h" #include "pmu.h" -#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) - -enum intel_pmu_architectural_events { - /* - * The order of the architectural events matters as support for each - * event is enumerated via CPUID using the index of the event. - */ - INTEL_ARCH_CPU_CYCLES, - INTEL_ARCH_INSTRUCTIONS_RETIRED, - INTEL_ARCH_REFERENCE_CYCLES, - INTEL_ARCH_LLC_REFERENCES, - INTEL_ARCH_LLC_MISSES, - INTEL_ARCH_BRANCHES_RETIRED, - INTEL_ARCH_BRANCHES_MISPREDICTED, - - NR_REAL_INTEL_ARCH_EVENTS, - - /* - * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a. - * TSC reference cycles. The architectural reference cycles event may - * or may not actually use the TSC as the reference, e.g. might use the - * core crystal clock or the bus clock (yeah, "architectural"). - */ - PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS, - NR_INTEL_ARCH_EVENTS, -}; +/* + * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX + * to encode the "type" of counter to read, i.e. this is not a "base". And to + * further confuse things, non-architectural PMUs use bit 31 as a flag for + * "fast" reads, whereas the "type" is an explicit value. + */ +#define INTEL_RDPMC_GP 0 +#define INTEL_RDPMC_FIXED INTEL_PMC_FIXED_RDPMC_BASE -static struct { - u8 eventsel; - u8 unit_mask; -} const intel_arch_events[] = { - [INTEL_ARCH_CPU_CYCLES] = { 0x3c, 0x00 }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { 0xc0, 0x00 }, - [INTEL_ARCH_REFERENCE_CYCLES] = { 0x3c, 0x01 }, - [INTEL_ARCH_LLC_REFERENCES] = { 0x2e, 0x4f }, - [INTEL_ARCH_LLC_MISSES] = { 0x2e, 0x41 }, - [INTEL_ARCH_BRANCHES_RETIRED] = { 0xc4, 0x00 }, - [INTEL_ARCH_BRANCHES_MISPREDICTED] = { 0xc5, 0x00 }, - [PSEUDO_ARCH_REFERENCE_CYCLES] = { 0x00, 0x03 }, -}; +#define INTEL_RDPMC_TYPE_MASK GENMASK(31, 16) +#define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0) -/* mapping between fixed pmc index and intel_arch_events array */ -static int fixed_pmc_events[] = { - [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED, - [1] = INTEL_ARCH_CPU_CYCLES, - [2] = PSEUDO_ARCH_REFERENCE_CYCLES, -}; +#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { @@ -84,83 +50,67 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); - __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); + __set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use); kvm_pmu_request_counter_reprogram(pmc); } } -static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) -{ - if (pmc_idx < INTEL_PMC_IDX_FIXED) { - return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx, - MSR_P6_EVNTSEL0); - } else { - u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED; - - return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0); - } -} - -static bool intel_hw_event_available(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu = pmc_to_pmu(pmc); - u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; - u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; - int i; - - BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS); - - /* - * Disallow events reported as unavailable in guest CPUID. Note, this - * doesn't apply to pseudo-architectural events. - */ - for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) { - if (intel_arch_events[i].eventsel != event_select || - intel_arch_events[i].unit_mask != unit_mask) - continue; - - return pmu->available_event_types & BIT(i); - } - - return true; -} - -static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) -{ - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - bool fixed = idx & (1u << 30); - - idx &= ~(3u << 30); - - return fixed ? idx < pmu->nr_arch_fixed_counters - : idx < pmu->nr_arch_gp_counters; -} - static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask) { + unsigned int type = idx & INTEL_RDPMC_TYPE_MASK; struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - bool fixed = idx & (1u << 30); struct kvm_pmc *counters; unsigned int num_counters; + u64 bitmask; + + /* + * The encoding of ECX for RDPMC is different for architectural versus + * non-architecturals PMUs (PMUs with version '0'). For architectural + * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC + * index. For non-architectural PMUs, bit 31 is a "fast" flag, and + * bits 30:0 specify the PMC index. + * + * Yell and reject attempts to read PMCs for a non-architectural PMU, + * as KVM doesn't support such PMUs. + */ + if (WARN_ON_ONCE(!pmu->version)) + return NULL; - idx &= ~(3u << 30); - if (fixed) { + /* + * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs + * are supported on all architectural PMUs, i.e. on all virtual PMUs + * supported by KVM. Note, KVM only emulates fixed PMCs for PMU v2+, + * but the type itself is still valid, i.e. let RDPMC fail due to + * accessing a non-existent counter. Reject attempts to read all other + * types, which are unknown/unsupported. + */ + switch (type) { + case INTEL_RDPMC_FIXED: counters = pmu->fixed_counters; num_counters = pmu->nr_arch_fixed_counters; - } else { + bitmask = pmu->counter_bitmask[KVM_PMC_FIXED]; + break; + case INTEL_RDPMC_GP: counters = pmu->gp_counters; num_counters = pmu->nr_arch_gp_counters; + bitmask = pmu->counter_bitmask[KVM_PMC_GP]; + break; + default: + return NULL; } + + idx &= INTEL_RDPMC_INDEX_MASK; if (idx >= num_counters) return NULL; - *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; + + *mask &= bitmask; return &counters[array_index_nospec(idx, num_counters)]; } static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) return 0; return vcpu->arch.perf_capabilities; @@ -210,7 +160,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; break; case MSR_IA32_DS_AREA: - ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); + ret = guest_cpu_cap_has(vcpu, X86_FEATURE_DS); break; case MSR_PEBS_DATA_CFG: perf_capabilities = vcpu_get_perf_capabilities(vcpu); @@ -398,14 +348,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - if (data & pmu->fixed_ctr_ctrl_mask) + if (data & pmu->fixed_ctr_ctrl_rsvd) return 1; if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); break; case MSR_IA32_PEBS_ENABLE: - if (data & pmu->pebs_enable_mask) + if (data & pmu->pebs_enable_rsvd) return 1; if (pmu->pebs_enable != data) { @@ -415,13 +365,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_DS_AREA: - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; pmu->ds_area = data; break; case MSR_PEBS_DATA_CFG: - if (data & pmu->pebs_data_cfg_mask) + if (data & pmu->pebs_data_cfg_rsvd) return 1; pmu->pebs_data_cfg = data; @@ -464,20 +414,46 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } -static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) +/* + * Map fixed counter events to architectural general purpose event encodings. + * Perf doesn't provide APIs to allow KVM to directly program a fixed counter, + * and so KVM instead programs the architectural event to effectively request + * the fixed counter. Perf isn't guaranteed to use a fixed counter and may + * instead program the encoding into a general purpose counter, e.g. if a + * different perf_event is already utilizing the requested counter, but the end + * result is the same (ignoring the fact that using a general purpose counter + * will likely exacerbate counter contention). + * + * Forcibly inlined to allow asserting on @index at build time, and there should + * never be more than one user. + */ +static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index) { - int i; + const enum perf_hw_id fixed_pmc_perf_ids[] = { + [0] = PERF_COUNT_HW_INSTRUCTIONS, + [1] = PERF_COUNT_HW_CPU_CYCLES, + [2] = PERF_COUNT_HW_REF_CPU_CYCLES, + }; + u64 eventsel; - BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED); + BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUTNERS); + BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUTNERS); - for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { - int index = array_index_nospec(i, KVM_PMC_MAX_FIXED); - struct kvm_pmc *pmc = &pmu->fixed_counters[index]; - u32 event = fixed_pmc_events[index]; + /* + * Yell if perf reports support for a fixed counter but perf doesn't + * have a known encoding for the associated general purpose event. + */ + eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]); + WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed); + return eventsel; +} - pmc->eventsel = (intel_arch_events[event].unit_mask << 8) | - intel_arch_events[event].eventsel; - } +static void intel_pmu_enable_fixed_counter_bits(struct kvm_pmu *pmu, u64 bits) +{ + int i; + + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) + pmu->fixed_ctr_ctrl_rsvd &= ~intel_fixed_bits_by_idx(i, bits); } static void intel_pmu_refresh(struct kvm_vcpu *vcpu) @@ -488,21 +464,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) union cpuid10_eax eax; union cpuid10_edx edx; u64 perf_capabilities; - u64 counter_mask; - int i; - - pmu->nr_arch_gp_counters = 0; - pmu->nr_arch_fixed_counters = 0; - pmu->counter_bitmask[KVM_PMC_GP] = 0; - pmu->counter_bitmask[KVM_PMC_FIXED] = 0; - pmu->version = 0; - pmu->reserved_bits = 0xffffffff00200000ull; - pmu->raw_event_mask = X86_RAW_EVENT_MASK; - pmu->global_ctrl_mask = ~0ull; - pmu->global_status_mask = ~0ull; - pmu->fixed_ctr_ctrl_mask = ~0ull; - pmu->pebs_enable_mask = ~0ull; - pmu->pebs_data_cfg_mask = ~0ull; + u64 counter_rsvd; memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); @@ -515,8 +477,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) return; entry = kvm_find_cpuid_entry(vcpu, 0xa); - if (!entry || !vcpu->kvm->arch.enable_pmu) + if (!entry) return; + eax.full = entry->eax; edx.full = entry->edx; @@ -543,25 +506,26 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) kvm_pmu_cap.bit_width_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; - setup_fixed_pmc_eventsel(pmu); } - for (i = 0; i < pmu->nr_arch_fixed_counters; i++) - pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4)); - counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | - (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED)); - pmu->global_ctrl_mask = counter_mask; + intel_pmu_enable_fixed_counter_bits(pmu, INTEL_FIXED_0_KERNEL | + INTEL_FIXED_0_USER | + INTEL_FIXED_0_ENABLE_PMI); + + counter_rsvd = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | + (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX)); + pmu->global_ctrl_rsvd = counter_rsvd; /* * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET) * share reserved bit definitions. The kernel just happens to use * OVF_CTRL for the names. */ - pmu->global_status_mask = pmu->global_ctrl_mask + pmu->global_status_rsvd = pmu->global_ctrl_rsvd & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); if (vmx_pt_mode_is_host_guest()) - pmu->global_status_mask &= + pmu->global_status_rsvd &= ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); @@ -580,7 +544,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) perf_capabilities = vcpu_get_perf_capabilities(vcpu); if (cpuid_model_is_consistent(vcpu) && (perf_capabilities & PMU_CAP_LBR_FMT)) - x86_perf_get_lbr(&lbr_desc->records); + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); else lbr_desc->records.nr = 0; @@ -589,15 +553,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { - pmu->pebs_enable_mask = counter_mask; + pmu->pebs_enable_rsvd = counter_rsvd; pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE; - for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { - pmu->fixed_ctr_ctrl_mask &= - ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); - } - pmu->pebs_data_cfg_mask = ~0xff00000full; + pmu->pebs_data_cfg_rsvd = ~0xff00000full; + intel_pmu_enable_fixed_counter_bits(pmu, ICL_FIXED_0_ADAPTIVE); } else { - pmu->pebs_enable_mask = + pmu->pebs_enable_rsvd = ~((1ull << pmu->nr_arch_gp_counters) - 1); } } @@ -609,18 +570,19 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); - for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_MAX_NR_INTEL_GP_COUNTERS; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; pmu->gp_counters[i].current_config = 0; } - for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUTNERS; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; - pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; + pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX; pmu->fixed_counters[i].current_config = 0; + pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i); } lbr_desc->records.nr = 0; @@ -748,11 +710,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) struct kvm_pmc *pmc = NULL; int bit, hw_idx; - for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, - X86_PMC_IDX_MAX) { - pmc = intel_pmc_idx_to_pmc(pmu, bit); - - if (!pmc || !pmc_speculative_in_use(pmc) || + kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) { + if (!pmc_speculative_in_use(pmc) || !pmc_is_globally_enabled(pmc) || !pmc->perf_event) continue; @@ -767,11 +726,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) } struct kvm_pmu_ops intel_pmu_ops __initdata = { - .hw_event_available = intel_hw_event_available, - .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = intel_msr_idx_to_pmc, - .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx, .is_valid_msr = intel_is_valid_msr, .get_msr = intel_pmu_get_msr, .set_msr = intel_pmu_set_msr, @@ -781,6 +737,6 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = { .deliver_pmi = intel_pmu_deliver_pmi, .cleanup = intel_pmu_cleanup, .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT, - .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC, + .MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS, .MIN_NR_GP_COUNTERS = 1, }; |