summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h1
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h28
-rw-r--r--arch/arm64/kernel/cpu_errata.c44
-rw-r--r--arch/arm64/kernel/cpufeature.c30
-rw-r--r--arch/arm64/kernel/image-vars.h5
-rw-r--r--arch/arm64/kvm/arm.c4
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h4
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c22
-rw-r--r--arch/arm64/kvm/pmu-emul.c138
-rw-r--r--arch/arm64/kvm/pmu.c10
-rw-r--r--arch/arm64/tools/cpucaps2
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c101
-rw-r--r--include/kvm/arm_pmu.h12
-rw-r--r--include/linux/perf/arm_pmu.h4
15 files changed, 303 insertions, 104 deletions
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
index 99483b19b99f..02e05d05851f 100644
--- a/arch/arm64/include/asm/apple_m1_pmu.h
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -37,6 +37,7 @@
#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2)
#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 0b5ca6e0eb09..9d769291a306 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -71,6 +71,8 @@ cpucap_is_possible(const unsigned int cap)
* KVM MPAM support doesn't rely on the host kernel supporting MPAM.
*/
return true;
+ case ARM64_HAS_PMUV3:
+ return IS_ENABLED(CONFIG_HW_PERF_EVENTS);
}
return true;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e0e4478f5fb5..c4326f1cb917 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -525,29 +525,6 @@ cpuid_feature_extract_unsigned_field(u64 features, int field)
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
-/*
- * Fields that identify the version of the Performance Monitors Extension do
- * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825,
- * "Alternative ID scheme used for the Performance Monitors Extension version".
- */
-static inline u64 __attribute_const__
-cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
-{
- u64 val = cpuid_feature_extract_unsigned_field(features, field);
- u64 mask = GENMASK_ULL(field + 3, field);
-
- /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
- if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
- val = 0;
-
- if (val > cap) {
- features &= ~mask;
- features |= (cap << field) & mask;
- }
-
- return features;
-}
-
static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -866,6 +843,11 @@ static __always_inline bool system_supports_mpam_hcr(void)
return alternative_has_cap_unlikely(ARM64_MPAM_HCR);
}
+static inline bool system_supports_pmuv3(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_PMUV3);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 66869d81c3d5..b55f5f705750 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -247,6 +247,43 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
return is_midr_in_range(&range) && has_dic;
}
+static const struct midr_range impdef_pmuv3_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+};
+
+static bool has_impdef_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
+
+ if (!is_kernel_in_hyp_mode())
+ return false;
+
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
+
+ return is_midr_in_range_list(impdef_pmuv3_cpus);
+}
+
+static void cpu_enable_impdef_pmuv3_traps(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set_s(SYS_HACR_EL2, 0, BIT(56));
+}
+
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
@@ -848,5 +885,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
})),
},
{
+ .desc = "Apple IMPDEF PMUv3 Traps",
+ .capability = ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_impdef_pmuv3,
+ .cpu_enable = cpu_enable_impdef_pmuv3_traps,
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 101668b312b0..9c4d6d552b25 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1900,6 +1900,28 @@ static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
}
#endif
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
+
+ /*
+ * PMUVer follows the standard ID scheme for an unsigned field with the
+ * exception of 0xF (IMP_DEF) which is treated specially and implies
+ * FEAT_PMUv3 is not implemented.
+ *
+ * See DDI0487L.a D24.1.3.2 for more details.
+ */
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
+
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP;
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
@@ -3011,6 +3033,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
},
#endif
+#ifdef CONFIG_HW_PERF_EVENTS
+ {
+ .desc = "PMUv3",
+ .capability = ARM64_HAS_PMUV3,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_pmuv3,
+ },
+#endif
{},
};
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index de3d081e5a57..5e3c4b58f279 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -113,11 +113,6 @@ KVM_NVHE_ALIAS(broken_cntvoff_key);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
-/* PMU available static key */
-#ifdef CONFIG_HW_PERF_EVENTS
-KVM_NVHE_ALIAS(kvm_arm_pmu_available);
-#endif
-
/* Position-independent library routines */
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3e0a6f3d7370..d6d7c09e56f0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -366,7 +366,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = get_num_wrps();
break;
case KVM_CAP_ARM_PMU_V3:
- r = kvm_arm_support_pmu_v3();
+ r = kvm_supports_guest_pmuv3();
break;
case KVM_CAP_ARM_INJECT_SERROR_ESR:
r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
@@ -1400,7 +1400,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features);
- if (!kvm_arm_support_pmu_v3())
+ if (!kvm_supports_guest_pmuv3())
clear_bit(KVM_ARM_VCPU_PMU_V3, &features);
if (!system_supports_sve())
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 23bbe28eaaf9..b741ea6aefa5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -244,7 +244,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
* EL1 instead of being trapped to EL2.
*/
- if (kvm_arm_support_pmu_v3()) {
+ if (system_supports_pmuv3()) {
struct kvm_cpu_context *hctxt;
write_sysreg(0, pmselr_el0);
@@ -281,7 +281,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
- if (kvm_arm_support_pmu_v3()) {
+ if (system_supports_pmuv3()) {
struct kvm_cpu_context *hctxt;
hctxt = host_data_ptr(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 647737d6e8d0..731a0378ed13 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -527,6 +527,25 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
return kvm_hyp_handle_sysreg(vcpu, exit_code);
}
+static bool kvm_hyp_handle_impdef(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 iss;
+
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return false;
+
+ /*
+ * Compute a synthetic ESR for a sysreg trap. Conveniently, AFSR1_EL2
+ * is populated with a correct ISS for a sysreg trap. These fruity
+ * parts are 64bit only, so unconditionally set IL.
+ */
+ iss = ESR_ELx_ISS(read_sysreg_s(SYS_AFSR1_EL2));
+ vcpu->arch.fault.esr_el2 = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SYS64) |
+ FIELD_PREP(ESR_ELx_ISS_MASK, iss) |
+ ESR_ELx_IL;
+ return false;
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -538,6 +557,9 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
+
+ /* Apple shenanigans */
+ [0x3F] = kvm_hyp_handle_impdef,
};
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6c5950b9ceac..51fb47e0bf89 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -17,8 +17,6 @@
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
-DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
static LIST_HEAD(arm_pmus);
static DEFINE_MUTEX(arm_pmus_lock);
@@ -26,6 +24,12 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc);
+bool kvm_supports_guest_pmuv3(void)
+{
+ guard(mutex)(&arm_pmus_lock);
+ return !list_empty(&arm_pmus);
+}
+
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
{
return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
@@ -673,6 +677,20 @@ static bool kvm_pmc_counts_at_el2(struct kvm_pmc *pmc)
return kvm_pmc_read_evtreg(pmc) & ARMV8_PMU_INCLUDE_EL2;
}
+static int kvm_map_pmu_event(struct kvm *kvm, unsigned int eventsel)
+{
+ struct arm_pmu *pmu = kvm->arch.arm_pmu;
+
+ /*
+ * The CPU PMU likely isn't PMUv3; let the driver provide a mapping
+ * for the guest's PMUv3 event ID.
+ */
+ if (unlikely(pmu->map_pmuv3_event))
+ return pmu->map_pmuv3_event(eventsel);
+
+ return eventsel;
+}
+
/**
* kvm_pmu_create_perf_event - create a perf event for a counter
* @pmc: Counter context
@@ -683,7 +701,8 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct perf_event *event;
struct perf_event_attr attr;
- u64 eventsel, evtreg;
+ int eventsel;
+ u64 evtreg;
evtreg = kvm_pmc_read_evtreg(pmc);
@@ -709,6 +728,14 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
+ /*
+ * Don't create an event if we're running on hardware that requires
+ * PMUv3 event translation and we couldn't find a valid mapping.
+ */
+ eventsel = kvm_map_pmu_event(vcpu->kvm, eventsel);
+ if (eventsel < 0)
+ return;
+
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
@@ -786,29 +813,23 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
return;
- mutex_lock(&arm_pmus_lock);
+ guard(mutex)(&arm_pmus_lock);
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
- goto out_unlock;
+ return;
entry->arm_pmu = pmu;
list_add_tail(&entry->entry, &arm_pmus);
-
- if (list_is_singular(&arm_pmus))
- static_branch_enable(&kvm_arm_pmu_available);
-
-out_unlock:
- mutex_unlock(&arm_pmus_lock);
}
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
- struct arm_pmu *tmp, *pmu = NULL;
struct arm_pmu_entry *entry;
+ struct arm_pmu *pmu;
int cpu;
- mutex_lock(&arm_pmus_lock);
+ guard(mutex)(&arm_pmus_lock);
/*
* It is safe to use a stale cpu to iterate the list of PMUs so long as
@@ -829,21 +850,53 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
*/
cpu = raw_smp_processor_id();
list_for_each_entry(entry, &arm_pmus, entry) {
- tmp = entry->arm_pmu;
+ pmu = entry->arm_pmu;
- if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
- pmu = tmp;
- break;
- }
+ if (cpumask_test_cpu(cpu, &pmu->supported_cpus))
+ return pmu;
}
- mutex_unlock(&arm_pmus_lock);
+ return NULL;
+}
+
+static u64 __compute_pmceid(struct arm_pmu *pmu, bool pmceid1)
+{
+ u32 hi[2], lo[2];
- return pmu;
+ bitmap_to_arr32(lo, pmu->pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+ bitmap_to_arr32(hi, pmu->pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+ return ((u64)hi[pmceid1] << 32) | lo[pmceid1];
+}
+
+static u64 compute_pmceid0(struct arm_pmu *pmu)
+{
+ u64 val = __compute_pmceid(pmu, 0);
+
+ /* always support SW_INCR */
+ val |= BIT(ARMV8_PMUV3_PERFCTR_SW_INCR);
+ /* always support CHAIN */
+ val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
+ return val;
+}
+
+static u64 compute_pmceid1(struct arm_pmu *pmu)
+{
+ u64 val = __compute_pmceid(pmu, 1);
+
+ /*
+ * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
+ * as RAZ
+ */
+ val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
+ return val;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
{
+ struct arm_pmu *cpu_pmu = vcpu->kvm->arch.arm_pmu;
unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
u64 val, mask = 0;
int base, i, nr_events;
@@ -852,19 +905,10 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return 0;
if (!pmceid1) {
- val = read_sysreg(pmceid0_el0);
- /* always support CHAIN */
- val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
+ val = compute_pmceid0(cpu_pmu);
base = 0;
} else {
- val = read_sysreg(pmceid1_el0);
- /*
- * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
- * as RAZ
- */
- val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
- BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
- BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
+ val = compute_pmceid1(cpu_pmu);
base = 32;
}
@@ -995,6 +1039,13 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
/*
+ * PMUv3 requires that all event counters are capable of counting any
+ * event, though the same may not be true of non-PMUv3 hardware.
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return 1;
+
+ /*
* The arm_pmu->cntr_mask considers the fixed counter(s) as well.
* Ignore those and return only the general-purpose counters.
*/
@@ -1205,13 +1256,26 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
u8 kvm_arm_pmu_get_pmuver_limit(void)
{
- u64 tmp;
+ unsigned int pmuver;
+
+ pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer,
+ read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1));
+
+ /*
+ * Spoof a barebones PMUv3 implementation if the system supports IMPDEF
+ * traps of the PMUv3 sysregs
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return ID_AA64DFR0_EL1_PMUVer_IMP;
+
+ /*
+ * Otherwise, treat IMPLEMENTATION DEFINED functionality as
+ * unimplemented
+ */
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return 0;
- tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
- tmp = cpuid_feature_cap_perfmon_field(tmp,
- ID_AA64DFR0_EL1_PMUVer_SHIFT,
- ID_AA64DFR0_EL1_PMUVer_V3P5);
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
+ return min(pmuver, ID_AA64DFR0_EL1_PMUVer_V3P5);
}
/**
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 0b3adf3e17b4..6b48a3d16d0d 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -41,7 +41,7 @@ void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
+ if (!system_supports_pmuv3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@@ -57,7 +57,7 @@ void kvm_clr_pmu_events(u64 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3())
+ if (!system_supports_pmuv3())
return;
pmu->events_host &= ~clr;
@@ -133,7 +133,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
struct kvm_pmu_events *pmu;
u64 events_guest, events_host;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return;
preempt_disable();
@@ -154,7 +154,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
struct kvm_pmu_events *pmu;
u64 events_guest, events_host;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return;
pmu = kvm_get_pmu_events();
@@ -180,7 +180,7 @@ bool kvm_set_pmuserenr(u64 val)
struct kvm_cpu_context *hctxt;
struct kvm_vcpu *vcpu;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return false;
vcpu = kvm_get_running_vcpu();
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 1e65f2fb45bd..772c1b008e43 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -45,6 +45,7 @@ HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
HAS_PAN
+HAS_PMUV3
HAS_S1PIE
HAS_S1POE
HAS_RAS_EXTN
@@ -104,6 +105,7 @@ WORKAROUND_CAVIUM_TX2_219_TVM
WORKAROUND_CLEAN_CACHE
WORKAROUND_DEVICE_LOAD_ACQUIRE
WORKAROUND_NVIDIA_CARMEL_CNP
+WORKAROUND_PMUV3_IMPDEF_TRAPS
WORKAROUND_QCOM_FALKOR_E1003
WORKAROUND_QCOM_ORYON_CNTVOFF
WORKAROUND_REPEAT_TLBI
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index 06fd317529fc..6be703619a97 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -12,6 +12,7 @@
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
#include <linux/platform_device.h>
#include <asm/apple_m1_pmu.h>
@@ -120,6 +121,8 @@ enum m1_pmu_events {
*/
M1_PMU_CFG_COUNT_USER = BIT(8),
M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+ M1_PMU_CFG_COUNT_HOST = BIT(10),
+ M1_PMU_CFG_COUNT_GUEST = BIT(11),
};
/*
@@ -172,6 +175,17 @@ static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC,
};
+#define M1_PMUV3_EVENT_MAP(pmuv3_event, m1_event) \
+ [ARMV8_PMUV3_PERFCTR_##pmuv3_event] = M1_PMU_PERFCTR_##m1_event
+
+static const u16 m1_pmu_pmceid_map[ARMV8_PMUV3_MAX_COMMON_EVENTS] = {
+ [0 ... ARMV8_PMUV3_MAX_COMMON_EVENTS - 1] = HW_OP_UNSUPPORTED,
+ M1_PMUV3_EVENT_MAP(INST_RETIRED, INST_ALL),
+ M1_PMUV3_EVENT_MAP(CPU_CYCLES, CORE_ACTIVE_CYCLE),
+ M1_PMUV3_EVENT_MAP(BR_RETIRED, INST_BRANCH),
+ M1_PMUV3_EVENT_MAP(BR_MIS_PRED_RETIRED, BRANCH_MISPRED_NONSPEC),
+};
+
/* sysfs definitions */
static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
@@ -327,11 +341,10 @@ static void m1_pmu_disable_counter_interrupt(unsigned int index)
__m1_pmu_enable_counter_interrupt(index, false);
}
-static void m1_pmu_configure_counter(unsigned int index, u8 event,
- bool user, bool kernel)
+static void __m1_pmu_configure_event_filter(unsigned int index, bool user,
+ bool kernel, bool host)
{
- u64 val, user_bit, kernel_bit;
- int shift;
+ u64 clear, set, user_bit, kernel_bit;
switch (index) {
case 0 ... 7:
@@ -346,19 +359,27 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
BUG();
}
- val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
-
+ clear = set = 0;
if (user)
- val |= user_bit;
+ set |= user_bit;
else
- val &= ~user_bit;
+ clear |= user_bit;
if (kernel)
- val |= kernel_bit;
+ set |= kernel_bit;
else
- val &= ~kernel_bit;
+ clear |= kernel_bit;
+
+ if (host)
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL1, clear, set);
+ else if (is_kernel_in_hyp_mode())
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL12, clear, set);
+}
- write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+static void __m1_pmu_configure_eventsel(unsigned int index, u8 event)
+{
+ u64 clear = 0, set = 0;
+ int shift;
/*
* Counters 0 and 1 have fixed events. For anything else,
@@ -371,21 +392,32 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
break;
case 2 ... 5:
shift = (index - 2) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR0_EL1, clear, set);
break;
case 6 ... 9:
shift = (index - 6) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR1_EL1, clear, set);
break;
}
}
+static void m1_pmu_configure_counter(unsigned int index, unsigned long config_base)
+{
+ bool kernel = config_base & M1_PMU_CFG_COUNT_KERNEL;
+ bool guest = config_base & M1_PMU_CFG_COUNT_GUEST;
+ bool host = config_base & M1_PMU_CFG_COUNT_HOST;
+ bool user = config_base & M1_PMU_CFG_COUNT_USER;
+ u8 evt = config_base & M1_PMU_CFG_EVENT;
+
+ __m1_pmu_configure_event_filter(index, user && host, kernel && host, true);
+ __m1_pmu_configure_event_filter(index, user && guest, kernel && guest, false);
+ __m1_pmu_configure_eventsel(index, evt);
+}
+
/* arm_pmu backend */
static void m1_pmu_enable_event(struct perf_event *event)
{
@@ -400,7 +432,7 @@ static void m1_pmu_enable_event(struct perf_event *event)
m1_pmu_disable_counter(event->hw.idx);
isb();
- m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+ m1_pmu_configure_counter(event->hw.idx, event->hw.config_base);
m1_pmu_enable_counter(event->hw.idx);
m1_pmu_enable_counter_interrupt(event->hw.idx);
isb();
@@ -538,6 +570,26 @@ static int m2_pmu_map_event(struct perf_event *event)
return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
}
+static int m1_pmu_map_pmuv3_event(unsigned int eventsel)
+{
+ u16 m1_event = HW_OP_UNSUPPORTED;
+
+ if (eventsel < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+ m1_event = m1_pmu_pmceid_map[eventsel];
+
+ return m1_event == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : m1_event;
+}
+
+static void m1_pmu_init_pmceid(struct arm_pmu *pmu)
+{
+ unsigned int event;
+
+ for (event = 0; event < ARMV8_PMUV3_MAX_COMMON_EVENTS; event++) {
+ if (m1_pmu_map_pmuv3_event(event) >= 0)
+ set_bit(event, pmu->pmceid_bitmap);
+ }
+}
+
static void m1_pmu_reset(void *info)
{
int i;
@@ -558,7 +610,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (!attr->exclude_guest) {
+ if (!attr->exclude_guest && !is_kernel_in_hyp_mode()) {
pr_debug("ARM performance counters do not support mode exclusion\n");
return -EOPNOTSUPP;
}
@@ -566,6 +618,10 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
config_base |= M1_PMU_CFG_COUNT_KERNEL;
if (!attr->exclude_user)
config_base |= M1_PMU_CFG_COUNT_USER;
+ if (!attr->exclude_host)
+ config_base |= M1_PMU_CFG_COUNT_HOST;
+ if (!attr->exclude_guest)
+ config_base |= M1_PMU_CFG_COUNT_GUEST;
event->config_base = config_base;
@@ -594,6 +650,9 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
cpu_pmu->reset = m1_pmu_reset;
cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
+ cpu_pmu->map_pmuv3_event = m1_pmu_map_pmuv3_event;
+ m1_pmu_init_pmceid(cpu_pmu);
+
bitmap_set(cpu_pmu->cntr_mask, 0, M1_PMU_NR_COUNTERS);
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 147bd3ee4f7b..58fc7f932b3f 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -37,13 +37,7 @@ struct arm_pmu_entry {
struct arm_pmu *arm_pmu;
};
-DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
-static __always_inline bool kvm_arm_support_pmu_v3(void)
-{
- return static_branch_likely(&kvm_arm_pmu_available);
-}
-
+bool kvm_supports_guest_pmuv3(void);
#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
@@ -86,7 +80,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_arm_support_pmu_v3()) \
+ if (!has_vhe() && system_supports_pmuv3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
@@ -102,7 +96,7 @@ void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu);
struct kvm_pmu {
};
-static inline bool kvm_arm_support_pmu_v3(void)
+static inline bool kvm_supports_guest_pmuv3(void)
{
return false;
}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4b5b83677e3f..7ce6dea5bfa9 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -100,6 +100,10 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
+ /*
+ * Called by KVM to map the PMUv3 event space onto non-PMUv3 hardware.
+ */
+ int (*map_pmuv3_event)(unsigned int eventsel);
DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40