diff options
Diffstat (limited to 'drivers/perf/arm_spe_pmu.c')
| -rw-r--r-- | drivers/perf/arm_spe_pmu.c | 411 |
1 files changed, 296 insertions, 115 deletions
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index d44bcc29d99c..4801115f2b54 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -12,6 +12,7 @@ #define DRVNAME PMUNAME "_pmu" #define pr_fmt(fmt) DRVNAME ": " fmt +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/capability.h> @@ -24,8 +25,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> -#include <linux/of_address.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/perf_event.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> @@ -39,6 +39,26 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +/* + * Cache if the event is allowed to trace Context information. + * This allows us to perform the check, i.e, perf_allow_kernel(), + * in the context of the event owner, once, during the event_init(). + */ +#define SPE_PMU_HW_FLAGS_CX 0x00001 + +static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_CX); + +static void set_spe_event_has_cx(struct perf_event *event) +{ + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel()) + event->hw.flags |= SPE_PMU_HW_FLAGS_CX; +} + +static bool get_spe_event_has_cx(struct perf_event *event) +{ + return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX); +} + #define ARM_SPE_BUF_PAD_BYTE 0 struct arm_spe_pmu_buf { @@ -64,9 +84,14 @@ struct arm_spe_pmu { #define SPE_PMU_FEAT_ARCH_INST (1UL << 3) #define SPE_PMU_FEAT_LDS (1UL << 4) #define SPE_PMU_FEAT_ERND (1UL << 5) +#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) +#define SPE_PMU_FEAT_DISCARD (1UL << 7) +#define SPE_PMU_FEAT_EFT (1UL << 8) +#define SPE_PMU_FEAT_FDS (1UL << 9) #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) u64 features; + u64 pmsevfr_res0; u16 max_record_sz; u16 align; struct perf_output_handle __percpu *handle; @@ -75,7 +100,8 @@ struct arm_spe_pmu { #define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu)) /* Convert a free-running index from perf into an SPE buffer offset */ -#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) +#define PERF_IDX2OFF(idx, buf) \ + ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT)) /* Keep track of our dynamic hotplug state */ static enum cpuhp_state arm_spe_pmu_online; @@ -93,6 +119,7 @@ enum arm_spe_pmu_capabilities { SPE_PMU_CAP_FEAT_MAX, SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX, SPE_PMU_CAP_MIN_IVAL, + SPE_PMU_CAP_EVENT_FILTER, }; static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { @@ -100,7 +127,7 @@ static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { [SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND, }; -static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) +static u64 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) { if (cap < SPE_PMU_CAP_FEAT_MAX) return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]); @@ -110,6 +137,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) return spe_pmu->counter_sz; case SPE_PMU_CAP_MIN_IVAL: return spe_pmu->min_period; + case SPE_PMU_CAP_EVENT_FILTER: + return ~spe_pmu->pmsevfr_res0; default: WARN(1, "unknown cap %d\n", cap); } @@ -126,7 +155,19 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; - return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap)); + return sysfs_emit(buf, "%llu\n", arm_spe_pmu_cap_get(spe_pmu, cap)); +} + +static ssize_t arm_spe_pmu_cap_show_hex(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + int cap = (long)ea->var; + + return sysfs_emit(buf, "0x%llx\n", arm_spe_pmu_cap_get(spe_pmu, cap)); } #define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \ @@ -136,12 +177,15 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, #define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var) +#define SPE_CAP_EXT_ATTR_ENTRY_HEX(_name, _var) \ + SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show_hex, _var) static struct attribute *arm_spe_pmu_cap_attr[] = { SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST), SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND), SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ), SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL), + SPE_CAP_EXT_ATTR_ENTRY_HEX(event_filter, SPE_PMU_CAP_EVENT_FILTER), NULL, }; @@ -172,6 +216,30 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */ #define ATTR_CFG_FLD_store_filter_LO 34 #define ATTR_CFG_FLD_store_filter_HI 34 +#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */ +#define ATTR_CFG_FLD_discard_LO 35 +#define ATTR_CFG_FLD_discard_HI 35 +#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */ +#define ATTR_CFG_FLD_branch_filter_mask_LO 36 +#define ATTR_CFG_FLD_branch_filter_mask_HI 36 +#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */ +#define ATTR_CFG_FLD_load_filter_mask_LO 37 +#define ATTR_CFG_FLD_load_filter_mask_HI 37 +#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */ +#define ATTR_CFG_FLD_store_filter_mask_LO 38 +#define ATTR_CFG_FLD_store_filter_mask_HI 38 +#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */ +#define ATTR_CFG_FLD_simd_filter_LO 39 +#define ATTR_CFG_FLD_simd_filter_HI 39 +#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */ +#define ATTR_CFG_FLD_simd_filter_mask_LO 40 +#define ATTR_CFG_FLD_simd_filter_mask_HI 40 +#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */ +#define ATTR_CFG_FLD_float_filter_LO 41 +#define ATTR_CFG_FLD_float_filter_HI 41 +#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */ +#define ATTR_CFG_FLD_float_filter_mask_LO 42 +#define ATTR_CFG_FLD_float_filter_mask_HI 42 #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */ #define ATTR_CFG_FLD_event_filter_LO 0 @@ -181,37 +249,33 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_min_latency_LO 0 #define ATTR_CFG_FLD_min_latency_HI 11 -/* Why does everything I do descend into this? */ -#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi - -#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) - -#define GEN_PMU_FORMAT_ATTR(name) \ - PMU_FORMAT_ATTR(name, \ - _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI)) - -#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ - ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0)) +#define ATTR_CFG_FLD_inv_event_filter_CFG config3 /* PMSNEVFR_EL1 */ +#define ATTR_CFG_FLD_inv_event_filter_LO 0 +#define ATTR_CFG_FLD_inv_event_filter_HI 63 -#define ATTR_CFG_GET_FLD(attr, name) \ - _ATTR_CFG_GET_FLD(attr, \ - ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI) +#define ATTR_CFG_FLD_inv_data_src_filter_CFG config4 /* inverse of PMSDSFR_EL1 */ +#define ATTR_CFG_FLD_inv_data_src_filter_LO 0 +#define ATTR_CFG_FLD_inv_data_src_filter_HI 63 GEN_PMU_FORMAT_ATTR(ts_enable); GEN_PMU_FORMAT_ATTR(pa_enable); GEN_PMU_FORMAT_ATTR(pct_enable); GEN_PMU_FORMAT_ATTR(jitter); GEN_PMU_FORMAT_ATTR(branch_filter); +GEN_PMU_FORMAT_ATTR(branch_filter_mask); GEN_PMU_FORMAT_ATTR(load_filter); +GEN_PMU_FORMAT_ATTR(load_filter_mask); GEN_PMU_FORMAT_ATTR(store_filter); +GEN_PMU_FORMAT_ATTR(store_filter_mask); +GEN_PMU_FORMAT_ATTR(simd_filter); +GEN_PMU_FORMAT_ATTR(simd_filter_mask); +GEN_PMU_FORMAT_ATTR(float_filter); +GEN_PMU_FORMAT_ATTR(float_filter_mask); GEN_PMU_FORMAT_ATTR(event_filter); +GEN_PMU_FORMAT_ATTR(inv_event_filter); +GEN_PMU_FORMAT_ATTR(inv_data_src_filter); GEN_PMU_FORMAT_ATTR(min_latency); +GEN_PMU_FORMAT_ATTR(discard); static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_ts_enable.attr, @@ -219,15 +283,56 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_pct_enable.attr, &format_attr_jitter.attr, &format_attr_branch_filter.attr, + &format_attr_branch_filter_mask.attr, &format_attr_load_filter.attr, + &format_attr_load_filter_mask.attr, &format_attr_store_filter.attr, + &format_attr_store_filter_mask.attr, + &format_attr_simd_filter.attr, + &format_attr_simd_filter_mask.attr, + &format_attr_float_filter.attr, + &format_attr_float_filter_mask.attr, &format_attr_event_filter.attr, + &format_attr_inv_event_filter.attr, + &format_attr_inv_data_src_filter.attr, &format_attr_min_latency.attr, + &format_attr_discard.attr, NULL, }; +static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) + { + struct device *dev = kobj_to_dev(kobj); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + + if (attr == &format_attr_discard.attr && !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return 0; + + if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) + return 0; + + if (attr == &format_attr_inv_data_src_filter.attr && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) + return 0; + + if ((attr == &format_attr_branch_filter_mask.attr || + attr == &format_attr_load_filter_mask.attr || + attr == &format_attr_store_filter_mask.attr || + attr == &format_attr_simd_filter.attr || + attr == &format_attr_simd_filter_mask.attr || + attr == &format_attr_float_filter.attr || + attr == &format_attr_float_filter_mask.attr) && + !(spe_pmu->features & SPE_PMU_FEAT_EFT)) + return 0; + + return attr->mode; +} + static const struct attribute_group arm_spe_pmu_format_group = { .name = "format", + .is_visible = arm_spe_pmu_format_attr_is_visible, .attrs = arm_spe_pmu_formats_attr, }; @@ -262,36 +367,39 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT; + reg |= FIELD_PREP(PMSCR_EL1_TS, ATTR_CFG_GET_FLD(attr, ts_enable)); + reg |= FIELD_PREP(PMSCR_EL1_PA, ATTR_CFG_GET_FLD(attr, pa_enable)); + reg |= FIELD_PREP(PMSCR_EL1_PCT, ATTR_CFG_GET_FLD(attr, pct_enable)); if (!attr->exclude_user) - reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT); + reg |= PMSCR_EL1_E0SPE; if (!attr->exclude_kernel) - reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); + reg |= PMSCR_EL1_E1SPE; - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) - reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); + if (get_spe_event_has_cx(event)) + reg |= PMSCR_EL1_CX; return reg; } static void arm_spe_event_sanitise_period(struct perf_event *event) { - struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); u64 period = event->hw.sample_period; - u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK - << SYS_PMSIRR_EL1_INTERVAL_SHIFT; + u64 max_period = PMSIRR_EL1_INTERVAL_MASK; - if (period < spe_pmu->min_period) - period = spe_pmu->min_period; - else if (period > max_period) - period = max_period; - else - period &= max_period; + /* + * The PMSIDR_EL1.Interval field (stored in spe_pmu->min_period) is a + * recommendation for the minimum interval, not a hardware limitation. + * + * According to the Arm ARM (DDI 0487 L.a), section D24.7.12 PMSIRR_EL1, + * Sampling Interval Reload Register, the INTERVAL field (bits [31:8]) + * states: "Software must set this to a nonzero value". Use 1 as the + * minimum value. + */ + u64 min_period = FIELD_PREP(PMSIRR_EL1_INTERVAL_MASK, 1); + period = clamp_t(u64, period, min_period, max_period) & max_period; event->hw.sample_period = period; } @@ -302,7 +410,7 @@ static u64 arm_spe_event_to_pmsirr(struct perf_event *event) arm_spe_event_sanitise_period(event); - reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT; + reg |= FIELD_PREP(PMSIRR_EL1_RND, ATTR_CFG_GET_FLD(attr, jitter)); reg |= event->hw.sample_period; return reg; @@ -313,18 +421,31 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT; + reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask)); if (reg) - reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT); + reg |= PMSFCR_EL1_FT; if (ATTR_CFG_GET_FLD(attr, event_filter)) - reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT); + reg |= PMSFCR_EL1_FE; + + if (ATTR_CFG_GET_FLD(attr, inv_event_filter)) + reg |= PMSFCR_EL1_FnE; + + if (ATTR_CFG_GET_FLD(attr, inv_data_src_filter)) + reg |= PMSFCR_EL1_FDS; if (ATTR_CFG_GET_FLD(attr, min_latency)) - reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT); + reg |= PMSFCR_EL1_FL; return reg; } @@ -335,11 +456,27 @@ static u64 arm_spe_event_to_pmsevfr(struct perf_event *event) return ATTR_CFG_GET_FLD(attr, event_filter); } +static u64 arm_spe_event_to_pmsnevfr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + return ATTR_CFG_GET_FLD(attr, inv_event_filter); +} + static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - return ATTR_CFG_GET_FLD(attr, min_latency) - << SYS_PMSLATFR_EL1_MINLAT_SHIFT; + return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency)); +} + +static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + /* + * Data src filter is inverted so that the default value of 0 is + * equivalent to no filtering. + */ + return ~ATTR_CFG_GET_FLD(attr, inv_data_src_filter); } static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) @@ -476,6 +613,12 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, u64 base, limit; struct arm_spe_pmu_buf *buf; + if (ATTR_CFG_GET_FLD(&event->attr, discard)) { + limit = FIELD_PREP(PMBLIMITR_EL1_FM, PMBLIMITR_EL1_FM_DISCARD); + limit |= PMBLIMITR_EL1_E; + goto out_write_limit; + } + /* Start a new aux session */ buf = perf_aux_output_begin(handle, event); if (!buf) { @@ -491,7 +634,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle) : arm_spe_pmu_next_off(handle); if (limit) - limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT); + limit |= PMBLIMITR_EL1_E; limit += (u64)buf->base; base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf); @@ -550,28 +693,28 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) /* Service required? */ pmbsr = read_sysreg_s(SYS_PMBSR_EL1); - if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT))) + if (!FIELD_GET(PMBSR_EL1_S, pmbsr)) return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; /* * If we've lost data, disable profiling and also set the PARTIAL * flag to indicate that the last record is corrupted. */ - if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT)) + if (FIELD_GET(PMBSR_EL1_DL, pmbsr)) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | PERF_AUX_FLAG_PARTIAL); /* Report collisions to userspace so that it can up the period */ - if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT)) + if (FIELD_GET(PMBSR_EL1_COLL, pmbsr)) perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); /* We only expect buffer management events */ - switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) { - case SYS_PMBSR_EL1_EC_BUF: + switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) { + case PMBSR_EL1_EC_BUF: /* Handled below */ break; - case SYS_PMBSR_EL1_EC_FAULT_S1: - case SYS_PMBSR_EL1_EC_FAULT_S2: + case PMBSR_EL1_EC_FAULT_S1: + case PMBSR_EL1_EC_FAULT_S2: err_str = "Unexpected buffer fault"; goto out_err; default: @@ -580,9 +723,8 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) } /* Buffer management event */ - switch (pmbsr & - (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) { - case SYS_PMBSR_EL1_BUF_BSC_FULL: + switch (FIELD_GET(PMBSR_EL1_BUF_BSC_MASK, pmbsr)) { + case PMBSR_EL1_BUF_BSC_FULL: ret = SPE_PMU_BUF_FAULT_ACT_OK; goto out_stop; default: @@ -653,18 +795,6 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) return IRQ_HANDLED; } -static u64 arm_spe_pmsevfr_res0(u16 pmsver) -{ - switch (pmsver) { - case ID_AA64DFR0_PMSVER_8_2: - return SYS_PMSEVFR_EL1_RES0_8_2; - case ID_AA64DFR0_PMSVER_8_3: - /* Return the highest version we support in default */ - default: - return SYS_PMSEVFR_EL1_RES0_8_3; - } -} - /* Perf callbacks */ static int arm_spe_pmu_event_init(struct perf_event *event) { @@ -680,7 +810,14 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus)) return -ENOENT; - if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) + if (arm_spe_event_to_pmsevfr(event) & spe_pmu->pmsevfr_res0) + return -EOPNOTSUPP; + + if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0) + return -EOPNOTSUPP; + + if (arm_spe_event_to_pmsdsfr(event) != U64_MAX && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) return -EOPNOTSUPP; if (attr->exclude_idle) @@ -697,24 +834,40 @@ static int arm_spe_pmu_event_init(struct perf_event *event) return -EINVAL; reg = arm_spe_event_to_pmsfcr(event); - if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FE, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) return -EOPNOTSUPP; - if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FnE, reg)) && + !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) + return -EOPNOTSUPP; + + if ((FIELD_GET(PMSFCR_EL1_FT, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) return -EOPNOTSUPP; - if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FL, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + if ((FIELD_GET(PMSFCR_EL1_LDm, reg) || + FIELD_GET(PMSFCR_EL1_STm, reg) || + FIELD_GET(PMSFCR_EL1_Bm, reg) || + FIELD_GET(PMSFCR_EL1_SIMD, reg) || + FIELD_GET(PMSFCR_EL1_SIMDm, reg) || + FIELD_GET(PMSFCR_EL1_FP, reg) || + FIELD_GET(PMSFCR_EL1_FPm, reg)) && + !(spe_pmu->features & SPE_PMU_FEAT_EFT)) + return -EOPNOTSUPP; + + if (ATTR_CFG_GET_FLD(&event->attr, discard) && + !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return -EOPNOTSUPP; + + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); - if (!perfmon_capable() && - (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | - BIT(SYS_PMSCR_EL1_CX_SHIFT) | - BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) - return -EACCES; + if (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT)) + return perf_allow_kernel(); return 0; } @@ -737,6 +890,16 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) reg = arm_spe_event_to_pmsevfr(event); write_sysreg_s(reg, SYS_PMSEVFR_EL1); + if (spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT) { + reg = arm_spe_event_to_pmsnevfr(event); + write_sysreg_s(reg, SYS_PMSNEVFR_EL1); + } + + if (spe_pmu->features & SPE_PMU_FEAT_FDS) { + reg = arm_spe_event_to_pmsdsfr(event); + write_sysreg_s(reg, SYS_PMSDSFR_EL1); + } + reg = arm_spe_event_to_pmslatfr(event); write_sysreg_s(reg, SYS_PMSLATFR_EL1); @@ -894,6 +1057,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu) spe_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &spe_pmu->pdev->dev, .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, .attr_groups = arm_spe_pmu_attr_groups, /* @@ -940,7 +1104,7 @@ static void __arm_spe_pmu_dev_probe(void *info) struct device *dev = &spe_pmu->pdev->dev; fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1), - ID_AA64DFR0_PMSVER_SHIFT); + ID_AA64DFR0_EL1_PMSVer_SHIFT); if (!fld) { dev_err(dev, "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", @@ -951,14 +1115,14 @@ static void __arm_spe_pmu_dev_probe(void *info) /* Read PMBIDR first to determine whether or not we have access */ reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) { + if (FIELD_GET(PMBIDR_EL1_P, reg)) { dev_err(dev, "profiling buffer owned by higher exception level\n"); return; } /* Minimum alignment. If it's out-of-range, then fail the probe */ - fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK; + fld = FIELD_GET(PMBIDR_EL1_ALIGN, reg); spe_pmu->align = 1 << fld; if (spe_pmu->align > SZ_2K) { dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n", @@ -968,58 +1132,70 @@ static void __arm_spe_pmu_dev_probe(void *info) /* It's now safe to read PMSIDR and figure out what we've got */ reg = read_sysreg_s(SYS_PMSIDR_EL1); - if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FE, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; - if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FnE, reg)) + spe_pmu->features |= SPE_PMU_FEAT_INV_FILT_EVT; + + if (FIELD_GET(PMSIDR_EL1_FT, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; - if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FL, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT; - if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_ARCHINST, reg)) spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST; - if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_LDS, reg)) spe_pmu->features |= SPE_PMU_FEAT_LDS; - if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_ERND, reg)) spe_pmu->features |= SPE_PMU_FEAT_ERND; + if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2) + spe_pmu->features |= SPE_PMU_FEAT_DISCARD; + + if (FIELD_GET(PMSIDR_EL1_EFT, reg)) + spe_pmu->features |= SPE_PMU_FEAT_EFT; + + if (FIELD_GET(PMSIDR_EL1_FDS, reg)) + spe_pmu->features |= SPE_PMU_FEAT_FDS; + /* This field has a spaced out encoding, so just use a look-up */ - fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK; + fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { - case 0: + case PMSIDR_EL1_INTERVAL_256: spe_pmu->min_period = 256; break; - case 2: + case PMSIDR_EL1_INTERVAL_512: spe_pmu->min_period = 512; break; - case 3: + case PMSIDR_EL1_INTERVAL_768: spe_pmu->min_period = 768; break; - case 4: + case PMSIDR_EL1_INTERVAL_1024: spe_pmu->min_period = 1024; break; - case 5: + case PMSIDR_EL1_INTERVAL_1536: spe_pmu->min_period = 1536; break; - case 6: + case PMSIDR_EL1_INTERVAL_2048: spe_pmu->min_period = 2048; break; - case 7: + case PMSIDR_EL1_INTERVAL_3072: spe_pmu->min_period = 3072; break; default: dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n", fld); fallthrough; - case 8: + case PMSIDR_EL1_INTERVAL_4096: spe_pmu->min_period = 4096; } /* Maximum record size. If it's out-of-range, then fail the probe */ - fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK; + fld = FIELD_GET(PMSIDR_EL1_MAXSIZE, reg); spe_pmu->max_record_sz = 1 << fld; if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) { dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", @@ -1027,19 +1203,26 @@ static void __arm_spe_pmu_dev_probe(void *info) return; } - fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK; + fld = FIELD_GET(PMSIDR_EL1_COUNTSIZE, reg); switch (fld) { default: dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", fld); fallthrough; - case 2: + case PMSIDR_EL1_COUNTSIZE_12_BIT_SAT: spe_pmu->counter_sz = 12; + break; + case PMSIDR_EL1_COUNTSIZE_16_BIT_SAT: + spe_pmu->counter_sz = 16; } + /* Write all 1s and then read back. Unsupported filter bits are RAZ/WI. */ + write_sysreg_s(U64_MAX, SYS_PMSEVFR_EL1); + spe_pmu->pmsevfr_res0 = ~read_sysreg_s(SYS_PMSEVFR_EL1); + dev_info(dev, - "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", - cpumask_pr_args(&spe_pmu->supported_cpus), + "probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", + spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus), spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features); spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED; @@ -1113,8 +1296,8 @@ static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu) return -ENXIO; /* Request our PPIs (note that the IRQ is still disabled) */ - ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME, - spe_pmu->handle); + ret = request_percpu_irq_affinity(spe_pmu->irq, arm_spe_pmu_irq_handler, + DRVNAME, mask, spe_pmu->handle); if (ret) return ret; @@ -1141,8 +1324,10 @@ static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu) static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) { struct platform_device *pdev = spe_pmu->pdev; - int irq = platform_get_irq(pdev, 0); + const struct cpumask *affinity; + int irq; + irq = platform_get_irq_affinity(pdev, 0, &affinity); if (irq < 0) return -ENXIO; @@ -1151,10 +1336,7 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) return -EINVAL; } - if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) { - dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); - return -EINVAL; - } + cpumask_copy(&spe_pmu->supported_cpus, affinity); spe_pmu->irq = irq; return 0; @@ -1219,14 +1401,13 @@ out_free_handle: return ret; } -static int arm_spe_pmu_device_remove(struct platform_device *pdev) +static void arm_spe_pmu_device_remove(struct platform_device *pdev) { struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); arm_spe_pmu_perf_destroy(spe_pmu); arm_spe_pmu_dev_teardown(spe_pmu); free_percpu(spe_pmu->handle); - return 0; } static struct platform_driver arm_spe_pmu_driver = { @@ -1237,7 +1418,7 @@ static struct platform_driver arm_spe_pmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_spe_pmu_device_probe, - .remove = arm_spe_pmu_device_remove, + .remove = arm_spe_pmu_device_remove, }; static int __init arm_spe_pmu_init(void) |
