diff options
Diffstat (limited to 'drivers/perf')
43 files changed, 5387 insertions, 1408 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 4e268de351c4..638321fc9800 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -178,6 +178,15 @@ config FSL_IMX9_DDR_PMU can give information about memory throughput and other related events. +config FUJITSU_UNCORE_PMU + tristate "Fujitsu Uncore PMU" + depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT) + help + Provides support for the Uncore performance monitor unit (PMU) + in Fujitsu processors. + Adds the Uncore PMU into the perf events subsystem for + monitoring Uncore events. + config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" depends on ARCH_QCOM && ARM64 && ACPI @@ -202,7 +211,7 @@ config THUNDERX2_PMU tristate "Cavium ThunderX2 SoC PMU UNCORE" depends on ARCH_THUNDER2 || COMPILE_TEST depends on NUMA && ACPI - default m + default m if ARCH_THUNDER2 help Provides support for ThunderX2 UNCORE events. The SoC has PMU support in its L3 cache controller (L3C) and @@ -223,6 +232,17 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +config ARM64_BRBE + bool "Enable support for branch stack sampling using FEAT_BRBE" + depends on ARM_PMUV3 && ARM64 + default y + help + Enable perf support for Branch Record Buffer Extension (BRBE) which + records all branches taken in an execution path. This supports some + branch types and privilege based filtering. It captures additional + relevant information such as cycle count, misprediction and branch + type, branch privilege level etc. + config ARM_DMC620_PMU tristate "Enable PMU support for the ARM DMC-620 memory controller" depends on (ARM64 && ACPI) || COMPILE_TEST diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index de71d2574857..ea52711a87e3 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o +obj-$(CONFIG_FUJITSU_UNCORE_PMU) += fujitsu_uncore_pmu.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o @@ -23,6 +24,7 @@ obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o +obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index 07446d784a1a..c1e755c356a3 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -511,7 +511,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev) fmt_attr_fill(pmu->info.hw_info->fmt_attr); - pmu->cpu = smp_processor_id(); + pmu->cpu = raw_smp_processor_id(); name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME); if (!name) diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 1d4d01e1275e..81b6f1a62349 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -12,6 +12,7 @@ #include <linux/of.h> #include <linux/perf/arm_pmu.h> +#include <linux/perf/arm_pmuv3.h> #include <linux/platform_device.h> #include <asm/apple_m1_pmu.h> @@ -120,6 +121,8 @@ enum m1_pmu_events { */ M1_PMU_CFG_COUNT_USER = BIT(8), M1_PMU_CFG_COUNT_KERNEL = BIT(9), + M1_PMU_CFG_COUNT_HOST = BIT(10), + M1_PMU_CFG_COUNT_GUEST = BIT(11), }; /* @@ -168,6 +171,19 @@ static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, +}; + +#define M1_PMUV3_EVENT_MAP(pmuv3_event, m1_event) \ + [ARMV8_PMUV3_PERFCTR_##pmuv3_event] = M1_PMU_PERFCTR_##m1_event + +static const u16 m1_pmu_pmceid_map[ARMV8_PMUV3_MAX_COMMON_EVENTS] = { + [0 ... ARMV8_PMUV3_MAX_COMMON_EVENTS - 1] = HW_OP_UNSUPPORTED, + M1_PMUV3_EVENT_MAP(INST_RETIRED, INST_ALL), + M1_PMUV3_EVENT_MAP(CPU_CYCLES, CORE_ACTIVE_CYCLE), + M1_PMUV3_EVENT_MAP(BR_RETIRED, INST_BRANCH), + M1_PMUV3_EVENT_MAP(BR_MIS_PRED_RETIRED, BRANCH_MISPRED_NONSPEC), }; /* sysfs definitions */ @@ -325,11 +341,10 @@ static void m1_pmu_disable_counter_interrupt(unsigned int index) __m1_pmu_enable_counter_interrupt(index, false); } -static void m1_pmu_configure_counter(unsigned int index, u8 event, - bool user, bool kernel) +static void __m1_pmu_configure_event_filter(unsigned int index, bool user, + bool kernel, bool host) { - u64 val, user_bit, kernel_bit; - int shift; + u64 clear, set, user_bit, kernel_bit; switch (index) { case 0 ... 7: @@ -344,19 +359,27 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event, BUG(); } - val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1); - + clear = set = 0; if (user) - val |= user_bit; + set |= user_bit; else - val &= ~user_bit; + clear |= user_bit; if (kernel) - val |= kernel_bit; + set |= kernel_bit; else - val &= ~kernel_bit; + clear |= kernel_bit; + + if (host) + sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL1, clear, set); + else if (is_kernel_in_hyp_mode()) + sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL12, clear, set); +} - write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1); +static void __m1_pmu_configure_eventsel(unsigned int index, u8 event) +{ + u64 clear = 0, set = 0; + int shift; /* * Counters 0 and 1 have fixed events. For anything else, @@ -369,21 +392,32 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event, break; case 2 ... 5: shift = (index - 2) * 8; - val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1); - val &= ~((u64)0xff << shift); - val |= (u64)event << shift; - write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1); + clear |= (u64)0xff << shift; + set |= (u64)event << shift; + sysreg_clear_set_s(SYS_IMP_APL_PMESR0_EL1, clear, set); break; case 6 ... 9: shift = (index - 6) * 8; - val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1); - val &= ~((u64)0xff << shift); - val |= (u64)event << shift; - write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1); + clear |= (u64)0xff << shift; + set |= (u64)event << shift; + sysreg_clear_set_s(SYS_IMP_APL_PMESR1_EL1, clear, set); break; } } +static void m1_pmu_configure_counter(unsigned int index, unsigned long config_base) +{ + bool kernel = config_base & M1_PMU_CFG_COUNT_KERNEL; + bool guest = config_base & M1_PMU_CFG_COUNT_GUEST; + bool host = config_base & M1_PMU_CFG_COUNT_HOST; + bool user = config_base & M1_PMU_CFG_COUNT_USER; + u8 evt = config_base & M1_PMU_CFG_EVENT; + + __m1_pmu_configure_event_filter(index, user && host, kernel && host, true); + __m1_pmu_configure_event_filter(index, user && guest, kernel && guest, false); + __m1_pmu_configure_eventsel(index, evt); +} + /* arm_pmu backend */ static void m1_pmu_enable_event(struct perf_event *event) { @@ -394,11 +428,7 @@ static void m1_pmu_enable_event(struct perf_event *event) user = event->hw.config_base & M1_PMU_CFG_COUNT_USER; kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL; - m1_pmu_disable_counter_interrupt(event->hw.idx); - m1_pmu_disable_counter(event->hw.idx); - isb(); - - m1_pmu_configure_counter(event->hw.idx, evt, user, kernel); + m1_pmu_configure_counter(event->hw.idx, event->hw.config_base); m1_pmu_enable_counter(event->hw.idx); m1_pmu_enable_counter_interrupt(event->hw.idx); isb(); @@ -444,8 +474,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - m1_pmu_disable_event(event); + perf_event_overflow(event, &data, regs); } cpu_pmu->start(cpu_pmu); @@ -536,6 +565,26 @@ static int m2_pmu_map_event(struct perf_event *event) return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); } +static int m1_pmu_map_pmuv3_event(unsigned int eventsel) +{ + u16 m1_event = HW_OP_UNSUPPORTED; + + if (eventsel < ARMV8_PMUV3_MAX_COMMON_EVENTS) + m1_event = m1_pmu_pmceid_map[eventsel]; + + return m1_event == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : m1_event; +} + +static void m1_pmu_init_pmceid(struct arm_pmu *pmu) +{ + unsigned int event; + + for (event = 0; event < ARMV8_PMUV3_MAX_COMMON_EVENTS; event++) { + if (m1_pmu_map_pmuv3_event(event) >= 0) + set_bit(event, pmu->pmceid_bitmap); + } +} + static void m1_pmu_reset(void *info) { int i; @@ -556,7 +605,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event, { unsigned long config_base = 0; - if (!attr->exclude_guest) { + if (!attr->exclude_guest && !is_kernel_in_hyp_mode()) { pr_debug("ARM performance counters do not support mode exclusion\n"); return -EOPNOTSUPP; } @@ -564,6 +613,10 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event, config_base |= M1_PMU_CFG_COUNT_KERNEL; if (!attr->exclude_user) config_base |= M1_PMU_CFG_COUNT_USER; + if (!attr->exclude_host) + config_base |= M1_PMU_CFG_COUNT_HOST; + if (!attr->exclude_guest) + config_base |= M1_PMU_CFG_COUNT_GUEST; event->config_base = config_base; @@ -592,6 +645,9 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags) cpu_pmu->reset = m1_pmu_reset; cpu_pmu->set_event_filter = m1_pmu_set_event_filter; + cpu_pmu->map_pmuv3_event = m1_pmu_map_pmuv3_event; + m1_pmu_init_pmceid(cpu_pmu); + bitmap_set(cpu_pmu->cntr_mask, 0, M1_PMU_NR_COUNTERS); cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index d5fcea3d4328..8af3563fdf60 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -565,7 +565,7 @@ module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint, static ktime_t arm_ccn_pmu_timer_period(void) { - return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000); + return us_to_ktime((u64)arm_ccn_pmu_poll_period_us); } @@ -1273,9 +1273,8 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) /* No overflow interrupt? Have to use a timer instead. */ if (!ccn->irq) { dev_info(ccn->dev, "No access to interrupts, using timer.\n"); - hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler; + hrtimer_setup(&ccn->dt.hrtimer, arm_ccn_pmu_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } /* Pick one CPU which we will use to collect data from CCN... */ diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 49bd811c6fd6..23245352a3fc 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2016-2020 Arm Limited -// CMN-600 Coherent Mesh Network PMU driver +// ARM CMN/CI interconnect PMU driver #include <linux/acpi.h> #include <linux/bitfield.h> @@ -65,7 +65,7 @@ /* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 /* ...except when they don't :( */ -#define CMN_S3_DTM_OFFSET 0xa000 +#define CMN_S3_R1_DTM_OFFSET 0xa000 #define CMN_S3_PMU_OFFSET 0xd900 /* For most nodes, this is all there is */ @@ -233,6 +233,9 @@ enum cmn_revision { REV_CMN700_R1P0, REV_CMN700_R2P0, REV_CMN700_R3P0, + REV_CMNS3_R0P0 = 0, + REV_CMNS3_R0P1, + REV_CMNS3_R1P0, REV_CI700_R0P0 = 0, REV_CI700_R1P0, REV_CI700_R2P0, @@ -425,8 +428,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn) static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn) { if (cmn->part == PART_CMN_S3) { - if (dn->type == CMN_TYPE_XP) - return CMN_S3_DTM_OFFSET; + if (cmn->rev >= REV_CMNS3_R1P0 && dn->type == CMN_TYPE_XP) + return CMN_S3_R1_DTM_OFFSET; return CMN_S3_PMU_OFFSET; } return CMN_PMU_OFFSET; @@ -727,8 +730,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2) || - (chan == 7 && cmn->snp_vc_num < 2) || - (chan == 8 && cmn->req_vc_num < 2)) + (chan == 7 && cmn->req_vc_num < 2) || + (chan == 8 && cmn->snp_vc_num < 2)) return 0; } @@ -802,8 +805,6 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event) #define CMN_EVENT_CCLA(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event) -#define CMN_EVENT_CCLA_RNI(_name, _event) \ - CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event) #define CMN_EVENT_HNS(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) @@ -884,8 +885,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \ _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \ _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)), \ - _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \ - _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5)) + _CMN_EVENT_XP(req2_##_name, (_event) | (7 << 5)), \ + _CMN_EVENT_XP(snp2_##_name, (_event) | (8 << 5)) #define CMN_EVENT_XP_DAT(_name, _event) \ _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \ @@ -1713,8 +1714,8 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) goto done; } - for (i = 0; i < CMN_MAX_DTCS; i++) - if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS) + for_each_hw_dtc_idx(hw, dtc, idx) + if (val->dtc_count[dtc] == CMN_DT_NUM_COUNTERS) goto done; for_each_hw_dn(hw, dn, i) { @@ -1798,6 +1799,9 @@ static int arm_cmn_event_init(struct perf_event *event) } else if (type == CMN_TYPE_XP && (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) { hw->wide_sel = true; + } else if (type == CMN_TYPE_RND) { + /* Secretly permit this as an alias for "rnid" events */ + type = CMN_TYPE_RNI; } /* This is sufficiently annoying to recalculate, so cache it */ @@ -2166,20 +2170,11 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); - if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) { - /* We do at least know that a DTC's XP must be in that DTC's domain */ - dn = arm_cmn_node(cmn, CMN_TYPE_DTC); - for (int i = 0; i < cmn->num_dtcs; i++) - arm_cmn_node_to_xp(cmn, dn + i)->dtc = i; - } - for (dn = cmn->dns; dn->type; dn++) { if (dn->type == CMN_TYPE_XP) continue; xp = arm_cmn_node_to_xp(cmn, dn); - dn->portid_bits = xp->portid_bits; - dn->deviceid_bits = xp->deviceid_bits; dn->dtc = xp->dtc; dn->dtm = xp->dtm; if (cmn->multi_dtm) @@ -2253,12 +2248,11 @@ static enum cmn_node_type arm_cmn_subtype(enum cmn_node_type type) static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) { - void __iomem *cfg_region; + void __iomem *cfg_region, __iomem *xp_region; struct arm_cmn_node cfg, *dn; struct arm_cmn_dtm *dtm; enum cmn_part part; u16 child_count, child_poff; - u32 xp_offset[CMN_MAX_XPS]; u64 reg; int i, j; size_t sz; @@ -2310,11 +2304,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->num_dns = cmn->num_xps; /* Pass 1: visit the XPs, enumerate their children */ + cfg_region += child_poff; for (i = 0; i < cmn->num_xps; i++) { - reg = readq_relaxed(cfg_region + child_poff + i * 8); - xp_offset[i] = reg & CMN_CHILD_NODE_ADDR; + reg = readq_relaxed(cfg_region + i * 8); + xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR); - reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO); + reg = readq_relaxed(xp_region + CMN_CHILD_INFO); cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg); } @@ -2340,11 +2335,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->dns = dn; cmn->dtms = dtm; for (i = 0; i < cmn->num_xps; i++) { - void __iomem *xp_region = cmn->base + xp_offset[i]; struct arm_cmn_node *xp = dn++; unsigned int xp_ports = 0; - arm_cmn_init_node_info(cmn, xp_offset[i], xp); + reg = readq_relaxed(cfg_region + i * 8); + xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR); + arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, xp); /* * Thanks to the order in which XP logical IDs seem to be * assigned, we can handily infer the mesh X dimension by @@ -2420,6 +2416,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); + dn->portid_bits = xp->portid_bits; + dn->deviceid_bits = xp->deviceid_bits; switch (dn->type) { case CMN_TYPE_DTC: @@ -2557,6 +2555,7 @@ static int arm_cmn_probe(struct platform_device *pdev) cmn->dev = &pdev->dev; cmn->part = (unsigned long)device_get_match_data(cmn->dev); + cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); platform_set_drvdata(pdev, cmn); if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) { @@ -2584,7 +2583,6 @@ static int arm_cmn_probe(struct platform_device *pdev) if (err) return err; - cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, .parent = cmn->dev, @@ -2650,6 +2648,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = { { "ARMHC600", PART_CMN600 }, { "ARMHC650" }, { "ARMHC700" }, + { "ARMHC003" }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); @@ -2660,6 +2659,7 @@ static struct platform_driver arm_cmn_driver = { .name = "arm-cmn", .of_match_table = of_match_ptr(arm_cmn_of_match), .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), + .suppress_bind_attrs = true, }, .probe = arm_cmn_probe, .remove = arm_cmn_remove, @@ -2698,5 +2698,5 @@ module_init(arm_cmn_init); module_exit(arm_cmn_exit); MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>"); -MODULE_DESCRIPTION("Arm CMN-600 PMU driver"); +MODULE_DESCRIPTION("Arm CMN/CI interconnect PMU driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index 90fcfe693439..66858c65215d 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -21,6 +21,11 @@ #define NI_CHILD_NODE_INFO 0x004 #define NI_CHILD_PTR(n) (0x008 + (n) * 4) +#define NI_NUM_SUB_FEATURES 0x100 +#define NI_SUB_FEATURE_TYPE(n) (0x108 + (n) * 8) +#define NI_SUB_FEATURE_PTR(n) (0x10c + (n) * 8) + +#define NI_SUB_FEATURE_TYPE_FCU 0x2 #define NI700_PMUSELA 0x00c @@ -33,9 +38,10 @@ #define NI_PIDR2_VERSION GENMASK(7, 4) /* PMU node */ -#define NI_PMEVCNTR(n) (0x008 + (n) * 8) -#define NI_PMCCNTR_L 0x0f8 -#define NI_PMCCNTR_U 0x0fc +#define NI700_PMEVCNTR(n) (0x008 + (n) * 8) +#define NI700_PMCCNTR_L 0x0f8 +#define NI_PMEVCNTR(n) (0x200 + (n) * 8) +#define NI_PMCCNTR_L 0x2f8 #define NI_PMEVTYPER(n) (0x400 + (n) * 4) #define NI_PMEVTYPER_NODE_TYPE GENMASK(12, 9) #define NI_PMEVTYPER_NODE_ID GENMASK(8, 0) @@ -66,6 +72,8 @@ enum ni_part { PART_NI_700 = 0x43b, PART_NI_710AE = 0x43d, + PART_NOC_S3 = 0x43f, + PART_SI_L1 = 0x455, }; enum ni_node_type { @@ -79,6 +87,10 @@ enum ni_node_type { NI_HSNI, NI_HMNI, NI_PMNI, + NI_TSNI, + NI_TMNI, + NI_CMNI = 0x0e, + NI_MCN = 0x63, }; struct arm_ni_node { @@ -102,10 +114,9 @@ struct arm_ni_unit { struct arm_ni_cd { void __iomem *pmu_base; u16 id; + s8 irq_friend; int num_units; int irq; - int cpu; - struct hlist_node cpuhp_node; struct pmu pmu; struct arm_ni_unit *units; struct perf_event *evcnt[NI_NUM_COUNTERS]; @@ -117,13 +128,18 @@ struct arm_ni { void __iomem *base; enum ni_part part; int id; + int cpu; int num_cds; + struct hlist_node cpuhp_node; struct arm_ni_cd cds[] __counted_by(num_cds); }; #define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id]) #define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu) +#define ni_for_each_cd(n, c) \ + for (struct arm_ni_cd *c = n->cds; c < n->cds + n->num_cds; c++) if (c->pmu_base) + #define cd_for_each_unit(cd, u) \ for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++) @@ -175,6 +191,9 @@ static struct attribute *arm_ni_event_attrs[] = { NI_EVENT_ATTR(hsni, NI_HSNI), NI_EVENT_ATTR(hmni, NI_HMNI), NI_EVENT_ATTR(pmni, NI_PMNI), + NI_EVENT_ATTR(tsni, NI_TSNI), + NI_EVENT_ATTR(tmni, NI_TMNI), + NI_EVENT_ATTR(cmni, NI_CMNI), NULL }; @@ -218,9 +237,9 @@ static const struct attribute_group arm_ni_format_attrs_group = { static ssize_t arm_ni_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev)); + struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev))); - return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu)); + return cpumap_print_to_pagebuf(true, buf, cpumask_of(ni->cpu)); } static struct device_attribute arm_ni_cpumask_attr = @@ -247,7 +266,6 @@ static struct attribute *arm_ni_other_attrs[] = { static const struct attribute_group arm_ni_other_attr_group = { .attrs = arm_ni_other_attrs, - NULL }; static const struct attribute_group *arm_ni_attr_groups[] = { @@ -305,9 +323,15 @@ static int arm_ni_validate_group(struct perf_event *event) return 0; } +static bool arm_ni_is_7xx(const struct arm_ni *ni) +{ + return ni->part == PART_NI_700 || ni->part == PART_NI_710AE; +} + static int arm_ni_event_init(struct perf_event *event) { struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + struct arm_ni *ni; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -315,7 +339,10 @@ static int arm_ni_event_init(struct perf_event *event) if (is_sampling_event(event)) return -EINVAL; - event->cpu = cd->cpu; + ni = cd_to_ni(cd); + event->cpu = ni->cpu; + event->hw.flags = arm_ni_is_7xx(ni); + if (NI_EVENT_TYPE(event) == NI_PMU) return arm_ni_validate_group(event); @@ -329,16 +356,16 @@ static int arm_ni_event_init(struct perf_event *event) return -EINVAL; } -static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd) +static u64 arm_ni_read_ccnt(void __iomem *pmccntr) { u64 l, u_old, u_new; int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */ - u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + u_new = readl_relaxed(pmccntr + 4); do { u_old = u_new; - l = readl_relaxed(cd->pmu_base + NI_PMCCNTR_L); - u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + l = readl_relaxed(pmccntr); + u_new = readl_relaxed(pmccntr + 4); } while (u_new != u_old && --retries); WARN_ON(!retries); @@ -347,7 +374,6 @@ static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd) static void arm_ni_event_read(struct perf_event *event) { - struct arm_ni_cd *cd = pmu_to_cd(event->pmu); struct hw_perf_event *hw = &event->hw; u64 count, prev; bool ccnt = hw->idx == NI_CCNT_IDX; @@ -355,9 +381,9 @@ static void arm_ni_event_read(struct perf_event *event) do { prev = local64_read(&hw->prev_count); if (ccnt) - count = arm_ni_read_ccnt(cd); + count = arm_ni_read_ccnt((void __iomem *)event->hw.event_base); else - count = readl_relaxed(cd->pmu_base + NI_PMEVCNTR(hw->idx)); + count = readl_relaxed((void __iomem *)event->hw.event_base); } while (local64_cmpxchg(&hw->prev_count, prev, count) != prev); count -= prev; @@ -382,16 +408,16 @@ static void arm_ni_event_stop(struct perf_event *event, int flags) arm_ni_event_read(event); } -static void arm_ni_init_ccnt(struct arm_ni_cd *cd) +static void arm_ni_init_ccnt(struct hw_perf_event *hw) { - local64_set(&cd->ccnt->hw.prev_count, S64_MIN); - lo_hi_writeq_relaxed(S64_MIN, cd->pmu_base + NI_PMCCNTR_L); + local64_set(&hw->prev_count, S64_MIN); + lo_hi_writeq_relaxed(S64_MIN, (void __iomem *)hw->event_base); } -static void arm_ni_init_evcnt(struct arm_ni_cd *cd, int idx) +static void arm_ni_init_evcnt(struct hw_perf_event *hw) { - local64_set(&cd->evcnt[idx]->hw.prev_count, S32_MIN); - writel_relaxed(S32_MIN, cd->pmu_base + NI_PMEVCNTR(idx)); + local64_set(&hw->prev_count, S32_MIN); + writel_relaxed(S32_MIN, (void __iomem *)hw->event_base); } static int arm_ni_event_add(struct perf_event *event, int flags) @@ -406,8 +432,10 @@ static int arm_ni_event_add(struct perf_event *event, int flags) if (cd->ccnt) return -ENOSPC; hw->idx = NI_CCNT_IDX; + hw->event_base = (unsigned long)cd->pmu_base + + (hw->flags ? NI700_PMCCNTR_L : NI_PMCCNTR_L); cd->ccnt = event; - arm_ni_init_ccnt(cd); + arm_ni_init_ccnt(hw); } else { hw->idx = 0; while (cd->evcnt[hw->idx]) { @@ -417,7 +445,9 @@ static int arm_ni_event_add(struct perf_event *event, int flags) cd->evcnt[hw->idx] = event; unit = (void *)hw->config_base; unit->event[hw->idx] = NI_EVENT_EVENTID(event); - arm_ni_init_evcnt(cd, hw->idx); + hw->event_base = (unsigned long)cd->pmu_base + + (hw->flags ? NI700_PMEVCNTR(hw->idx) : NI_PMEVCNTR(hw->idx)); + arm_ni_init_evcnt(hw); lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela); reg = FIELD_PREP(NI_PMEVTYPER_NODE_TYPE, type) | @@ -446,33 +476,56 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id) { struct arm_ni_cd *cd = dev_id; irqreturn_t ret = IRQ_NONE; - u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR); - if (reg & (1U << NI_CCNT_IDX)) { - ret = IRQ_HANDLED; - if (!(WARN_ON(!cd->ccnt))) { - arm_ni_event_read(cd->ccnt); - arm_ni_init_ccnt(cd); + for (;;) { + u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR); + + if (reg & (1U << NI_CCNT_IDX)) { + ret = IRQ_HANDLED; + if (!(WARN_ON(!cd->ccnt))) { + arm_ni_event_read(cd->ccnt); + arm_ni_init_ccnt(&cd->ccnt->hw); + } } + for (int i = 0; i < NI_NUM_COUNTERS; i++) { + if (!(reg & (1U << i))) + continue; + ret = IRQ_HANDLED; + if (!(WARN_ON(!cd->evcnt[i]))) { + arm_ni_event_read(cd->evcnt[i]); + arm_ni_init_evcnt(&cd->evcnt[i]->hw); + } + } + writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR); + if (!cd->irq_friend) + return ret; + cd += cd->irq_friend; } - for (int i = 0; i < NI_NUM_COUNTERS; i++) { - if (!(reg & (1U << i))) +} + +static void __iomem *arm_ni_get_pmusel(struct arm_ni *ni, void __iomem *unit_base) +{ + u32 type, ptr, num; + + if (arm_ni_is_7xx(ni)) + return unit_base + NI700_PMUSELA; + + num = readl_relaxed(unit_base + NI_NUM_SUB_FEATURES); + for (int i = 0; i < num; i++) { + type = readl_relaxed(unit_base + NI_SUB_FEATURE_TYPE(i)); + if (type != NI_SUB_FEATURE_TYPE_FCU) continue; - ret = IRQ_HANDLED; - if (!(WARN_ON(!cd->evcnt[i]))) { - arm_ni_event_read(cd->evcnt[i]); - arm_ni_init_evcnt(cd, i); - } + ptr = readl_relaxed(unit_base + NI_SUB_FEATURE_PTR(i)); + return ni->base + ptr; } - writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR); - return ret; + /* Should be impossible */ + return NULL; } static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start) { struct arm_ni_cd *cd = ni->cds + node->id; const char *name; - int err; cd->id = node->id; cd->num_units = node->num_components; @@ -505,13 +558,18 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s case NI_HSNI: case NI_HMNI: case NI_PMNI: - unit->pmusela = unit_base + NI700_PMUSELA; + case NI_TSNI: + case NI_TMNI: + case NI_CMNI: + unit->pmusela = arm_ni_get_pmusel(ni, unit_base); writel_relaxed(1, unit->pmusela); if (readl_relaxed(unit->pmusela) != 1) dev_info(ni->dev, "No access to node 0x%04x%04x\n", unit->id, unit->type); else unit->ns = true; break; + case NI_MCN: + break; default: /* * e.g. FMU - thankfully bits 3:2 of FMU_ERR_FR0 are RES0 so @@ -532,19 +590,11 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s cd->pmu_base + NI_PMCR); writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR); writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR); - writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET); cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id); if (cd->irq < 0) return cd->irq; - err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(ni->dev), cd); - if (err) - return err; - - cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev)); cd->pmu = (struct pmu) { .module = THIS_MODULE, .parent = ni->dev, @@ -565,15 +615,19 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s if (!name) return -ENOMEM; - err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); - if (err) - return err; + return perf_pmu_register(&cd->pmu, name, -1); +} - err = perf_pmu_register(&cd->pmu, name, -1); - if (err) - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); +static void arm_ni_remove(struct platform_device *pdev) +{ + struct arm_ni *ni = platform_get_drvdata(pdev); - return err; + ni_for_each_cd(ni, cd) { + writel_relaxed(0, cd->pmu_base + NI_PMCR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); + perf_pmu_unregister(&cd->pmu); + } + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node); } static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) @@ -586,6 +640,34 @@ static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO); } +static int arm_ni_init_irqs(struct arm_ni *ni) +{ + int err; + + ni_for_each_cd(ni, cd) { + for (struct arm_ni_cd *prev = cd; prev-- > ni->cds; ) { + if (prev->irq == cd->irq) { + prev->irq_friend = cd - prev; + goto set_inten; + } + } + err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_AUTOEN, + dev_name(ni->dev), cd); + if (err) + return err; + + irq_set_affinity(cd->irq, cpumask_of(ni->cpu)); +set_inten: + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET); + } + + ni_for_each_cd(ni, cd) + if (!cd->irq_friend) + enable_irq(cd->irq); + return 0; +} + static int arm_ni_probe(struct platform_device *pdev) { struct arm_ni_node cfg, vd, pd, cd; @@ -593,7 +675,7 @@ static int arm_ni_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; static atomic_t id; - int num_cds; + int ret, num_cds; u32 reg, part; /* @@ -618,6 +700,8 @@ static int arm_ni_probe(struct platform_device *pdev) switch (part) { case PART_NI_700: case PART_NI_710AE: + case PART_NOC_S3: + case PART_SI_L1: break; default: dev_WARN(&pdev->dev, "Unknown part number: 0x%03x, this may go badly\n", part); @@ -644,6 +728,12 @@ static int arm_ni_probe(struct platform_device *pdev) ni->num_cds = num_cds; ni->part = part; ni->id = atomic_fetch_inc(&id); + ni->cpu = cpumask_local_spread(0, dev_to_node(ni->dev)); + platform_set_drvdata(pdev, ni); + + ret = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node); + if (ret) + return ret; for (int v = 0; v < cfg.num_components; v++) { reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); @@ -652,35 +742,23 @@ static int arm_ni_probe(struct platform_device *pdev) reg = readl_relaxed(vd.base + NI_CHILD_PTR(p)); arm_ni_probe_domain(base + reg, &pd); for (int c = 0; c < pd.num_components; c++) { - int ret; - reg = readl_relaxed(pd.base + NI_CHILD_PTR(c)); arm_ni_probe_domain(base + reg, &cd); ret = arm_ni_init_cd(ni, &cd, res->start); - if (ret) + if (ret) { + ni->cds[cd.id].pmu_base = NULL; + arm_ni_remove(pdev); return ret; + } } } } - return 0; -} - -static void arm_ni_remove(struct platform_device *pdev) -{ - struct arm_ni *ni = platform_get_drvdata(pdev); - - for (int i = 0; i < ni->num_cds; i++) { - struct arm_ni_cd *cd = ni->cds + i; - - if (!cd->pmu_base) - continue; + ret = arm_ni_init_irqs(ni); + if (ret) + arm_ni_remove(pdev); - writel_relaxed(0, cd->pmu_base + NI_PMCR); - writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); - perf_pmu_unregister(&cd->pmu); - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); - } + return ret; } #ifdef CONFIG_OF @@ -704,47 +782,50 @@ static struct platform_driver arm_ni_driver = { .name = "arm-ni", .of_match_table = of_match_ptr(arm_ni_of_match), .acpi_match_table = ACPI_PTR(arm_ni_acpi_match), + .suppress_bind_attrs = true, }, .probe = arm_ni_probe, .remove = arm_ni_remove, }; -static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu) +static void arm_ni_pmu_migrate(struct arm_ni *ni, unsigned int cpu) { - perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu); - irq_set_affinity(cd->irq, cpumask_of(cpu)); - cd->cpu = cpu; + ni_for_each_cd(ni, cd) { + perf_pmu_migrate_context(&cd->pmu, ni->cpu, cpu); + irq_set_affinity(cd->irq, cpumask_of(cpu)); + } + ni->cpu = cpu; } static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) { - struct arm_ni_cd *cd; + struct arm_ni *ni; int node; - cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); - node = dev_to_node(cd_to_ni(cd)->dev); - if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node) - arm_ni_pmu_migrate(cd, cpu); + ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node); + node = dev_to_node(ni->dev); + if (cpu_to_node(ni->cpu) != node && cpu_to_node(cpu) == node) + arm_ni_pmu_migrate(ni, cpu); return 0; } static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) { - struct arm_ni_cd *cd; + struct arm_ni *ni; unsigned int target; int node; - cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); - if (cpu != cd->cpu) + ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node); + if (cpu != ni->cpu) return 0; - node = dev_to_node(cd_to_ni(cd)->dev); + node = dev_to_node(ni->dev); target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target < nr_cpu_ids) - arm_ni_pmu_migrate(cd, target); + arm_ni_pmu_migrate(ni, target); return 0; } diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c new file mode 100644 index 000000000000..ba554e0c846c --- /dev/null +++ b/drivers/perf/arm_brbe.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Branch Record Buffer Extension Driver. + * + * Copyright (C) 2022-2025 ARM Limited + * + * Author: Anshuman Khandual <anshuman.khandual@arm.com> + */ +#include <linux/types.h> +#include <linux/bitmap.h> +#include <linux/perf/arm_pmu.h> +#include "arm_brbe.h" + +#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \ + BRBFCR_EL1_INDIRECT | \ + BRBFCR_EL1_RTN | \ + BRBFCR_EL1_INDCALL | \ + BRBFCR_EL1_DIRCALL | \ + BRBFCR_EL1_CONDDIR) + +/* + * BRBTS_EL1 is currently not used for branch stack implementation + * purpose but BRBCR_ELx.TS needs to have a valid value from all + * available options. BRBCR_ELx_TS_VIRTUAL is selected for this. + */ +#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL) + +/* + * BRBE Buffer Organization + * + * BRBE buffer is arranged as multiple banks of 32 branch record + * entries each. An individual branch record in a given bank could + * be accessed, after selecting the bank in BRBFCR_EL1.BANK and + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with + * indices [0..31]. + * + * Bank 0 + * + * --------------------------------- ------ + * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + * + * Bank 1 + * + * --------------------------------- ------ + * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + */ +#define BRBE_BANK_MAX_ENTRIES 32 + +struct brbe_regset { + u64 brbsrc; + u64 brbtgt; + u64 brbinf; +}; + +#define PERF_BR_ARM64_MAX (PERF_BR_MAX + PERF_BR_NEW_MAX) + +struct brbe_hw_attr { + int brbe_version; + int brbe_cc; + int brbe_nr; + int brbe_format; +}; + +#define BRBE_REGN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define BRBE_REGN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + BRBE_REGN_CASE(0, case_macro); \ + BRBE_REGN_CASE(1, case_macro); \ + BRBE_REGN_CASE(2, case_macro); \ + BRBE_REGN_CASE(3, case_macro); \ + BRBE_REGN_CASE(4, case_macro); \ + BRBE_REGN_CASE(5, case_macro); \ + BRBE_REGN_CASE(6, case_macro); \ + BRBE_REGN_CASE(7, case_macro); \ + BRBE_REGN_CASE(8, case_macro); \ + BRBE_REGN_CASE(9, case_macro); \ + BRBE_REGN_CASE(10, case_macro); \ + BRBE_REGN_CASE(11, case_macro); \ + BRBE_REGN_CASE(12, case_macro); \ + BRBE_REGN_CASE(13, case_macro); \ + BRBE_REGN_CASE(14, case_macro); \ + BRBE_REGN_CASE(15, case_macro); \ + BRBE_REGN_CASE(16, case_macro); \ + BRBE_REGN_CASE(17, case_macro); \ + BRBE_REGN_CASE(18, case_macro); \ + BRBE_REGN_CASE(19, case_macro); \ + BRBE_REGN_CASE(20, case_macro); \ + BRBE_REGN_CASE(21, case_macro); \ + BRBE_REGN_CASE(22, case_macro); \ + BRBE_REGN_CASE(23, case_macro); \ + BRBE_REGN_CASE(24, case_macro); \ + BRBE_REGN_CASE(25, case_macro); \ + BRBE_REGN_CASE(26, case_macro); \ + BRBE_REGN_CASE(27, case_macro); \ + BRBE_REGN_CASE(28, case_macro); \ + BRBE_REGN_CASE(29, case_macro); \ + BRBE_REGN_CASE(30, case_macro); \ + BRBE_REGN_CASE(31, case_macro); \ + default: WARN(1, "Invalid BRB* index %d\n", x); \ + } \ + } while (0) + +#define RETURN_READ_BRBSRCN(n) \ + return read_sysreg_s(SYS_BRBSRC_EL1(n)) +static inline u64 get_brbsrc_reg(int idx) +{ + BRBE_REGN_SWITCH(idx, RETURN_READ_BRBSRCN); + return 0; +} + +#define RETURN_READ_BRBTGTN(n) \ + return read_sysreg_s(SYS_BRBTGT_EL1(n)) +static u64 get_brbtgt_reg(int idx) +{ + BRBE_REGN_SWITCH(idx, RETURN_READ_BRBTGTN); + return 0; +} + +#define RETURN_READ_BRBINFN(n) \ + return read_sysreg_s(SYS_BRBINF_EL1(n)) +static u64 get_brbinf_reg(int idx) +{ + BRBE_REGN_SWITCH(idx, RETURN_READ_BRBINFN); + return 0; +} + +static u64 brbe_record_valid(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf); +} + +static bool brbe_invalid(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE; +} + +static bool brbe_record_is_complete(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL; +} + +static bool brbe_record_is_source_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE; +} + +static bool brbe_record_is_target_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET; +} + +static int brbinf_get_in_tx(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf); +} + +static int brbinf_get_mispredict(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf); +} + +static int brbinf_get_lastfailed(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf); +} + +static u16 brbinf_get_cycles(u64 brbinf) +{ + u32 exp, mant, cycles; + /* + * Captured cycle count is unknown and hence + * should not be passed on to userspace. + */ + if (brbinf & BRBINFx_EL1_CCU) + return 0; + + exp = FIELD_GET(BRBINFx_EL1_CC_EXP_MASK, brbinf); + mant = FIELD_GET(BRBINFx_EL1_CC_MANT_MASK, brbinf); + + if (!exp) + return mant; + + cycles = (mant | 0x100) << (exp - 1); + + return min(cycles, U16_MAX); +} + +static int brbinf_get_type(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf); +} + +static int brbinf_get_el(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf); +} + +void brbe_invalidate(void) +{ + /* Ensure all branches before this point are recorded */ + isb(); + asm volatile(BRB_IALL_INSN); + /* Ensure all branch records are invalidated after this point */ + isb(); +} + +static bool valid_brbe_nr(int brbe_nr) +{ + return brbe_nr == BRBIDR0_EL1_NUMREC_8 || + brbe_nr == BRBIDR0_EL1_NUMREC_16 || + brbe_nr == BRBIDR0_EL1_NUMREC_32 || + brbe_nr == BRBIDR0_EL1_NUMREC_64; +} + +static bool valid_brbe_cc(int brbe_cc) +{ + return brbe_cc == BRBIDR0_EL1_CC_20_BIT; +} + +static bool valid_brbe_format(int brbe_format) +{ + return brbe_format == BRBIDR0_EL1_FORMAT_FORMAT_0; +} + +static bool valid_brbidr(u64 brbidr) +{ + int brbe_format, brbe_cc, brbe_nr; + + brbe_format = FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr); + brbe_cc = FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr); + brbe_nr = FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr); + + return valid_brbe_format(brbe_format) && valid_brbe_cc(brbe_cc) && valid_brbe_nr(brbe_nr); +} + +static bool valid_brbe_version(int brbe_version) +{ + return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP || + brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1; +} + +static void select_brbe_bank(int bank) +{ + u64 brbfcr; + + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_BANK_MASK; + brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + /* + * Arm ARM (DDI 0487K.a) D.18.4 rule PPBZP requires explicit sync + * between setting BANK and accessing branch records. + */ + isb(); +} + +static bool __read_brbe_regset(struct brbe_regset *entry, int idx) +{ + entry->brbinf = get_brbinf_reg(idx); + + if (brbe_invalid(entry->brbinf)) + return false; + + entry->brbsrc = get_brbsrc_reg(idx); + entry->brbtgt = get_brbtgt_reg(idx); + return true; +} + +/* + * Generic perf branch filters supported on BRBE + * + * New branch filters need to be evaluated whether they could be supported on + * BRBE. This ensures that such branch filters would not just be accepted, to + * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively + * supported only on platforms where kernel is in hyp mode. + */ +#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \ + PERF_SAMPLE_BRANCH_IN_TX | \ + PERF_SAMPLE_BRANCH_NO_TX | \ + PERF_SAMPLE_BRANCH_CALL_STACK | \ + PERF_SAMPLE_BRANCH_COUNTERS) + +#define BRBE_ALLOWED_BRANCH_TYPES (PERF_SAMPLE_BRANCH_ANY | \ + PERF_SAMPLE_BRANCH_ANY_CALL | \ + PERF_SAMPLE_BRANCH_ANY_RETURN | \ + PERF_SAMPLE_BRANCH_IND_CALL | \ + PERF_SAMPLE_BRANCH_COND | \ + PERF_SAMPLE_BRANCH_IND_JUMP | \ + PERF_SAMPLE_BRANCH_CALL) + + +#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \ + PERF_SAMPLE_BRANCH_KERNEL | \ + PERF_SAMPLE_BRANCH_HV | \ + BRBE_ALLOWED_BRANCH_TYPES | \ + PERF_SAMPLE_BRANCH_NO_FLAGS | \ + PERF_SAMPLE_BRANCH_NO_CYCLES | \ + PERF_SAMPLE_BRANCH_TYPE_SAVE | \ + PERF_SAMPLE_BRANCH_HW_INDEX | \ + PERF_SAMPLE_BRANCH_PRIV_SAVE) + +#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \ + BRBE_EXCLUDE_BRANCH_FILTERS) + +/* + * BRBE supports the following functional branch type filters while + * generating branch records. These branch filters can be enabled, + * either individually or as a group i.e ORing multiple filters + * with each other. + * + * BRBFCR_EL1_CONDDIR - Conditional direct branch + * BRBFCR_EL1_DIRCALL - Direct call + * BRBFCR_EL1_INDCALL - Indirect call + * BRBFCR_EL1_INDIRECT - Indirect branch + * BRBFCR_EL1_DIRECT - Direct branch + * BRBFCR_EL1_RTN - Subroutine return + */ +static u64 branch_type_to_brbfcr(int branch_type) +{ + u64 brbfcr = 0; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbfcr |= BRBFCR_EL1_BRANCH_FILTERS; + return brbfcr; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + brbfcr |= BRBFCR_EL1_INDCALL; + brbfcr |= BRBFCR_EL1_DIRCALL; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbfcr |= BRBFCR_EL1_RTN; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL) + brbfcr |= BRBFCR_EL1_INDCALL; + + if (branch_type & PERF_SAMPLE_BRANCH_COND) + brbfcr |= BRBFCR_EL1_CONDDIR; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) + brbfcr |= BRBFCR_EL1_INDIRECT; + + if (branch_type & PERF_SAMPLE_BRANCH_CALL) + brbfcr |= BRBFCR_EL1_DIRCALL; + + return brbfcr; +} + +/* + * BRBE supports the following privilege mode filters while generating + * branch records. + * + * BRBCR_ELx_E0BRE - EL0 branch records + * BRBCR_ELx_ExBRE - EL1/EL2 branch records + * + * BRBE also supports the following additional functional branch type + * filters while generating branch records. + * + * BRBCR_ELx_EXCEPTION - Exception + * BRBCR_ELx_ERTN - Exception return + */ +static u64 branch_type_to_brbcr(int branch_type) +{ + u64 brbcr = BRBCR_ELx_FZP | BRBCR_ELx_DEFAULT_TS; + + if (branch_type & PERF_SAMPLE_BRANCH_USER) + brbcr |= BRBCR_ELx_E0BRE; + + /* + * When running in the hyp mode, writing into BRBCR_EL1 + * actually writes into BRBCR_EL2 instead. Field E2BRE + * is also at the same position as E1BRE. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) + brbcr |= BRBCR_ELx_ExBRE; + + if (branch_type & PERF_SAMPLE_BRANCH_HV) { + if (is_kernel_in_hyp_mode()) + brbcr |= BRBCR_ELx_ExBRE; + } + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)) + brbcr |= BRBCR_ELx_CC; + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) + brbcr |= BRBCR_ELx_MPRED; + + /* + * The exception and exception return branches could be + * captured, irrespective of the perf event's privilege. + * If the perf event does not have enough privilege for + * a given exception level, then addresses which falls + * under that exception level will be reported as zero + * for the captured branch record, creating source only + * or target only records. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) { + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbcr |= BRBCR_ELx_EXCEPTION; + brbcr |= BRBCR_ELx_ERTN; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) + brbcr |= BRBCR_ELx_EXCEPTION; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbcr |= BRBCR_ELx_ERTN; + } + return brbcr; +} + +bool brbe_branch_attr_valid(struct perf_event *event) +{ + u64 branch_type = event->attr.branch_sample_type; + + /* + * Ensure both perf branch filter allowed and exclude + * masks are always in sync with the generic perf ABI. + */ + BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1)); + + if (branch_type & BRBE_EXCLUDE_BRANCH_FILTERS) { + pr_debug("requested branch filter not supported 0x%llx\n", branch_type); + return false; + } + + /* Ensure at least 1 branch type is enabled */ + if (!(branch_type & BRBE_ALLOWED_BRANCH_TYPES)) { + pr_debug("no branch type enabled 0x%llx\n", branch_type); + return false; + } + + /* + * No branches are recorded in guests nor nVHE hypervisors, so + * excluding the host or both kernel and user is invalid. + * + * Ideally we'd just require exclude_guest and exclude_hv, but setting + * event filters with perf for kernel or user don't set exclude_guest. + * So effectively, exclude_guest and exclude_hv are ignored. + */ + if (event->attr.exclude_host || (event->attr.exclude_user && event->attr.exclude_kernel)) { + pr_debug("branch filter in hypervisor or guest only not supported 0x%llx\n", branch_type); + return false; + } + + event->hw.branch_reg.config = branch_type_to_brbfcr(event->attr.branch_sample_type); + event->hw.extra_reg.config = branch_type_to_brbcr(event->attr.branch_sample_type); + + return true; +} + +unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu) +{ + return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, armpmu->reg_brbidr); +} + +void brbe_probe(struct arm_pmu *armpmu) +{ + u64 brbidr, aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); + u32 brbe; + + brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT); + if (!valid_brbe_version(brbe)) + return; + + brbidr = read_sysreg_s(SYS_BRBIDR0_EL1); + if (!valid_brbidr(brbidr)) + return; + + armpmu->reg_brbidr = brbidr; +} + +/* + * BRBE is assumed to be disabled/paused on entry + */ +void brbe_enable(const struct arm_pmu *arm_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(arm_pmu->hw_events); + u64 brbfcr = 0, brbcr = 0; + + /* + * Discard existing records to avoid a discontinuity, e.g. records + * missed during handling an overflow. + */ + brbe_invalidate(); + + /* + * Merge the permitted branch filters of all events. + */ + for (int i = 0; i < ARMPMU_MAX_HWEVENTS; i++) { + struct perf_event *event = cpuc->events[i]; + + if (event && has_branch_stack(event)) { + brbfcr |= event->hw.branch_reg.config; + brbcr |= event->hw.extra_reg.config; + } + } + + /* + * In VHE mode with MDCR_EL2.HPMN equal to PMCR_EL0.N, BRBCR_EL1.FZP + * controls freezing the branch records on counter overflow rather than + * BRBCR_EL2.FZP (which writes to BRBCR_EL1 are redirected to). + * The exception levels are enabled/disabled in BRBCR_EL2, so keep EL1 + * and EL0 recording disabled for guests. + * + * As BRBCR_EL1 CC and MPRED bits also need to match, use the same + * value for both registers just masking the exception levels. + */ + if (is_kernel_in_hyp_mode()) + write_sysreg_s(brbcr & ~(BRBCR_ELx_ExBRE | BRBCR_ELx_E0BRE), SYS_BRBCR_EL12); + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + /* Ensure BRBCR_ELx settings take effect before unpausing */ + isb(); + + /* Finally write SYS_BRBFCR_EL to unpause BRBE */ + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + /* Synchronization in PMCR write ensures ordering WRT PMU enabling */ +} + +void brbe_disable(void) +{ + /* + * No need for synchronization here as synchronization in PMCR write + * ensures ordering and in the interrupt handler this is a NOP as + * we're already paused. + */ + write_sysreg_s(BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + write_sysreg_s(0, SYS_BRBCR_EL1); +} + +static const int brbe_type_to_perf_type_map[BRBINFx_EL1_TYPE_DEBUG_EXIT + 1][2] = { + [BRBINFx_EL1_TYPE_DIRECT_UNCOND] = { PERF_BR_UNCOND, 0 }, + [BRBINFx_EL1_TYPE_INDIRECT] = { PERF_BR_IND, 0 }, + [BRBINFx_EL1_TYPE_DIRECT_LINK] = { PERF_BR_CALL, 0 }, + [BRBINFx_EL1_TYPE_INDIRECT_LINK] = { PERF_BR_IND_CALL, 0 }, + [BRBINFx_EL1_TYPE_RET] = { PERF_BR_RET, 0 }, + [BRBINFx_EL1_TYPE_DIRECT_COND] = { PERF_BR_COND, 0 }, + [BRBINFx_EL1_TYPE_CALL] = { PERF_BR_SYSCALL, 0 }, + [BRBINFx_EL1_TYPE_ERET] = { PERF_BR_ERET, 0 }, + [BRBINFx_EL1_TYPE_IRQ] = { PERF_BR_IRQ, 0 }, + [BRBINFx_EL1_TYPE_TRAP] = { PERF_BR_IRQ, 0 }, + [BRBINFx_EL1_TYPE_SERROR] = { PERF_BR_SERROR, 0 }, + [BRBINFx_EL1_TYPE_ALIGN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_ALGN }, + [BRBINFx_EL1_TYPE_INSN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_INST }, + [BRBINFx_EL1_TYPE_DATA_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_DATA }, +}; + +static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf) +{ + int brbe_type = brbinf_get_type(brbinf); + + if (brbe_type <= BRBINFx_EL1_TYPE_DEBUG_EXIT) { + const int *br_type = brbe_type_to_perf_type_map[brbe_type]; + + entry->type = br_type[0]; + entry->new_type = br_type[1]; + } +} + +static int brbinf_get_perf_priv(u64 brbinf) +{ + int brbe_el = brbinf_get_el(brbinf); + + switch (brbe_el) { + case BRBINFx_EL1_EL_EL0: + return PERF_BR_PRIV_USER; + case BRBINFx_EL1_EL_EL1: + return PERF_BR_PRIV_KERNEL; + case BRBINFx_EL1_EL_EL2: + if (is_kernel_in_hyp_mode()) + return PERF_BR_PRIV_KERNEL; + return PERF_BR_PRIV_HV; + default: + pr_warn_once("%d - unknown branch privilege captured\n", brbe_el); + return PERF_BR_PRIV_UNKNOWN; + } +} + +static bool perf_entry_from_brbe_regset(int index, struct perf_branch_entry *entry, + const struct perf_event *event) +{ + struct brbe_regset bregs; + u64 brbinf; + + if (!__read_brbe_regset(&bregs, index)) + return false; + + brbinf = bregs.brbinf; + perf_clear_branch_entry_bitfields(entry); + if (brbe_record_is_complete(brbinf)) { + entry->from = bregs.brbsrc; + entry->to = bregs.brbtgt; + } else if (brbe_record_is_source_only(brbinf)) { + entry->from = bregs.brbsrc; + entry->to = 0; + } else if (brbe_record_is_target_only(brbinf)) { + entry->from = 0; + entry->to = bregs.brbtgt; + } + + brbe_set_perf_entry_type(entry, brbinf); + + if (!branch_sample_no_cycles(event)) + entry->cycles = brbinf_get_cycles(brbinf); + + if (!branch_sample_no_flags(event)) { + /* Mispredict info is available for source only and complete branch records. */ + if (!brbe_record_is_target_only(brbinf)) { + entry->mispred = brbinf_get_mispredict(brbinf); + entry->predicted = !entry->mispred; + } + + /* + * Currently TME feature is neither implemented in any hardware + * nor it is being supported in the kernel. Just warn here once + * if TME related information shows up rather unexpectedly. + */ + if (brbinf_get_lastfailed(brbinf) || brbinf_get_in_tx(brbinf)) + pr_warn_once("Unknown transaction states\n"); + } + + /* + * Branch privilege level is available for target only and complete + * branch records. + */ + if (!brbe_record_is_source_only(brbinf)) + entry->priv = brbinf_get_perf_priv(brbinf); + + return true; +} + +#define PERF_BR_ARM64_ALL ( \ + BIT(PERF_BR_COND) | \ + BIT(PERF_BR_UNCOND) | \ + BIT(PERF_BR_IND) | \ + BIT(PERF_BR_CALL) | \ + BIT(PERF_BR_IND_CALL) | \ + BIT(PERF_BR_RET)) + +#define PERF_BR_ARM64_ALL_KERNEL ( \ + BIT(PERF_BR_SYSCALL) | \ + BIT(PERF_BR_IRQ) | \ + BIT(PERF_BR_SERROR) | \ + BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_ALGN) | \ + BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_DATA) | \ + BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_INST)) + +static void prepare_event_branch_type_mask(u64 branch_sample, + unsigned long *event_type_mask) +{ + if (branch_sample & PERF_SAMPLE_BRANCH_ANY) { + if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL) + bitmap_from_u64(event_type_mask, + BIT(PERF_BR_ERET) | PERF_BR_ARM64_ALL | + PERF_BR_ARM64_ALL_KERNEL); + else + bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL); + return; + } + + bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_CALL) { + if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL) + bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL_KERNEL); + + set_bit(PERF_BR_CALL, event_type_mask); + set_bit(PERF_BR_IND_CALL, event_type_mask); + } + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_JUMP) + set_bit(PERF_BR_IND, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_COND) + set_bit(PERF_BR_COND, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_CALL) + set_bit(PERF_BR_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_CALL) + set_bit(PERF_BR_IND_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_RETURN) { + set_bit(PERF_BR_RET, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL) + set_bit(PERF_BR_ERET, event_type_mask); + } +} + +/* + * BRBE is configured with an OR of permissions from all events, so there may + * be events which have to be dropped or events where just the source or target + * address has to be zeroed. + */ +static bool filter_branch_privilege(struct perf_branch_entry *entry, u64 branch_sample_type) +{ + bool from_user = access_ok((void __user *)(unsigned long)entry->from, 4); + bool to_user = access_ok((void __user *)(unsigned long)entry->to, 4); + bool exclude_kernel = !((branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL) || + (is_kernel_in_hyp_mode() && (branch_sample_type & PERF_SAMPLE_BRANCH_HV))); + + /* We can only have a half record if permissions have not been expanded */ + if (!entry->from || !entry->to) + return true; + + /* + * If record is within a single exception level, just need to either + * drop or keep the entire record. + */ + if (from_user == to_user) + return ((entry->priv == PERF_BR_PRIV_KERNEL) && !exclude_kernel) || + ((entry->priv == PERF_BR_PRIV_USER) && + (branch_sample_type & PERF_SAMPLE_BRANCH_USER)); + + /* + * Record is across exception levels, mask addresses for the exception + * level we're not capturing. + */ + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) { + if (from_user) + entry->from = 0; + if (to_user) + entry->to = 0; + } + + if (exclude_kernel) { + if (!from_user) + entry->from = 0; + if (!to_user) + entry->to = 0; + } + + return true; +} + +static bool filter_branch_type(struct perf_branch_entry *entry, + const unsigned long *event_type_mask) +{ + if (entry->type == PERF_BR_EXTEND_ABI) + return test_bit(PERF_BR_MAX + entry->new_type, event_type_mask); + else + return test_bit(entry->type, event_type_mask); +} + +static bool filter_branch_record(struct perf_branch_entry *entry, + u64 branch_sample, + const unsigned long *event_type_mask) +{ + return filter_branch_type(entry, event_type_mask) && + filter_branch_privilege(entry, branch_sample); +} + +void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack, + const struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + int nr_hw = brbe_num_branch_records(cpu_pmu); + int nr_banks = DIV_ROUND_UP(nr_hw, BRBE_BANK_MAX_ENTRIES); + int nr_filtered = 0; + u64 branch_sample_type = event->attr.branch_sample_type; + DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX); + + prepare_event_branch_type_mask(branch_sample_type, event_type_mask); + + for (int bank = 0; bank < nr_banks; bank++) { + int nr_remaining = nr_hw - (bank * BRBE_BANK_MAX_ENTRIES); + int nr_this_bank = min(nr_remaining, BRBE_BANK_MAX_ENTRIES); + + select_brbe_bank(bank); + + for (int i = 0; i < nr_this_bank; i++) { + struct perf_branch_entry *pbe = &branch_stack->entries[nr_filtered]; + + if (!perf_entry_from_brbe_regset(i, pbe, event)) + goto done; + + if (!filter_branch_record(pbe, branch_sample_type, event_type_mask)) + continue; + + nr_filtered++; + } + } + +done: + branch_stack->nr = nr_filtered; +} diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h new file mode 100644 index 000000000000..b7c7d8796c86 --- /dev/null +++ b/drivers/perf/arm_brbe.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Branch Record Buffer Extension Helpers. + * + * Copyright (C) 2022-2025 ARM Limited + * + * Author: Anshuman Khandual <anshuman.khandual@arm.com> + */ + +struct arm_pmu; +struct perf_branch_stack; +struct perf_event; + +#ifdef CONFIG_ARM64_BRBE +void brbe_probe(struct arm_pmu *arm_pmu); +unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu); +void brbe_invalidate(void); + +void brbe_enable(const struct arm_pmu *arm_pmu); +void brbe_disable(void); + +bool brbe_branch_attr_valid(struct perf_event *event); +void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack, + const struct perf_event *event); +#else +static inline void brbe_probe(struct arm_pmu *arm_pmu) { } +static inline unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu) +{ + return 0; +} + +static inline void brbe_invalidate(void) { } + +static inline void brbe_enable(const struct arm_pmu *arm_pmu) { }; +static inline void brbe_disable(void) { }; + +static inline bool brbe_branch_attr_valid(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); + return false; +} + +static void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack, + const struct perf_event *event) +{ +} +#endif diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c index f72f5689923c..b8ca69fd9d1d 100644 --- a/drivers/perf/arm_cspmu/ampere_cspmu.c +++ b/drivers/perf/arm_cspmu/ampere_cspmu.c @@ -10,10 +10,10 @@ #include "arm_cspmu.h" -#define PMAUXR0 0xD80 -#define PMAUXR1 0xD84 -#define PMAUXR2 0xD88 -#define PMAUXR3 0xD8C +#define PMAUXR0 PMIMPDEF +#define PMAUXR1 (PMIMPDEF + 0x4) +#define PMAUXR2 (PMIMPDEF + 0x8) +#define PMAUXR3 (PMIMPDEF + 0xC) #define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx)) @@ -132,32 +132,20 @@ ampere_cspmu_get_name(const struct arm_cspmu *cspmu) return ctx->name; } -static u32 ampere_cspmu_event_filter(const struct perf_event *event) +static void ampere_cspmu_set_cc_filter(struct arm_cspmu *cspmu, + const struct perf_event *event) { /* - * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked - * as RES0. Make sure, PMCCFILTR is written zero. + * PMCCFILTR is RES0, so this is just a dummy callback to override + * the default implementation and avoid writing to it. */ - return 0; } static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, - u32 filter) + const struct perf_event *event) { - struct perf_event *event; - unsigned int idx; u32 threshold, rank, bank; - /* - * At this point, all the events have the same filter settings. - * Therefore, take the first event and use its configuration. - */ - idx = find_first_bit(cspmu->hw_events.used_ctrs, - cspmu->cycle_counter_logical_idx); - - event = cspmu->hw_events.events[idx]; - threshold = get_threshold(event); rank = get_rank(event); bank = get_bank(event); @@ -233,7 +221,7 @@ static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu) cspmu->impl.ctx = ctx; - impl_ops->event_filter = ampere_cspmu_event_filter; + impl_ops->set_cc_filter = ampere_cspmu_set_cc_filter; impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter; impl_ops->validate_event = ampere_cspmu_validate_event; impl_ops->get_name = ampere_cspmu_get_name; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 81e8b97e9353..34430b68f602 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -40,51 +40,6 @@ ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show, \ (unsigned long)_config) -/* - * CoreSight PMU Arch register offsets. - */ -#define PMEVCNTR_LO 0x0 -#define PMEVCNTR_HI 0x4 -#define PMEVTYPER 0x400 -#define PMCCFILTR 0x47C -#define PMEVFILTR 0xA00 -#define PMCNTENSET 0xC00 -#define PMCNTENCLR 0xC20 -#define PMINTENSET 0xC40 -#define PMINTENCLR 0xC60 -#define PMOVSCLR 0xC80 -#define PMOVSSET 0xCC0 -#define PMCFGR 0xE00 -#define PMCR 0xE04 -#define PMIIDR 0xE08 - -/* PMCFGR register field */ -#define PMCFGR_NCG GENMASK(31, 28) -#define PMCFGR_HDBG BIT(24) -#define PMCFGR_TRO BIT(23) -#define PMCFGR_SS BIT(22) -#define PMCFGR_FZO BIT(21) -#define PMCFGR_MSI BIT(20) -#define PMCFGR_UEN BIT(19) -#define PMCFGR_NA BIT(17) -#define PMCFGR_EX BIT(16) -#define PMCFGR_CCD BIT(15) -#define PMCFGR_CC BIT(14) -#define PMCFGR_SIZE GENMASK(13, 8) -#define PMCFGR_N GENMASK(7, 0) - -/* PMCR register field */ -#define PMCR_TRO BIT(11) -#define PMCR_HDBG BIT(10) -#define PMCR_FZO BIT(9) -#define PMCR_NA BIT(8) -#define PMCR_DP BIT(5) -#define PMCR_X BIT(4) -#define PMCR_D BIT(3) -#define PMCR_C BIT(2) -#define PMCR_P BIT(1) -#define PMCR_E BIT(0) - /* Each SET/CLR register supports up to 32 counters. */ #define ARM_CSPMU_SET_CLR_COUNTER_SHIFT 5 #define ARM_CSPMU_SET_CLR_COUNTER_NUM \ @@ -111,7 +66,9 @@ static unsigned long arm_cspmu_cpuhp_state; static DEFINE_MUTEX(arm_cspmu_lock); static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, u32 filter); + const struct perf_event *event); +static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, + const struct perf_event *event); static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { @@ -226,6 +183,7 @@ arm_cspmu_event_attr_is_visible(struct kobject *kobj, static struct attribute *arm_cspmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, + ARM_CSPMU_FORMAT_FILTER2_ATTR, NULL, }; @@ -250,11 +208,6 @@ static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event) return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT); } -static u32 arm_cspmu_event_filter(const struct perf_event *event) -{ - return event->attr.config1 & ARM_CSPMU_FILTER_MASK; -} - static ssize_t arm_cspmu_identifier_show(struct device *dev, struct device_attribute *attr, char *page) @@ -369,14 +322,14 @@ static struct arm_cspmu_impl_match impl_match[] = { { .module_name = "nvidia_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, { .module_name = "ampere_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, @@ -398,6 +351,44 @@ static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) return NULL; } +static u32 arm_cspmu_get_pmiidr(struct arm_cspmu *cspmu) +{ + u32 pmiidr, pmpidr; + + pmiidr = readl(cspmu->base0 + PMIIDR); + + if (pmiidr != 0) + return pmiidr; + + /* Construct PMIIDR value from PMPIDRs. */ + + pmpidr = readl(cspmu->base0 + PMPIDR0); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_0, + FIELD_GET(PMPIDR0_PART_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR1); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_1, + FIELD_GET(PMPIDR1_PART_1, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_0, + FIELD_GET(PMPIDR1_DES_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR2); + pmiidr |= FIELD_PREP(PMIIDR_VARIANT, + FIELD_GET(PMPIDR2_REVISION, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_1, + FIELD_GET(PMPIDR2_DES_1, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR3); + pmiidr |= FIELD_PREP(PMIIDR_REVISION, + FIELD_GET(PMPIDR3_REVAND, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR4); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_2, + FIELD_GET(PMPIDR4_DES_2, pmpidr)); + + return pmiidr; +} + #define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) @@ -408,7 +399,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) /* Start with a default PMU implementation */ cspmu->impl.module = THIS_MODULE; - cspmu->impl.pmiidr = readl(cspmu->base0 + PMIIDR); + cspmu->impl.pmiidr = arm_cspmu_get_pmiidr(cspmu); cspmu->impl.ops = (struct arm_cspmu_impl_ops) { DEFAULT_IMPL_OP(get_event_attrs), DEFAULT_IMPL_OP(get_format_attrs), @@ -416,7 +407,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) DEFAULT_IMPL_OP(get_name), DEFAULT_IMPL_OP(is_cycle_counter_event), DEFAULT_IMPL_OP(event_type), - DEFAULT_IMPL_OP(event_filter), + DEFAULT_IMPL_OP(set_cc_filter), DEFAULT_IMPL_OP(set_ev_filter), DEFAULT_IMPL_OP(event_attr_is_visible), }; @@ -812,26 +803,28 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu, } static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, - u32 filter) + const struct perf_event *event) { - u32 offset = PMEVFILTR + (4 * hwc->idx); + u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK; + u32 filter2 = event->attr.config2 & ARM_CSPMU_FILTER_MASK; + u32 offset = 4 * event->hw.idx; - writel(filter, cspmu->base0 + offset); + writel(filter, cspmu->base0 + PMEVFILTR + offset); + writel(filter2, cspmu->base0 + PMEVFILT2R + offset); } -static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter) +static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, + const struct perf_event *event) { - u32 offset = PMCCFILTR; + u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK; - writel(filter, cspmu->base0 + offset); + writel(filter, cspmu->base0 + PMCCFILTR); } static void arm_cspmu_start(struct perf_event *event, int pmu_flags) { struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - u32 filter; /* We always reprogram the counter */ if (pmu_flags & PERF_EF_RELOAD) @@ -839,13 +832,11 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags) arm_cspmu_set_event_period(event); - filter = cspmu->impl.ops.event_filter(event); - if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) { - arm_cspmu_set_cc_filter(cspmu, filter); + cspmu->impl.ops.set_cc_filter(cspmu, event); } else { arm_cspmu_set_event(cspmu, hwc); - cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter); + cspmu->impl.ops.set_ev_filter(cspmu, event); } hwc->state = 0; @@ -862,6 +853,10 @@ static void arm_cspmu_stop(struct perf_event *event, int pmu_flags) return; arm_cspmu_disable_counter(cspmu, hwc->idx); + + if (cspmu->impl.ops.reset_ev_filter) + cspmu->impl.ops.reset_ev_filter(cspmu, event); + arm_cspmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; @@ -1412,8 +1407,10 @@ void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match) /* Unbind the driver from all matching backend devices. */ while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL, - match, arm_cspmu_match_device))) + match, arm_cspmu_match_device))) { device_release_driver(dev); + put_device(dev); + } mutex_lock(&arm_cspmu_lock); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 2621f3111148..cd65a58dbd88 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -47,6 +47,8 @@ /* Default filter format */ #define ARM_CSPMU_FORMAT_FILTER_ATTR \ ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31") +#define ARM_CSPMU_FORMAT_FILTER2_ATTR \ + ARM_CSPMU_FORMAT_ATTR(filter2, "config2:0-31") /* * This is the default event number for cycle count, if supported, since the @@ -65,9 +67,87 @@ /* The cycle counter, if implemented, is located at counter[31]. */ #define ARM_CSPMU_CYCLE_CNTR_IDX 31 +/* + * CoreSight PMU Arch register offsets. + */ +#define PMEVCNTR_LO 0x0 +#define PMEVCNTR_HI 0x4 +#define PMEVTYPER 0x400 +#define PMCCFILTR 0x47C +#define PMEVFILT2R 0x800 +#define PMEVFILTR 0xA00 +#define PMCNTENSET 0xC00 +#define PMCNTENCLR 0xC20 +#define PMINTENSET 0xC40 +#define PMINTENCLR 0xC60 +#define PMOVSCLR 0xC80 +#define PMOVSSET 0xCC0 +#define PMIMPDEF 0xD80 +#define PMCFGR 0xE00 +#define PMCR 0xE04 +#define PMIIDR 0xE08 +#define PMPIDR0 0xFE0 +#define PMPIDR1 0xFE4 +#define PMPIDR2 0xFE8 +#define PMPIDR3 0xFEC +#define PMPIDR4 0xFD0 + +/* PMCFGR register field */ +#define PMCFGR_NCG GENMASK(31, 28) +#define PMCFGR_HDBG BIT(24) +#define PMCFGR_TRO BIT(23) +#define PMCFGR_SS BIT(22) +#define PMCFGR_FZO BIT(21) +#define PMCFGR_MSI BIT(20) +#define PMCFGR_UEN BIT(19) +#define PMCFGR_NA BIT(17) +#define PMCFGR_EX BIT(16) +#define PMCFGR_CCD BIT(15) +#define PMCFGR_CC BIT(14) +#define PMCFGR_SIZE GENMASK(13, 8) +#define PMCFGR_N GENMASK(7, 0) + +/* PMCR register field */ +#define PMCR_TRO BIT(11) +#define PMCR_HDBG BIT(10) +#define PMCR_FZO BIT(9) +#define PMCR_NA BIT(8) +#define PMCR_DP BIT(5) +#define PMCR_X BIT(4) +#define PMCR_D BIT(3) +#define PMCR_C BIT(2) +#define PMCR_P BIT(1) +#define PMCR_E BIT(0) + /* PMIIDR register field */ -#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0) -#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_IMPLEMENTER GENMASK(11, 0) +#define PMIIDR_IMPLEMENTER_DES_0 GENMASK(3, 0) +#define PMIIDR_IMPLEMENTER_DES_1 GENMASK(6, 4) +#define PMIIDR_IMPLEMENTER_DES_2 GENMASK(11, 8) +#define PMIIDR_REVISION GENMASK(15, 12) +#define PMIIDR_VARIANT GENMASK(19, 16) +#define PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_PRODUCTID_PART_0 GENMASK(27, 20) +#define PMIIDR_PRODUCTID_PART_1 GENMASK(31, 28) + +/* PMPIDR0 register field */ +#define PMPIDR0_PART_0 GENMASK(7, 0) + +/* PMPIDR1 register field */ +#define PMPIDR1_DES_0 GENMASK(7, 4) +#define PMPIDR1_PART_1 GENMASK(3, 0) + +/* PMPIDR2 register field */ +#define PMPIDR2_REVISION GENMASK(7, 4) +#define PMPIDR2_DES_1 GENMASK(2, 0) + +/* PMPIDR3 register field */ +#define PMPIDR3_REVAND GENMASK(7, 4) +#define PMPIDR3_CMOD GENMASK(3, 0) + +/* PMPIDR4 register field */ +#define PMPIDR4_SIZE GENMASK(7, 4) +#define PMPIDR4_DES_2 GENMASK(3, 0) /* JEDEC-assigned JEP106 identification code */ #define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B @@ -103,11 +183,13 @@ struct arm_cspmu_impl_ops { bool (*is_cycle_counter_event)(const struct perf_event *event); /* Decode event type/id from configs */ u32 (*event_type)(const struct perf_event *event); - /* Decode filter value from configs */ - u32 (*event_filter)(const struct perf_event *event); - /* Set event filter */ + /* Set/reset event filters */ + void (*set_cc_filter)(struct arm_cspmu *cspmu, + const struct perf_event *event); void (*set_ev_filter)(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, u32 filter); + const struct perf_event *event); + void (*reset_ev_filter)(struct arm_cspmu *cspmu, + const struct perf_event *event); /* Implementation specific event validation */ int (*validate_event)(struct arm_cspmu *cspmu, struct perf_event *event); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index d0ef611240aa..e06a06d3407b 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -6,6 +6,7 @@ /* Support for NVIDIA specific attributes. */ +#include <linux/io.h> #include <linux/module.h> #include <linux/topology.h> @@ -22,7 +23,7 @@ #define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0) -#define NV_PRODID_MASK GENMASK(31, 0) +#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION) #define NV_FORMAT_NAME_GENERIC 0 @@ -39,10 +40,21 @@ struct nv_cspmu_ctx { const char *name; - u32 filter_mask; - u32 filter_default_val; + struct attribute **event_attr; struct attribute **format_attr; + + u32 filter_mask; + u32 filter_default_val; + u32 filter2_mask; + u32 filter2_default_val; + + u32 (*get_filter)(const struct perf_event *event); + u32 (*get_filter2)(const struct perf_event *event); + + void *data; + + int (*init_data)(struct arm_cspmu *cspmu); }; static struct attribute *scf_pmu_event_attrs[] = { @@ -54,65 +66,24 @@ static struct attribute *scf_pmu_event_attrs[] = { ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3), NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101), - NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105), NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109), - NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d), - NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111), NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115), - NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119), - NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d), - NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121), - NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125), - NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129), NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d), - NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131), NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135), NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139), - NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d), - NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141), - - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155), - - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169), ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d), ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e), ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f), - ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170), - ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171), - ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172), ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173), ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174), - ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175), - ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176), - ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177), - ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178), ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179), - ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a), ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b), NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c), - - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c), - ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0), ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1), ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2), @@ -122,35 +93,12 @@ static struct attribute *scf_pmu_event_attrs[] = { ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5), ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6), ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7), - ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8), - ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9), - ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa), ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab), ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac), - ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad), - ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae), - ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af), - ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0), ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1), - ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2), - - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3), - - ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7), - ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8), - ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9), ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7), - ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db), ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), @@ -194,6 +142,7 @@ static struct attribute *pcie_pmu_format_attrs[] = { static struct attribute *nvlink_c2c_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"), NULL, }; @@ -206,6 +155,7 @@ static struct attribute *cnvlink_pmu_format_attrs[] = { static struct attribute *generic_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, + ARM_CSPMU_FORMAT_FILTER2_ATTR, NULL, }; @@ -238,12 +188,55 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event) const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); - if (ctx->filter_mask == 0) + const u32 filter_val = event->attr.config1 & ctx->filter_mask; + + if (filter_val == 0) return ctx->filter_default_val; - return event->attr.config1 & ctx->filter_mask; + return filter_val; } +static u32 nv_cspmu_event_filter2(const struct perf_event *event) +{ + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + + const u32 filter_val = event->attr.config2 & ctx->filter2_mask; + + if (filter_val == 0) + return ctx->filter2_default_val; + + return filter_val; +} + +static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + const struct perf_event *event) +{ + u32 filter, offset; + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + offset = 4 * event->hw.idx; + + if (ctx->get_filter) { + filter = ctx->get_filter(event); + writel(filter, cspmu->base0 + PMEVFILTR + offset); + } + + if (ctx->get_filter2) { + filter = ctx->get_filter2(event); + writel(filter, cspmu->base0 + PMEVFILT2R + offset); + } +} + +static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu, + const struct perf_event *event) +{ + u32 filter = nv_cspmu_event_filter(event); + + writel(filter, cspmu->base0 + PMCCFILTR); +} + + enum nv_cspmu_name_fmt { NAME_FMT_GENERIC, NAME_FMT_SOCKET @@ -252,74 +245,120 @@ enum nv_cspmu_name_fmt { struct nv_cspmu_match { u32 prodid; u32 prodid_mask; - u64 filter_mask; - u32 filter_default_val; const char *name_pattern; enum nv_cspmu_name_fmt name_fmt; - struct attribute **event_attr; - struct attribute **format_attr; + struct nv_cspmu_ctx template_ctx; + struct arm_cspmu_impl_ops ops; }; static const struct nv_cspmu_match nv_cspmu_match[] = { { - .prodid = 0x103, + .prodid = 0x10300000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_PCIE_FILTER_ID_MASK, - .filter_default_val = NV_PCIE_FILTER_ID_MASK, .name_pattern = "nvidia_pcie_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = pcie_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = pcie_pmu_format_attrs, + .filter_mask = NV_PCIE_FILTER_ID_MASK, + .filter_default_val = NV_PCIE_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x104, + .prodid = 0x10400000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x105, + .prodid = 0x10500000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x106, + .prodid = 0x10600000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_CNVL_FILTER_ID_MASK, - .filter_default_val = NV_CNVL_FILTER_ID_MASK, .name_pattern = "nvidia_cnvlink_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = cnvlink_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = cnvlink_pmu_format_attrs, + .filter_mask = NV_CNVL_FILTER_ID_MASK, + .filter_default_val = NV_CNVL_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x2CF, + .prodid = 0x2CF00000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, - .filter_default_val = 0x0, .name_pattern = "nvidia_scf_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = scf_pmu_event_attrs, - .format_attr = scf_pmu_format_attrs + .template_ctx = { + .event_attr = scf_pmu_event_attrs, + .format_attr = scf_pmu_format_attrs, + .filter_mask = 0x0, + .filter_default_val = 0x0, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0, .prodid_mask = 0, - .filter_mask = NV_GENERIC_FILTER_ID_MASK, - .filter_default_val = NV_GENERIC_FILTER_ID_MASK, .name_pattern = "nvidia_uncore_pmu_%u", .name_fmt = NAME_FMT_GENERIC, - .event_attr = generic_pmu_event_attrs, - .format_attr = generic_pmu_format_attrs + .template_ctx = { + .event_attr = generic_pmu_event_attrs, + .format_attr = generic_pmu_format_attrs, + .filter_mask = NV_GENERIC_FILTER_ID_MASK, + .filter_default_val = NV_GENERIC_FILTER_ID_MASK, + .filter2_mask = NV_GENERIC_FILTER_ID_MASK, + .filter2_default_val = NV_GENERIC_FILTER_ID_MASK, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = nv_cspmu_event_filter2, + .data = NULL, + .init_data = NULL + }, }, }; @@ -352,9 +391,16 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, return name; } +#define SET_OP(name, impl, match, default_op) \ + do { \ + if (match->ops.name) \ + impl->name = match->ops.name; \ + else if (default_op != NULL) \ + impl->name = default_op; \ + } while (false) + static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { - u32 prodid; struct nv_cspmu_ctx *ctx; struct device *dev = cspmu->dev; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; @@ -364,29 +410,30 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) if (!ctx) return -ENOMEM; - prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr); - /* Find matching PMU. */ for (; match->prodid; match++) { const u32 prodid_mask = match->prodid_mask; - if ((match->prodid & prodid_mask) == (prodid & prodid_mask)) + if ((match->prodid & prodid_mask) == + (cspmu->impl.pmiidr & prodid_mask)) break; } - ctx->name = nv_cspmu_format_name(cspmu, match); - ctx->filter_mask = match->filter_mask; - ctx->filter_default_val = match->filter_default_val; - ctx->event_attr = match->event_attr; - ctx->format_attr = match->format_attr; + /* Initialize the context with the matched template. */ + memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx)); + ctx->name = nv_cspmu_format_name(cspmu, match); cspmu->impl.ctx = ctx; /* NVIDIA specific callbacks. */ - impl_ops->event_filter = nv_cspmu_event_filter; - impl_ops->get_event_attrs = nv_cspmu_get_event_attrs; - impl_ops->get_format_attrs = nv_cspmu_get_format_attrs; - impl_ops->get_name = nv_cspmu_get_name; + SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter); + SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter); + SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs); + SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs); + SET_OP(get_name, impl_ops, match, nv_cspmu_get_name); + + if (ctx->init_data) + return ctx->init_data(cspmu); return 0; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 398cce3d76fc..973a027d9063 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,7 +26,8 @@ #include <asm/irq_regs.h> -static int armpmu_count_irq_users(const int irq); +static int armpmu_count_irq_users(const struct cpumask *affinity, + const int irq); struct pmu_irq_ops { void (*enable_pmuirq)(unsigned int irq); @@ -64,7 +65,9 @@ static void armpmu_enable_percpu_pmuirq(unsigned int irq) static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, void __percpu *devid) { - if (armpmu_count_irq_users(irq) == 1) + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) free_percpu_irq(irq, devid); } @@ -89,7 +92,9 @@ static void armpmu_disable_percpu_pmunmi(unsigned int irq) static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, void __percpu *devid) { - if (armpmu_count_irq_users(irq) == 1) + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) free_percpu_nmi(irq, devid); } @@ -99,7 +104,6 @@ static const struct pmu_irq_ops percpu_pmunmi_ops = { .free_pmuirq = armpmu_free_percpu_pmunmi }; -static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); @@ -318,6 +322,12 @@ armpmu_del(struct perf_event *event, int flags) int idx = hwc->idx; armpmu_stop(event, PERF_EF_UPDATE); + + if (has_branch_stack(event)) { + hw_events->branch_users--; + perf_sched_cb_dec(event->pmu); + } + hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); @@ -342,12 +352,15 @@ armpmu_add(struct perf_event *event, int flags) if (idx < 0) return idx; - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ + /* The newly-allocated counter should be empty */ + WARN_ON_ONCE(hw_events->events[idx]); + + if (has_branch_stack(event)) { + hw_events->branch_users++; + perf_sched_cb_inc(event->pmu); + } + event->hw.idx = idx; - armpmu->disable(event); hw_events->events[idx] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; @@ -511,8 +524,7 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) + if (has_branch_stack(event) && !armpmu->reg_brbidr) return -EOPNOTSUPP; return __hw_perf_event_init(event); @@ -572,11 +584,11 @@ static const struct attribute_group armpmu_common_attr_group = { .attrs = armpmu_common_attrs, }; -static int armpmu_count_irq_users(const int irq) +static int armpmu_count_irq_users(const struct cpumask *affinity, const int irq) { int cpu, count = 0; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, affinity) { if (per_cpu(cpu_irq, cpu) == irq) count++; } @@ -584,12 +596,13 @@ static int armpmu_count_irq_users(const int irq) return count; } -static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +static const struct pmu_irq_ops * +armpmu_find_irq_ops(const struct cpumask *affinity, int irq) { const struct pmu_irq_ops *ops = NULL; int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, affinity) { if (per_cpu(cpu_irq, cpu) != irq) continue; @@ -601,22 +614,25 @@ static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) return ops; } -void armpmu_free_irq(int irq, int cpu) +void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) return; if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, armpmu); per_cpu(cpu_irq, cpu) = 0; per_cpu(cpu_irq_ops, cpu) = NULL; } -int armpmu_request_irq(int irq, int cpu) +int armpmu_request_irq(struct arm_pmu * __percpu *pcpu_armpmu, int irq, int cpu) { int err = 0; + struct arm_pmu **armpmu = per_cpu_ptr(pcpu_armpmu, cpu); + const struct cpumask *affinity = *armpmu ? &(*armpmu)->supported_cpus : + cpu_possible_mask; /* ACPI */ const irq_handler_t handler = armpmu_dispatch_irq; const struct pmu_irq_ops *irq_ops; @@ -638,25 +654,24 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NOBALANCING | IRQF_NO_AUTOEN | IRQF_NO_THREAD; - err = request_nmi(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&cpu_armpmu, cpu)); + err = request_nmi(irq, handler, irq_flags, "arm-pmu", armpmu); /* If cannot get an NMI, get a normal interrupt */ if (err) { err = request_irq(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&cpu_armpmu, cpu)); + armpmu); irq_ops = &pmuirq_ops; } else { has_nmi = true; irq_ops = &pmunmi_ops; } - } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); + } else if (armpmu_count_irq_users(affinity, irq) == 0) { + err = request_percpu_nmi(irq, handler, "arm-pmu", affinity, pcpu_armpmu); /* If cannot get an NMI, get a normal interrupt */ if (err) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_percpu_irq_affinity(irq, handler, "arm-pmu", + affinity, pcpu_armpmu); irq_ops = &percpu_pmuirq_ops; } else { has_nmi = true; @@ -664,7 +679,7 @@ int armpmu_request_irq(int irq, int cpu) } } else { /* Per cpudevid irq was already requested by another CPU */ - irq_ops = armpmu_find_irq_ops(irq); + irq_ops = armpmu_find_irq_ops(affinity, irq); if (WARN_ON(!irq_ops)) err = -EINVAL; @@ -709,8 +724,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) if (pmu->reset) pmu->reset(pmu); - per_cpu(cpu_armpmu, cpu) = pmu; - irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); @@ -730,8 +743,6 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) if (irq) per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); - per_cpu(cpu_armpmu, cpu) = NULL; - return 0; } @@ -917,6 +928,12 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + if (!pmu->set_event_filter) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 05dda19c5359..e80f76d95e68 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -218,7 +218,7 @@ static int arm_pmu_acpi_parse_irqs(void) * them with their PMUs. */ per_cpu(pmu_irqs, cpu) = irq; - err = armpmu_request_irq(irq, cpu); + err = armpmu_request_irq(&probed_pmus, irq, cpu); if (err) goto out_err; } diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 118170a5cede..1c9e50a13201 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -42,14 +42,13 @@ static int probe_current_pmu(struct arm_pmu *pmu, return ret; } -static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq) +static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq, + const struct cpumask *affinity) { - int cpu, ret; struct pmu_hw_events __percpu *hw_events = pmu->hw_events; + int cpu; - ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); - if (ret) - return ret; + cpumask_copy(&pmu->supported_cpus, affinity); for_each_cpu(cpu, &pmu->supported_cpus) per_cpu(hw_events->irq, cpu) = irq; @@ -115,9 +114,12 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) } if (num_irqs == 1) { - int irq = platform_get_irq(pdev, 0); + const struct cpumask *affinity; + int irq; + + irq = platform_get_irq_affinity(pdev, 0, &affinity); if ((irq > 0) && irq_is_percpu_devid(irq)) - return pmu_parse_percpu_irq(pmu, irq); + return pmu_parse_percpu_irq(pmu, irq, affinity); } if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node)) @@ -163,7 +165,7 @@ static int armpmu_request_irqs(struct arm_pmu *armpmu) if (!irq) continue; - err = armpmu_request_irq(irq, cpu); + err = armpmu_request_irq(&hw_events->percpu_pmu, irq, cpu); if (err) break; } @@ -179,7 +181,7 @@ static void armpmu_free_irqs(struct arm_pmu *armpmu) for_each_cpu(cpu, &armpmu->supported_cpus) { int irq = per_cpu(hw_events->irq, cpu); - armpmu_free_irq(irq, cpu); + armpmu_free_irq(&hw_events->percpu_pmu, irq, cpu); } } diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index b5cc11abc962..8014ff766cff 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -25,6 +25,8 @@ #include <linux/smp.h> #include <linux/nmi.h> +#include "arm_brbe.h" + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -438,7 +440,19 @@ static ssize_t threshold_max_show(struct device *dev, static DEVICE_ATTR_RO(threshold_max); +static ssize_t branches_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + + return sysfs_emit(page, "%d\n", brbe_num_branch_records(cpu_pmu)); +} + +static DEVICE_ATTR_RO(branches); + static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_branches.attr, &dev_attr_slots.attr, &dev_attr_bus_slots.attr, &dev_attr_bus_width.attr, @@ -446,9 +460,22 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = { NULL, }; +static umode_t caps_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + + if (i == 0) + return brbe_num_branch_records(cpu_pmu) ? attr->mode : 0; + + return attr->mode; +} + static const struct attribute_group armv8_pmuv3_caps_attr_group = { .name = "caps", .attrs = armv8_pmuv3_caps_attrs, + .is_visible = caps_is_visible, }; /* @@ -795,11 +822,6 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) static void armv8pmu_enable_event(struct perf_event *event) { - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - armv8pmu_disable_event_counter(event); armv8pmu_write_event_type(event); armv8pmu_enable_event_irq(event); armv8pmu_enable_event_counter(event); @@ -814,6 +836,7 @@ static void armv8pmu_disable_event(struct perf_event *event) static void armv8pmu_start(struct arm_pmu *cpu_pmu) { struct perf_event_context *ctx; + struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events); int nr_user = 0; ctx = perf_cpu_task_ctx(); @@ -825,18 +848,36 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) else armv8pmu_disable_user_access(); + kvm_vcpu_pmu_resync_el0(); + + if (hw_events->branch_users) + brbe_enable(cpu_pmu); + /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - - kvm_vcpu_pmu_resync_el0(); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { + struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events); + + if (hw_events->branch_users) + brbe_disable(); + /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); } +static void read_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct perf_sample_data *data) +{ + struct perf_branch_stack *branch_stack = cpuc->branch_stack; + + brbe_read_filtered_entries(branch_stack, event); + perf_sample_save_brstack(data, event, branch_stack, NULL); +} + static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u64 pmovsr; @@ -887,13 +928,15 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + if (has_branch_stack(event)) + read_branch_records(cpuc, event, &data); + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of * IPI_IRQ_WORK. */ - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + perf_event_overflow(event, &data, regs); } armv8pmu_start(cpu_pmu); @@ -935,6 +978,42 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + + if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) + return false; + + /* + * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0 + * since it lacks threshold support. + */ + if (armv8pmu_event_get_threshold(&event->attr)) + return false; + + /* + * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP. + * So don't use it for branch events. + */ + if (has_branch_stack(event)) + return false; + + /* + * The PMCCNTR_EL0 increments from the processor clock rather than + * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue + * counting on a WFI PE if one of its SMT sibling is not idle on a + * multi-threaded implementation. So don't use it on SMT cores. + */ + if (cpu_pmu->has_smt) + return false; + + return true; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -943,8 +1022,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && - !armv8pmu_event_get_threshold(&event->attr)) { + if (armv8pmu_can_use_pmccntr(cpuc, event)) { if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) return ARMV8_PMU_CYCLE_IDX; else if (armv8pmu_event_is_64bit(event) && @@ -993,6 +1071,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event) return event->hw.idx + 1; } +static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + + if (!hw_events->branch_users) + return; + + if (sched_in) + brbe_invalidate(); +} + /* * Add an event filter to a given event. */ @@ -1010,6 +1101,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return -EOPNOTSUPP; } + if (has_branch_stack(perf_event)) { + if (!brbe_num_branch_records(cpu_pmu) || !brbe_branch_attr_valid(perf_event)) + return -EOPNOTSUPP; + + perf_event->attach_state |= PERF_ATTACH_SCHED_CB; + } + /* * If we're running in hyp mode, then we *are* the hypervisor. * Therefore we ignore exclude_hv in this configuration, since @@ -1076,6 +1174,11 @@ static void armv8pmu_reset(void *info) /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(mask); + if (brbe_num_branch_records(cpu_pmu)) { + brbe_disable(); + brbe_invalidate(); + } + /* * Initialize & Reset PMNC. Request overflow interrupt for * 64 bit cycle counter but cheat in armv8pmu_write_counter(). @@ -1244,6 +1347,25 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; + + brbe_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + size_t size = struct_size_t(struct perf_branch_stack, entries, + brbe_num_branch_records(armpmu)); + int cpu; + + for_each_cpu(cpu, &armpmu->supported_cpus) { + struct pmu_hw_events *events_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + events_cpu->branch_stack = kmalloc(size, GFP_KERNEL); + if (!events_cpu->branch_stack) + return -ENOMEM; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1260,7 +1382,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (brbe_num_branch_records(cpu_pmu)) { + ret = branch_records_alloc(cpu_pmu); + if (ret) + return ret; + } + return 0; } static void armv8pmu_disable_user_access_ipi(void *unused) @@ -1279,7 +1409,7 @@ static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int return 0; } -static struct ctl_table armv8_pmu_sysctl_table[] = { +static const struct ctl_table armv8_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, @@ -1319,6 +1449,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + if (brbe_num_branch_records(cpu_pmu)) + cpu_pmu->pmu.sched_task = armv8pmu_sched_task; cpu_pmu->name = name; cpu_pmu->map_event = map_event; @@ -1343,6 +1475,10 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ PMUV3_INIT_SIMPLE(armv8_pmuv3) +PMUV3_INIT_SIMPLE(armv8_c1_nano) +PMUV3_INIT_SIMPLE(armv8_c1_premium) +PMUV3_INIT_SIMPLE(armv8_c1_pro) +PMUV3_INIT_SIMPLE(armv8_c1_ultra) PMUV3_INIT_SIMPLE(armv8_cortex_a34) PMUV3_INIT_SIMPLE(armv8_cortex_a55) PMUV3_INIT_SIMPLE(armv8_cortex_a65) @@ -1350,11 +1486,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a75) PMUV3_INIT_SIMPLE(armv8_cortex_a76) PMUV3_INIT_SIMPLE(armv8_cortex_a77) PMUV3_INIT_SIMPLE(armv8_cortex_a78) +PMUV3_INIT_SIMPLE(armv9_cortex_a320) PMUV3_INIT_SIMPLE(armv9_cortex_a510) PMUV3_INIT_SIMPLE(armv9_cortex_a520) +PMUV3_INIT_SIMPLE(armv9_cortex_a520ae) PMUV3_INIT_SIMPLE(armv9_cortex_a710) PMUV3_INIT_SIMPLE(armv9_cortex_a715) PMUV3_INIT_SIMPLE(armv9_cortex_a720) +PMUV3_INIT_SIMPLE(armv9_cortex_a720ae) PMUV3_INIT_SIMPLE(armv9_cortex_a725) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) @@ -1369,6 +1508,7 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1) PMUV3_INIT_SIMPLE(armv8_neoverse_v2) PMUV3_INIT_SIMPLE(armv8_neoverse_v3) PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae) +PMUV3_INIT_SIMPLE(armv8_rainier) PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) PMUV3_INIT_SIMPLE(armv8_nvidia_denver) @@ -1385,6 +1525,10 @@ PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event) static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, + {.compatible = "arm,c1-nano-pmu", .data = armv8_c1_nano_pmu_init}, + {.compatible = "arm,c1-premium-pmu", .data = armv8_c1_premium_pmu_init}, + {.compatible = "arm,c1-pro-pmu", .data = armv8_c1_pro_pmu_init}, + {.compatible = "arm,c1-ultra-pmu", .data = armv8_c1_ultra_pmu_init}, {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, {.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init}, @@ -1397,11 +1541,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, + {.compatible = "arm,cortex-a320-pmu", .data = armv9_cortex_a320_pmu_init}, {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init}, + {.compatible = "arm,cortex-a520ae-pmu", .data = armv9_cortex_a520ae_pmu_init}, {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, + {.compatible = "arm,cortex-a720ae-pmu", .data = armv9_cortex_a720ae_pmu_init}, {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, @@ -1416,6 +1563,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init}, {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init}, {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init}, + {.compatible = "arm,rainier-pmu", .data = armv8_rainier_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init}, {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index b1510f660c7a..621f02a7f43b 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -431,6 +431,17 @@ static int smmu_pmu_event_init(struct perf_event *event) return -EINVAL; } + /* + * Ensure all events are on the same cpu so all events are in the + * same cpu context, to avoid races on pmu_enable etc. + */ + event->cpu = smmu_pmu->on_cpu; + + hwc->idx = -1; + + if (event->group_leader == event) + return 0; + for_each_sibling_event(sibling, event->group_leader) { if (is_software_event(sibling)) continue; @@ -442,14 +453,6 @@ static int smmu_pmu_event_init(struct perf_event *event) return -EINVAL; } - hwc->idx = -1; - - /* - * Ensure all events are on the same cpu so all events are in the - * same cpu context, to avoid races on pmu_enable etc. - */ - event->cpu = smmu_pmu->on_cpu; - return 0; } diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index fd5b78732603..4801115f2b54 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -50,7 +50,7 @@ static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_C static void set_spe_event_has_cx(struct perf_event *event) { - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel(&event->attr)) + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel()) event->hw.flags |= SPE_PMU_HW_FLAGS_CX; } @@ -85,9 +85,13 @@ struct arm_spe_pmu { #define SPE_PMU_FEAT_LDS (1UL << 4) #define SPE_PMU_FEAT_ERND (1UL << 5) #define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) +#define SPE_PMU_FEAT_DISCARD (1UL << 7) +#define SPE_PMU_FEAT_EFT (1UL << 8) +#define SPE_PMU_FEAT_FDS (1UL << 9) #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) u64 features; + u64 pmsevfr_res0; u16 max_record_sz; u16 align; struct perf_output_handle __percpu *handle; @@ -96,7 +100,8 @@ struct arm_spe_pmu { #define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu)) /* Convert a free-running index from perf into an SPE buffer offset */ -#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) +#define PERF_IDX2OFF(idx, buf) \ + ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT)) /* Keep track of our dynamic hotplug state */ static enum cpuhp_state arm_spe_pmu_online; @@ -114,6 +119,7 @@ enum arm_spe_pmu_capabilities { SPE_PMU_CAP_FEAT_MAX, SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX, SPE_PMU_CAP_MIN_IVAL, + SPE_PMU_CAP_EVENT_FILTER, }; static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { @@ -121,7 +127,7 @@ static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = { [SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND, }; -static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) +static u64 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) { if (cap < SPE_PMU_CAP_FEAT_MAX) return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]); @@ -131,6 +137,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap) return spe_pmu->counter_sz; case SPE_PMU_CAP_MIN_IVAL: return spe_pmu->min_period; + case SPE_PMU_CAP_EVENT_FILTER: + return ~spe_pmu->pmsevfr_res0; default: WARN(1, "unknown cap %d\n", cap); } @@ -147,7 +155,19 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; - return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap)); + return sysfs_emit(buf, "%llu\n", arm_spe_pmu_cap_get(spe_pmu, cap)); +} + +static ssize_t arm_spe_pmu_cap_show_hex(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + int cap = (long)ea->var; + + return sysfs_emit(buf, "0x%llx\n", arm_spe_pmu_cap_get(spe_pmu, cap)); } #define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \ @@ -157,12 +177,15 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, #define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var) +#define SPE_CAP_EXT_ATTR_ENTRY_HEX(_name, _var) \ + SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show_hex, _var) static struct attribute *arm_spe_pmu_cap_attr[] = { SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST), SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND), SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ), SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL), + SPE_CAP_EXT_ATTR_ENTRY_HEX(event_filter, SPE_PMU_CAP_EVENT_FILTER), NULL, }; @@ -193,6 +216,30 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */ #define ATTR_CFG_FLD_store_filter_LO 34 #define ATTR_CFG_FLD_store_filter_HI 34 +#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */ +#define ATTR_CFG_FLD_discard_LO 35 +#define ATTR_CFG_FLD_discard_HI 35 +#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */ +#define ATTR_CFG_FLD_branch_filter_mask_LO 36 +#define ATTR_CFG_FLD_branch_filter_mask_HI 36 +#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */ +#define ATTR_CFG_FLD_load_filter_mask_LO 37 +#define ATTR_CFG_FLD_load_filter_mask_HI 37 +#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */ +#define ATTR_CFG_FLD_store_filter_mask_LO 38 +#define ATTR_CFG_FLD_store_filter_mask_HI 38 +#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */ +#define ATTR_CFG_FLD_simd_filter_LO 39 +#define ATTR_CFG_FLD_simd_filter_HI 39 +#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */ +#define ATTR_CFG_FLD_simd_filter_mask_LO 40 +#define ATTR_CFG_FLD_simd_filter_mask_HI 40 +#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */ +#define ATTR_CFG_FLD_float_filter_LO 41 +#define ATTR_CFG_FLD_float_filter_HI 41 +#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */ +#define ATTR_CFG_FLD_float_filter_mask_LO 42 +#define ATTR_CFG_FLD_float_filter_mask_HI 42 #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */ #define ATTR_CFG_FLD_event_filter_LO 0 @@ -206,16 +253,29 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_inv_event_filter_LO 0 #define ATTR_CFG_FLD_inv_event_filter_HI 63 +#define ATTR_CFG_FLD_inv_data_src_filter_CFG config4 /* inverse of PMSDSFR_EL1 */ +#define ATTR_CFG_FLD_inv_data_src_filter_LO 0 +#define ATTR_CFG_FLD_inv_data_src_filter_HI 63 + GEN_PMU_FORMAT_ATTR(ts_enable); GEN_PMU_FORMAT_ATTR(pa_enable); GEN_PMU_FORMAT_ATTR(pct_enable); GEN_PMU_FORMAT_ATTR(jitter); GEN_PMU_FORMAT_ATTR(branch_filter); +GEN_PMU_FORMAT_ATTR(branch_filter_mask); GEN_PMU_FORMAT_ATTR(load_filter); +GEN_PMU_FORMAT_ATTR(load_filter_mask); GEN_PMU_FORMAT_ATTR(store_filter); +GEN_PMU_FORMAT_ATTR(store_filter_mask); +GEN_PMU_FORMAT_ATTR(simd_filter); +GEN_PMU_FORMAT_ATTR(simd_filter_mask); +GEN_PMU_FORMAT_ATTR(float_filter); +GEN_PMU_FORMAT_ATTR(float_filter_mask); GEN_PMU_FORMAT_ATTR(event_filter); GEN_PMU_FORMAT_ATTR(inv_event_filter); +GEN_PMU_FORMAT_ATTR(inv_data_src_filter); GEN_PMU_FORMAT_ATTR(min_latency); +GEN_PMU_FORMAT_ATTR(discard); static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_ts_enable.attr, @@ -223,11 +283,20 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_pct_enable.attr, &format_attr_jitter.attr, &format_attr_branch_filter.attr, + &format_attr_branch_filter_mask.attr, &format_attr_load_filter.attr, + &format_attr_load_filter_mask.attr, &format_attr_store_filter.attr, + &format_attr_store_filter_mask.attr, + &format_attr_simd_filter.attr, + &format_attr_simd_filter_mask.attr, + &format_attr_float_filter.attr, + &format_attr_float_filter_mask.attr, &format_attr_event_filter.attr, &format_attr_inv_event_filter.attr, + &format_attr_inv_data_src_filter.attr, &format_attr_min_latency.attr, + &format_attr_discard.attr, NULL, }; @@ -238,9 +307,26 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + if (attr == &format_attr_discard.attr && !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return 0; + if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) return 0; + if (attr == &format_attr_inv_data_src_filter.attr && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) + return 0; + + if ((attr == &format_attr_branch_filter_mask.attr || + attr == &format_attr_load_filter_mask.attr || + attr == &format_attr_store_filter_mask.attr || + attr == &format_attr_simd_filter.attr || + attr == &format_attr_simd_filter_mask.attr || + attr == &format_attr_float_filter.attr || + attr == &format_attr_float_filter_mask.attr) && + !(spe_pmu->features & SPE_PMU_FEAT_EFT)) + return 0; + return attr->mode; } @@ -299,17 +385,21 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) static void arm_spe_event_sanitise_period(struct perf_event *event) { - struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); u64 period = event->hw.sample_period; u64 max_period = PMSIRR_EL1_INTERVAL_MASK; - if (period < spe_pmu->min_period) - period = spe_pmu->min_period; - else if (period > max_period) - period = max_period; - else - period &= max_period; + /* + * The PMSIDR_EL1.Interval field (stored in spe_pmu->min_period) is a + * recommendation for the minimum interval, not a hardware limitation. + * + * According to the Arm ARM (DDI 0487 L.a), section D24.7.12 PMSIRR_EL1, + * Sampling Interval Reload Register, the INTERVAL field (bits [31:8]) + * states: "Software must set this to a nonzero value". Use 1 as the + * minimum value. + */ + u64 min_period = FIELD_PREP(PMSIRR_EL1_INTERVAL_MASK, 1); + period = clamp_t(u64, period, min_period, max_period) & max_period; event->hw.sample_period = period; } @@ -332,8 +422,15 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) u64 reg = 0; reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask)); reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask)); reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask)); + reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask)); if (reg) reg |= PMSFCR_EL1_FT; @@ -344,6 +441,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) if (ATTR_CFG_GET_FLD(attr, inv_event_filter)) reg |= PMSFCR_EL1_FnE; + if (ATTR_CFG_GET_FLD(attr, inv_data_src_filter)) + reg |= PMSFCR_EL1_FDS; + if (ATTR_CFG_GET_FLD(attr, min_latency)) reg |= PMSFCR_EL1_FL; @@ -368,6 +468,17 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency)); } +static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + /* + * Data src filter is inverted so that the default value of 0 is + * equivalent to no filtering. + */ + return ~ATTR_CFG_GET_FLD(attr, inv_data_src_filter); +} + static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) { struct arm_spe_pmu_buf *buf = perf_get_aux(handle); @@ -502,6 +613,12 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, u64 base, limit; struct arm_spe_pmu_buf *buf; + if (ATTR_CFG_GET_FLD(&event->attr, discard)) { + limit = FIELD_PREP(PMBLIMITR_EL1_FM, PMBLIMITR_EL1_FM_DISCARD); + limit |= PMBLIMITR_EL1_E; + goto out_write_limit; + } + /* Start a new aux session */ buf = perf_aux_output_begin(handle, event); if (!buf) { @@ -678,20 +795,6 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) return IRQ_HANDLED; } -static u64 arm_spe_pmsevfr_res0(u16 pmsver) -{ - switch (pmsver) { - case ID_AA64DFR0_EL1_PMSVer_IMP: - return PMSEVFR_EL1_RES0_IMP; - case ID_AA64DFR0_EL1_PMSVer_V1P1: - return PMSEVFR_EL1_RES0_V1P1; - case ID_AA64DFR0_EL1_PMSVer_V1P2: - /* Return the highest version we support in default */ - default: - return PMSEVFR_EL1_RES0_V1P2; - } -} - /* Perf callbacks */ static int arm_spe_pmu_event_init(struct perf_event *event) { @@ -707,10 +810,14 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus)) return -ENOENT; - if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) + if (arm_spe_event_to_pmsevfr(event) & spe_pmu->pmsevfr_res0) + return -EOPNOTSUPP; + + if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0) return -EOPNOTSUPP; - if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) + if (arm_spe_event_to_pmsdsfr(event) != U64_MAX && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) return -EOPNOTSUPP; if (attr->exclude_idle) @@ -743,10 +850,24 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + if ((FIELD_GET(PMSFCR_EL1_LDm, reg) || + FIELD_GET(PMSFCR_EL1_STm, reg) || + FIELD_GET(PMSFCR_EL1_Bm, reg) || + FIELD_GET(PMSFCR_EL1_SIMD, reg) || + FIELD_GET(PMSFCR_EL1_SIMDm, reg) || + FIELD_GET(PMSFCR_EL1_FP, reg) || + FIELD_GET(PMSFCR_EL1_FPm, reg)) && + !(spe_pmu->features & SPE_PMU_FEAT_EFT)) + return -EOPNOTSUPP; + + if (ATTR_CFG_GET_FLD(&event->attr, discard) && + !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return -EOPNOTSUPP; + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT)) - return perf_allow_kernel(&event->attr); + return perf_allow_kernel(); return 0; } @@ -774,6 +895,11 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) write_sysreg_s(reg, SYS_PMSNEVFR_EL1); } + if (spe_pmu->features & SPE_PMU_FEAT_FDS) { + reg = arm_spe_event_to_pmsdsfr(event); + write_sysreg_s(reg, SYS_PMSDSFR_EL1); + } + reg = arm_spe_event_to_pmslatfr(event); write_sysreg_s(reg, SYS_PMSLATFR_EL1); @@ -1027,6 +1153,15 @@ static void __arm_spe_pmu_dev_probe(void *info) if (FIELD_GET(PMSIDR_EL1_ERND, reg)) spe_pmu->features |= SPE_PMU_FEAT_ERND; + if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2) + spe_pmu->features |= SPE_PMU_FEAT_DISCARD; + + if (FIELD_GET(PMSIDR_EL1_EFT, reg)) + spe_pmu->features |= SPE_PMU_FEAT_EFT; + + if (FIELD_GET(PMSIDR_EL1_FDS, reg)) + spe_pmu->features |= SPE_PMU_FEAT_FDS; + /* This field has a spaced out encoding, so just use a look-up */ fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { @@ -1081,6 +1216,10 @@ static void __arm_spe_pmu_dev_probe(void *info) spe_pmu->counter_sz = 16; } + /* Write all 1s and then read back. Unsupported filter bits are RAZ/WI. */ + write_sysreg_s(U64_MAX, SYS_PMSEVFR_EL1); + spe_pmu->pmsevfr_res0 = ~read_sysreg_s(SYS_PMSEVFR_EL1); + dev_info(dev, "probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus), @@ -1157,8 +1296,8 @@ static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu) return -ENXIO; /* Request our PPIs (note that the IRQ is still disabled) */ - ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME, - spe_pmu->handle); + ret = request_percpu_irq_affinity(spe_pmu->irq, arm_spe_pmu_irq_handler, + DRVNAME, mask, spe_pmu->handle); if (ret) return ret; @@ -1185,8 +1324,10 @@ static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu) static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) { struct platform_device *pdev = spe_pmu->pdev; - int irq = platform_get_irq(pdev, 0); + const struct cpumask *affinity; + int irq; + irq = platform_get_irq_affinity(pdev, 0, &affinity); if (irq < 0) return -ENXIO; @@ -1195,10 +1336,7 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) return -EINVAL; } - if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) { - dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); - return -EINVAL; - } + cpumask_copy(&spe_pmu->supported_cpus, affinity); spe_pmu->irq = irq; return 0; diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c index b09615bb2bb2..7cb12c8e06c7 100644 --- a/drivers/perf/arm_v6_pmu.c +++ b/drivers/perf/arm_v6_pmu.c @@ -276,8 +276,7 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + perf_event_overflow(event, &data, regs); } /* diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c index 420cadd108e7..a1e438101114 100644 --- a/drivers/perf/arm_v7_pmu.c +++ b/drivers/perf/arm_v7_pmu.c @@ -858,16 +858,6 @@ static void armv7pmu_enable_event(struct perf_event *event) } /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* * Set event (if destined for PMNx counters) * We only need to set the event for the cycle counter if we * have the ability to perform event filtering. @@ -875,14 +865,7 @@ static void armv7pmu_enable_event(struct perf_event *event) if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) armv7_pmnc_write_evtsel(idx, hwc->config_base); - /* - * Enable interrupt for this counter - */ armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ armv7_pmnc_enable_counter(idx); } @@ -898,18 +881,7 @@ static void armv7pmu_disable_event(struct perf_event *event) return; } - /* - * Disable counter and interrupt - */ - - /* - * Disable counter - */ armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ armv7_pmnc_disable_intens(idx); } @@ -958,8 +930,7 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + perf_event_overflow(event, &data, regs); } /* @@ -1477,14 +1448,6 @@ static void krait_pmu_enable_event(struct perf_event *event) int idx = hwc->idx; /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* * Set event (if destined for PMNx counters) * We set the event for the cycle counter because we * have the ability to perform event filtering. @@ -1494,10 +1457,7 @@ static void krait_pmu_enable_event(struct perf_event *event) else armv7_pmnc_write_evtsel(idx, hwc->config_base); - /* Enable interrupt for this counter */ armv7_pmnc_enable_intens(idx); - - /* Enable counter */ armv7_pmnc_enable_counter(idx); } @@ -1798,14 +1758,6 @@ static void scorpion_pmu_enable_event(struct perf_event *event) int idx = hwc->idx; /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* * Set event (if destined for PMNx counters) * We don't set the event for the cycle counter because we * don't have the ability to perform event filtering. @@ -1815,10 +1767,7 @@ static void scorpion_pmu_enable_event(struct perf_event *event) else if (idx != ARMV7_IDX_CYCLE_COUNTER) armv7_pmnc_write_evtsel(idx, hwc->config_base); - /* Enable interrupt for this counter */ armv7_pmnc_enable_intens(idx); - - /* Enable counter */ armv7_pmnc_enable_counter(idx); } diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c index 638fea9b1263..c2ac41dd9e19 100644 --- a/drivers/perf/arm_xscale_pmu.c +++ b/drivers/perf/arm_xscale_pmu.c @@ -186,8 +186,7 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + perf_event_overflow(event, &data, regs); } irq_work_run(); @@ -519,8 +518,7 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); + perf_event_overflow(event, &data, regs); } irq_work_run(); diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index bee4b5b52ec6..d094030220bf 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -113,7 +113,7 @@ struct cxl_pmu_info { /* * All CPMU counters are discoverable via the Event Capabilities Registers. - * Each Event Capability register contains a a VID / GroupID. + * Each Event Capability register contains a VID / GroupID. * A counter may then count any combination (by summing) of events in * that group which are in the Supported Events Bitmask. * However, there are some complexities to the scheme. @@ -406,7 +406,7 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)), - /* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */ + /* CXL rev 3.1 Table 3-50 S2M NDR Opcodes */ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), @@ -627,7 +627,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags) hwc->state = 0; /* - * Currently only hdm filter control is implemnted, this code will + * Currently only hdm filter control is implemented, this code will * want generalizing when more filters are added. */ if (info->filter_hdm) { @@ -834,8 +834,8 @@ static int cxl_pmu_probe(struct device *dev) if (rc) return rc; - info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events), - info->num_counters, GFP_KERNEL); + info->hw_events = devm_kcalloc(dev, info->num_counters, + sizeof(*info->hw_events), GFP_KERNEL); if (!info->hw_events) return -ENOMEM; @@ -873,7 +873,7 @@ static int cxl_pmu_probe(struct device *dev) return rc; irq = rc; - irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name); + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow", dev_name); if (!irq_name) return -ENOMEM; @@ -977,7 +977,7 @@ static __exit void cxl_pmu_exit(void) MODULE_DESCRIPTION("CXL Performance Monitor Driver"); MODULE_LICENSE("GPL"); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("CXL"); module_init(cxl_pmu_init); module_exit(cxl_pmu_exit); MODULE_ALIAS_CXL(CXL_DEVICE_PMU); diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c index 9cbea9675e21..22f73ac894e9 100644 --- a/drivers/perf/dwc_pcie_pmu.c +++ b/drivers/perf/dwc_pcie_pmu.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/list.h> +#include <linux/pcie-dwc.h> #include <linux/perf_event.h> #include <linux/pci.h> #include <linux/platform_device.h> @@ -20,7 +21,6 @@ #include <linux/sysfs.h> #include <linux/types.h> -#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 #define DWC_PCIE_EVENT_CNT_CTL 0x8 /* @@ -39,6 +39,10 @@ #define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0) #define DWC_PCIE_EVENT_PER_CLEAR 0x1 +/* Event Selection Field has two subfields */ +#define DWC_PCIE_CNT_EVENT_SEL_GROUP GENMASK(11, 8) +#define DWC_PCIE_CNT_EVENT_SEL_EVID GENMASK(7, 0) + #define DWC_PCIE_EVENT_CNT_DATA 0xC #define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10 @@ -73,6 +77,10 @@ enum dwc_pcie_event_type { DWC_PCIE_EVENT_TYPE_MAX, }; +#define DWC_PCIE_LANE_GROUP_6 6 +#define DWC_PCIE_LANE_GROUP_7 7 +#define DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP 256 + #define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0) #define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0) @@ -82,8 +90,11 @@ struct dwc_pcie_pmu { u16 ras_des_offset; u32 nr_lanes; + /* Groups #6 and #7 */ + DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP); + struct perf_event *time_based_event; + struct hlist_node cpuhp_node; - struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX]; int on_cpu; }; @@ -100,17 +111,6 @@ struct dwc_pcie_dev_info { struct list_head dev_node; }; -struct dwc_pcie_vendor_id { - int vendor_id; -}; - -static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { - {.vendor_id = PCI_VENDOR_ID_ALIBABA }, - {.vendor_id = PCI_VENDOR_ID_AMPERE }, - {.vendor_id = PCI_VENDOR_ID_QCOM }, - {} /* terminator */ -}; - static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -199,8 +199,8 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05), DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06), DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x08), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x09), /* Group #1 */ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_pcie_tlp_data_payload, 0x20), @@ -257,19 +257,26 @@ static const struct attribute_group *dwc_pcie_attr_groups[] = { }; static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu, + struct perf_event *event, bool enable) { struct pci_dev *pdev = pcie_pmu->pdev; u16 ras_des_offset = pcie_pmu->ras_des_offset; + int event_id = DWC_PCIE_EVENT_ID(event); + int lane = DWC_PCIE_EVENT_LANE(event); + u32 ctrl; + + ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) | + FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) | + FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR); if (enable) - pci_clear_and_set_config_dword(pdev, - ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, - DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON); + ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON); else - pci_clear_and_set_config_dword(pdev, - ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, - DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF); + ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF); + + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + ctrl); } static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu, @@ -287,11 +294,22 @@ static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event) { struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); struct pci_dev *pdev = pcie_pmu->pdev; + int event_id = DWC_PCIE_EVENT_ID(event); + int lane = DWC_PCIE_EVENT_LANE(event); u16 ras_des_offset = pcie_pmu->ras_des_offset; - u32 val; + u32 val, ctrl; + ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) | + FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) | + FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON); + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + ctrl); pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val); + ctrl |= FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR); + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + ctrl); + return val; } @@ -340,26 +358,77 @@ static void dwc_pcie_pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); - u64 delta, prev, now = 0; + u64 delta, prev, now; + + if (type == DWC_PCIE_LANE_EVENT) { + now = dwc_pcie_pmu_read_lane_event_counter(event) & + DWC_PCIE_LANE_EVENT_MAX_PERIOD; + local64_add(now, &event->count); + return; + } do { prev = local64_read(&hwc->prev_count); - - if (type == DWC_PCIE_LANE_EVENT) - now = dwc_pcie_pmu_read_lane_event_counter(event); - else if (type == DWC_PCIE_TIME_BASE_EVENT) - now = dwc_pcie_pmu_read_time_based_counter(event); + now = dwc_pcie_pmu_read_time_based_counter(event); } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); delta = (now - prev) & DWC_PCIE_MAX_PERIOD; - /* 32-bit counter for Lane Event Counting */ - if (type == DWC_PCIE_LANE_EVENT) - delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD; - local64_add(delta, &event->count); } +static int dwc_pcie_pmu_validate_add_lane_event(struct perf_event *event, + unsigned long val_lane_events[]) +{ + int event_id, event_nr, group; + + event_id = DWC_PCIE_EVENT_ID(event); + event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id); + group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id); + + if (group != DWC_PCIE_LANE_GROUP_6 && group != DWC_PCIE_LANE_GROUP_7) + return -EINVAL; + + group -= DWC_PCIE_LANE_GROUP_6; + + if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr, + val_lane_events)) + return -EINVAL; + + return 0; +} + +static int dwc_pcie_pmu_validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + DECLARE_BITMAP(val_lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP); + bool time_event = false; + int type; + + type = DWC_PCIE_EVENT_TYPE(leader); + if (type == DWC_PCIE_TIME_BASE_EVENT) + time_event = true; + else + if (dwc_pcie_pmu_validate_add_lane_event(leader, val_lane_events)) + return -ENOSPC; + + for_each_sibling_event(sibling, leader) { + type = DWC_PCIE_EVENT_TYPE(sibling); + if (type == DWC_PCIE_TIME_BASE_EVENT) { + if (time_event) + return -ENOSPC; + + time_event = true; + continue; + } + + if (dwc_pcie_pmu_validate_add_lane_event(sibling, val_lane_events)) + return -ENOSPC; + } + + return 0; +} + static int dwc_pcie_pmu_event_init(struct perf_event *event) { struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); @@ -378,10 +447,6 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event) if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - if (event->group_leader != event && - !is_software_event(event->group_leader)) - return -EINVAL; - for_each_sibling_event(sibling, event->group_leader) { if (sibling->pmu != event->pmu && !is_software_event(sibling)) return -EINVAL; @@ -396,6 +461,9 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event) return -EINVAL; } + if (dwc_pcie_pmu_validate_group(event)) + return -ENOSPC; + event->cpu = pcie_pmu->on_cpu; return 0; @@ -411,7 +479,7 @@ static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags) local64_set(&hwc->prev_count, 0); if (type == DWC_PCIE_LANE_EVENT) - dwc_pcie_pmu_lane_event_enable(pcie_pmu, true); + dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true); else if (type == DWC_PCIE_TIME_BASE_EVENT) dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true); } @@ -425,12 +493,13 @@ static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags) if (event->hw.state & PERF_HES_STOPPED) return; + dwc_pcie_pmu_event_update(event); + if (type == DWC_PCIE_LANE_EVENT) - dwc_pcie_pmu_lane_event_enable(pcie_pmu, false); + dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false); else if (type == DWC_PCIE_TIME_BASE_EVENT) dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false); - dwc_pcie_pmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } @@ -445,14 +514,17 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags) u16 ras_des_offset = pcie_pmu->ras_des_offset; u32 ctrl; - /* one counter for each type and it is in use */ - if (pcie_pmu->event[type]) - return -ENOSPC; - - pcie_pmu->event[type] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; if (type == DWC_PCIE_LANE_EVENT) { + int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id); + int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) - + DWC_PCIE_LANE_GROUP_6; + + if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr, + pcie_pmu->lane_events)) + return -ENOSPC; + /* EVENT_COUNTER_DATA_REG needs clear manually */ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) | FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) | @@ -461,6 +533,11 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags) pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, ctrl); } else if (type == DWC_PCIE_TIME_BASE_EVENT) { + if (pcie_pmu->time_based_event) + return -ENOSPC; + + pcie_pmu->time_based_event = event; + /* * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely * use it with any manually controlled duration. And it is @@ -489,7 +566,18 @@ static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags) dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE); perf_event_update_userpage(event); - pcie_pmu->event[type] = NULL; + + if (type == DWC_PCIE_TIME_BASE_EVENT) { + pcie_pmu->time_based_event = NULL; + } else { + int event_id = DWC_PCIE_EVENT_ID(event); + int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id); + int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) - + DWC_PCIE_LANE_GROUP_6; + + clear_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr, + pcie_pmu->lane_events); + } } static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node) @@ -519,31 +607,28 @@ static void dwc_pcie_unregister_pmu(void *data) perf_pmu_unregister(&pcie_pmu->pmu); } -static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) +static u16 dwc_pcie_des_cap(struct pci_dev *pdev) { - const struct dwc_pcie_vendor_id *vid; - u16 vsec = 0; + const struct dwc_pcie_vsec_id *vid; + u16 vsec; u32 val; if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) - return false; + return 0; - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { + for (vid = dwc_pcie_rasdes_vsec_ids; vid->vendor_id; vid++) { vsec = pci_find_vsec_capability(pdev, vid->vendor_id, - DWC_PCIE_VSEC_RAS_DES_ID); - if (vsec) - break; + vid->vsec_id); + if (vsec) { + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, + &val); + if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); + return vsec; + } + } } - if (!vsec) - return false; - - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); - if (PCI_VNDR_HEADER_REV(val) != 0x04) - return false; - - pci_dbg(pdev, - "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); - return true; + return 0; } static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) @@ -560,15 +645,15 @@ static int dwc_pcie_register_dev(struct pci_dev *pdev) u32 sbdf; sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn); - plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", sbdf, - pdev, sizeof(*pdev)); - + plat_dev = platform_device_register_simple("dwc_pcie_pmu", sbdf, NULL, 0); if (IS_ERR(plat_dev)) return PTR_ERR(plat_dev); dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); - if (!dev_info) + if (!dev_info) { + platform_device_unregister(plat_dev); return -ENOMEM; + } /* Cache platform device to handle pci device hotplug */ dev_info->plat_dev = plat_dev; @@ -587,7 +672,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb, switch (action) { case BUS_NOTIFY_ADD_DEVICE: - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) return NOTIFY_DONE; if (dwc_pcie_register_dev(pdev)) return NOTIFY_BAD; @@ -609,17 +694,26 @@ static struct notifier_block dwc_pcie_pmu_nb = { static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) { - struct pci_dev *pdev = plat_dev->dev.platform_data; + struct pci_dev *pdev; struct dwc_pcie_pmu *pcie_pmu; char *name; - u32 sbdf, val; + u32 sbdf; u16 vsec; int ret; - vsec = pci_find_vsec_capability(pdev, pdev->vendor, - DWC_PCIE_VSEC_RAS_DES_ID); - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); sbdf = plat_dev->id; + pdev = pci_get_domain_bus_and_slot(sbdf >> 16, PCI_BUS_NUM(sbdf & 0xffff), + sbdf & 0xff); + if (!pdev) { + pr_err("No pdev found for the sbdf 0x%x\n", sbdf); + return -ENODEV; + } + + vsec = dwc_pcie_des_cap(pdev); + if (!vsec) + return -ENODEV; + + pci_dev_put(pdev); name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); if (!name) return -ENOMEM; @@ -634,7 +728,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) pcie_pmu->on_cpu = -1; pcie_pmu->pmu = (struct pmu){ .name = name, - .parent = &pdev->dev, + .parent = &plat_dev->dev, .module = THIS_MODULE, .attr_groups = dwc_pcie_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, @@ -724,19 +818,28 @@ static struct platform_driver dwc_pcie_pmu_driver = { .driver = {.name = "dwc_pcie_pmu",}, }; +static void dwc_pcie_cleanup_devices(void) +{ + struct dwc_pcie_dev_info *dev_info, *tmp; + + list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node) { + dwc_pcie_unregister_dev(dev_info); + } +} + static int __init dwc_pcie_pmu_init(void) { struct pci_dev *pdev = NULL; int ret; for_each_pci_dev(pdev) { - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) continue; ret = dwc_pcie_register_dev(pdev); if (ret) { pci_dev_put(pdev); - return ret; + goto err_cleanup; } } @@ -745,35 +848,35 @@ static int __init dwc_pcie_pmu_init(void) dwc_pcie_pmu_online_cpu, dwc_pcie_pmu_offline_cpu); if (ret < 0) - return ret; + goto err_cleanup; dwc_pcie_pmu_hp_state = ret; ret = platform_driver_register(&dwc_pcie_pmu_driver); if (ret) - goto platform_driver_register_err; + goto err_remove_cpuhp; ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb); if (ret) - goto platform_driver_register_err; + goto err_unregister_driver; notify = true; return 0; -platform_driver_register_err: +err_unregister_driver: + platform_driver_unregister(&dwc_pcie_pmu_driver); +err_remove_cpuhp: cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); - +err_cleanup: + dwc_pcie_cleanup_devices(); return ret; } static void __exit dwc_pcie_pmu_exit(void) { - struct dwc_pcie_dev_info *dev_info, *tmp; - if (notify) bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb); - list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node) - dwc_pcie_unregister_dev(dev_info); + dwc_pcie_cleanup_devices(); platform_driver_unregister(&dwc_pcie_pmu_driver); cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); } diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index b989ffa95d69..bcdf5575d71c 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -5,6 +5,7 @@ */ #include <linux/bitfield.h> +#include <linux/clk.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -52,18 +53,27 @@ #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) #define DDR_PERF_DEV_NAME "imx8_ddr" +#define DB_PERF_DEV_NAME "imx8_db" #define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" static DEFINE_IDA(ddr_ida); +static DEFINE_IDA(db_ida); /* DDR Perf hardware feature */ #define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ #define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */ #define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */ +/* Perf type */ +enum fsl_ddr_type { + DDR_PERF_TYPE = 0, /* ddr Perf (default) */ + DB_PERF_TYPE, /* db Perf */ +}; + struct fsl_ddr_devtype_data { unsigned int quirks; /* quirks needed for different DDR Perf core */ const char *identifier; /* system PMU identifier for userspace */ + enum fsl_ddr_type type; /* types of Perf, ddr or db */ }; static const struct fsl_ddr_devtype_data imx8_devtype_data; @@ -97,6 +107,12 @@ static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = { .identifier = "i.MX8DXL", }; +static const struct fsl_ddr_devtype_data imx8dxl_db_devtype_data = { + .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER, + .identifier = "i.MX8DXL", + .type = DB_PERF_TYPE, +}; + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, @@ -105,6 +121,7 @@ static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data}, { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data}, { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data}, + { .compatible = "fsl,imx8dxl-db-pmu", .data = &imx8dxl_db_devtype_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); @@ -284,9 +301,37 @@ static struct attribute *ddr_perf_events_attrs[] = { NULL, }; +static const int ddr_perf_db_visible_event_list[] = { + EVENT_CYCLES_ID, + 0x41, + 0x42, +}; + +static umode_t ddr_perf_events_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct ddr_pmu *pmu = dev_get_drvdata(dev); + struct perf_pmu_events_attr *pmu_attr; + unsigned int i; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (pmu->devtype_data->type == DDR_PERF_TYPE) + return attr->mode; + + /* DB Type */ + for (i = 0; i < ARRAY_SIZE(ddr_perf_db_visible_event_list); i++) + if (pmu_attr->id == ddr_perf_db_visible_event_list[i]) + return attr->mode; + + return 0; +} + static const struct attribute_group ddr_perf_events_attr_group = { .name = "events", .attrs = ddr_perf_events_attrs, + .is_visible = ddr_perf_events_attrs_is_visible, }; PMU_FORMAT_ATTR(event, "config:0-7"); @@ -645,8 +690,8 @@ static void ddr_perf_pmu_disable(struct pmu *pmu) { } -static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, - struct device *dev) +static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, + struct device *dev) { *pmu = (struct ddr_pmu) { .pmu = (struct pmu) { @@ -667,9 +712,6 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .base = base, .dev = dev, }; - - pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL); - return pmu->id; } static irqreturn_t ddr_perf_irq_handler(int irq, void *p) @@ -735,10 +777,13 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) static int ddr_perf_probe(struct platform_device *pdev) { + struct clk_bulk_data *clks; struct ddr_pmu *pmu; struct device_node *np; void __iomem *base; + struct ida *ida; char *name; + int nclks; int num; int ret; int irq; @@ -753,19 +798,33 @@ static int ddr_perf_probe(struct platform_device *pdev) if (!pmu) return -ENOMEM; - num = ddr_perf_init(pmu, base, &pdev->dev); + ddr_perf_init(pmu, base, &pdev->dev); platform_set_drvdata(pdev, pmu); - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", - num); + nclks = devm_clk_bulk_get_all_enabled(&pdev->dev, &clks); + if (nclks < 0) + return dev_err_probe(&pdev->dev, nclks, "Failure get clks\n"); + + pmu->devtype_data = of_device_get_match_data(&pdev->dev); + + ida = pmu->devtype_data->type == DDR_PERF_TYPE ? &ddr_ida : &db_ida; + num = ida_alloc(ida, GFP_KERNEL); + if (num < 0) + return num; + + pmu->id = num; + + if (pmu->devtype_data->type == DDR_PERF_TYPE) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", num); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DB_PERF_DEV_NAME "%d", num); + if (!name) { ret = -ENOMEM; - goto cpuhp_state_err; + goto idr_free; } - pmu->devtype_data = of_device_get_match_data(&pdev->dev); - pmu->cpu = raw_smp_processor_id(); ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME, @@ -774,7 +833,7 @@ static int ddr_perf_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n"); - goto cpuhp_state_err; + goto idr_free; } pmu->cpuhp_state = ret; @@ -821,8 +880,8 @@ ddr_perf_err: cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); -cpuhp_state_err: - ida_free(&ddr_ida, pmu->id); +idr_free: + ida_free(ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; } @@ -836,7 +895,11 @@ static void ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); - ida_free(&ddr_ida, pmu->id); + if (pmu->devtype_data->type == DDR_PERF_TYPE) + ida_free(&ddr_ida, pmu->id); + else + ida_free(&db_ida, pmu->id); + } static struct platform_driver imx_ddr_pmu_driver = { diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 3c856d9a4e97..7050b48c0467 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -63,8 +63,21 @@ static DEFINE_IDA(ddr_ida); +/* + * V1 support 1 read transaction, 1 write transaction and 1 read beats + * event which corresponding respecitively to counter 2, 3 and 4. + */ +#define DDR_PERF_AXI_FILTER_V1 0x1 + +/* + * V2 support 1 read beats and 3 write beats events which corresponding + * respecitively to counter 2-5. + */ +#define DDR_PERF_AXI_FILTER_V2 0x2 + struct imx_ddr_devtype_data { const char *identifier; /* system PMU identifier for userspace */ + unsigned int filter_ver; /* AXI filter version */ }; struct ddr_pmu { @@ -83,29 +96,38 @@ struct ddr_pmu { static const struct imx_ddr_devtype_data imx91_devtype_data = { .identifier = "imx91", + .filter_ver = DDR_PERF_AXI_FILTER_V1 }; static const struct imx_ddr_devtype_data imx93_devtype_data = { .identifier = "imx93", + .filter_ver = DDR_PERF_AXI_FILTER_V1 +}; + +static const struct imx_ddr_devtype_data imx94_devtype_data = { + .identifier = "imx94", + .filter_ver = DDR_PERF_AXI_FILTER_V2 }; static const struct imx_ddr_devtype_data imx95_devtype_data = { .identifier = "imx95", + .filter_ver = DDR_PERF_AXI_FILTER_V2 }; -static inline bool is_imx93(struct ddr_pmu *pmu) +static inline bool axi_filter_v1(struct ddr_pmu *pmu) { - return pmu->devtype_data == &imx93_devtype_data; + return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V1; } -static inline bool is_imx95(struct ddr_pmu *pmu) +static inline bool axi_filter_v2(struct ddr_pmu *pmu) { - return pmu->devtype_data == &imx95_devtype_data; + return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V2; } static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data }, { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data }, + { .compatible = "fsl,imx94-ddr-pmu", .data = &imx94_devtype_data }, { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data }, { /* sentinel */ } }; @@ -155,7 +177,7 @@ static const struct attribute_group ddr_perf_cpumask_attr_group = { struct imx9_pmu_events_attr { struct device_attribute attr; u64 id; - const void *devtype_data; + const struct imx_ddr_devtype_data *devtype_data; }; static ssize_t ddr_pmu_event_show(struct device *dev, @@ -307,7 +329,8 @@ ddr_perf_events_attrs_is_visible(struct kobject *kobj, if (!eattr->devtype_data) return attr->mode; - if (eattr->devtype_data != ddr_pmu->devtype_data) + if (eattr->devtype_data != ddr_pmu->devtype_data && + eattr->devtype_data->filter_ver != ddr_pmu->devtype_data->filter_ver) return 0; return attr->mode; @@ -444,9 +467,11 @@ static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, int counter, int axi_id, int axi_mask) { u32 pmcfg1, pmcfg2; - u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN, - MX93_PMCFG1_WR_TRANS_FILT_EN, - MX93_PMCFG1_RD_BT_FILT_EN }; + static const u32 mask[] = { + MX93_PMCFG1_RD_TRANS_FILT_EN, + MX93_PMCFG1_WR_TRANS_FILT_EN, + MX93_PMCFG1_RD_BT_FILT_EN + }; pmcfg1 = readl_relaxed(pmu->base + PMCFG1); @@ -624,11 +649,11 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) hwc->idx = counter; hwc->state |= PERF_HES_STOPPED; - if (is_imx93(pmu)) + if (axi_filter_v1(pmu)) /* read trans, write trans, read beat */ imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); - if (is_imx95(pmu)) + if (axi_filter_v2(pmu)) /* write beat, read beat2, read beat1, read beat */ imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); diff --git a/drivers/perf/fujitsu_uncore_pmu.c b/drivers/perf/fujitsu_uncore_pmu.c new file mode 100644 index 000000000000..c3c6f56474ad --- /dev/null +++ b/drivers/perf/fujitsu_uncore_pmu.c @@ -0,0 +1,613 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the Uncore PMUs in Fujitsu chips. + * + * See Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst for more details. + * + * Copyright (c) 2025 Fujitsu. All rights reserved. + */ + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> + +/* Number of counters on each PMU */ +#define MAC_NUM_COUNTERS 8 +#define PCI_NUM_COUNTERS 8 +/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */ +#define UNCORE_EVTYPE_MASK 0xFF + +/* Perfmon registers */ +#define PM_EVCNTR(__cntr) (0x000 + (__cntr) * 8) +#define PM_CNTCTL(__cntr) (0x100 + (__cntr) * 8) +#define PM_CNTCTL_RESET 0 +#define PM_EVTYPE(__cntr) (0x200 + (__cntr) * 8) +#define PM_EVTYPE_EVSEL(__val) FIELD_GET(UNCORE_EVTYPE_MASK, __val) +#define PM_CR 0x400 +#define PM_CR_RESET BIT(1) +#define PM_CR_ENABLE BIT(0) +#define PM_CNTENSET 0x410 +#define PM_CNTENSET_IDX(__cntr) BIT(__cntr) +#define PM_CNTENCLR 0x418 +#define PM_CNTENCLR_IDX(__cntr) BIT(__cntr) +#define PM_CNTENCLR_RESET 0xFF +#define PM_INTENSET 0x420 +#define PM_INTENSET_IDX(__cntr) BIT(__cntr) +#define PM_INTENCLR 0x428 +#define PM_INTENCLR_IDX(__cntr) BIT(__cntr) +#define PM_INTENCLR_RESET 0xFF +#define PM_OVSR 0x440 +#define PM_OVSR_OVSRCLR_RESET 0xFF + +enum fujitsu_uncore_pmu { + FUJITSU_UNCORE_PMU_MAC = 1, + FUJITSU_UNCORE_PMU_PCI = 2, +}; + +struct uncore_pmu { + int num_counters; + struct pmu pmu; + struct hlist_node node; + void __iomem *regs; + struct perf_event **events; + unsigned long *used_mask; + int cpu; + int irq; + struct device *dev; +}; + +#define to_uncore_pmu(p) (container_of(p, struct uncore_pmu, pmu)) + +static int uncore_pmu_cpuhp_state; + +static void fujitsu_uncore_counter_start(struct perf_event *event) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + int idx = event->hw.idx; + + /* Initialize the hardware counter and reset prev_count*/ + local64_set(&event->hw.prev_count, 0); + writeq_relaxed(0, uncorepmu->regs + PM_EVCNTR(idx)); + + /* Set the event type */ + writeq_relaxed(PM_EVTYPE_EVSEL(event->attr.config), uncorepmu->regs + PM_EVTYPE(idx)); + + /* Enable interrupt generation by this counter */ + writeq_relaxed(PM_INTENSET_IDX(idx), uncorepmu->regs + PM_INTENSET); + + /* Finally, enable the counter */ + writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(idx)); + writeq_relaxed(PM_CNTENSET_IDX(idx), uncorepmu->regs + PM_CNTENSET); +} + +static void fujitsu_uncore_counter_stop(struct perf_event *event) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + int idx = event->hw.idx; + + /* Disable the counter */ + writeq_relaxed(PM_CNTENCLR_IDX(idx), uncorepmu->regs + PM_CNTENCLR); + + /* Disable interrupt generation by this counter */ + writeq_relaxed(PM_INTENCLR_IDX(idx), uncorepmu->regs + PM_INTENCLR); +} + +static void fujitsu_uncore_counter_update(struct perf_event *event) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + int idx = event->hw.idx; + u64 prev, new; + + do { + prev = local64_read(&event->hw.prev_count); + new = readq_relaxed(uncorepmu->regs + PM_EVCNTR(idx)); + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + local64_add(new - prev, &event->count); +} + +static inline void fujitsu_uncore_init(struct uncore_pmu *uncorepmu) +{ + int i; + + writeq_relaxed(PM_CR_RESET, uncorepmu->regs + PM_CR); + + writeq_relaxed(PM_CNTENCLR_RESET, uncorepmu->regs + PM_CNTENCLR); + writeq_relaxed(PM_INTENCLR_RESET, uncorepmu->regs + PM_INTENCLR); + writeq_relaxed(PM_OVSR_OVSRCLR_RESET, uncorepmu->regs + PM_OVSR); + + for (i = 0; i < uncorepmu->num_counters; ++i) { + writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(i)); + writeq_relaxed(PM_EVTYPE_EVSEL(0), uncorepmu->regs + PM_EVTYPE(i)); + } + writeq_relaxed(PM_CR_ENABLE, uncorepmu->regs + PM_CR); +} + +static irqreturn_t fujitsu_uncore_handle_irq(int irq_num, void *data) +{ + struct uncore_pmu *uncorepmu = data; + /* Read the overflow status register */ + long status = readq_relaxed(uncorepmu->regs + PM_OVSR); + int idx; + + if (status == 0) + return IRQ_NONE; + + /* Clear the bits we read on the overflow status register */ + writeq_relaxed(status, uncorepmu->regs + PM_OVSR); + + for_each_set_bit(idx, &status, uncorepmu->num_counters) { + struct perf_event *event; + + event = uncorepmu->events[idx]; + if (!event) + continue; + + fujitsu_uncore_counter_update(event); + } + + return IRQ_HANDLED; +} + +static void fujitsu_uncore_pmu_enable(struct pmu *pmu) +{ + writeq_relaxed(PM_CR_ENABLE, to_uncore_pmu(pmu)->regs + PM_CR); +} + +static void fujitsu_uncore_pmu_disable(struct pmu *pmu) +{ + writeq_relaxed(0, to_uncore_pmu(pmu)->regs + PM_CR); +} + +static bool fujitsu_uncore_validate_event_group(struct perf_event *event) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + struct perf_event *leader = event->group_leader; + struct perf_event *sibling; + int counters = 1; + + if (leader == event) + return true; + + if (leader->pmu == event->pmu) + counters++; + + for_each_sibling_event(sibling, leader) { + if (sibling->pmu == event->pmu) + counters++; + } + + /* + * If the group requires more counters than the HW has, it + * cannot ever be scheduled. + */ + return counters <= uncorepmu->num_counters; +} + +static int fujitsu_uncore_event_init(struct perf_event *event) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* Is the event for this PMU? */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * Sampling not supported since these events are not + * core-attributable. + */ + if (is_sampling_event(event)) + return -EINVAL; + + /* + * Task mode not available, we run the counters as socket counters, + * not attributable to any CPU and therefore cannot attribute per-task. + */ + if (event->cpu < 0) + return -EINVAL; + + /* Validate the group */ + if (!fujitsu_uncore_validate_event_group(event)) + return -EINVAL; + + hwc->idx = -1; + + event->cpu = uncorepmu->cpu; + + return 0; +} + +static void fujitsu_uncore_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->state = 0; + fujitsu_uncore_counter_start(event); +} + +static void fujitsu_uncore_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->state & PERF_HES_STOPPED) + return; + + fujitsu_uncore_counter_stop(event); + if (flags & PERF_EF_UPDATE) + fujitsu_uncore_counter_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int fujitsu_uncore_event_add(struct perf_event *event, int flags) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + /* Try to allocate a counter. */ + idx = bitmap_find_free_region(uncorepmu->used_mask, uncorepmu->num_counters, 0); + if (idx < 0) + /* The counters are all in use. */ + return -EAGAIN; + + hwc->idx = idx; + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + uncorepmu->events[idx] = event; + + if (flags & PERF_EF_START) + fujitsu_uncore_event_start(event, 0); + + /* Propagate changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void fujitsu_uncore_event_del(struct perf_event *event, int flags) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* Stop and clean up */ + fujitsu_uncore_event_stop(event, flags | PERF_EF_UPDATE); + uncorepmu->events[hwc->idx] = NULL; + bitmap_release_region(uncorepmu->used_mask, hwc->idx, 0); + + /* Propagate changes to the userspace mapping. */ + perf_event_update_userpage(event); +} + +static void fujitsu_uncore_event_read(struct perf_event *event) +{ + fujitsu_uncore_counter_update(event); +} + +#define UNCORE_PMU_FORMAT_ATTR(_name, _config) \ + (&((struct dev_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, device_show_string, NULL), \ + .var = (void *)_config, } \ + })[0].attr.attr) + +static struct attribute *fujitsu_uncore_pmu_formats[] = { + UNCORE_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group fujitsu_uncore_pmu_format_group = { + .name = "format", + .attrs = fujitsu_uncore_pmu_formats, +}; + +static ssize_t fujitsu_uncore_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define MAC_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id) + +static struct attribute *fujitsu_uncore_mac_pmu_events[] = { + MAC_EVENT_ATTR(cycles, 0x00), + MAC_EVENT_ATTR(read-count, 0x10), + MAC_EVENT_ATTR(read-count-request, 0x11), + MAC_EVENT_ATTR(read-count-return, 0x12), + MAC_EVENT_ATTR(read-count-request-pftgt, 0x13), + MAC_EVENT_ATTR(read-count-request-normal, 0x14), + MAC_EVENT_ATTR(read-count-return-pftgt-hit, 0x15), + MAC_EVENT_ATTR(read-count-return-pftgt-miss, 0x16), + MAC_EVENT_ATTR(read-wait, 0x17), + MAC_EVENT_ATTR(write-count, 0x20), + MAC_EVENT_ATTR(write-count-write, 0x21), + MAC_EVENT_ATTR(write-count-pwrite, 0x22), + MAC_EVENT_ATTR(memory-read-count, 0x40), + MAC_EVENT_ATTR(memory-write-count, 0x50), + MAC_EVENT_ATTR(memory-pwrite-count, 0x60), + MAC_EVENT_ATTR(ea-mac, 0x80), + MAC_EVENT_ATTR(ea-memory, 0x90), + MAC_EVENT_ATTR(ea-memory-mac-write, 0x92), + MAC_EVENT_ATTR(ea-ha, 0xa0), + NULL +}; + +#define PCI_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id) + +static struct attribute *fujitsu_uncore_pci_pmu_events[] = { + PCI_EVENT_ATTR(pci-port0-cycles, 0x00), + PCI_EVENT_ATTR(pci-port0-read-count, 0x10), + PCI_EVENT_ATTR(pci-port0-read-count-bus, 0x14), + PCI_EVENT_ATTR(pci-port0-write-count, 0x20), + PCI_EVENT_ATTR(pci-port0-write-count-bus, 0x24), + PCI_EVENT_ATTR(pci-port1-cycles, 0x40), + PCI_EVENT_ATTR(pci-port1-read-count, 0x50), + PCI_EVENT_ATTR(pci-port1-read-count-bus, 0x54), + PCI_EVENT_ATTR(pci-port1-write-count, 0x60), + PCI_EVENT_ATTR(pci-port1-write-count-bus, 0x64), + PCI_EVENT_ATTR(ea-pci, 0x80), + NULL +}; + +static const struct attribute_group fujitsu_uncore_mac_pmu_events_group = { + .name = "events", + .attrs = fujitsu_uncore_mac_pmu_events, +}; + +static const struct attribute_group fujitsu_uncore_pci_pmu_events_group = { + .name = "events", + .attrs = fujitsu_uncore_pci_pmu_events, +}; + +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uncore_pmu *uncorepmu = to_uncore_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(uncorepmu->cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *fujitsu_uncore_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group fujitsu_uncore_pmu_cpumask_attr_group = { + .attrs = fujitsu_uncore_pmu_cpumask_attrs, +}; + +static const struct attribute_group *fujitsu_uncore_mac_pmu_attr_grps[] = { + &fujitsu_uncore_pmu_format_group, + &fujitsu_uncore_mac_pmu_events_group, + &fujitsu_uncore_pmu_cpumask_attr_group, + NULL +}; + +static const struct attribute_group *fujitsu_uncore_pci_pmu_attr_grps[] = { + &fujitsu_uncore_pmu_format_group, + &fujitsu_uncore_pci_pmu_events_group, + &fujitsu_uncore_pmu_cpumask_attr_group, + NULL +}; + +static void fujitsu_uncore_pmu_migrate(struct uncore_pmu *uncorepmu, unsigned int cpu) +{ + perf_pmu_migrate_context(&uncorepmu->pmu, uncorepmu->cpu, cpu); + irq_set_affinity(uncorepmu->irq, cpumask_of(cpu)); + uncorepmu->cpu = cpu; +} + +static int fujitsu_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct uncore_pmu *uncorepmu; + int node; + + uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node); + node = dev_to_node(uncorepmu->dev); + if (cpu_to_node(uncorepmu->cpu) != node && cpu_to_node(cpu) == node) + fujitsu_uncore_pmu_migrate(uncorepmu, cpu); + + return 0; +} + +static int fujitsu_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct uncore_pmu *uncorepmu; + unsigned int target; + int node; + + uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node); + if (cpu != uncorepmu->cpu) + return 0; + + node = dev_to_node(uncorepmu->dev); + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target < nr_cpu_ids) + fujitsu_uncore_pmu_migrate(uncorepmu, target); + + return 0; +} + +static int fujitsu_uncore_pmu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned long device_type = (unsigned long)device_get_match_data(dev); + const struct attribute_group **attr_groups; + struct uncore_pmu *uncorepmu; + struct resource *memrc; + size_t alloc_size; + char *name; + int ret; + int irq; + u64 uid; + + ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid); + if (ret) + return dev_err_probe(dev, ret, "unable to read ACPI uid\n"); + + uncorepmu = devm_kzalloc(dev, sizeof(*uncorepmu), GFP_KERNEL); + if (!uncorepmu) + return -ENOMEM; + uncorepmu->dev = dev; + uncorepmu->cpu = cpumask_local_spread(0, dev_to_node(dev)); + platform_set_drvdata(pdev, uncorepmu); + + switch (device_type) { + case FUJITSU_UNCORE_PMU_MAC: + uncorepmu->num_counters = MAC_NUM_COUNTERS; + attr_groups = fujitsu_uncore_mac_pmu_attr_grps; + name = devm_kasprintf(dev, GFP_KERNEL, "mac_iod%llu_mac%llu_ch%llu", + (uid >> 8) & 0xF, (uid >> 4) & 0xF, uid & 0xF); + break; + case FUJITSU_UNCORE_PMU_PCI: + uncorepmu->num_counters = PCI_NUM_COUNTERS; + attr_groups = fujitsu_uncore_pci_pmu_attr_grps; + name = devm_kasprintf(dev, GFP_KERNEL, "pci_iod%llu_pci%llu", + (uid >> 4) & 0xF, uid & 0xF); + break; + default: + return dev_err_probe(dev, -EINVAL, "illegal device type: %lu\n", device_type); + } + if (!name) + return -ENOMEM; + + uncorepmu->pmu = (struct pmu) { + .parent = dev, + .task_ctx_nr = perf_invalid_context, + + .attr_groups = attr_groups, + + .pmu_enable = fujitsu_uncore_pmu_enable, + .pmu_disable = fujitsu_uncore_pmu_disable, + .event_init = fujitsu_uncore_event_init, + .add = fujitsu_uncore_event_add, + .del = fujitsu_uncore_event_del, + .start = fujitsu_uncore_event_start, + .stop = fujitsu_uncore_event_stop, + .read = fujitsu_uncore_event_read, + + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + }; + + alloc_size = sizeof(uncorepmu->events[0]) * uncorepmu->num_counters; + uncorepmu->events = devm_kzalloc(dev, alloc_size, GFP_KERNEL); + if (!uncorepmu->events) + return -ENOMEM; + + alloc_size = sizeof(uncorepmu->used_mask[0]) * BITS_TO_LONGS(uncorepmu->num_counters); + uncorepmu->used_mask = devm_kzalloc(dev, alloc_size, GFP_KERNEL); + if (!uncorepmu->used_mask) + return -ENOMEM; + + uncorepmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc); + if (IS_ERR(uncorepmu->regs)) + return PTR_ERR(uncorepmu->regs); + + fujitsu_uncore_init(uncorepmu); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, fujitsu_uncore_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD, + name, uncorepmu); + if (ret) + return dev_err_probe(dev, ret, "Failed to request IRQ:%d\n", irq); + + ret = irq_set_affinity(irq, cpumask_of(uncorepmu->cpu)); + if (ret) + return dev_err_probe(dev, ret, "Failed to set irq affinity:%d\n", irq); + + uncorepmu->irq = irq; + + /* Add this instance to the list used by the offline callback */ + ret = cpuhp_state_add_instance(uncore_pmu_cpuhp_state, &uncorepmu->node); + if (ret) + return dev_err_probe(dev, ret, "Error registering hotplug"); + + ret = perf_pmu_register(&uncorepmu->pmu, name, -1); + if (ret < 0) { + cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node); + return dev_err_probe(dev, ret, "Failed to register %s PMU\n", name); + } + + dev_dbg(dev, "Registered %s, type: %d\n", name, uncorepmu->pmu.type); + + return 0; +} + +static void fujitsu_uncore_pmu_remove(struct platform_device *pdev) +{ + struct uncore_pmu *uncorepmu = platform_get_drvdata(pdev); + + writeq_relaxed(0, uncorepmu->regs + PM_CR); + + perf_pmu_unregister(&uncorepmu->pmu); + cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node); +} + +static const struct acpi_device_id fujitsu_uncore_pmu_acpi_match[] = { + { "FUJI200C", FUJITSU_UNCORE_PMU_MAC }, + { "FUJI200D", FUJITSU_UNCORE_PMU_PCI }, + { } +}; +MODULE_DEVICE_TABLE(acpi, fujitsu_uncore_pmu_acpi_match); + +static struct platform_driver fujitsu_uncore_pmu_driver = { + .driver = { + .name = "fujitsu-uncore-pmu", + .acpi_match_table = fujitsu_uncore_pmu_acpi_match, + .suppress_bind_attrs = true, + }, + .probe = fujitsu_uncore_pmu_probe, + .remove = fujitsu_uncore_pmu_remove, +}; + +static int __init fujitsu_uncore_pmu_init(void) +{ + int ret; + + /* Install a hook to update the reader CPU in case it goes offline */ + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/fujitsu/uncore:online", + fujitsu_uncore_pmu_online_cpu, + fujitsu_uncore_pmu_offline_cpu); + if (ret < 0) + return ret; + + uncore_pmu_cpuhp_state = ret; + + ret = platform_driver_register(&fujitsu_uncore_pmu_driver); + if (ret) + cpuhp_remove_multi_state(uncore_pmu_cpuhp_state); + + return ret; +} + +static void __exit fujitsu_uncore_pmu_exit(void) +{ + platform_driver_unregister(&fujitsu_uncore_pmu_driver); + cpuhp_remove_multi_state(uncore_pmu_cpuhp_state); +} + +module_init(fujitsu_uncore_pmu_init); +module_exit(fujitsu_uncore_pmu_exit); + +MODULE_AUTHOR("Koichi Okuno <fj2767dz@fujitsu.com>"); +MODULE_DESCRIPTION("Fujitsu Uncore PMU driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 48dcc8381ea7..186be3d02238 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ + hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 3f3fb1de11f5..b879b81adfdd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->sicl_id)) { + hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev); + + if (cpa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->index_id)) { + if (cpa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->ccl_id = -1; - cpa_pmu->sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); @@ -227,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = { .attrs = hisi_cpa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { - .attrs = hisi_cpa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_cpa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { - &hisi_cpa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_identifier_group = { - .attrs = hisi_cpa_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { &hisi_cpa_pmu_format_group, &hisi_cpa_pmu_events_group, - &hisi_cpa_pmu_cpumask_attr_group, - &hisi_cpa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -311,8 +286,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", - cpa_pmu->sicl_id, cpa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d", + cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -389,6 +364,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index a6ebf2ec99d3..21c494881ca0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,12 +43,21 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 +/* DDRC interrupt registers definition in v3 */ +#define DDRC_V3_INT_MASK 0x534 +#define DDRC_V3_INT_STATUS 0x538 +#define DDRC_V3_INT_CLEAR 0x53C + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 #define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 -#define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_V2_NR_EVENTS 0xFF + +#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) +#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) +#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0) /* * For PMU v1, there are eight-events and every event has been mapped @@ -63,47 +72,37 @@ static const u32 ddrc_reg_off[] = { DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG }; -/* - * Select the counter register offset using the counter index. - * In PMU v1, there are no programmable counter, the count - * is read form the statistics counter register itself. - */ -static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) -{ - return ddrc_reg_off[cntr_idx]; -} - -static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) -{ - return DDRC_V2_EVENT_CNT + cntr_idx * 8; -} +struct hisi_ddrc_pmu_regs { + u32 event_cnt; + u32 event_ctrl; + u32 event_type; + u32 perf_ctrl; + u32 perf_ctrl_en; + u32 int_mask; + u32 int_clear; + u32 int_status; +}; -static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, +static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { - return readl(ddrc_pmu->base + - hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) -{ - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]); -static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readq(ddrc_pmu->base + - hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } -static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) +static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc, u64 val) { - writeq(val, - ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]); + else + writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } /* @@ -111,57 +110,15 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, * so there is no need to write event type, while it is programmable counter in * PMU v2. */ -static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, +static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { - u32 offset; + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; - if (hha_pmu->identifier >= HISI_PMU_V2) { - offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, hha_pmu->base + offset); - } -} + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return; -static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Set counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val |= (1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); -} - -static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Clear counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); + writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx)); } static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) @@ -180,140 +137,119 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } -static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) { + struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return hisi_ddrc_pmu_v1_get_event_idx(event); + return hisi_uncore_pmu_get_event_idx(event); } -static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val |= DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val |= regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val &= ~DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val &= ~regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 0 to enable interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_INT_MASK); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 1 to mask interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->int_mask); } -static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->int_mask); } -static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - return readl(ddrc_pmu->base + DDRC_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); + return readl(ddrc_pmu->base + regs->int_status); } -static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { - return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); + writel(1 << idx, ddrc_pmu->base + regs->int_clear); } -static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { - { "HISI0233", }, - { "HISI0234", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); - static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { + hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev); + /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &ddrc_pmu->index_id)) { + &ddrc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->sccl_id)) { + if (ddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } - /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->ccl_id = -1; + + ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!ddrc_pmu->dev_info) + return -ENODEV; ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { @@ -323,8 +259,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->sub_id)) { + if (ddrc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -382,73 +317,35 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { .attrs = hisi_ddrc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { - .attrs = hisi_ddrc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_ddrc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { - &hisi_ddrc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_ddrc_pmu_identifier_group = { - .attrs = hisi_ddrc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { &hisi_ddrc_pmu_v1_format_group, &hisi_ddrc_pmu_v1_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { &hisi_ddrc_pmu_v2_format_group, &hisi_ddrc_pmu_v2_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { - .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, - .start_counters = hisi_ddrc_pmu_v1_start_counters, - .stop_counters = hisi_ddrc_pmu_v1_stop_counters, - .enable_counter = hisi_ddrc_pmu_v1_enable_counter, - .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v1_write_counter, - .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_v1_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, -}; - -static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, - .start_counters = hisi_ddrc_pmu_v2_start_counters, - .stop_counters = hisi_ddrc_pmu_v2_stop_counters, - .enable_counter = hisi_ddrc_pmu_v2_enable_counter, - .disable_counter = hisi_ddrc_pmu_v2_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v2_write_counter, - .read_counter = hisi_ddrc_pmu_v2_read_counter, - .get_int_status = hisi_ddrc_pmu_v2_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, + .get_event_idx = hisi_ddrc_pmu_get_event_idx, + .start_counters = hisi_ddrc_pmu_start_counters, + .stop_counters = hisi_ddrc_pmu_stop_counters, + .enable_counter = hisi_ddrc_pmu_enable_counter, + .disable_counter = hisi_ddrc_pmu_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, + .write_counter = hisi_ddrc_pmu_write_counter, + .read_counter = hisi_ddrc_pmu_read_counter, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -464,18 +361,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - ddrc_pmu->counter_bits = 48; - ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; - ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; - } else { - ddrc_pmu->counter_bits = 32; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; - } - + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; + ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits; + ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event; + ddrc_pmu->ops = &hisi_uncore_ddrc_ops; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; @@ -501,13 +390,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u_%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id, - ddrc_pmu->sub_id); + "hisi_sccl%d_ddrc%d_%d", + ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id, + ddrc_pmu->topo.sub_id); else name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, - ddrc_pmu->index_id); + "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id, + ddrc_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -540,6 +429,68 @@ static void hisi_ddrc_pmu_remove(struct platform_device *pdev) &ddrc_pmu->node); } +static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .event_cnt = DDRC_UNIMPLEMENTED_REG, + .event_ctrl = DDRC_EVENT_CTRL, + .event_type = DDRC_UNIMPLEMENTED_REG, + .perf_ctrl = DDRC_PERF_CTRL, + .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN, + .int_mask = DDRC_INT_MASK, + .int_clear = DDRC_INT_CLEAR, + .int_status = DDRC_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { + .counter_bits = 32, + .check_event = DDRC_V1_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .private = &hisi_ddrc_v1_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, + .int_mask = DDRC_V2_INT_MASK, + .int_clear = DDRC_V2_INT_CLEAR, + .int_status = DDRC_V2_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v2_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, + .int_mask = DDRC_V3_INT_MASK, + .int_clear = DDRC_V3_INT_CLEAR, + .int_status = DDRC_V3_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v3_pmu_regs, +}; + +static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { + { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1 }, + { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2 }, + { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); + static struct platform_driver hisi_ddrc_pmu_driver = { .driver = { .name = "hisi_ddrc_pmu", @@ -575,10 +526,10 @@ static void __exit hisi_ddrc_pmu_module_exit(void) { platform_driver_unregister(&hisi_ddrc_pmu_driver); cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE); - } module_exit(hisi_ddrc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 32624872596f..97cfaa586a87 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -47,9 +47,9 @@ #define HHA_SRCID_CMD GENMASK(16, 6) #define HHA_SRCID_MSK GENMASK(30, 20) #define HHA_DATSRC_SKT_EN BIT(23) -#define HHA_EVTYPE_NONE 0xff +#define HHA_EVTYPE_MASK GENMASK(7, 0) #define HHA_V1_NR_EVENT 0x65 -#define HHA_V2_NR_EVENT 0xCE +#define HHA_V2_NR_EVENT 0xFF HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11); @@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, /* Write event code to HHA_EVENT_TYPEx register */ val = readl(hha_pmu->base + reg); - val &= ~(HHA_EVTYPE_NONE << shift); + val &= ~(HHA_EVTYPE_MASK << shift); val |= (type << shift); writel(val, hha_pmu->base + reg); } @@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, unsigned long long id; acpi_status status; + hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev); + /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->sccl_id)) { + if (hha_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * Early versions of BIOS support _UID by mistake, so we support * both "hisilicon, idx-id" as preference, if available. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->index_id)) { + if (hha_pmu->topo.index_id < 0) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -318,10 +318,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - hha_pmu->index_id = id; + hha_pmu->topo.index_id = id; } - /* HHA PMUs only share the same SCCL */ - hha_pmu->ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { @@ -407,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = { .attrs = hisi_hha_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { - .attrs = hisi_hha_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_hha_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_hha_pmu_identifier_attrs[] = { - &hisi_hha_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_hha_pmu_identifier_group = { - .attrs = hisi_hha_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { &hisi_hha_pmu_v1_format_group, &hisi_hha_pmu_v1_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { &hisi_hha_pmu_v2_format_group, &hisi_hha_pmu_v2_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -510,8 +485,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d", + hha_pmu->topo.sccl_id, hha_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -581,6 +556,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index c235b46ce873..f963e4f9e552 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -39,6 +39,7 @@ /* L3C has 8-counters */ #define L3C_NR_COUNTERS 0x8 +#define L3C_MAX_EXT 2 #define L3C_PERF_CTRL_EN 0x10000 #define L3C_TRACETAG_EN BIT(31) @@ -55,59 +56,172 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF -HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16); +/* + * Remain the config1:0-7 for backward compatibility if some existing users + * hardcode the config1:0-7 directly without parsing the sysfs attribute. + */ +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core_deprecated, config1, 7, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); -static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +struct hisi_l3c_pmu { + struct hisi_pmu l3c_pmu; + + /* MMIO and IRQ resources for extension events */ + void __iomem *ext_base[L3C_MAX_EXT]; + int ext_irq[L3C_MAX_EXT]; + int ext_num; +}; + +#define to_hisi_l3c_pmu(_l3c_pmu) \ + container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu) + +/* + * The hardware counter idx used in counter enable/disable, + * interrupt enable/disable and status check, etc. + */ +#define L3C_HW_IDX(_cntr_idx) ((_cntr_idx) % L3C_NR_COUNTERS) + +/* Range of ext counters in used mask. */ +#define L3C_CNTR_EXT_L(_ext) (((_ext) + 1) * L3C_NR_COUNTERS) +#define L3C_CNTR_EXT_H(_ext) (((_ext) + 2) * L3C_NR_COUNTERS) + +struct hisi_l3c_pmu_ext { + bool support_ext; +}; + +static bool support_ext(struct hisi_l3c_pmu *pmu) +{ + struct hisi_l3c_pmu_ext *l3c_pmu_ext = pmu->l3c_pmu.dev_info->private; + + return l3c_pmu_ext->support_ext; +} + +/* + * tt_core was extended to cover all the CPUs sharing the L3 and was moved from + * config1:0-7 to config2:0-*. Try it first and fallback to tt_core_deprecated + * if user's still using the deprecated one. + */ +static u32 hisi_l3c_pmu_get_tt_core(struct perf_event *event) +{ + u32 core = hisi_get_tt_core(event); + + if (core) + return core; + + return hisi_get_tt_core_deprecated(event); +} + +static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + int ext = hisi_get_ext(event); + int idx; + + /* + * For an L3C PMU that supports extension events, we can monitor + * maximum 2 * num_counters to 3 * num_counters events, depending on + * the number of ext regions supported by hardware. Thus use bit + * [0, num_counters - 1] for normal events and bit + * [ext * num_counters, (ext + 1) * num_counters - 1] for extension + * events. The idx allocation will keep unchanged for normal events and + * we can also use the idx to distinguish whether it's an extension + * event or not. + * + * Since normal events and extension events locates on the different + * address space, save the base address to the event->hw.event_base. + */ + if (ext && !support_ext(hisi_l3c_pmu)) + return -EOPNOTSUPP; + + if (ext) + event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base[ext - 1]; + else + event->hw.event_base = (unsigned long)l3c_pmu->base; + + ext -= 1; + idx = find_next_zero_bit(used_mask, L3C_CNTR_EXT_H(ext), L3C_CNTR_EXT_L(ext)); + + if (idx >= L3C_CNTR_EXT_H(ext)) + return -EAGAIN; + + set_bit(idx, used_mask); + + return idx; +} + +static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg) +{ + return readl((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val) +{ + writel(val, (void __iomem *)hwc->event_base + reg); +} + +static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg) +{ + return readq((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val) +{ + writeq(val, (void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Set request-type for tracetag */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Clear request-type */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; - int idx = hwc->idx; + int idx = L3C_HW_IDX(hwc->idx); /* * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). @@ -120,15 +234,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) reg_idx = idx % 4; shift = 8 * reg_idx; - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_DATSRC_MASK << shift); val |= ds_cfg << shift; - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_config_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -138,15 +252,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val |= L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_clear_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -156,57 +270,63 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val &= ~L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); - u32 core = hisi_get_tt_core(event); + struct hw_perf_event *hwc = &event->hw; + u32 core = hisi_l3c_pmu_get_tt_core(event); if (core) { u32 val; /* Config and enable core information */ - writel(core, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Enable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); - u32 core = hisi_get_tt_core(event); + struct hw_perf_event *hwc = &event->hw; + u32 core = hisi_l3c_pmu_get_tt_core(event); if (core) { u32 val; /* Clear core information */ - writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Disable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } +static bool hisi_l3c_pmu_have_filter(struct perf_event *event) +{ + return hisi_get_tt_req(event) || hisi_l3c_pmu_get_tt_core(event) || + hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event); +} + static void hisi_l3c_pmu_enable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_config_req_tracetag(event); hisi_l3c_pmu_config_core_tracetag(event); hisi_l3c_pmu_config_ds(event); @@ -215,38 +335,56 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event) static void hisi_l3c_pmu_disable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_clear_ds(event); hisi_l3c_pmu_clear_core_tracetag(event); hisi_l3c_pmu_clear_req_tracetag(event); } } +static int hisi_l3c_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ext = hisi_get_ext(event); + + if (ext < 0 || ext > hisi_l3c_pmu->ext_num) + return -EINVAL; + + if (hisi_get_tt_core(event) && hisi_get_tt_core_deprecated(event)) + return -EINVAL; + + return 0; +} + /* * Select the counter register offset using the counter index */ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) { - return (L3C_CNTR0_LOWER + (cntr_idx * 8)); + return L3C_CNTR0_LOWER + L3C_HW_IDX(cntr_idx) * 8; } static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, u32 type) { + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; + idx = L3C_HW_IDX(idx); + /* * Select the appropriate event select register(L3C_EVENT_TYPE0/1). * There are 2 event select registers for the 8 hardware counters. @@ -259,36 +397,72 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, shift = 8 * reg_idx; /* Write event code to L3C_EVENT_TYPEx Register */ - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_EVTYPE_NONE << shift); - val |= (type << shift); - writel(val, l3c_pmu->base + reg); + val |= type << shift; + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters); u32 val; + int i; /* - * Set perf_enable bit in L3C_PERF_CTRL register to start counting - * for all enabled counters. + * Check if any counter belongs to the normal range (instead of ext + * range). If so, enable it. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val |= L3C_PERF_CTRL_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (used_cntr < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + /* If not, do enable it on ext ranges. */ + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + /* Find used counter in this ext range, skip the range if not. */ + used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i)); + if (used_cntr >= L3C_CNTR_EXT_H(i)) + continue; + + val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters); u32 val; + int i; /* - * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting - * for all enabled counters. + * Check if any counter belongs to the normal range (instead of ext + * range). If so, stop it. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val &= ~(L3C_PERF_CTRL_EN); - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (used_cntr < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + /* If not, do stop it on ext ranges. */ + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + /* Find used counter in this ext range, skip the range if not. */ + used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i)); + if (used_cntr >= L3C_CNTR_EXT_H(i)) + continue; + + val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + val &= ~L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, @@ -297,9 +471,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Enable counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); - val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); + val |= 1 << L3C_HW_IDX(hwc->idx); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, @@ -308,9 +482,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Clear counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, @@ -318,10 +492,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, @@ -329,48 +503,61 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ - val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + val |= 1 << L3C_HW_IDX(hwc->idx); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - return readl(l3c_pmu->base + L3C_INT_STATUS); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + u32 ext_int, status, status_ext = 0; + int i; + + status = readl(l3c_pmu->base + L3C_INT_STATUS); + + if (!support_ext(hisi_l3c_pmu)) + return status; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + ext_int = readl(hisi_l3c_pmu->ext_base[i] + L3C_INT_STATUS); + status_ext |= ext_int << (L3C_NR_COUNTERS * i); + } + + return status | (status_ext << L3C_NR_COUNTERS); } static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); -} + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; -static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { - { "HISI0213", }, - { "HISI0214", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx)); +} static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->sccl_id)) { + if (l3c_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->ccl_id)) { + if (l3c_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } + l3c_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!l3c_pmu->dev_info) + return -ENODEV; + l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(l3c_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); @@ -382,6 +569,50 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } +static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev) +{ + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, irq, ext_num, i; + char *irqname; + + /* HiSilicon L3C PMU supporting ext should have more than 1 irq resources. */ + ext_num = platform_irq_count(pdev); + if (ext_num < L3C_MAX_EXT) + return -ENODEV; + + /* + * The number of ext supported equals the number of irq - 1, since one + * of the irqs belongs to the normal part of PMU. + */ + hisi_l3c_pmu->ext_num = ext_num - 1; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) { + hisi_l3c_pmu->ext_base[i] = devm_platform_ioremap_resource(pdev, i + 1); + if (IS_ERR(hisi_l3c_pmu->ext_base[i])) + return PTR_ERR(hisi_l3c_pmu->ext_base[i]); + + irq = platform_get_irq(pdev, i + 1); + if (irq < 0) + return irq; + + irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext%d", + dev_name(&pdev->dev), i + 1); + if (!irqname) + return -ENOMEM; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + irqname, l3c_pmu); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Fail to request EXT IRQ: %d.\n", irq); + + hisi_l3c_pmu->ext_irq[i] = irq; + } + + return 0; +} + static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, @@ -394,10 +625,11 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), - HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), NULL }; @@ -406,6 +638,20 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = { .attrs = hisi_l3c_pmu_v2_format_attr, }; +static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ext, "config:16-17"), + HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v3_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v3_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -441,48 +687,82 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { - .attrs = hisi_l3c_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_l3c_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { - &hisi_l3c_pmu_identifier_attr.attr, +static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(rd_spipe, 0x18), + HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x19), + HISI_PMU_EVENT_ATTR(wr_spipe, 0x1a), + HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b), + HISI_PMU_EVENT_ATTR(io_rd_spipe, 0x1c), + HISI_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d), + HISI_PMU_EVENT_ATTR(io_wr_spipe, 0x1e), + HISI_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f), + HISI_PMU_EVENT_ATTR(cycles, 0x7f), + HISI_PMU_EVENT_ATTR(l3c_ref, 0xbc), + HISI_PMU_EVENT_ATTR(l3c2ring, 0xbd), NULL }; -static const struct attribute_group hisi_l3c_pmu_identifier_group = { - .attrs = hisi_l3c_pmu_identifier_attrs, +static const struct attribute_group hisi_l3c_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_l3c_pmu_v3_events_attr, }; static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { &hisi_l3c_pmu_v2_format_group, &hisi_l3c_pmu_v2_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = { + &hisi_l3c_pmu_v3_format_group, + &hisi_l3c_pmu_v3_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; +static struct hisi_l3c_pmu_ext hisi_l3c_pmu_support_ext = { + .support_ext = true, +}; + +static struct hisi_l3c_pmu_ext hisi_l3c_pmu_not_support_ext = { + .support_ext = false, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { + .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .counter_bits = 48, + .check_event = L3C_V1_NR_EVENTS, + .private = &hisi_l3c_pmu_not_support_ext, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { + .attr_groups = hisi_l3c_pmu_v2_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, + .private = &hisi_l3c_pmu_not_support_ext, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = { + .attr_groups = hisi_l3c_pmu_v3_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, + .private = &hisi_l3c_pmu_support_ext, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, - .get_event_idx = hisi_uncore_pmu_get_event_idx, + .get_event_idx = hisi_l3c_pmu_get_event_idx, .start_counters = hisi_l3c_pmu_start_counters, .stop_counters = hisi_l3c_pmu_stop_counters, .enable_counter = hisi_l3c_pmu_enable_counter, @@ -495,11 +775,14 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .clear_int_status = hisi_l3c_pmu_clear_int_status, .enable_filter = hisi_l3c_pmu_enable_filter, .disable_filter = hisi_l3c_pmu_disable_filter, + .check_filter = hisi_l3c_pmu_check_filter, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + struct hisi_l3c_pmu_ext *l3c_pmu_dev_ext; int ret; ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu); @@ -510,42 +793,55 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (l3c_pmu->identifier >= HISI_PMU_V2) { - l3c_pmu->counter_bits = 64; - l3c_pmu->check_event = L3C_V2_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; - } else { - l3c_pmu->counter_bits = 48; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; - } - + l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups; + l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits; + l3c_pmu->check_event = l3c_pmu->dev_info->check_event; l3c_pmu->num_counters = L3C_NR_COUNTERS; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; + l3c_pmu_dev_ext = l3c_pmu->dev_info->private; + if (l3c_pmu_dev_ext->support_ext) { + ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev); + if (ret) + return ret; + /* + * The extension events have their own counters with the + * same number of the normal events counters. So we can + * have at maximum num_counters * ext events monitored. + */ + l3c_pmu->num_counters += hisi_l3c_pmu->ext_num * L3C_NR_COUNTERS; + } + return 0; } static int hisi_l3c_pmu_probe(struct platform_device *pdev) { + struct hisi_l3c_pmu *hisi_l3c_pmu; struct hisi_pmu *l3c_pmu; char *name; int ret; - l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL); - if (!l3c_pmu) + hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL); + if (!hisi_l3c_pmu) return -ENOMEM; + l3c_pmu = &hisi_l3c_pmu->l3c_pmu; platform_set_drvdata(pdev, l3c_pmu); ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu); if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); + if (l3c_pmu->topo.sub_id >= 0) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id, + l3c_pmu->topo.sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; @@ -577,6 +873,14 @@ static void hisi_l3c_pmu_remove(struct platform_device *pdev) &l3c_pmu->node); } +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { + { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, + { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); + static struct platform_driver hisi_l3c_pmu_driver = { .driver = { .name = "hisi_l3c_pmu", @@ -587,14 +891,60 @@ static struct platform_driver hisi_l3c_pmu_driver = { .remove = hisi_l3c_pmu_remove, }; +static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, i; + + ret = hisi_uncore_pmu_online_cpu(cpu, node); + if (ret) + return ret; + + /* Avoid L3C pmu not supporting ext from ext irq migrating. */ + if (!support_ext(hisi_l3c_pmu)) + return 0; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i], + cpumask_of(l3c_pmu->on_cpu))); + + return 0; +} + +static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + int ret, i; + + ret = hisi_uncore_pmu_offline_cpu(cpu, node); + if (ret) + return ret; + + /* If failed to find any available CPU, skip irq migration. */ + if (l3c_pmu->on_cpu < 0) + return 0; + + /* Avoid L3C pmu not supporting ext from ext irq migrating. */ + if (!support_ext(hisi_l3c_pmu)) + return 0; + + for (i = 0; i < hisi_l3c_pmu->ext_num; i++) + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i], + cpumask_of(l3c_pmu->on_cpu))); + + return 0; +} + static int __init hisi_l3c_pmu_module_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, "AP_PERF_ARM_HISI_L3_ONLINE", - hisi_uncore_pmu_online_cpu, - hisi_uncore_pmu_offline_cpu); + hisi_l3c_pmu_online_cpu, + hisi_l3c_pmu_offline_cpu); if (ret) { pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret); return ret; @@ -615,6 +965,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c new file mode 100644 index 000000000000..4df4eebe243e --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC MN uncore Hardware event counters support + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + */ +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/mod_devicetable.h> +#include <linux/property.h> + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by MN PMU */ +static enum cpuhp_state hisi_mn_pmu_online; + +/* MN register definition */ +#define HISI_MN_DYNAMIC_CTRL_REG 0x400 +#define HISI_MN_DYNAMIC_CTRL_EN BIT(0) +#define HISI_MN_PERF_CTRL_REG 0x408 +#define HISI_MN_PERF_CTRL_EN BIT(6) +#define HISI_MN_INT_MASK_REG 0x800 +#define HISI_MN_INT_STATUS_REG 0x808 +#define HISI_MN_INT_CLEAR_REG 0x80C +#define HISI_MN_EVENT_CTRL_REG 0x1C00 +#define HISI_MN_VERSION_REG 0x1C04 +#define HISI_MN_EVTYPE0_REG 0x1d00 +#define HISI_MN_EVTYPE_MASK GENMASK(7, 0) +#define HISI_MN_CNTR0_REG 0x1e00 +#define HISI_MN_EVTYPE_REGn(evtype0, n) ((evtype0) + (n) * 4) +#define HISI_MN_CNTR_REGn(cntr0, n) ((cntr0) + (n) * 8) + +#define HISI_MN_NR_COUNTERS 4 +#define HISI_MN_TIMEOUT_US 500U + +struct hisi_mn_pmu_regs { + u32 version; + u32 dyn_ctrl; + u32 perf_ctrl; + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 event_ctrl; + u32 event_type0; + u32 event_cntr0; +}; + +/* + * Each event request takes a certain amount of time to complete. If + * we counting the latency related event, we need to wait for the all + * requests complete. Otherwise, the value of counter is slightly larger. + */ +static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + int ret; + u32 val; + + val = readl(mn_pmu->base + reg_info->dyn_ctrl); + val |= HISI_MN_DYNAMIC_CTRL_EN; + writel(val, mn_pmu->base + reg_info->dyn_ctrl); + + ret = readl_poll_timeout_atomic(mn_pmu->base + reg_info->dyn_ctrl, + val, !(val & HISI_MN_DYNAMIC_CTRL_EN), + 1, HISI_MN_TIMEOUT_US); + if (ret) + dev_warn(mn_pmu->dev, "Counter flush timeout\n"); +} + +static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + return readq(mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc, u64 val) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4)); + val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4)); +} + +static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->perf_ctrl); + val |= HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + reg_info->perf_ctrl); +} + +static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->perf_ctrl); + val &= ~HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + reg_info->perf_ctrl); + + hisi_mn_pmu_counter_flush(mn_pmu); +} + +static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->event_ctrl); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->event_ctrl); +} + +static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->event_ctrl); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->event_ctrl); +} + +static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->int_mask); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->int_mask); +} + +static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + u32 val; + + val = readl(mn_pmu->base + reg_info->int_mask); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + reg_info->int_mask); +} + +static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + return readl(mn_pmu->base + reg_info->int_status); +} + +static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx) +{ + struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private; + + writel(BIT(idx), mn_pmu->base + reg_info->int_clear); +} + +static struct attribute *hisi_mn_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_format_group = { + .name = "format", + .attrs = hisi_mn_pmu_format_attr, +}; + +static struct attribute *hisi_mn_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00), + HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01), + HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02), + HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03), + HISI_PMU_EVENT_ATTR(req_retry_num, 0x04), + HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05), + HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06), + HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07), + HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A), + HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C), + HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E), + HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80), + HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83), + HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85), + HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_events_group = { + .name = "events", + .attrs = hisi_mn_pmu_events_attr, +}; + +static const struct attribute_group *hisi_mn_pmu_attr_groups[] = { + &hisi_mn_pmu_format_group, + &hisi_mn_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_mn_ops = { + .write_evtype = hisi_mn_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_mn_pmu_start_counters, + .stop_counters = hisi_mn_pmu_stop_counters, + .enable_counter = hisi_mn_pmu_enable_counter, + .disable_counter = hisi_mn_pmu_disable_counter, + .enable_counter_int = hisi_mn_pmu_enable_counter_int, + .disable_counter_int = hisi_mn_pmu_disable_counter_int, + .write_counter = hisi_mn_pmu_write_counter, + .read_counter = hisi_mn_pmu_read_counter, + .get_int_status = hisi_mn_pmu_get_int_status, + .clear_int_status = hisi_mn_pmu_clear_int_status, +}; + +static int hisi_mn_pmu_dev_init(struct platform_device *pdev, + struct hisi_pmu *mn_pmu) +{ + struct hisi_mn_pmu_regs *reg_info; + int ret; + + hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev); + + if (mn_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN scl id\n"); + + if (mn_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN index id\n"); + + mn_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mn_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base), + "Failed to ioremap resource\n"); + + ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev); + if (ret) + return ret; + + mn_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!mn_pmu->dev_info) + return -ENODEV; + + mn_pmu->pmu_events.attr_groups = mn_pmu->dev_info->attr_groups; + mn_pmu->counter_bits = mn_pmu->dev_info->counter_bits; + mn_pmu->check_event = mn_pmu->dev_info->check_event; + mn_pmu->num_counters = HISI_MN_NR_COUNTERS; + mn_pmu->ops = &hisi_uncore_mn_ops; + mn_pmu->dev = &pdev->dev; + mn_pmu->on_cpu = -1; + + reg_info = mn_pmu->dev_info->private; + mn_pmu->identifier = readl(mn_pmu->base + reg_info->version); + + return 0; +} + +static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node); +} + +static void hisi_mn_pmu_unregister(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_mn_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *mn_pmu; + char *name; + int ret; + + mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL); + if (!mn_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, mn_pmu); + + ret = hisi_mn_pmu_dev_init(pdev, mn_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d", + mn_pmu->topo.scl_id, mn_pmu->topo.index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(mn_pmu, THIS_MODULE); + + ret = perf_pmu_register(&mn_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n"); + + return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu); +} + +static struct hisi_mn_pmu_regs hisi_mn_v1_pmu_regs = { + .version = HISI_MN_VERSION_REG, + .dyn_ctrl = HISI_MN_DYNAMIC_CTRL_REG, + .perf_ctrl = HISI_MN_PERF_CTRL_REG, + .int_mask = HISI_MN_INT_MASK_REG, + .int_clear = HISI_MN_INT_CLEAR_REG, + .int_status = HISI_MN_INT_STATUS_REG, + .event_ctrl = HISI_MN_EVENT_CTRL_REG, + .event_type0 = HISI_MN_EVTYPE0_REG, + .event_cntr0 = HISI_MN_CNTR0_REG, +}; + +static const struct hisi_pmu_dev_info hisi_mn_v1 = { + .attr_groups = hisi_mn_pmu_attr_groups, + .counter_bits = 48, + .check_event = HISI_MN_EVTYPE_MASK, + .private = &hisi_mn_v1_pmu_regs, +}; + +static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = { + { "HISI0222", (kernel_ulong_t) &hisi_mn_v1 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match); + +static struct platform_driver hisi_mn_pmu_driver = { + .driver = { + .name = "hisi_mn_pmu", + .acpi_match_table = hisi_mn_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_mn_pmu_probe, +}; + +static int __init hisi_mn_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret); + return ret; + } + hisi_mn_pmu_online = ret; + + ret = platform_driver_register(&hisi_mn_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_mn_pmu_online); + + return ret; +} +module_init(hisi_mn_pmu_module_init); + +static void __exit hisi_mn_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_mn_pmu_driver); + cpuhp_remove_multi_state(hisi_mn_pmu_online); +} +module_exit(hisi_mn_pmu_module_exit); + +MODULE_IMPORT_NS("HISI_PMU"); +MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c new file mode 100644 index 000000000000..de3b9cc7aada --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + * Author: Yicong Yang <yangyicong@hisilicon.com> + */ +#include <linux/bitops.h> +#include <linux/cpuhotplug.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/sysfs.h> + +#include "hisi_uncore_pmu.h" + +#define NOC_PMU_VERSION 0x1e00 +#define NOC_PMU_GLOBAL_CTRL 0x1e04 +#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0) +#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1) +#define NOC_PMU_CNT_INFO 0x1e08 +#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n) +#define NOC_PMU_EVENT_CTRL0 0x1e20 +#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0) +/* + * Note channel of 0x0 will reset the counter value, so don't do it before + * we read out the counter. + */ +#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8) +#define NOC_PMU_EVENT_CTRL_EN BIT(11) +#define NOC_PMU_EVENT_COUNTER0 0x1e80 + +#define NOC_PMU_NR_COUNTERS 4 +#define NOC_PMU_CH_DEFAULT 0x7 + +#define NOC_PMU_EVENT_CTRLn(ctrl0, n) ((ctrl0) + 4 * (n)) +#define NOC_PMU_EVENT_CNTRn(cntr0, n) ((cntr0) + 8 * (n)) + +HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3); + +/* Dynamic CPU hotplug state used by this PMU driver */ +static enum cpuhp_state hisi_noc_pmu_cpuhp_state; + +struct hisi_noc_pmu_regs { + u32 version; + u32 pmu_ctrl; + u32 event_ctrl0; + u32 event_cntr0; + u32 overflow_status; +}; + +/* + * Tracetag filtering is not per event and all the events should keep + * the consistence. Return true if the new comer doesn't match the + * tracetag filtering configuration of the current scheduled events. + */ +static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr, + struct perf_event *new) +{ + return hisi_get_tt_en(curr) == hisi_get_tt_en(new); +} + +static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx)); + reg &= ~NOC_PMU_EVENT_CTRL_TYPE; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx)); +} + +static int hisi_noc_pmu_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events; + int cur_idx; + + cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters); + if (cur_idx != noc_pmu->num_counters && + !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event)) + return -EAGAIN; + + return hisi_uncore_pmu_get_event_idx(event); +} + +static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + return readq(noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + writeq(val, noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg |= NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); +} + +static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + /* We don't support interrupt, so a stub here. */ +} + +static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ +} + +static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); +} + +static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); +} + +static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + + return readl(noc_pmu->base + reg_info->overflow_status); +} + +static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx) +{ + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 reg; + + reg = readl(noc_pmu->base + reg_info->overflow_status); + reg &= ~NOC_PMU_CNT_INFO_OVERFLOW(idx); + writel(reg, noc_pmu->base + reg_info->overflow_status); +} + +static void hisi_noc_pmu_enable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + struct hw_perf_event *hwc = &event->hw; + u32 tt_en = hisi_get_tt_en(event); + u32 ch = hisi_get_ch(event); + u32 reg; + + if (!ch) + ch = NOC_PMU_CH_DEFAULT; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx)); + + /* + * Since tracetag filter applies to all the counters, don't touch it + * if user doesn't specify it explicitly. + */ + if (tt_en) { + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg |= NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); + } +} + +static void hisi_noc_pmu_disable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private; + u32 tt_en = hisi_get_tt_en(event); + u32 reg; + + /* + * If we're not the last counter, don't touch the global tracetag + * configuration. + */ + if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1) + return; + + if (tt_en) { + reg = readl(noc_pmu->base + reg_info->pmu_ctrl); + reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + reg_info->pmu_ctrl); + } +} + +static const struct hisi_uncore_ops hisi_uncore_noc_ops = { + .write_evtype = hisi_noc_pmu_write_evtype, + .get_event_idx = hisi_noc_pmu_get_event_idx, + .read_counter = hisi_noc_pmu_read_counter, + .write_counter = hisi_noc_pmu_write_counter, + .enable_counter = hisi_noc_pmu_enable_counter, + .disable_counter = hisi_noc_pmu_disable_counter, + .enable_counter_int = hisi_noc_pmu_enable_counter_int, + .disable_counter_int = hisi_noc_pmu_disable_counter_int, + .start_counters = hisi_noc_pmu_start_counters, + .stop_counters = hisi_noc_pmu_stop_counters, + .get_int_status = hisi_noc_pmu_get_int_status, + .clear_int_status = hisi_noc_pmu_clear_int_status, + .enable_filter = hisi_noc_pmu_enable_filter, + .disable_filter = hisi_noc_pmu_disable_filter, +}; + +static struct attribute *hisi_noc_pmu_format_attrs[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"), + HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_format_group = { + .name = "format", + .attrs = hisi_noc_pmu_format_attrs, +}; + +static struct attribute *hisi_noc_pmu_events_attrs[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x0e), + /* Flux on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a), + HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17), + /* Buffer full duration on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19), + HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12), + /* Failure packets count on/off the ring */ + HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01), + HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09), + HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03), + HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b), + /* Flux of the ring */ + HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05), + HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_events_group = { + .name = "events", + .attrs = hisi_noc_pmu_events_attrs, +}; + +static const struct attribute_group *hisi_noc_pmu_attr_groups[] = { + &hisi_noc_pmu_format_group, + &hisi_noc_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu) +{ + struct hisi_noc_pmu_regs *reg_info; + + hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev); + + if (noc_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n"); + + if (noc_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n"); + + if (noc_pmu->topo.sub_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n"); + + noc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(noc_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base), + "fail to remap io memory\n"); + + noc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!noc_pmu->dev_info) + return -ENODEV; + + noc_pmu->pmu_events.attr_groups = noc_pmu->dev_info->attr_groups; + noc_pmu->counter_bits = noc_pmu->dev_info->counter_bits; + noc_pmu->check_event = noc_pmu->dev_info->check_event; + noc_pmu->num_counters = NOC_PMU_NR_COUNTERS; + noc_pmu->ops = &hisi_uncore_noc_ops; + noc_pmu->dev = &pdev->dev; + noc_pmu->on_cpu = -1; + + reg_info = noc_pmu->dev_info->private; + noc_pmu->identifier = readl(noc_pmu->base + reg_info->version); + + return 0; +} + +static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node); +} + +static void hisi_noc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_noc_pmu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct hisi_pmu *noc_pmu; + char *name; + int ret; + + noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL); + if (!noc_pmu) + return -ENOMEM; + + /* + * HiSilicon Uncore PMU framework needs to get common hisi_pmu device + * from device's drvdata. + */ + platform_set_drvdata(pdev, noc_pmu); + + ret = hisi_noc_pmu_dev_init(pdev, noc_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node); + if (ret) + return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n"); + + ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance, + &noc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(noc_pmu, THIS_MODULE); + + name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d", + noc_pmu->topo.scl_id, noc_pmu->topo.index_id, + noc_pmu->topo.sub_id); + if (!name) + return -ENOMEM; + + ret = perf_pmu_register(&noc_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(dev, ret, "Fail to register PMU\n"); + + return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu, + &noc_pmu->pmu); +} + +static struct hisi_noc_pmu_regs hisi_noc_v1_pmu_regs = { + .version = NOC_PMU_VERSION, + .pmu_ctrl = NOC_PMU_GLOBAL_CTRL, + .event_ctrl0 = NOC_PMU_EVENT_CTRL0, + .event_cntr0 = NOC_PMU_EVENT_COUNTER0, + .overflow_status = NOC_PMU_CNT_INFO, +}; + +static const struct hisi_pmu_dev_info hisi_noc_v1 = { + .attr_groups = hisi_noc_pmu_attr_groups, + .counter_bits = 64, + .check_event = NOC_PMU_EVENT_CTRL_TYPE, + .private = &hisi_noc_v1_pmu_regs, +}; + +static const struct acpi_device_id hisi_noc_pmu_ids[] = { + { "HISI04E0", (kernel_ulong_t) &hisi_noc_v1 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids); + +static struct platform_driver hisi_noc_pmu_driver = { + .driver = { + .name = "hisi_noc_pmu", + .acpi_match_table = hisi_noc_pmu_ids, + .suppress_bind_attrs = true, + }, + .probe = hisi_noc_pmu_probe, +}; + +static int __init hisi_noc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret); + return ret; + } + hisi_noc_pmu_cpuhp_state = ret; + + ret = platform_driver_register(&hisi_noc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); + + return ret; +} +module_init(hisi_noc_pmu_module_init); + +static void __exit hisi_noc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_noc_pmu_driver); + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); +} +module_exit(hisi_noc_pmu_module_exit); + +MODULE_IMPORT_NS("HISI_PMU"); +MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index c0f5d7c73e06..80108c63cb60 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -269,25 +269,22 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { + hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev); + /* * As PA PMU is in a SICL, use the SICL_ID and the index ID * to identify the PA PMU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sicl_id)) { + if (pa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->index_id)) { + if (pa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->ccl_id = -1; - pa_pmu->sccl_id = -1; - pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) return -ENODEV; @@ -356,29 +353,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = { .attrs = hisi_h60pa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { - .attrs = hisi_pa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_pa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_pa_pmu_identifier_attrs[] = { - &hisi_pa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_identifier_group = { - .attrs = hisi_pa_pmu_identifier_attrs, -}; - static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { .mask_offset = PA_INT_MASK, .clear_offset = PA_INT_CLEAR, @@ -388,8 +362,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -402,8 +376,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v3_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -422,8 +396,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_h60pa_pmu_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -466,7 +440,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; - pa_pmu->check_event = 0xB0; + pa_pmu->check_event = PA_EVTYPE_MASK; pa_pmu->counter_bits = 64; pa_pmu->dev = &pdev->dev; pa_pmu->on_cpu = -1; @@ -488,9 +462,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", - pa_pmu->sicl_id, pa_pmu->dev_info->name, - pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d", + pa_pmu->topo.sicl_id, pa_pmu->dev_info->name, + pa_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -569,6 +543,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 918cdc31de57..de71dcf11653 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> +#include <linux/property.h> #include <asm/cputype.h> #include <asm/local64.h> @@ -34,7 +35,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -46,7 +47,52 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus); +} +static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL); + +static struct attribute *hisi_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + &dev_attr_associated_cpus.attr, + NULL +}; + +const struct attribute_group hisi_pmu_cpumask_attr_group = { + .attrs = hisi_pmu_cpumask_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, "HISI_PMU"); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); + +static struct device_attribute hisi_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pmu_identifier_attrs[] = { + &hisi_pmu_identifier_attr.attr, + NULL +}; + +const struct attribute_group hisi_pmu_identifier_group = { + .attrs = hisi_pmu_identifier_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, "HISI_PMU"); static bool hisi_validate_event_group(struct perf_event *event) { @@ -96,24 +142,14 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); - -ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - - return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); -} -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { clear_bit(idx, hisi_pmu->pmu_events.used_mask); } -static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) { struct hisi_pmu *hisi_pmu = data; struct perf_event *event; @@ -142,6 +178,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) return IRQ_HANDLED; } +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, "HISI_PMU"); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev) @@ -165,7 +202,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -198,7 +235,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return -EINVAL; hisi_pmu = to_hisi_pmu(event->pmu); - if (event->attr.config > hisi_pmu->check_event) + if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event) return -EINVAL; if (hisi_pmu->on_cpu == -1) @@ -219,7 +256,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); /* * Set the counter to count the event that we're interested in, @@ -273,7 +310,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -294,7 +331,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -317,7 +354,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -334,7 +371,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -357,7 +394,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -369,14 +406,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -389,7 +426,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -397,7 +434,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); /* @@ -444,22 +481,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) */ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { + struct hisi_pmu_topology *topo = &hisi_pmu->topo; int sccl_id, ccl_id; - /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ - if (hisi_pmu->sccl_id == -1) - return true; - - if (hisi_pmu->ccl_id == -1) { + if (topo->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); - return sccl_id == hisi_pmu->sccl_id; + return sccl_id == topo->sccl_id; } hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id); - return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; + return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id; } int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) @@ -467,24 +501,39 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) + /* + * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu + * based on the locality if it hasn't been initialized yet. For PMUs + * do have associated CPUs, it'll be updated later. + */ + if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) { + if (hisi_pmu->on_cpu != -1) + return 0; + + hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, + cpumask_of(hisi_pmu->on_cpu))); return 0; + } cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus); - /* If another CPU is already managing this PMU, simply return. */ - if (hisi_pmu->on_cpu != -1) + /* If another associated CPU is already managing this PMU, simply return. */ + if (hisi_pmu->on_cpu != -1 && + cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus)) return 0; /* Use this CPU in cpumask for event counting */ hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -492,9 +541,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) node); unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) - return 0; - /* Nothing to do if this CPU doesn't own the PMU */ if (hisi_pmu->on_cpu != cpu) return 0; @@ -502,20 +548,58 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) /* Give up ownership of the PMU */ hisi_pmu->on_cpu = -1; - /* Choose a new CPU to migrate ownership of the PMU to */ + /* + * Migrate ownership of the PMU to a new CPU chosen from PMU's online + * associated CPUs if possible, if no associated CPU online then + * migrate to one online CPU. + */ target = cpumask_any_and_but(&hisi_pmu->associated_cpus, cpu_online_mask, cpu); if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target >= nr_cpu_ids) return 0; perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); + + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); + +/* + * Retrieve the topology information from the firmware for the hisi_pmu device. + * The topology ID will be -1 if we cannot initialize it, it may either due to + * the PMU doesn't locate on this certain topology or the firmware needs to be + * fixed. + */ +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev) +{ + struct hisi_pmu_topology *topo = &hisi_pmu->topo; + + topo->sccl_id = -1; + topo->ccl_id = -1; + topo->index_id = -1; + topo->sub_id = -1; + + if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id)) + dev_dbg(dev, "no scl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id)) + dev_dbg(dev, "no ccl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id)) + dev_dbg(dev, "no idx-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) + dev_dbg(dev, "no sub-id present\n"); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -535,7 +619,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_GPL(hisi_pmu_init); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b2d43b72bf..3ffe6acda653 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -24,7 +24,7 @@ #define pr_fmt(fmt) "hisi_pmu: " fmt #define HISI_PMU_V2 0x30 -#define HISI_MAX_COUNTERS 0x10 +#define HISI_MAX_COUNTERS 0x18 #define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu)) #define HISI_PMU_ATTR(_name, _func, _config) \ @@ -43,7 +43,8 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) +#define HISI_EVENTID_MASK GENMASK(7, 0) +#define HISI_GET_EVENTID(ev) ((ev)->hw.config_base & HISI_EVENTID_MASK) #define HISI_PMU_EVTYPE_BITS 8 #define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) @@ -72,6 +73,8 @@ struct hisi_uncore_ops { struct hisi_pmu_dev_info { const char *name; const struct attribute_group **attr_groups; + u32 counter_bits; + u32 check_event; void *private; }; @@ -81,27 +84,55 @@ struct hisi_pmu_hwevents { const struct attribute_group **attr_groups; }; +/** + * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU + * is located. + * @sccl_id: ID of the SCCL on which the PMU locate is located. + * @sicl_id: ID of the SICL on which the PMU locate is located. + * @scl_id: ID used by the core which is unaware of the SCCL/SICL. + * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located. + * @index_id: the ID of the PMU module if there're several PMUs at a + * particularly location in the topology. + * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2 + * since each DDRC has more than one DMC + * + * The ID will be -1 if the PMU isn't located on a certain topology. + */ +struct hisi_pmu_topology { + /* + * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel + * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL + * distinction is not relevant, use scl_id instead. + */ + union { + int sccl_id; + int sicl_id; + int scl_id; + }; + int ccl_id; + int index_id; + int sub_id; +}; + /* Generic pmu struct for different pmu types */ struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; - /* associated_cpus: All CPUs associated with the PMU */ + struct hisi_pmu_topology topo; + /* + * CPUs associated to the PMU and are preferred to use for counting. + * Could be empty if PMU has no association (e.g. PMU on SICL), in + * which case any online CPU will be used. + */ cpumask_t associated_cpus; /* CPU used for counting */ int on_cpu; int irq; struct device *dev; struct hlist_node node; - int sccl_id; - int sicl_id; - int ccl_id; void __iomem *base; - /* the ID of the PMU modules */ - u32 index_id; - /* For DDRC PMU v2: each DDRC has more than one DMC */ - u32 sub_id; int num_counters; int counter_bits; /* check event code range */ @@ -109,6 +140,10 @@ struct hisi_pmu { u32 identifier; }; +/* Generic implementation of cpumask/identifier group */ +extern const struct attribute_group hisi_pmu_cpumask_attr_group; +extern const struct attribute_group hisi_pmu_identifier_group; + int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); @@ -130,8 +165,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index c5f4764ee888..cd32d606df05 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -28,6 +28,18 @@ #define SLLC_VERSION 0x1cf0 #define SLLC_EVENT_CNT0_L 0x1d00 +/* SLLC registers definition in v3 */ +#define SLLC_V3_INT_MASK 0x6834 +#define SLLC_V3_INT_STATUS 0x6838 +#define SLLC_V3_INT_CLEAR 0x683c +#define SLLC_V3_VERSION 0x6c00 +#define SLLC_V3_PERF_CTRL 0x6d00 +#define SLLC_V3_SRCID_CTRL 0x6d04 +#define SLLC_V3_TGTID_CTRL 0x6d08 +#define SLLC_V3_EVENT_CTRL 0x6d14 +#define SLLC_V3_EVENT_TYPE0 0x6d18 +#define SLLC_V3_EVENT_CNT0_L 0x6e00 + #define SLLC_EVTYPE_MASK 0xff #define SLLC_PERF_CTRL_EN BIT(0) #define SLLC_FILT_EN BIT(1) @@ -40,7 +52,14 @@ #define SLLC_TGTID_MAX_SHIFT 12 #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 -#define SLLC_NR_EVENTS 0x80 + +#define SLLC_V3_TGTID_MIN_SHIFT 1 +#define SLLC_V3_TGTID_MAX_SHIFT 10 +#define SLLC_V3_SRCID_CMD_SHIFT 1 +#define SLLC_V3_SRCID_MSK_SHIFT 10 + +#define SLLC_NR_EVENTS 0xff +#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); @@ -48,6 +67,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_sllc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 perf_ctrl; + u32 srcid_ctrl; + u32 srcid_cmd_shift; + u32 srcid_mask_shift; + u32 tgtid_ctrl; + u32 tgtid_min_shift; + u32 tgtid_max_shift; + u32 event_ctrl; + u32 event_type0; + u32 version; + u32 event_cnt0; +}; + static bool tgtid_is_valid(u32 max, u32 min) { return max > 0 && max >= min; @@ -56,96 +92,104 @@ static bool tgtid_is_valid(u32 max, u32 min) static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { - u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + u32 val = (max << regs->tgtid_max_shift) | + (min << regs->tgtid_min_shift); - writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(val, sllc_pmu->base + regs->tgtid_ctrl); /* Enable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TGTID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { u32 val; - writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl); /* Disable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val, msk; msk = hisi_get_srcid_msk(event); - val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); - writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + val = (cmd << regs->srcid_cmd_shift) | + (msk << regs->srcid_mask_shift); + writel(val, sllc_pmu->base + regs->srcid_ctrl); /* Enable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_SRCID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val; - writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl); /* Disable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } @@ -167,29 +211,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event) } } -static u32 hisi_sllc_pmu_get_counter_offset(int idx) -{ - return (SLLC_EVENT_CNT0_L + idx * 8); -} - static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { - return readq(sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, u32 type) { - u32 reg, reg_idx, shift, val; + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + u32 reg, val; /* * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). @@ -198,114 +240,117 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, * SLLC_EVENT_TYPE1 is chosen. */ - reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; - reg_idx = idx % 4; - shift = 8 * reg_idx; + reg = regs->event_type0 + (idx / 4) * 4; /* Write event code to SLLC_EVENT_TYPEx Register */ val = readl(sllc_pmu->base + reg); - val &= ~(SLLC_EVTYPE_MASK << shift); - val |= (type << shift); + val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); writel(val, sllc_pmu->base + reg); } static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_PERF_CTRL_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_PERF_CTRL_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 1 to mask interrupt */ - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) { - return readl(sllc_pmu->base + SLLC_INT_STATUS); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readl(sllc_pmu->base + regs->int_status); } static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) { - writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); -} + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; -static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { - { "HISI0263", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear); +} static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs; + + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); + /* * Use the SCCL_ID and the index ID to identify the SLLC PMU, * while SCCL_ID is from MPIDR_EL1 by CPU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->sccl_id)) { + if (sllc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->index_id)) { + if (sllc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - /* SLLC PMUs only share the same SCCL */ - sllc_pmu->ccl_id = -1; + sllc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!sllc_pmu->dev_info) + return -ENODEV; sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { @@ -313,7 +358,8 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, return PTR_ERR(sllc_pmu->base); } - sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + regs = sllc_pmu->dev_info->private; + sllc_pmu->identifier = readl(sllc_pmu->base + regs->version); return 0; } @@ -347,35 +393,54 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = { .attrs = hisi_sllc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, +static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { + &hisi_sllc_pmu_v2_format_group, + &hisi_sllc_pmu_v2_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; -static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { - .attrs = hisi_sllc_pmu_cpumask_attrs, +static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = { + .int_mask = SLLC_INT_MASK, + .int_clear = SLLC_INT_CLEAR, + .int_status = SLLC_INT_STATUS, + .perf_ctrl = SLLC_PERF_CTRL, + .srcid_ctrl = SLLC_SRCID_CTRL, + .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_TGTID_CTRL, + .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_EVENT_CTRL, + .event_type0 = SLLC_EVENT_TYPE0, + .version = SLLC_VERSION, + .event_cnt0 = SLLC_EVENT_CNT0_L, }; -static struct device_attribute hisi_sllc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { - &hisi_sllc_pmu_identifier_attr.attr, - NULL +static const struct hisi_pmu_dev_info hisi_sllc_v2 = { + .private = &hisi_sllc_v2_pmu_regs, }; -static const struct attribute_group hisi_sllc_pmu_identifier_group = { - .attrs = hisi_sllc_pmu_identifier_attrs, +static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = { + .int_mask = SLLC_V3_INT_MASK, + .int_clear = SLLC_V3_INT_CLEAR, + .int_status = SLLC_V3_INT_STATUS, + .perf_ctrl = SLLC_V3_PERF_CTRL, + .srcid_ctrl = SLLC_V3_SRCID_CTRL, + .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_V3_TGTID_CTRL, + .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_V3_EVENT_CTRL, + .event_type0 = SLLC_V3_EVENT_TYPE0, + .version = SLLC_V3_VERSION, + .event_cnt0 = SLLC_V3_EVENT_CNT0_L, }; -static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { - &hisi_sllc_pmu_v2_format_group, - &hisi_sllc_pmu_v2_events_group, - &hisi_sllc_pmu_cpumask_attr_group, - &hisi_sllc_pmu_identifier_group, - NULL +static const struct hisi_pmu_dev_info hisi_sllc_v3 = { + .private = &hisi_sllc_v3_pmu_regs, }; static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { @@ -433,8 +498,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", - sllc_pmu->sccl_id, sllc_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d", + sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -469,6 +534,13 @@ static void hisi_sllc_pmu_remove(struct platform_device *pdev) &sllc_pmu->node); } +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + static struct platform_driver hisi_sllc_pmu_driver = { .driver = { .name = "hisi_sllc_pmu", @@ -507,6 +579,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 481dcc9e8fbf..03cb9b564b99 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -11,7 +11,6 @@ #include <linux/irq.h> #include <linux/list.h> #include <linux/mod_devicetable.h> -#include <linux/property.h> #include "hisi_uncore_pmu.h" @@ -366,25 +365,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) static int hisi_uc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *uc_pmu) { + hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev); + /* * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to * identify the topology information of UC PMU devices in the chip. * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->sccl_id)) { + if (uc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->ccl_id)) { + if (uc_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->sub_id)) { + if (uc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -439,34 +437,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = { .attrs = hisi_uc_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { - .attrs = hisi_uc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_uc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_uc_pmu_identifier_attrs[] = { - &hisi_uc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_uc_pmu_identifier_group = { - .attrs = hisi_uc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { &hisi_uc_pmu_format_group, &hisi_uc_pmu_events_group, - &hisi_uc_pmu_cpumask_attr_group, - &hisi_uc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -538,8 +513,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", - uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d", + uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id, + uc_pmu->topo.sub_id); if (!name) return -ENOMEM; @@ -613,6 +589,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>"); diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index 8860d9f687ae..72ac17efd846 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver +/* + * Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver * - * Copyright (C) 2021 Marvell. + * Copyright (C) 2021-2024 Marvell. */ #include <linux/init.h> @@ -14,24 +15,29 @@ #include <linux/platform_device.h> /* Performance Counters Operating Mode Control Registers */ -#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 -#define OP_MODE_CTRL_VAL_MANNUAL 0x1 +#define CN10K_DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 +#define ODY_DDRC_PERF_CNT_OP_MODE_CTRL 0x20020 +#define OP_MODE_CTRL_VAL_MANUAL 0x1 /* Performance Counters Start Operation Control Registers */ -#define DDRC_PERF_CNT_START_OP_CTRL 0x8028 +#define CN10K_DDRC_PERF_CNT_START_OP_CTRL 0x8028 +#define ODY_DDRC_PERF_CNT_START_OP_CTRL 0x200A0 #define START_OP_CTRL_VAL_START 0x1ULL #define START_OP_CTRL_VAL_ACTIVE 0x2 /* Performance Counters End Operation Control Registers */ -#define DDRC_PERF_CNT_END_OP_CTRL 0x8030 +#define CN10K_DDRC_PERF_CNT_END_OP_CTRL 0x8030 +#define ODY_DDRC_PERF_CNT_END_OP_CTRL 0x200E0 #define END_OP_CTRL_VAL_END 0x1ULL /* Performance Counters End Status Registers */ -#define DDRC_PERF_CNT_END_STATUS 0x8038 +#define CN10K_DDRC_PERF_CNT_END_STATUS 0x8038 +#define ODY_DDRC_PERF_CNT_END_STATUS 0x20120 #define END_STATUS_VAL_END_TIMER_MODE_END 0x1 /* Performance Counters Configuration Registers */ -#define DDRC_PERF_CFG_BASE 0x8040 +#define CN10K_DDRC_PERF_CFG_BASE 0x8040 +#define ODY_DDRC_PERF_CFG_BASE 0x20160 /* 8 Generic event counter + 2 fixed event counters */ #define DDRC_PERF_NUM_GEN_COUNTERS 8 @@ -42,18 +48,28 @@ DDRC_PERF_NUM_FIX_COUNTERS) /* Generic event counter registers */ -#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n)) +#define DDRC_PERF_CFG(base, n) ((base) + 8 * (n)) #define EVENT_ENABLE BIT_ULL(63) /* Two dedicated event counters for DDR reads and writes */ #define EVENT_DDR_READS 101 #define EVENT_DDR_WRITES 100 +#define DDRC_PERF_REG(base, n) ((base) + 8 * (n)) /* * programmable events IDs in programmable event counters. * DO NOT change these event-id numbers, they are used to * program event bitmap in h/w. */ +#define EVENT_DFI_CMD_IS_RETRY 61 +#define EVENT_RD_UC_ECC_ERROR 60 +#define EVENT_RD_CRC_ERROR 59 +#define EVENT_CAPAR_ERROR 58 +#define EVENT_WR_CRC_ERROR 57 +#define EVENT_DFI_PARITY_POISON 56 +#define EVENT_RETRY_FIFO_FULL 46 +#define EVENT_DFI_CYCLES 45 + #define EVENT_OP_IS_ZQLATCH 55 #define EVENT_OP_IS_ZQSTART 54 #define EVENT_OP_IS_TCR_MRR 53 @@ -102,28 +118,37 @@ #define EVENT_HIF_RD_OR_WR 1 /* Event counter value registers */ -#define DDRC_PERF_CNT_VALUE_BASE 0x8080 -#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n)) +#define CN10K_DDRC_PERF_CNT_VALUE_BASE 0x8080 +#define ODY_DDRC_PERF_CNT_VALUE_BASE 0x201C0 /* Fixed event counter enable/disable register */ -#define DDRC_PERF_CNT_FREERUN_EN 0x80C0 +#define CN10K_DDRC_PERF_CNT_FREERUN_EN 0x80C0 #define DDRC_PERF_FREERUN_WRITE_EN 0x1 #define DDRC_PERF_FREERUN_READ_EN 0x2 /* Fixed event counter control register */ -#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 +#define CN10K_DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 +#define ODY_DDRC_PERF_CNT_FREERUN_CTRL 0x20240 #define DDRC_FREERUN_WRITE_CNT_CLR 0x1 #define DDRC_FREERUN_READ_CNT_CLR 0x2 -/* Fixed event counter value register */ -#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 -#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 +/* Fixed event counter clear register, defined only for Odyssey */ +#define ODY_DDRC_PERF_CNT_FREERUN_CLR 0x20248 + #define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48) #define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0) +/* Fixed event counter value register */ +#define CN10K_DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 +#define CN10K_DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 +#define ODY_DDRC_PERF_CNT_VALUE_WR_OP 0x20250 +#define ODY_DDRC_PERF_CNT_VALUE_RD_OP 0x20258 + struct cn10k_ddr_pmu { struct pmu pmu; void __iomem *base; + const struct ddr_pmu_platform_data *p_data; + const struct ddr_pmu_ops *ops; unsigned int cpu; struct device *dev; int active_events; @@ -132,8 +157,36 @@ struct cn10k_ddr_pmu { struct hlist_node node; }; +struct ddr_pmu_ops { + void (*enable_read_freerun_counter)(struct cn10k_ddr_pmu *pmu, + bool enable); + void (*enable_write_freerun_counter)(struct cn10k_ddr_pmu *pmu, + bool enable); + void (*clear_read_freerun_counter)(struct cn10k_ddr_pmu *pmu); + void (*clear_write_freerun_counter)(struct cn10k_ddr_pmu *pmu); + void (*pmu_overflow_handler)(struct cn10k_ddr_pmu *pmu, int evt_idx); +}; + #define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu) +struct ddr_pmu_platform_data { + u64 counter_overflow_val; + u64 counter_max_val; + u64 cnt_base; + u64 cfg_base; + u64 cnt_op_mode_ctrl; + u64 cnt_start_op_ctrl; + u64 cnt_end_op_ctrl; + u64 cnt_end_status; + u64 cnt_freerun_en; + u64 cnt_freerun_ctrl; + u64 cnt_freerun_clr; + u64 cnt_value_wr_op; + u64 cnt_value_rd_op; + bool is_cn10k; + bool is_ody; +}; + static ssize_t cn10k_ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, char *page) @@ -209,6 +262,85 @@ static struct attribute *cn10k_ddr_perf_events_attrs[] = { NULL }; +static struct attribute *odyssey_ddr_perf_events_attrs[] = { + /* Programmable */ + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_wr_data_access, + EVENT_DFI_WR_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_rd_data_access, + EVENT_DFI_RD_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access, + EVENT_HPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access, + EVENT_LPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access, + EVENT_WR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, + EVENT_OP_IS_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, + EVENT_OP_IS_RD_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, + EVENT_PRECHARGE_FOR_RDWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other, + EVENT_PRECHARGE_FOR_OTHER), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, + EVENT_OP_IS_ENTER_POWERDOWN), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM), + CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH), + CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cycles, EVENT_DFI_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_retry_fifo_full, + EVENT_RETRY_FIFO_FULL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd, + EVENT_VISIBLE_WIN_LIMIT_REACHED_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr, + EVENT_VISIBLE_WIN_LIMIT_REACHED_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_parity_poison, + EVENT_DFI_PARITY_POISON), + CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_crc_error, EVENT_WR_CRC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_capar_error, EVENT_CAPAR_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_crc_error, EVENT_RD_CRC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_uc_ecc_error, EVENT_RD_UC_ECC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cmd_is_retry, EVENT_DFI_CMD_IS_RETRY), + /* Free run event counters */ + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES), + NULL +}; + +static struct attribute_group odyssey_ddr_perf_events_attr_group = { + .name = "events", + .attrs = odyssey_ddr_perf_events_attrs, +}; + static struct attribute_group cn10k_ddr_perf_events_attr_group = { .name = "events", .attrs = cn10k_ddr_perf_events_attrs, @@ -254,6 +386,13 @@ static const struct attribute_group *cn10k_attr_groups[] = { NULL, }; +static const struct attribute_group *odyssey_attr_groups[] = { + &odyssey_ddr_perf_events_attr_group, + &cn10k_ddr_perf_format_attr_group, + &cn10k_ddr_perf_cpumask_attr_group, + NULL +}; + /* Default poll timeout is 100 sec, which is very sufficient for * 48 bit counter incremented max at 5.6 GT/s, which may take many * hours to overflow. @@ -266,9 +405,18 @@ static ktime_t cn10k_ddr_pmu_timer_period(void) return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC); } -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap, + struct cn10k_ddr_pmu *ddr_pmu) { + int err = 0; + switch (eventid) { + case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY: + if (!ddr_pmu->p_data->is_ody) { + err = -EINVAL; + break; + } + fallthrough; case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD: case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH: *event_bitmap = (1ULL << (eventid - 1)); @@ -279,11 +427,12 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) *event_bitmap = (0xFULL << (eventid - 1)); break; default: - pr_err("%s Invalid eventid %d\n", __func__, eventid); - return -EINVAL; + err = -EINVAL; } - return 0; + if (err) + pr_err("%s Invalid eventid %d\n", __func__, eventid); + return err; } static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu, @@ -351,9 +500,33 @@ static int cn10k_ddr_perf_event_init(struct perf_event *event) return 0; } +static void cn10k_ddr_perf_counter_start(struct cn10k_ddr_pmu *ddr_pmu, + int counter) +{ + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; + u64 ctrl_reg = p_data->cnt_start_op_ctrl; + + writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + + DDRC_PERF_REG(ctrl_reg, counter)); +} + +static void cn10k_ddr_perf_counter_stop(struct cn10k_ddr_pmu *ddr_pmu, + int counter) +{ + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; + u64 ctrl_reg = p_data->cnt_end_op_ctrl; + + writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + + DDRC_PERF_REG(ctrl_reg, counter)); +} + static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, int counter, bool enable) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 ctrl_reg = pmu->p_data->cnt_op_mode_ctrl; + const struct ddr_pmu_ops *ops = pmu->ops; + bool is_ody = pmu->p_data->is_ody; u32 reg; u64 val; @@ -363,7 +536,7 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, } if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { - reg = DDRC_PERF_CFG(counter); + reg = DDRC_PERF_CFG(p_data->cfg_base, counter); val = readq_relaxed(pmu->base + reg); if (enable) @@ -372,40 +545,52 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, val &= ~EVENT_ENABLE; writeq_relaxed(val, pmu->base + reg); - } else { - val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN); - if (enable) { - if (counter == DDRC_PERF_READ_COUNTER_IDX) - val |= DDRC_PERF_FREERUN_READ_EN; - else - val |= DDRC_PERF_FREERUN_WRITE_EN; - } else { - if (counter == DDRC_PERF_READ_COUNTER_IDX) - val &= ~DDRC_PERF_FREERUN_READ_EN; - else - val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + if (is_ody) { + if (enable) { + /* + * Setup the PMU counter to work in + * manual mode + */ + reg = DDRC_PERF_REG(ctrl_reg, counter); + writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, + pmu->base + reg); + + cn10k_ddr_perf_counter_start(pmu, counter); + } else { + cn10k_ddr_perf_counter_stop(pmu, counter); + } } - writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN); + } else { + if (counter == DDRC_PERF_READ_COUNTER_IDX) + ops->enable_read_freerun_counter(pmu, enable); + else + ops->enable_write_freerun_counter(pmu, enable); } } static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; u64 val; if (counter == DDRC_PERF_READ_COUNTER_IDX) - return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP); + return readq_relaxed(pmu->base + + p_data->cnt_value_rd_op); if (counter == DDRC_PERF_WRITE_COUNTER_IDX) - return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP); + return readq_relaxed(pmu->base + + p_data->cnt_value_wr_op); - val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter)); + val = readq_relaxed(pmu->base + + DDRC_PERF_REG(p_data->cnt_base, counter)); return val; } static void cn10k_ddr_perf_event_update(struct perf_event *event) { struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + const struct ddr_pmu_platform_data *p_data = pmu->p_data; struct hw_perf_event *hwc = &event->hw; u64 prev_count, new_count, mask; @@ -414,7 +599,7 @@ static void cn10k_ddr_perf_event_update(struct perf_event *event) new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx); } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); - mask = DDRC_PERF_CNT_MAX_VALUE; + mask = p_data->counter_max_val; local64_add((new_count - prev_count) & mask, &event->count); } @@ -435,6 +620,8 @@ static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags) static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) { struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + const struct ddr_pmu_ops *ops = pmu->ops; struct hw_perf_event *hwc = &event->hw; u8 config = event->attr.config; int counter, ret; @@ -454,8 +641,8 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { /* Generic counters, configure event id */ - reg_offset = DDRC_PERF_CFG(counter); - ret = ddr_perf_get_event_bitmap(config, &val); + reg_offset = DDRC_PERF_CFG(p_data->cfg_base, counter); + ret = ddr_perf_get_event_bitmap(config, &val, pmu); if (ret) return ret; @@ -463,11 +650,9 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) } else { /* fixed event counter, clear counter value */ if (counter == DDRC_PERF_READ_COUNTER_IDX) - val = DDRC_FREERUN_READ_CNT_CLR; + ops->clear_read_freerun_counter(pmu); else - val = DDRC_FREERUN_WRITE_CNT_CLR; - - writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL); + ops->clear_write_freerun_counter(pmu); } hwc->state |= PERF_HES_STOPPED; @@ -512,17 +697,19 @@ static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags) static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu) { struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + - DDRC_PERF_CNT_START_OP_CTRL); + p_data->cnt_start_op_ctrl); } static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu) { struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + - DDRC_PERF_CNT_END_OP_CTRL); + p_data->cnt_end_op_ctrl); } static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) @@ -547,8 +734,123 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) } } +static void ddr_pmu_enable_read_freerun(struct cn10k_ddr_pmu *pmu, bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_en); + if (enable) + val |= DDRC_PERF_FREERUN_READ_EN; + else + val &= ~DDRC_PERF_FREERUN_READ_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en); +} + +static void ddr_pmu_enable_write_freerun(struct cn10k_ddr_pmu *pmu, bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_en); + if (enable) + val |= DDRC_PERF_FREERUN_WRITE_EN; + else + val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en); +} + +static void ddr_pmu_read_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_READ_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_write_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_WRITE_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx) +{ + cn10k_ddr_perf_event_update_all(pmu); + cn10k_ddr_perf_pmu_disable(&pmu->pmu); + cn10k_ddr_perf_pmu_enable(&pmu->pmu); +} + +static void ddr_pmu_ody_enable_read_freerun(struct cn10k_ddr_pmu *pmu, + bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl); + if (enable) + val |= DDRC_PERF_FREERUN_READ_EN; + else + val &= ~DDRC_PERF_FREERUN_READ_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_ody_enable_write_freerun(struct cn10k_ddr_pmu *pmu, + bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl); + if (enable) + val |= DDRC_PERF_FREERUN_WRITE_EN; + else + val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_ody_read_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_READ_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr); +} + +static void ddr_pmu_ody_write_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_WRITE_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr); +} + +static void ddr_pmu_ody_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx) +{ + /* + * On reaching the maximum value of the counter, the counter freezes + * there. The particular event is updated and the respective counter + * is stopped and started again so that it starts counting from zero + */ + cn10k_ddr_perf_event_update(pmu->events[evt_idx]); + cn10k_ddr_perf_counter_stop(pmu, evt_idx); + cn10k_ddr_perf_counter_start(pmu, evt_idx); +} + static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + const struct ddr_pmu_ops *ops = pmu->ops; struct perf_event *event; struct hw_perf_event *hwc; u64 prev_count, new_count; @@ -586,11 +888,9 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) continue; value = cn10k_ddr_perf_read_counter(pmu, i); - if (value == DDRC_PERF_CNT_MAX_VALUE) { + if (value == p_data->counter_max_val) { pr_info("Counter-(%d) reached max value\n", i); - cn10k_ddr_perf_event_update_all(pmu); - cn10k_ddr_perf_pmu_disable(&pmu->pmu); - cn10k_ddr_perf_pmu_enable(&pmu->pmu); + ops->pmu_overflow_handler(pmu, i); } } @@ -629,11 +929,68 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } +static const struct ddr_pmu_ops ddr_pmu_ops = { + .enable_read_freerun_counter = ddr_pmu_enable_read_freerun, + .enable_write_freerun_counter = ddr_pmu_enable_write_freerun, + .clear_read_freerun_counter = ddr_pmu_read_clear_freerun, + .clear_write_freerun_counter = ddr_pmu_write_clear_freerun, + .pmu_overflow_handler = ddr_pmu_overflow_hander, +}; + +#if defined(CONFIG_ACPI) || defined(CONFIG_OF) +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = { + .counter_overflow_val = BIT_ULL(48), + .counter_max_val = GENMASK_ULL(48, 0), + .cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE, + .cfg_base = CN10K_DDRC_PERF_CFG_BASE, + .cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL, + .cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL, + .cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL, + .cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS, + .cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN, + .cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL, + .cnt_freerun_clr = 0, + .cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP, + .cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP, + .is_cn10k = TRUE, +}; +#endif + +static const struct ddr_pmu_ops ddr_pmu_ody_ops = { + .enable_read_freerun_counter = ddr_pmu_ody_enable_read_freerun, + .enable_write_freerun_counter = ddr_pmu_ody_enable_write_freerun, + .clear_read_freerun_counter = ddr_pmu_ody_read_clear_freerun, + .clear_write_freerun_counter = ddr_pmu_ody_write_clear_freerun, + .pmu_overflow_handler = ddr_pmu_ody_overflow_hander, +}; + +#ifdef CONFIG_ACPI +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = { + .counter_overflow_val = 0, + .counter_max_val = GENMASK_ULL(63, 0), + .cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE, + .cfg_base = ODY_DDRC_PERF_CFG_BASE, + .cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL, + .cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL, + .cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL, + .cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS, + .cnt_freerun_en = 0, + .cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL, + .cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR, + .cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP, + .cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP, + .is_ody = TRUE, +}; +#endif + static int cn10k_ddr_perf_probe(struct platform_device *pdev) { + const struct ddr_pmu_platform_data *dev_data; struct cn10k_ddr_pmu *ddr_pmu; struct resource *res; void __iomem *base; + bool is_cn10k; + bool is_ody; char *name; int ret; @@ -644,30 +1001,60 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev) ddr_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, ddr_pmu); + dev_data = device_get_match_data(&pdev->dev); + if (!dev_data) { + dev_err(&pdev->dev, "Error: No device match data found\n"); + return -ENODEV; + } + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); ddr_pmu->base = base; - /* Setup the PMU counter to work in manual mode */ - writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base + - DDRC_PERF_CNT_OP_MODE_CTRL); - - ddr_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - .task_ctx_nr = perf_invalid_context, - .attr_groups = cn10k_attr_groups, - .event_init = cn10k_ddr_perf_event_init, - .add = cn10k_ddr_perf_event_add, - .del = cn10k_ddr_perf_event_del, - .start = cn10k_ddr_perf_event_start, - .stop = cn10k_ddr_perf_event_stop, - .read = cn10k_ddr_perf_event_update, - .pmu_enable = cn10k_ddr_perf_pmu_enable, - .pmu_disable = cn10k_ddr_perf_pmu_disable, - }; + ddr_pmu->p_data = dev_data; + is_cn10k = ddr_pmu->p_data->is_cn10k; + is_ody = ddr_pmu->p_data->is_ody; + + if (is_cn10k) { + ddr_pmu->ops = &ddr_pmu_ops; + /* Setup the PMU counter to work in manual mode */ + writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base + + ddr_pmu->p_data->cnt_op_mode_ctrl); + + ddr_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = cn10k_attr_groups, + .event_init = cn10k_ddr_perf_event_init, + .add = cn10k_ddr_perf_event_add, + .del = cn10k_ddr_perf_event_del, + .start = cn10k_ddr_perf_event_start, + .stop = cn10k_ddr_perf_event_stop, + .read = cn10k_ddr_perf_event_update, + .pmu_enable = cn10k_ddr_perf_pmu_enable, + .pmu_disable = cn10k_ddr_perf_pmu_disable, + }; + } + + if (is_ody) { + ddr_pmu->ops = &ddr_pmu_ody_ops; + + ddr_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = odyssey_attr_groups, + .event_init = cn10k_ddr_perf_event_init, + .add = cn10k_ddr_perf_event_add, + .del = cn10k_ddr_perf_event_del, + .start = cn10k_ddr_perf_event_start, + .stop = cn10k_ddr_perf_event_stop, + .read = cn10k_ddr_perf_event_update, + }; + } /* Choose this cpu to collect perf data */ ddr_pmu->cpu = raw_smp_processor_id(); @@ -677,8 +1064,8 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler; + hrtimer_setup(&ddr_pmu->hrtimer, cn10k_ddr_pmu_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); cpuhp_state_add_instance_nocalls( CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, @@ -688,7 +1075,7 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev) if (ret) goto error; - pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start); + pr_info("DDR PMU Driver for ddrc@%llx\n", res->start); return 0; error: cpuhp_state_remove_instance_nocalls( @@ -710,7 +1097,7 @@ static void cn10k_ddr_perf_remove(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id cn10k_ddr_pmu_of_match[] = { - { .compatible = "marvell,cn10k-ddr-pmu", }, + { .compatible = "marvell,cn10k-ddr-pmu", .data = &cn10k_ddr_pmu_pdata }, { }, }; MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); @@ -718,7 +1105,8 @@ MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = { - {"MRVL000A", 0}, + {"MRVL000A", (kernel_ulong_t)&cn10k_ddr_pmu_pdata }, + {"MRVL000C", (kernel_ulong_t)&odyssey_ddr_pmu_pdata}, {}, }; MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match); diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index cda55ee35eee..51ccb0befa05 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -37,6 +37,15 @@ struct tad_pmu { DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS); }; +enum mrvl_tad_pmu_version { + TAD_PMU_V1 = 1, + TAD_PMU_V2, +}; + +struct tad_pmu_data { + int id; +}; + static int tad_pmu_cpuhp_state; static void tad_pmu_event_counter_read(struct perf_event *event) @@ -214,6 +223,24 @@ static const struct attribute_group tad_pmu_events_attr_group = { .attrs = tad_pmu_event_attrs, }; +static struct attribute *ody_tad_pmu_event_attrs[] = { + TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3), + TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a), + TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b), + TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c), + TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d), + TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e), + TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f), + TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20), + TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF), + NULL +}; + +static const struct attribute_group ody_tad_pmu_events_attr_group = { + .name = "events", + .attrs = ody_tad_pmu_event_attrs, +}; + PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *tad_pmu_format_attrs[] = { @@ -252,8 +279,16 @@ static const struct attribute_group *tad_pmu_attr_groups[] = { NULL }; +static const struct attribute_group *ody_tad_pmu_attr_groups[] = { + &ody_tad_pmu_events_attr_group, + &tad_pmu_format_attr_group, + &tad_pmu_cpumask_attr_group, + NULL +}; + static int tad_pmu_probe(struct platform_device *pdev) { + const struct tad_pmu_data *dev_data; struct device *dev = &pdev->dev; struct tad_region *regions; struct tad_pmu *tad_pmu; @@ -261,6 +296,7 @@ static int tad_pmu_probe(struct platform_device *pdev) u32 tad_pmu_page_size; u32 tad_page_size; u32 tad_cnt; + int version; int i, ret; char *name; @@ -270,6 +306,13 @@ static int tad_pmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tad_pmu); + dev_data = device_get_match_data(&pdev->dev); + if (!dev_data) { + dev_err(&pdev->dev, "Error: No device match data found\n"); + return -ENODEV; + } + version = dev_data->id; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "Mem resource not found\n"); @@ -319,7 +362,6 @@ static int tad_pmu_probe(struct platform_device *pdev) tad_pmu->pmu = (struct pmu) { .module = THIS_MODULE, - .attr_groups = tad_pmu_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .task_ctx_nr = perf_invalid_context, @@ -332,6 +374,11 @@ static int tad_pmu_probe(struct platform_device *pdev) .read = tad_pmu_event_counter_read, }; + if (version == TAD_PMU_V1) + tad_pmu->pmu.attr_groups = tad_pmu_attr_groups; + else + tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups; + tad_pmu->cpu = raw_smp_processor_id(); /* Register pmu instance for cpu hotplug */ @@ -360,16 +407,29 @@ static void tad_pmu_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); } +#if defined(CONFIG_OF) || defined(CONFIG_ACPI) +static const struct tad_pmu_data tad_pmu_data = { + .id = TAD_PMU_V1, +}; +#endif + +#ifdef CONFIG_ACPI +static const struct tad_pmu_data tad_pmu_v2_data = { + .id = TAD_PMU_V2, +}; +#endif + #ifdef CONFIG_OF static const struct of_device_id tad_pmu_of_match[] = { - { .compatible = "marvell,cn10k-tad-pmu", }, + { .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data }, {}, }; #endif #ifdef CONFIG_ACPI static const struct acpi_device_id tad_pmu_acpi_match[] = { - {"MRVL000B", 0}, + {"MRVL000B", (kernel_ulong_t)&tad_pmu_data}, + {"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data}, {}, }; MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 1aa303f76cc7..7dd282da67ce 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -59,10 +59,11 @@ asm volatile(ALTERNATIVE( \ #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) -PMU_FORMAT_ATTR(event, "config:0-47"); +PMU_FORMAT_ATTR(event, "config:0-55"); PMU_FORMAT_ATTR(firmware, "config:62-63"); static bool sbi_v2_available; +static bool sbi_v3_available; static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available); #define sbi_pmu_snapshot_available() \ static_branch_unlikely(&sbi_pmu_snapshot_available) @@ -99,6 +100,7 @@ static unsigned int riscv_pmu_irq; /* Cache the available counters in a bitmask */ static unsigned long cmask; +static int pmu_event_find_cache(u64 config); struct sbi_pmu_event_data { union { union { @@ -298,6 +300,66 @@ static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] }, }; +static int pmu_sbi_check_event_info(void) +{ + int num_events = ARRAY_SIZE(pmu_hw_event_map) + PERF_COUNT_HW_CACHE_MAX * + PERF_COUNT_HW_CACHE_OP_MAX * PERF_COUNT_HW_CACHE_RESULT_MAX; + struct riscv_pmu_event_info *event_info_shmem; + phys_addr_t base_addr; + int i, j, k, result = 0, count = 0; + struct sbiret ret; + + event_info_shmem = kcalloc(num_events, sizeof(*event_info_shmem), GFP_KERNEL); + if (!event_info_shmem) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + event_info_shmem[count++].event_idx = pmu_hw_event_map[i].event_idx; + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + event_info_shmem[count++].event_idx = + pmu_cache_event_map[i][j][k].event_idx; + } + } + + base_addr = __pa(event_info_shmem); + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, lower_32_bits(base_addr), + upper_32_bits(base_addr), count, 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, base_addr, 0, + count, 0, 0, 0); + if (ret.error) { + result = -EOPNOTSUPP; + goto free_mem; + } + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) { + if (!(event_info_shmem[i].output & RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_hw_event_map[i].event_idx = -ENOENT; + } + + count = ARRAY_SIZE(pmu_hw_event_map); + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) { + if (!(event_info_shmem[count].output & + RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_cache_event_map[i][j][k].event_idx = -ENOENT; + count++; + } + } + } + +free_mem: + kfree(event_info_shmem); + + return result; +} + static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) { struct sbiret ret; @@ -315,6 +377,15 @@ static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) static void pmu_sbi_check_std_events(struct work_struct *work) { + int ret; + + if (sbi_v3_available) { + ret = pmu_sbi_check_event_info(); + if (ret) + pr_err("pmu_sbi_check_event_info failed with error %d\n", ret); + return; + } + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) pmu_sbi_check_event(&pmu_hw_event_map[i]); @@ -339,8 +410,73 @@ static bool pmu_sbi_ctr_is_fw(int cidx) if (!info) return false; - return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; + return info->type == SBI_PMU_CTR_TYPE_FW; +} + +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig) +{ + int ret = -ENOENT; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI v0.3 specification, + * -- the upper 16 bits must be unused for a hardware raw event. + * As per SBI v2.0 specification, + * -- the upper 8 bits must be unused for a hardware raw event. + * Bits 63:62 are used to distinguish between raw events + * 00 - Hardware raw event + * 10 - SBI firmware events + * 11 - Risc-V platform specific firmware event + */ + switch (config >> 62) { + case 0: + if (sbi_v3_available) { + /* Return error any bits [56-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; + ret = RISCV_PMU_RAW_EVENT_V2_IDX; + } + /* Return error any bits [48-63] is set as it is not allowed by the spec */ + } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_MASK; + ret = RISCV_PMU_RAW_EVENT_IDX; + } + break; + case 2: + ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); + break; + case 3: + /* + * For Risc-V platform specific firmware events + * Event code - 0xFFFF + * Event data - raw event encoding + */ + ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; + if (econfig) + *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; + break; + default: + break; + } + break; + default: + break; + } + + return ret; } +EXPORT_SYMBOL_GPL(riscv_pmu_get_event_info); /* * Returns the counter width of a programmable counter and number of hardware @@ -507,8 +643,6 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) { u32 type = event->attr.type; u64 config = event->attr.config; - u64 raw_config_val; - int ret; /* * Ensure we are finished checking standard hardware events for @@ -516,51 +650,7 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) */ flush_work(&check_std_events_work); - switch (type) { - case PERF_TYPE_HARDWARE: - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - ret = pmu_hw_event_map[event->attr.config].event_idx; - break; - case PERF_TYPE_HW_CACHE: - ret = pmu_event_find_cache(config); - break; - case PERF_TYPE_RAW: - /* - * As per SBI specification, the upper 16 bits must be unused - * for a raw event. - * Bits 63:62 are used to distinguish between raw events - * 00 - Hardware raw event - * 10 - SBI firmware events - * 11 - Risc-V platform specific firmware event - */ - raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; - switch (config >> 62) { - case 0: - ret = RISCV_PMU_RAW_EVENT_IDX; - *econfig = raw_config_val; - break; - case 2: - ret = (raw_config_val & 0xFFFF) | - (SBI_PMU_EVENT_TYPE_FW << 16); - break; - case 3: - /* - * For Risc-V platform specific firmware events - * Event code - 0xFFFF - * Event data - raw event encoding - */ - ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; - *econfig = raw_config_val; - break; - } - break; - default: - ret = -ENOENT; - break; - } - - return ret; + return riscv_pmu_get_event_info(type, config, econfig); } static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) @@ -875,8 +965,10 @@ static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt, for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask; /* Start all the counters that did not overflow in a single shot */ - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask, - 0, 0, 0, 0); + if (ctr_start_mask) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, + ctr_start_mask, 0, 0, 0, 0); + } } /* Reinitialize and start all the counter that overflowed */ @@ -1017,7 +1109,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* compute hardware counter index */ hidx = info->csr - CSR_CYCLE; - /* check if the corresponding bit is set in sscountovf or overflow mask in shmem */ + /* check if the corresponding bit is set in scountovf or overflow mask in shmem */ if (!(overflow & BIT(hidx))) continue; @@ -1315,7 +1407,7 @@ static int riscv_pmu_proc_user_access_handler(const struct ctl_table *table, return 0; } -static struct ctl_table sbi_pmu_sysctl_table[] = { +static const struct ctl_table sbi_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, @@ -1450,6 +1542,9 @@ static int __init pmu_sbi_devinit(void) if (sbi_spec_version >= sbi_mk_version(2, 0)) sbi_v2_available = true; + if (sbi_spec_version >= sbi_mk_version(3, 0)) + sbi_v3_available = true; + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, "perf/riscv/pmu:starting", pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index cadd60221b8f..6ed4707bd6bb 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -752,9 +752,8 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) tx2_pmu->cpu = cpu; if (tx2_pmu->hrtimer_callback) { - hrtimer_init(&tx2_pmu->hrtimer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback; + hrtimer_setup(&tx2_pmu->hrtimer, tx2_pmu->hrtimer_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } ret = tx2_uncore_pmu_register(tx2_pmu); |
