diff options
Diffstat (limited to 'drivers/perf')
50 files changed, 7476 insertions, 1404 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index ec6e0d9194a1..4e268de351c4 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -48,6 +48,13 @@ config ARM_CMN Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh Network interconnect. +config ARM_NI + tristate "Arm NI-700 PMU support" + depends on ARM64 || COMPILE_TEST + help + Support for PMU events monitoring on the Arm NI-700 Network-on-Chip + interconnect and family. + config ARM_PMU depends on ARM || ARM64 bool "ARM PMU framework" @@ -56,6 +63,18 @@ config ARM_PMU Say y if you want to use CPU performance monitors on ARM-based systems. +config ARM_V6_PMU + depends on ARM_PMU && (CPU_V6 || CPU_V6K) + def_bool y + +config ARM_V7_PMU + depends on ARM_PMU && CPU_V7 + def_bool y + +config ARM_XSCALE_PMU + depends on ARM_PMU && CPU_XSCALE + def_bool y + config RISCV_PMU depends on RISCV bool "RISC-V PMU framework" @@ -86,6 +105,30 @@ config RISCV_PMU_SBI full perf feature support i.e. counter overflow, privilege mode filtering, counter configuration. +config STARFIVE_STARLINK_PMU + depends on ARCH_STARFIVE || COMPILE_TEST + depends on 64BIT + bool "StarFive StarLink PMU" + help + Provide support for StarLink Performance Monitor Unit. + StarLink Performance Monitor Unit integrates one or more cores with + an L3 memory system. The L3 cache events are added into perf event + subsystem, allowing monitoring of various L3 cache perf events. + +config ANDES_CUSTOM_PMU + bool "Andes custom PMU support" + depends on ARCH_RENESAS && RISCV_ALTERNATIVE && RISCV_PMU_SBI + default y + help + The Andes cores implement the PMU overflow extension very + similar to the standard Sscofpmf and Smcntrpmf extension. + + This will patch the overflow and pending CSRs and handle the + non-standard behaviour via the regular SBI PMU driver and + interface. + + If you don't know what to do here, say "Y". + config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y @@ -241,4 +284,11 @@ config CXL_PMU If unsure say 'm'. +config MARVELL_PEM_PMU + tristate "MARVELL PEM PMU Support" + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) + help + Enable support for PCIe Interface performance monitoring + on Marvell platform. + endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index a06338e3401c..de71d2574857 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -3,9 +3,13 @@ obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o obj-$(CONFIG_ARM_CCN) += arm-ccn.o obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o +obj-$(CONFIG_ARM_NI) += arm-ni.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o +obj-$(CONFIG_ARM_V6_PMU) += arm_v6_pmu.o +obj-$(CONFIG_ARM_V7_PMU) += arm_v7_pmu.o +obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o @@ -15,12 +19,14 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o +obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o +obj-$(CONFIG_MARVELL_PEM_PMU) += marvell_pem_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index 19d459a36be5..99a0ef9817e0 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -236,24 +236,16 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = { .attrs = ali_drw_pmu_cpumask_attrs, }; -static ssize_t ali_drw_pmu_identifier_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - return sysfs_emit(page, "%s\n", "ali_drw_pmu"); -} - static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { return attr->mode; } -static struct device_attribute ali_drw_pmu_identifier_attr = - __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL); +static DEVICE_STRING_ATTR_RO(ali_drw_pmu_identifier, 0444, "ali_drw_pmu"); static struct attribute *ali_drw_pmu_identifier_attrs[] = { - &ali_drw_pmu_identifier_attr.attr, + &dev_attr_ali_drw_pmu_identifier.attr.attr, NULL }; @@ -408,7 +400,7 @@ static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data) } /* clear common counter intr status */ - clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1); + clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status); writel(clr_status, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); } @@ -709,6 +701,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) drw_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .event_init = ali_drw_pmu_event_init, .add = ali_drw_pmu_add, @@ -729,7 +722,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) return ret; } -static int ali_drw_pmu_remove(struct platform_device *pdev) +static void ali_drw_pmu_remove(struct platform_device *pdev) { struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev); @@ -739,8 +732,6 @@ static int ali_drw_pmu_remove(struct platform_device *pdev) ali_drw_pmu_uninit_irq(drw_pmu); perf_pmu_unregister(&drw_pmu->pmu); - - return 0; } static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) @@ -748,18 +739,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) struct ali_drw_pmu_irq *irq; struct ali_drw_pmu *drw_pmu; unsigned int target; - int ret; - cpumask_t node_online_cpus; irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node); if (cpu != irq->cpu) return 0; - ret = cpumask_and(&node_online_cpus, - cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask); - if (ret) - target = cpumask_any_but(&node_online_cpus, cpu); - else + target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)), + cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index bbc7285fd934..07446d784a1a 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -492,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev) *pmu = (struct ddr_pmu) { .pmu = { .module = THIS_MODULE, + .parent = &pdev->dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .attr_groups = attr_groups, diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c index 15d52ab3276a..f33e9a456e85 100644 --- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c +++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c @@ -355,11 +355,9 @@ static int g12_ddr_pmu_probe(struct platform_device *pdev) return meson_ddr_pmu_create(pdev); } -static int g12_ddr_pmu_remove(struct platform_device *pdev) +static void g12_ddr_pmu_remove(struct platform_device *pdev) { meson_ddr_pmu_remove(pdev); - - return 0; } static const struct of_device_id meson_ddr_pmu_dt_match[] = { diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index f322e5ca1114..06fd317529fc 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -47,46 +47,79 @@ * implementations, we'll have to introduce per cpu-type tables. */ enum m1_pmu_events { - M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, - M1_PMU_PERFCTR_CPU_CYCLES = 0x02, - M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, - M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, - M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, - M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, - M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, - M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, - M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, - M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, - M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, - M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, - M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, - M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, - M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, - M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, - M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, - M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, - M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, - M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, - M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, - M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, - M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, - M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, - M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, - M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, - M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, - M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, - M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, - M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, - M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, - M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, - M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + M1_PMU_PERFCTR_RETIRE_UOP = 0x1, + M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2, + M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4, + M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5, + M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7, + M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8, + M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa, + M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb, + M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd, + M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52, + M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c, + M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70, + M1_PMU_PERFCTR_MAP_REWIND = 0x75, + M1_PMU_PERFCTR_MAP_STALL = 0x76, + M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c, + M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d, + M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e, + M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84, + M1_PMU_PERFCTR_INST_ALL = 0x8c, + M1_PMU_PERFCTR_INST_BRANCH = 0x8d, + M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e, + M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f, + M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90, + M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93, + M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94, + M1_PMU_PERFCTR_INST_INT_LD = 0x95, + M1_PMU_PERFCTR_INST_INT_ST = 0x96, + M1_PMU_PERFCTR_INST_INT_ALU = 0x97, + M1_PMU_PERFCTR_INST_SIMD_LD = 0x98, + M1_PMU_PERFCTR_INST_SIMD_ST = 0x99, + M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a, + M1_PMU_PERFCTR_INST_LDST = 0x9b, + M1_PMU_PERFCTR_INST_BARRIER = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0, + M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3, + M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6, + M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7, + M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8, + M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1, + M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0, + M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1, + M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4, + M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5, + M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6, + M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8, + M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca, + M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb, + M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4, + M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6, + M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb, + M1_PMU_PERFCTR_FETCH_RESTART = 0xde, + M1_PMU_PERFCTR_ST_NT_UOP = 0xe5, + M1_PMU_PERFCTR_LD_NT_UOP = 0xe6, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, /* * From this point onwards, these are not actual HW events, * but attributes that get stored in hw->config_base. */ - M1_PMU_CFG_COUNT_USER = BIT(8), - M1_PMU_CFG_COUNT_KERNEL = BIT(9), + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), }; /* @@ -96,46 +129,47 @@ enum m1_pmu_events { * counters had strange affinities. */ static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { - [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, - [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), - [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), - [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), - [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, - [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7), + [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7), + [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_LDST] = BIT(7), + [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, }; static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, - /* No idea about the rest yet */ + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, }; /* sysfs definitions */ @@ -154,8 +188,8 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev, PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) static struct attribute *m1_pmu_event_attrs[] = { - M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), - M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL), NULL, }; @@ -400,7 +434,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) regs = get_irq_regs(); - for (idx = 0; idx < cpu_pmu->num_events; idx++) { + for_each_set_bit(idx, cpu_pmu->cntr_mask, M1_PMU_NR_COUNTERS) { struct perf_event *event = cpuc->events[idx]; struct perf_sample_data data; @@ -560,7 +594,7 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags) cpu_pmu->reset = m1_pmu_reset; cpu_pmu->set_event_filter = m1_pmu_set_event_filter; - cpu_pmu->num_events = M1_PMU_NR_COUNTERS; + bitmap_set(cpu_pmu->cntr_mask, 0, M1_PMU_NR_COUNTERS); cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; return 0; diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 61de861eaf91..1cc3214d6b6d 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -127,8 +127,6 @@ enum cci_models { static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask); -static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf); static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -138,7 +136,7 @@ static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev, })[0].attr.attr #define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config) + CCI_EXT_ATTR_ENTRY(_name, device_show_string, _config) #define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \ CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config) @@ -688,14 +686,6 @@ static void __cci_pmu_disable(struct cci_pmu *cci_pmu) writel(val, cci_pmu->ctrl_base + CCI_PMCR); } -static ssize_t cci_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} - static ssize_t cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1409,6 +1399,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) cci_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .name = cci_pmu->model->name, .task_ctx_nr = perf_invalid_context, .pmu_enable = cci_pmu_enable, @@ -1697,16 +1688,14 @@ error_pmu_init: return ret; } -static int cci_pmu_remove(struct platform_device *pdev) +static void cci_pmu_remove(struct platform_device *pdev) { if (!g_cci_pmu) - return 0; + return; cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE); perf_pmu_unregister(&g_cci_pmu->pmu); g_cci_pmu = NULL; - - return 0; } static struct platform_driver cci_pmu_driver = { diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 728d13d8e98a..d5fcea3d4328 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -215,18 +215,9 @@ static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port) *config |= (node_xp << 0) | (type << 8) | (port << 24); } -static ssize_t arm_ccn_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *ea = container_of(attr, - struct dev_ext_attribute, attr); - - return sysfs_emit(buf, "%s\n", (char *)ea->var); -} - #define CCN_FORMAT_ATTR(_name, _config) \ struct dev_ext_attribute arm_ccn_pmu_format_attr_##_name = \ - { __ATTR(_name, S_IRUGO, arm_ccn_pmu_format_show, \ + { __ATTR(_name, S_IRUGO, device_show_string, \ NULL), _config } static CCN_FORMAT_ATTR(node, "config:0-7"); @@ -1265,6 +1256,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) /* Perf driver registration */ ccn->dt.pmu = (struct pmu) { .module = THIS_MODULE, + .parent = ccn->dev, .attr_groups = arm_ccn_pmu_attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = arm_ccn_pmu_event_init, @@ -1515,13 +1507,11 @@ static int arm_ccn_probe(struct platform_device *pdev) return arm_ccn_pmu_init(ccn); } -static int arm_ccn_remove(struct platform_device *pdev) +static void arm_ccn_remove(struct platform_device *pdev) { struct arm_ccn *ccn = platform_get_drvdata(pdev); arm_ccn_pmu_cleanup(ccn); - - return 0; } static const struct of_device_id arm_ccn_match[] = { @@ -1571,4 +1561,5 @@ module_init(arm_ccn_init); module_exit(arm_ccn_exit); MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>"); +MODULE_DESCRIPTION("ARM CCN (Cache Coherent Network) Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 7e3aa7e2345f..ef959e66db7c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -24,14 +24,6 @@ #define CMN_NI_NODE_ID GENMASK_ULL(31, 16) #define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) -#define CMN_NODEID_DEVID(reg) ((reg) & 3) -#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1) -#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) -#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3) -#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7) -#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) -#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) - #define CMN_CHILD_INFO 0x0080 #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) @@ -43,6 +35,9 @@ #define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION) #define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4) +/* Currently XPs are the node type we can have most of; others top out at 128 */ +#define CMN_MAX_NODES_PER_EVENT CMN_MAX_XPS + /* The CFG node has various info besides the discovery tree */ #define CMN_CFGM_PERIPH_ID_01 0x0008 #define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0) @@ -50,24 +45,28 @@ #define CMN_CFGM_PERIPH_ID_23 0x0010 #define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4) -#define CMN_CFGM_INFO_GLOBAL 0x900 +#define CMN_CFGM_INFO_GLOBAL 0x0900 #define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63) #define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52) #define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50) +#define CMN_INFO_DEVICE_ISO_ENABLE BIT_ULL(44) -#define CMN_CFGM_INFO_GLOBAL_1 0x908 +#define CMN_CFGM_INFO_GLOBAL_1 0x0908 #define CMN_INFO_SNP_VC_NUM GENMASK_ULL(3, 2) #define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0) /* XPs also have some local topology info which has uses too */ #define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p)) -#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0) +#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(5, 0) #define CMN_MAX_PORTS 6 #define CI700_CONNECT_INFO_P2_5_OFFSET 0x10 /* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 +/* ...except when they don't :( */ +#define CMN_S3_DTM_OFFSET 0xa000 +#define CMN_S3_PMU_OFFSET 0xd900 /* For most nodes, this is all there is */ #define CMN_PMU_EVENT_SEL 0x000 @@ -78,7 +77,8 @@ /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */ #define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32) -/* HN-Ps are weird... */ +/* Some types are designed to coexist with another device in the same node */ +#define CMN_CCLA_PMU_EVENT_SEL 0x008 #define CMN_HNP_PMU_EVENT_SEL 0x008 /* DTMs live in the PMU space of XP registers */ @@ -123,27 +123,28 @@ /* The DTC node is where the magic happens */ #define CMN_DT_DTC_CTL 0x0a00 #define CMN_DT_DTC_CTL_DT_EN BIT(0) +#define CMN_DT_DTC_CTL_CG_DISABLE BIT(10) /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */ #define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4) -#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n)) -#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40) +#define CMN_DT_PMEVCNT(dtc, n) ((dtc)->pmu_base + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTR(dtc) ((dtc)->pmu_base + 0x40) -#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n)) -#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90) +#define CMN_DT_PMEVCNTSR(dtc, n) ((dtc)->pmu_base + 0x50 + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTRSR(dtc) ((dtc)->pmu_base + 0x90) -#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100) +#define CMN_DT_PMCR(dtc) ((dtc)->pmu_base + 0x100) #define CMN_DT_PMCR_PMU_EN BIT(0) #define CMN_DT_PMCR_CNTR_RST BIT(5) #define CMN_DT_PMCR_OVFL_INTR_EN BIT(6) -#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118) -#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120) +#define CMN_DT_PMOVSR(dtc) ((dtc)->pmu_base + 0x118) +#define CMN_DT_PMOVSR_CLR(dtc) ((dtc)->pmu_base + 0x120) -#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128) +#define CMN_DT_PMSSR(dtc) ((dtc)->pmu_base + 0x128) #define CMN_DT_PMSSR_SS_STATUS(n) BIT(n) -#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130) +#define CMN_DT_PMSRR(dtc) ((dtc)->pmu_base + 0x130) #define CMN_DT_PMSRR_SS_REQ BIT(0) #define CMN_DT_NUM_COUNTERS 8 @@ -174,9 +175,8 @@ #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) -/* Note that we don't yet support the tertiary match group on newer IPs */ -#define CMN_CONFIG_WP_GRP BIT_ULL(56) -#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) +#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56) +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) #define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0) @@ -199,10 +199,11 @@ enum cmn_model { CMN650 = 2, CMN700 = 4, CI700 = 8, + CMNS3 = 16, /* ...and then we can use bitmap tricks for commonality */ CMN_ANY = -1, NOT_CMN600 = -2, - CMN_650ON = CMN650 | CMN700, + CMN_650ON = CMN650 | CMN700 | CMNS3, }; /* Actual part numbers and revision IDs defined by the hardware */ @@ -211,6 +212,7 @@ enum cmn_part { PART_CMN650 = 0x436, PART_CMN700 = 0x43c, PART_CI700 = 0x43a, + PART_CMN_S3 = 0x43e, }; /* CMN-600 r0px shouldn't exist in silicon, thankfully */ @@ -262,6 +264,7 @@ enum cmn_node_type { CMN_TYPE_HNS = 0x200, CMN_TYPE_HNS_MPAM_S, CMN_TYPE_HNS_MPAM_NS, + CMN_TYPE_APB = 0x1000, /* Not a real node type */ CMN_TYPE_WP = 0x7770 }; @@ -281,8 +284,11 @@ struct arm_cmn_node { u16 id, logid; enum cmn_node_type type; + /* XP properties really, but replicated to children for convenience */ u8 dtm; s8 dtc; + u8 portid_bits:4; + u8 deviceid_bits:4; /* DN/HN-F/CXHA */ struct { u8 val : 4; @@ -308,8 +314,9 @@ struct arm_cmn_dtm { struct arm_cmn_dtc { void __iomem *base; + void __iomem *pmu_base; int irq; - int irq_friend; + s8 irq_friend; bool cc_active; struct perf_event *counters[CMN_DT_NUM_COUNTERS]; @@ -358,49 +365,33 @@ struct arm_cmn { static int arm_cmn_hp_state; struct arm_cmn_nodeid { - u8 x; - u8 y; u8 port; u8 dev; }; static int arm_cmn_xyidbits(const struct arm_cmn *cmn) { - return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2); + return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1)); } -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id) +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn) { struct arm_cmn_nodeid nid; - if (cmn->num_xps == 1) { - nid.x = 0; - nid.y = 0; - nid.port = CMN_NODEID_1x1_PID(id); - nid.dev = CMN_NODEID_DEVID(id); - } else { - int bits = arm_cmn_xyidbits(cmn); - - nid.x = CMN_NODEID_X(id, bits); - nid.y = CMN_NODEID_Y(id, bits); - if (cmn->ports_used & 0xc) { - nid.port = CMN_NODEID_EXT_PID(id); - nid.dev = CMN_NODEID_EXT_DEVID(id); - } else { - nid.port = CMN_NODEID_PID(id); - nid.dev = CMN_NODEID_DEVID(id); - } - } + nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1); + nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1); return nid; } static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn, const struct arm_cmn_node *dn) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); - int xp_idx = cmn->mesh_x * nid.y + nid.x; + int id = dn->id >> (dn->portid_bits + dn->deviceid_bits); + int bits = arm_cmn_xyidbits(cmn); + int x = id >> bits; + int y = id & ((1U << bits) - 1); - return cmn->xps + xp_idx; + return cmn->xps + cmn->mesh_x * y + x; } static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, enum cmn_node_type type) @@ -424,15 +415,27 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn) return CMN700; case PART_CI700: return CI700; + case PART_CMN_S3: + return CMNS3; default: return 0; }; } +static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn) +{ + if (cmn->part == PART_CMN_S3) { + if (dn->type == CMN_TYPE_XP) + return CMN_S3_DTM_OFFSET; + return CMN_S3_PMU_OFFSET; + } + return CMN_PMU_OFFSET; +} + static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, const struct arm_cmn_node *xp, int port) { - int offset = CMN_MXP__CONNECT_INFO(port); + int offset = CMN_MXP__CONNECT_INFO(port) - arm_cmn_pmu_offset(cmn, xp); if (port >= 2) { if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650) @@ -445,7 +448,7 @@ static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, offset += CI700_CONNECT_INFO_P2_5_OFFSET; } - return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset); + return readl_relaxed(xp->pmu_base + offset); } static struct dentry *arm_cmn_debugfs; @@ -479,20 +482,26 @@ static const char *arm_cmn_device_type(u8 type) case 0x17: return "RN-F_C_E|"; case 0x18: return " RN-F_E |"; case 0x19: return "RN-F_E_E|"; + case 0x1a: return " HN-S |"; + case 0x1b: return " LCN |"; case 0x1c: return " MTSX |"; case 0x1d: return " HN-V |"; case 0x1e: return " CCG |"; + case 0x20: return " RN-F_F |"; + case 0x21: return "RN-F_F_E|"; + case 0x22: return " SN-F_F |"; default: return " ???? |"; } } -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d) { struct arm_cmn *cmn = s->private; struct arm_cmn_node *dn; + u16 id = xp->id | d | (p << xp->deviceid_bits); for (dn = cmn->dns; dn->type; dn++) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + int pad = dn->logid < 10; if (dn->type == CMN_TYPE_XP) continue; @@ -500,10 +509,10 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) if (dn->type < CMN_TYPE_HNI) continue; - if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d) + if (dn->id != id) continue; - seq_printf(s, " #%-2d |", dn->logid); + seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid); return; } seq_puts(s, " |"); @@ -516,28 +525,27 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, " X"); for (x = 0; x < cmn->mesh_x; x++) - seq_printf(s, " %d ", x); + seq_printf(s, " %-2d ", x); seq_puts(s, "\nY P D+"); y = cmn->mesh_y; while (y--) { int xp_base = cmn->mesh_x * y; + struct arm_cmn_node *xp = cmn->xps + xp_base; u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION]; for (x = 0; x < cmn->mesh_x; x++) seq_puts(s, "--------+"); - seq_printf(s, "\n%d |", y); + seq_printf(s, "\n%-2d |", y); for (x = 0; x < cmn->mesh_x; x++) { - struct arm_cmn_node *xp = cmn->xps + xp_base + x; - for (p = 0; p < CMN_MAX_PORTS; p++) - port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); - seq_printf(s, " XP #%-2d |", xp_base + x); + port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p); + seq_printf(s, " XP #%-3d|", xp_base + x); } seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) { - s8 dtc = cmn->xps[xp_base + x].dtc; + s8 dtc = xp[x].dtc; if (dtc < 0) seq_puts(s, " DTC ?? |"); @@ -554,10 +562,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, arm_cmn_device_type(port[p][x])); seq_puts(s, "\n 0|"); for (x = 0; x < cmn->mesh_x; x++) - arm_cmn_show_logid(s, x, y, p, 0); + arm_cmn_show_logid(s, xp + x, p, 0); seq_puts(s, "\n 1|"); for (x = 0; x < cmn->mesh_x; x++) - arm_cmn_show_logid(s, x, y, p, 1); + arm_cmn_show_logid(s, xp + x, p, 1); } seq_puts(s, "\n-----+"); } @@ -585,13 +593,21 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {} struct arm_cmn_hw_event { struct arm_cmn_node *dn; - u64 dtm_idx[4]; + u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)]; s8 dtc_idx[CMN_MAX_DTCS]; u8 num_dns; u8 dtm_offset; + + /* + * WP config registers are divided to UP and DOWN events. We need to + * keep to track only one of them. + */ + DECLARE_BITMAP(wp_idx, CMN_MAX_XPS); + bool wide_sel; enum cmn_filter_select filter_sel; }; +static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target)); #define for_each_hw_dn(hw, dn, i) \ for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) @@ -602,7 +618,6 @@ struct arm_cmn_hw_event { static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) { - BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); return (struct arm_cmn_hw_event *)&event->hw; } @@ -616,6 +631,17 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) return (x[pos / 32] >> ((pos % 32) * 2)) & 3; } +static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val) +{ + if (val) + set_bit(pos, wp_idx); +} + +static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos) +{ + return test_bit(pos, wp_idx); +} + struct arm_cmn_event_attr { struct device_attribute attr; enum cmn_model model; @@ -772,8 +798,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event) #define CMN_EVENT_CCRA(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event) -#define CMN_EVENT_CCHA(_name, _event) \ - CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event) +#define CMN_EVENT_CCHA(_model, _name, _event) \ + CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event) #define CMN_EVENT_CCLA(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event) #define CMN_EVENT_CCLA_RNI(_name, _event) \ @@ -1131,42 +1157,43 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_CCRA(wdb_alloc, 0x59), CMN_EVENT_CCRA(ssb_alloc, 0x5a), - CMN_EVENT_CCHA(rddatbyp, 0x61), - CMN_EVENT_CCHA(chirsp_up_stall, 0x62), - CMN_EVENT_CCHA(chidat_up_stall, 0x63), - CMN_EVENT_CCHA(snppcrd_link0_stall, 0x64), - CMN_EVENT_CCHA(snppcrd_link1_stall, 0x65), - CMN_EVENT_CCHA(snppcrd_link2_stall, 0x66), - CMN_EVENT_CCHA(reqtrk_occ, 0x67), - CMN_EVENT_CCHA(rdb_occ, 0x68), - CMN_EVENT_CCHA(rdbyp_occ, 0x69), - CMN_EVENT_CCHA(wdb_occ, 0x6a), - CMN_EVENT_CCHA(snptrk_occ, 0x6b), - CMN_EVENT_CCHA(sdb_occ, 0x6c), - CMN_EVENT_CCHA(snphaz_occ, 0x6d), - CMN_EVENT_CCHA(reqtrk_alloc, 0x6e), - CMN_EVENT_CCHA(rdb_alloc, 0x6f), - CMN_EVENT_CCHA(rdbyp_alloc, 0x70), - CMN_EVENT_CCHA(wdb_alloc, 0x71), - CMN_EVENT_CCHA(snptrk_alloc, 0x72), - CMN_EVENT_CCHA(sdb_alloc, 0x73), - CMN_EVENT_CCHA(snphaz_alloc, 0x74), - CMN_EVENT_CCHA(pb_rhu_req_occ, 0x75), - CMN_EVENT_CCHA(pb_rhu_req_alloc, 0x76), - CMN_EVENT_CCHA(pb_rhu_pcie_req_occ, 0x77), - CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc, 0x78), - CMN_EVENT_CCHA(pb_pcie_wr_req_occ, 0x79), - CMN_EVENT_CCHA(pb_pcie_wr_req_alloc, 0x7a), - CMN_EVENT_CCHA(pb_pcie_reg_req_occ, 0x7b), - CMN_EVENT_CCHA(pb_pcie_reg_req_alloc, 0x7c), - CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ, 0x7d), - CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc, 0x7e), - CMN_EVENT_CCHA(pb_rhu_dat_occ, 0x7f), - CMN_EVENT_CCHA(pb_rhu_dat_alloc, 0x80), - CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ, 0x81), - CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc, 0x82), - CMN_EVENT_CCHA(pb_pcie_wr_dat_occ, 0x83), - CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc, 0x84), + CMN_EVENT_CCHA(CMN_ANY, rddatbyp, 0x61), + CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall, 0x62), + CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall, 0x63), + CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall, 0x64), + CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall, 0x65), + CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall, 0x66), + CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ, 0x67), + CMN_EVENT_CCHA(CMN_ANY, rdb_occ, 0x68), + CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ, 0x69), + CMN_EVENT_CCHA(CMN_ANY, wdb_occ, 0x6a), + CMN_EVENT_CCHA(CMN_ANY, snptrk_occ, 0x6b), + CMN_EVENT_CCHA(CMN_ANY, sdb_occ, 0x6c), + CMN_EVENT_CCHA(CMN_ANY, snphaz_occ, 0x6d), + CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc, 0x6e), + CMN_EVENT_CCHA(CMN_ANY, rdb_alloc, 0x6f), + CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc, 0x70), + CMN_EVENT_CCHA(CMN_ANY, wdb_alloc, 0x71), + CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc, 0x72), + CMN_EVENT_CCHA(CMN_ANY, db_alloc, 0x73), + CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc, 0x74), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ, 0x75), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc, 0x76), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ, 0x77), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc, 0x78), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ, 0x79), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc, 0x7a), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ, 0x7b), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc, 0x7c), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ, 0x7d), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc, 0x7e), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ, 0x7f), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc, 0x80), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ, 0x81), + CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc, 0x82), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ, 0x83), + CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc, 0x84), + CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall, 0x85), CMN_EVENT_CCLA(rx_cxs, 0x21), CMN_EVENT_CCLA(tx_cxs, 0x22), @@ -1253,15 +1280,11 @@ static ssize_t arm_cmn_format_show(struct device *dev, struct device_attribute *attr, char *buf) { struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr); - int lo = __ffs(fmt->field), hi = __fls(fmt->field); - - if (lo == hi) - return sysfs_emit(buf, "config:%d\n", lo); if (!fmt->config) - return sysfs_emit(buf, "config:%d-%d\n", lo, hi); + return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field); - return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi); + return sysfs_emit(buf, "config%d:%*pbl\n", fmt->config, 64, &fmt->field); } #define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \ @@ -1335,12 +1358,37 @@ static const struct attribute_group *arm_cmn_attr_groups[] = { NULL }; -static int arm_cmn_wp_idx(struct perf_event *event) +static int arm_cmn_find_free_wp_idx(struct arm_cmn_dtm *dtm, + struct perf_event *event) { - return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event); + int wp_idx = CMN_EVENT_EVENTID(event); + + if (dtm->wp_event[wp_idx] >= 0) + if (dtm->wp_event[++wp_idx] >= 0) + return -ENOSPC; + + return wp_idx; +} + +static int arm_cmn_get_assigned_wp_idx(struct perf_event *event, + struct arm_cmn_hw_event *hw, + unsigned int pos) +{ + return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos); +} + +static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm, + struct perf_event *event, + unsigned int dtc, int wp_idx, + unsigned int pos) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + + dtm->wp_event[wp_idx] = hw->dtc_idx[dtc]; + arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event)); } -static u32 arm_cmn_wp_config(struct perf_event *event) +static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx) { u32 config; u32 dev = CMN_EVENT_WP_DEV_SEL(event); @@ -1350,6 +1398,10 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 combine = CMN_EVENT_WP_COMBINE(event); bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600; + /* CMN-600 supports only primary and secondary matching groups */ + if (is_cmn600) + grp &= 1; + config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | @@ -1357,7 +1409,9 @@ static u32 arm_cmn_wp_config(struct perf_event *event) if (exc) config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE : CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE; - if (combine && !grp) + + /* wp_combine is available only on WP0 and WP2 */ + if (combine && !(wp_idx & 0x1)) config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE : CMN_DTM_WPn_CONFIG_WP_COMBINE; return config; @@ -1366,7 +1420,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event) static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state) { if (!cmn->state) - writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR); + writel_relaxed(0, CMN_DT_PMCR(&cmn->dtc[0])); cmn->state |= state; } @@ -1375,7 +1429,7 @@ static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state) cmn->state &= ~state; if (!cmn->state) writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, - cmn->dtc[0].base + CMN_DT_PMCR); + CMN_DT_PMCR(&cmn->dtc[0])); } static void arm_cmn_pmu_enable(struct pmu *pmu) @@ -1410,18 +1464,19 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc) { - u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR); + void __iomem *pmccntr = CMN_DT_PMCCNTR(dtc); + u64 val = readq_relaxed(pmccntr); - writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR); + writeq_relaxed(CMN_CC_INIT, pmccntr); return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1); } static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx) { - u32 val, pmevcnt = CMN_DT_PMEVCNT(idx); + void __iomem *pmevcnt = CMN_DT_PMEVCNT(dtc, idx); + u32 val = readl_relaxed(pmevcnt); - val = readl_relaxed(dtc->base + pmevcnt); - writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt); + writel_relaxed(CMN_COUNTER_INIT, pmevcnt); return val - CMN_COUNTER_INIT; } @@ -1432,7 +1487,7 @@ static void arm_cmn_init_counter(struct perf_event *event) u64 count; for_each_hw_dtc_idx(hw, i, idx) { - writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx)); + writel_relaxed(CMN_COUNTER_INIT, CMN_DT_PMEVCNT(&cmn->dtc[i], idx)); cmn->dtc[i].counters[idx] = event; } @@ -1515,16 +1570,19 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = hw->dtc_idx[0]; - writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); - cmn->dtc[i].cc_active = true; + struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0]; + + writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE, + dtc->base + CMN_DT_DTC_CTL); + writeq_relaxed(CMN_CC_INIT, CMN_DT_PMCCNTR(dtc)); + dtc->cc_active = true; } else if (type == CMN_TYPE_WP) { - int wp_idx = arm_cmn_wp_idx(event); u64 val = CMN_EVENT_WP_VAL(event); u64 mask = CMN_EVENT_WP_MASK(event); for_each_hw_dn(hw, dn, i) { void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx)); writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx)); @@ -1546,13 +1604,14 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = hw->dtc_idx[0]; - cmn->dtc[i].cc_active = false; - } else if (type == CMN_TYPE_WP) { - int wp_idx = arm_cmn_wp_idx(event); + struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0]; + dtc->cc_active = false; + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL); + } else if (type == CMN_TYPE_WP) { for_each_hw_dn(hw, dn, i) { void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx)); writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx)); @@ -1570,10 +1629,23 @@ struct arm_cmn_val { u8 dtm_count[CMN_MAX_DTMS]; u8 occupid[CMN_MAX_DTMS][SEL_MAX]; u8 wp[CMN_MAX_DTMS][4]; + u8 wp_combine[CMN_MAX_DTMS][2]; int dtc_count[CMN_MAX_DTCS]; bool cycles; }; +static int arm_cmn_val_find_free_wp_config(struct perf_event *event, + struct arm_cmn_val *val, int dtm) +{ + int wp_idx = CMN_EVENT_EVENTID(event); + + if (val->wp[dtm][wp_idx]) + if (val->wp[dtm][++wp_idx]) + return -ENOSPC; + + return wp_idx; +} + static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, struct perf_event *event) { @@ -1605,8 +1677,9 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, if (type != CMN_TYPE_WP) continue; - wp_idx = arm_cmn_wp_idx(event); - val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; + wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm); + val->wp[dtm][wp_idx] = 1; + val->wp_combine[dtm][wp_idx >> 1] += !!CMN_EVENT_WP_COMBINE(event); } } @@ -1630,6 +1703,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) return -ENOMEM; arm_cmn_val_add_event(cmn, val, leader); + for_each_sibling_event(sibling, leader) arm_cmn_val_add_event(cmn, val, sibling); @@ -1639,12 +1713,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) goto done; } - for (i = 0; i < CMN_MAX_DTCS; i++) - if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS) + for_each_hw_dtc_idx(hw, dtc, idx) + if (val->dtc_count[dtc] == CMN_DT_NUM_COUNTERS) goto done; for_each_hw_dn(hw, dn, i) { - int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel; + int wp_idx, dtm = dn->dtm, sel = hw->filter_sel; if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS) goto done; @@ -1656,12 +1730,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) if (type != CMN_TYPE_WP) continue; - wp_idx = arm_cmn_wp_idx(event); - if (val->wp[dtm][wp_idx]) + wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm); + if (wp_idx < 0) goto done; - wp_cmb = val->wp[dtm][wp_idx ^ 1]; - if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) + if (wp_idx & 1 && + val->wp_combine[dtm][wp_idx >> 1] != !!CMN_EVENT_WP_COMBINE(event)) goto done; } @@ -1721,7 +1795,8 @@ static int arm_cmn_event_init(struct perf_event *event) /* ...but the DTM may depend on which port we're watching */ if (cmn->multi_dtm) hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2; - } else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) { + } else if (type == CMN_TYPE_XP && + (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) { hw->wide_sel = true; } @@ -1752,10 +1827,7 @@ static int arm_cmn_event_init(struct perf_event *event) } if (!hw->num_dns) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid); - - dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", - nodeid, nid.x, nid.y, nid.port, nid.dev, type); + dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type); return -EINVAL; } @@ -1772,8 +1844,11 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset; unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); - if (type == CMN_TYPE_WP) - dtm->wp_event[arm_cmn_wp_idx(event)] = -1; + if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); + + dtm->wp_event[wp_idx] = -1; + } if (hw->filter_sel > SEL_NONE) hw->dn[i].occupid[hw->filter_sel].count--; @@ -1782,6 +1857,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG); } memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); + memset(hw->wp_idx, 0, sizeof(hw->wp_idx)); for_each_hw_dtc_idx(hw, j, idx) cmn->dtc[j].counters[idx] = NULL; @@ -1835,22 +1911,26 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) if (type == CMN_TYPE_XP) { input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; } else if (type == CMN_TYPE_WP) { - int tmp, wp_idx = arm_cmn_wp_idx(event); - u32 cfg = arm_cmn_wp_config(event); + int tmp, wp_idx; + u32 cfg; - if (dtm->wp_event[wp_idx] >= 0) + wp_idx = arm_cmn_find_free_wp_idx(dtm, event); + if (wp_idx < 0) goto free_dtms; + cfg = arm_cmn_wp_config(event, wp_idx); + tmp = dtm->wp_event[wp_idx ^ 1]; if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp])) goto free_dtms; input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; - dtm->wp_event[wp_idx] = hw->dtc_idx[d]; + + arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i); writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + struct arm_cmn_nodeid nid = arm_cmn_nid(dn); if (cmn->multi_dtm) nid.port %= 2; @@ -1939,7 +2019,7 @@ static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_nod cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); node = dev_to_node(cmn->dev); - if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node) + if (cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node) arm_cmn_migrate(cmn, cpu); return 0; } @@ -1949,20 +2029,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no struct arm_cmn *cmn; unsigned int target; int node; - cpumask_t mask; cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); if (cpu != cmn->cpu) return 0; node = dev_to_node(cmn->dev); - if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); + if (target < nr_cpu_ids) arm_cmn_migrate(cmn, target); + return 0; } @@ -1972,7 +2052,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) irqreturn_t ret = IRQ_NONE; for (;;) { - u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR); + u32 status = readl_relaxed(CMN_DT_PMOVSR(dtc)); u64 delta; int i; @@ -1994,7 +2074,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) } } - writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR); + writel_relaxed(status, CMN_DT_PMOVSR_CLR(dtc)); if (!dtc->irq_friend) return ret; @@ -2048,15 +2128,16 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id { struct arm_cmn_dtc *dtc = cmn->dtc + idx; - dtc->base = dn->pmu_base - CMN_PMU_OFFSET; + dtc->pmu_base = dn->pmu_base; + dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn); dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx); if (dtc->irq < 0) return dtc->irq; writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL); - writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); - writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR); - writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc)); + writeq_relaxed(0, CMN_DT_PMCCNTR(dtc)); + writel_relaxed(0x1ff, CMN_DT_PMOVSR_CLR(dtc)); return 0; } @@ -2100,7 +2181,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) dn->dtc = xp->dtc; dn->dtm = xp->dtm; if (cmn->multi_dtm) - dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2; + dn->dtm += arm_cmn_nid(dn).port / 2; if (dn->type == CMN_TYPE_DTC) { int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); @@ -2142,7 +2223,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c node->id = FIELD_GET(CMN_NI_NODE_ID, reg); node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); - node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET; + node->pmu_base = cmn->base + offset + arm_cmn_pmu_offset(cmn, node); if (node->type == CMN_TYPE_CFG) level = 0; @@ -2200,7 +2281,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23); cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + /* + * With the device isolation feature, if firmware has neglected to enable + * an XP port then we risk locking up if we try to access anything behind + * it; however we also have no way to tell from Non-Secure whether any + * given port is disabled or not, so the only way to win is not to play... + */ reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL); + if (reg & CMN_INFO_DEVICE_ISO_ENABLE) { + dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n"); + return -ENODEV; + } cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN; cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg); cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg); @@ -2270,18 +2361,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) arm_cmn_init_dtm(dtm++, xp, 0); /* * Keeping track of connected ports will let us filter out - * unnecessary XP events easily. We can also reliably infer the - * "extra device ports" configuration for the node ID format - * from this, since in that case we will see at least one XP - * with port 2 connected, for the HN-D. + * unnecessary XP events easily, and also infer the per-XP + * part of the node ID format. */ for (int p = 0; p < CMN_MAX_PORTS; p++) if (arm_cmn_device_connect_info(cmn, xp, p)) xp_ports |= BIT(p); - if (cmn->multi_dtm && (xp_ports & 0xc)) + if (cmn->num_xps == 1) { + xp->portid_bits = 3; + xp->deviceid_bits = 2; + } else if (xp_ports > 0x3) { + xp->portid_bits = 2; + xp->deviceid_bits = 1; + } else { + xp->portid_bits = 1; + xp->deviceid_bits = 2; + } + + if (cmn->multi_dtm && (xp_ports > 0x3)) arm_cmn_init_dtm(dtm++, xp, 1); - if (cmn->multi_dtm && (xp_ports & 0x30)) + if (cmn->multi_dtm && (xp_ports > 0xf)) arm_cmn_init_dtm(dtm++, xp, 2); cmn->ports_used |= xp_ports; @@ -2318,6 +2418,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); + dn->portid_bits = xp->portid_bits; + dn->deviceid_bits = xp->deviceid_bits; switch (dn->type) { case CMN_TYPE_DTC: @@ -2336,10 +2438,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_CXHA: case CMN_TYPE_CCRA: case CMN_TYPE_CCHA: - case CMN_TYPE_CCLA: case CMN_TYPE_HNS: dn++; break; + case CMN_TYPE_CCLA: + dn->pmu_base += CMN_CCLA_PMU_EVENT_SEL; + dn++; + break; /* Nothing to see here */ case CMN_TYPE_MPAM_S: case CMN_TYPE_MPAM_NS: @@ -2347,6 +2452,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_CXLA: case CMN_TYPE_HNS_MPAM_S: case CMN_TYPE_HNS_MPAM_NS: + case CMN_TYPE_APB: break; /* * Split "optimised" combination nodes into separate @@ -2357,7 +2463,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_HNP: case CMN_TYPE_CCLA_RNI: dn[1] = dn[0]; - dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL; + dn[0].pmu_base += CMN_CCLA_PMU_EVENT_SEL; dn[1].type = arm_cmn_subtype(dn->type); dn += 2; break; @@ -2481,6 +2587,7 @@ static int arm_cmn_probe(struct platform_device *pdev) cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = cmn->dev, .attr_groups = arm_cmn_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, @@ -2515,7 +2622,7 @@ static int arm_cmn_probe(struct platform_device *pdev) return err; } -static int arm_cmn_remove(struct platform_device *pdev) +static void arm_cmn_remove(struct platform_device *pdev) { struct arm_cmn *cmn = platform_get_drvdata(pdev); @@ -2524,7 +2631,6 @@ static int arm_cmn_remove(struct platform_device *pdev) perf_pmu_unregister(&cmn->pmu); cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node); debugfs_remove(cmn->debug); - return 0; } #ifdef CONFIG_OF @@ -2532,6 +2638,7 @@ static const struct of_device_id arm_cmn_of_match[] = { { .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 }, { .compatible = "arm,cmn-650" }, { .compatible = "arm,cmn-700" }, + { .compatible = "arm,cmn-s3" }, { .compatible = "arm,ci-700" }, {} }; diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c new file mode 100644 index 000000000000..fd7a5e60e963 --- /dev/null +++ b/drivers/perf/arm-ni.c @@ -0,0 +1,780 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2022-2024 Arm Limited +// NI-700 Network-on-Chip PMU driver + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +/* Common registers */ +#define NI_NODE_TYPE 0x000 +#define NI_NODE_TYPE_NODE_ID GENMASK(31, 16) +#define NI_NODE_TYPE_NODE_TYPE GENMASK(15, 0) + +#define NI_CHILD_NODE_INFO 0x004 +#define NI_CHILD_PTR(n) (0x008 + (n) * 4) + +#define NI700_PMUSELA 0x00c + +/* Config node */ +#define NI_PERIPHERAL_ID0 0xfe0 +#define NI_PIDR0_PART_7_0 GENMASK(7, 0) +#define NI_PERIPHERAL_ID1 0xfe4 +#define NI_PIDR1_PART_11_8 GENMASK(3, 0) +#define NI_PERIPHERAL_ID2 0xfe8 +#define NI_PIDR2_VERSION GENMASK(7, 4) + +/* PMU node */ +#define NI_PMEVCNTR(n) (0x008 + (n) * 8) +#define NI_PMCCNTR_L 0x0f8 +#define NI_PMCCNTR_U 0x0fc +#define NI_PMEVTYPER(n) (0x400 + (n) * 4) +#define NI_PMEVTYPER_NODE_TYPE GENMASK(12, 9) +#define NI_PMEVTYPER_NODE_ID GENMASK(8, 0) +#define NI_PMCNTENSET 0xc00 +#define NI_PMCNTENCLR 0xc20 +#define NI_PMINTENSET 0xc40 +#define NI_PMINTENCLR 0xc60 +#define NI_PMOVSCLR 0xc80 +#define NI_PMOVSSET 0xcc0 +#define NI_PMCFGR 0xe00 +#define NI_PMCR 0xe04 +#define NI_PMCR_RESET_CCNT BIT(2) +#define NI_PMCR_RESET_EVCNT BIT(1) +#define NI_PMCR_ENABLE BIT(0) + +#define NI_NUM_COUNTERS 8 +#define NI_CCNT_IDX 31 + +/* Event attributes */ +#define NI_CONFIG_TYPE GENMASK_ULL(15, 0) +#define NI_CONFIG_NODEID GENMASK_ULL(31, 16) +#define NI_CONFIG_EVENTID GENMASK_ULL(47, 32) + +#define NI_EVENT_TYPE(event) FIELD_GET(NI_CONFIG_TYPE, (event)->attr.config) +#define NI_EVENT_NODEID(event) FIELD_GET(NI_CONFIG_NODEID, (event)->attr.config) +#define NI_EVENT_EVENTID(event) FIELD_GET(NI_CONFIG_EVENTID, (event)->attr.config) + +enum ni_part { + PART_NI_700 = 0x43b, + PART_NI_710AE = 0x43d, +}; + +enum ni_node_type { + NI_GLOBAL, + NI_VOLTAGE, + NI_POWER, + NI_CLOCK, + NI_ASNI, + NI_AMNI, + NI_PMU, + NI_HSNI, + NI_HMNI, + NI_PMNI, +}; + +struct arm_ni_node { + void __iomem *base; + enum ni_node_type type; + u16 id; + u32 num_components; +}; + +struct arm_ni_unit { + void __iomem *pmusela; + enum ni_node_type type; + u16 id; + bool ns; + union { + __le64 pmusel; + u8 event[8]; + }; +}; + +struct arm_ni_cd { + void __iomem *pmu_base; + u16 id; + int num_units; + int irq; + int cpu; + struct hlist_node cpuhp_node; + struct pmu pmu; + struct arm_ni_unit *units; + struct perf_event *evcnt[NI_NUM_COUNTERS]; + struct perf_event *ccnt; +}; + +struct arm_ni { + struct device *dev; + void __iomem *base; + enum ni_part part; + int id; + int num_cds; + struct arm_ni_cd cds[] __counted_by(num_cds); +}; + +#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id]) +#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu) + +#define cd_for_each_unit(cd, u) \ + for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++) + +static int arm_ni_hp_state; + +struct arm_ni_event_attr { + struct device_attribute attr; + enum ni_node_type type; +}; + +#define NI_EVENT_ATTR(_name, _type) \ + (&((struct arm_ni_event_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_ni_event_show, NULL), \ + .type = _type, \ + }})[0].attr.attr) + +static ssize_t arm_ni_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ni_event_attr *eattr = container_of(attr, typeof(*eattr), attr); + + if (eattr->type == NI_PMU) + return sysfs_emit(buf, "type=0x%x\n", eattr->type); + + return sysfs_emit(buf, "type=0x%x,eventid=?,nodeid=?\n", eattr->type); +} + +static umode_t arm_ni_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev)); + struct arm_ni_event_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + + cd_for_each_unit(cd, unit) { + if (unit->type == eattr->type && unit->ns) + return attr->mode; + } + + return 0; +} + +static struct attribute *arm_ni_event_attrs[] = { + NI_EVENT_ATTR(asni, NI_ASNI), + NI_EVENT_ATTR(amni, NI_AMNI), + NI_EVENT_ATTR(cycles, NI_PMU), + NI_EVENT_ATTR(hsni, NI_HSNI), + NI_EVENT_ATTR(hmni, NI_HMNI), + NI_EVENT_ATTR(pmni, NI_PMNI), + NULL +}; + +static const struct attribute_group arm_ni_event_attrs_group = { + .name = "events", + .attrs = arm_ni_event_attrs, + .is_visible = arm_ni_event_attr_is_visible, +}; + +struct arm_ni_format_attr { + struct device_attribute attr; + u64 field; +}; + +#define NI_FORMAT_ATTR(_name, _fld) \ + (&((struct arm_ni_format_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_ni_format_show, NULL), \ + .field = _fld, \ + }})[0].attr.attr) + +static ssize_t arm_ni_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ni_format_attr *fmt = container_of(attr, typeof(*fmt), attr); + + return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field); +} + +static struct attribute *arm_ni_format_attrs[] = { + NI_FORMAT_ATTR(type, NI_CONFIG_TYPE), + NI_FORMAT_ATTR(nodeid, NI_CONFIG_NODEID), + NI_FORMAT_ATTR(eventid, NI_CONFIG_EVENTID), + NULL +}; + +static const struct attribute_group arm_ni_format_attrs_group = { + .name = "format", + .attrs = arm_ni_format_attrs, +}; + +static ssize_t arm_ni_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu)); +} + +static struct device_attribute arm_ni_cpumask_attr = + __ATTR(cpumask, 0444, arm_ni_cpumask_show, NULL); + +static ssize_t arm_ni_identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev))); + u32 reg = readl_relaxed(ni->base + NI_PERIPHERAL_ID2); + int version = FIELD_GET(NI_PIDR2_VERSION, reg); + + return sysfs_emit(buf, "%03x%02x\n", ni->part, version); +} + +static struct device_attribute arm_ni_identifier_attr = + __ATTR(identifier, 0444, arm_ni_identifier_show, NULL); + +static struct attribute *arm_ni_other_attrs[] = { + &arm_ni_cpumask_attr.attr, + &arm_ni_identifier_attr.attr, + NULL +}; + +static const struct attribute_group arm_ni_other_attr_group = { + .attrs = arm_ni_other_attrs, +}; + +static const struct attribute_group *arm_ni_attr_groups[] = { + &arm_ni_event_attrs_group, + &arm_ni_format_attrs_group, + &arm_ni_other_attr_group, + NULL +}; + +static void arm_ni_pmu_enable(struct pmu *pmu) +{ + writel_relaxed(NI_PMCR_ENABLE, pmu_to_cd(pmu)->pmu_base + NI_PMCR); +} + +static void arm_ni_pmu_disable(struct pmu *pmu) +{ + writel_relaxed(0, pmu_to_cd(pmu)->pmu_base + NI_PMCR); +} + +struct arm_ni_val { + unsigned int evcnt; + unsigned int ccnt; +}; + +static bool arm_ni_val_count_event(struct perf_event *evt, struct arm_ni_val *val) +{ + if (is_software_event(evt)) + return true; + + if (NI_EVENT_TYPE(evt) == NI_PMU) { + val->ccnt++; + return val->ccnt <= 1; + } + + val->evcnt++; + return val->evcnt <= NI_NUM_COUNTERS; +} + +static int arm_ni_validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct arm_ni_val val = { 0 }; + + if (leader == event) + return 0; + + arm_ni_val_count_event(event, &val); + if (!arm_ni_val_count_event(leader, &val)) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (!arm_ni_val_count_event(sibling, &val)) + return -EINVAL; + } + return 0; +} + +static int arm_ni_event_init(struct perf_event *event) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event)) + return -EINVAL; + + event->cpu = cd->cpu; + if (NI_EVENT_TYPE(event) == NI_PMU) + return arm_ni_validate_group(event); + + cd_for_each_unit(cd, unit) { + if (unit->type == NI_EVENT_TYPE(event) && + unit->id == NI_EVENT_NODEID(event) && unit->ns) { + event->hw.config_base = (unsigned long)unit; + return arm_ni_validate_group(event); + } + } + return -EINVAL; +} + +static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd) +{ + u64 l, u_old, u_new; + int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */ + + u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + do { + u_old = u_new; + l = readl_relaxed(cd->pmu_base + NI_PMCCNTR_L); + u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + } while (u_new != u_old && --retries); + WARN_ON(!retries); + + return (u_new << 32) | l; +} + +static void arm_ni_event_read(struct perf_event *event) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + struct hw_perf_event *hw = &event->hw; + u64 count, prev; + bool ccnt = hw->idx == NI_CCNT_IDX; + + do { + prev = local64_read(&hw->prev_count); + if (ccnt) + count = arm_ni_read_ccnt(cd); + else + count = readl_relaxed(cd->pmu_base + NI_PMEVCNTR(hw->idx)); + } while (local64_cmpxchg(&hw->prev_count, prev, count) != prev); + + count -= prev; + if (!ccnt) + count = (u32)count; + local64_add(count, &event->count); +} + +static void arm_ni_event_start(struct perf_event *event, int flags) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + + writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENSET); +} + +static void arm_ni_event_stop(struct perf_event *event, int flags) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + + writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENCLR); + if (flags & PERF_EF_UPDATE) + arm_ni_event_read(event); +} + +static void arm_ni_init_ccnt(struct arm_ni_cd *cd) +{ + local64_set(&cd->ccnt->hw.prev_count, S64_MIN); + lo_hi_writeq_relaxed(S64_MIN, cd->pmu_base + NI_PMCCNTR_L); +} + +static void arm_ni_init_evcnt(struct arm_ni_cd *cd, int idx) +{ + local64_set(&cd->evcnt[idx]->hw.prev_count, S32_MIN); + writel_relaxed(S32_MIN, cd->pmu_base + NI_PMEVCNTR(idx)); +} + +static int arm_ni_event_add(struct perf_event *event, int flags) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + struct hw_perf_event *hw = &event->hw; + struct arm_ni_unit *unit; + enum ni_node_type type = NI_EVENT_TYPE(event); + u32 reg; + + if (type == NI_PMU) { + if (cd->ccnt) + return -ENOSPC; + hw->idx = NI_CCNT_IDX; + cd->ccnt = event; + arm_ni_init_ccnt(cd); + } else { + hw->idx = 0; + while (cd->evcnt[hw->idx]) { + if (++hw->idx == NI_NUM_COUNTERS) + return -ENOSPC; + } + cd->evcnt[hw->idx] = event; + unit = (void *)hw->config_base; + unit->event[hw->idx] = NI_EVENT_EVENTID(event); + arm_ni_init_evcnt(cd, hw->idx); + lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela); + + reg = FIELD_PREP(NI_PMEVTYPER_NODE_TYPE, type) | + FIELD_PREP(NI_PMEVTYPER_NODE_ID, NI_EVENT_NODEID(event)); + writel_relaxed(reg, cd->pmu_base + NI_PMEVTYPER(hw->idx)); + } + if (flags & PERF_EF_START) + arm_ni_event_start(event, 0); + return 0; +} + +static void arm_ni_event_del(struct perf_event *event, int flags) +{ + struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + struct hw_perf_event *hw = &event->hw; + + arm_ni_event_stop(event, PERF_EF_UPDATE); + + if (hw->idx == NI_CCNT_IDX) + cd->ccnt = NULL; + else + cd->evcnt[hw->idx] = NULL; +} + +static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id) +{ + struct arm_ni_cd *cd = dev_id; + irqreturn_t ret = IRQ_NONE; + u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR); + + if (reg & (1U << NI_CCNT_IDX)) { + ret = IRQ_HANDLED; + if (!(WARN_ON(!cd->ccnt))) { + arm_ni_event_read(cd->ccnt); + arm_ni_init_ccnt(cd); + } + } + for (int i = 0; i < NI_NUM_COUNTERS; i++) { + if (!(reg & (1U << i))) + continue; + ret = IRQ_HANDLED; + if (!(WARN_ON(!cd->evcnt[i]))) { + arm_ni_event_read(cd->evcnt[i]); + arm_ni_init_evcnt(cd, i); + } + } + writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR); + return ret; +} + +static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start) +{ + struct arm_ni_cd *cd = ni->cds + node->id; + const char *name; + int err; + + cd->id = node->id; + cd->num_units = node->num_components; + cd->units = devm_kcalloc(ni->dev, cd->num_units, sizeof(*(cd->units)), GFP_KERNEL); + if (!cd->units) + return -ENOMEM; + + for (int i = 0; i < cd->num_units; i++) { + u32 reg = readl_relaxed(node->base + NI_CHILD_PTR(i)); + void __iomem *unit_base = ni->base + reg; + struct arm_ni_unit *unit = cd->units + i; + + reg = readl_relaxed(unit_base + NI_NODE_TYPE); + unit->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg); + unit->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg); + + switch (unit->type) { + case NI_PMU: + reg = readl_relaxed(unit_base + NI_PMCFGR); + if (!reg) { + dev_info(ni->dev, "No access to PMU %d\n", cd->id); + devm_kfree(ni->dev, cd->units); + return 0; + } + unit->ns = true; + cd->pmu_base = unit_base; + break; + case NI_ASNI: + case NI_AMNI: + case NI_HSNI: + case NI_HMNI: + case NI_PMNI: + unit->pmusela = unit_base + NI700_PMUSELA; + writel_relaxed(1, unit->pmusela); + if (readl_relaxed(unit->pmusela) != 1) + dev_info(ni->dev, "No access to node 0x%04x%04x\n", unit->id, unit->type); + else + unit->ns = true; + break; + default: + /* + * e.g. FMU - thankfully bits 3:2 of FMU_ERR_FR0 are RES0 so + * can't alias any of the leaf node types we're looking for. + */ + dev_dbg(ni->dev, "Mystery node 0x%04x%04x\n", unit->id, unit->type); + break; + } + } + + res_start += cd->pmu_base - ni->base; + if (!devm_request_mem_region(ni->dev, res_start, SZ_4K, dev_name(ni->dev))) { + dev_err(ni->dev, "Failed to request PMU region 0x%llx\n", res_start); + return -EBUSY; + } + + writel_relaxed(NI_PMCR_RESET_CCNT | NI_PMCR_RESET_EVCNT, + cd->pmu_base + NI_PMCR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET); + + cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id); + if (cd->irq < 0) + return cd->irq; + + err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(ni->dev), cd); + if (err) + return err; + + cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev)); + cd->pmu = (struct pmu) { + .module = THIS_MODULE, + .parent = ni->dev, + .attr_groups = arm_ni_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = arm_ni_pmu_enable, + .pmu_disable = arm_ni_pmu_disable, + .event_init = arm_ni_event_init, + .add = arm_ni_event_add, + .del = arm_ni_event_del, + .start = arm_ni_event_start, + .stop = arm_ni_event_stop, + .read = arm_ni_event_read, + }; + + name = devm_kasprintf(ni->dev, GFP_KERNEL, "arm_ni_%d_cd_%d", ni->id, cd->id); + if (!name) + return -ENOMEM; + + err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); + if (err) + return err; + + err = perf_pmu_register(&cd->pmu, name, -1); + if (err) + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); + + return err; +} + +static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) +{ + u32 reg = readl_relaxed(base + NI_NODE_TYPE); + + node->base = base; + node->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg); + node->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg); + node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO); +} + +static int arm_ni_probe(struct platform_device *pdev) +{ + struct arm_ni_node cfg, vd, pd, cd; + struct arm_ni *ni; + struct resource *res; + void __iomem *base; + static atomic_t id; + int num_cds; + u32 reg, part; + + /* + * We want to map the whole configuration space for ease of discovery, + * but the PMU pages are the only ones for which we can honestly claim + * exclusive ownership, so we'll request them explicitly once found. + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!base) + return -ENOMEM; + + arm_ni_probe_domain(base, &cfg); + if (cfg.type != NI_GLOBAL) + return -ENODEV; + + reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID0); + part = FIELD_GET(NI_PIDR0_PART_7_0, reg); + reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID1); + part |= FIELD_GET(NI_PIDR1_PART_11_8, reg) << 8; + + switch (part) { + case PART_NI_700: + case PART_NI_710AE: + break; + default: + dev_WARN(&pdev->dev, "Unknown part number: 0x%03x, this may go badly\n", part); + break; + } + + num_cds = 0; + for (int v = 0; v < cfg.num_components; v++) { + reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); + arm_ni_probe_domain(base + reg, &vd); + for (int p = 0; p < vd.num_components; p++) { + reg = readl_relaxed(vd.base + NI_CHILD_PTR(p)); + arm_ni_probe_domain(base + reg, &pd); + num_cds += pd.num_components; + } + } + + ni = devm_kzalloc(&pdev->dev, struct_size(ni, cds, num_cds), GFP_KERNEL); + if (!ni) + return -ENOMEM; + + ni->dev = &pdev->dev; + ni->base = base; + ni->num_cds = num_cds; + ni->part = part; + ni->id = atomic_fetch_inc(&id); + + for (int v = 0; v < cfg.num_components; v++) { + reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); + arm_ni_probe_domain(base + reg, &vd); + for (int p = 0; p < vd.num_components; p++) { + reg = readl_relaxed(vd.base + NI_CHILD_PTR(p)); + arm_ni_probe_domain(base + reg, &pd); + for (int c = 0; c < pd.num_components; c++) { + int ret; + + reg = readl_relaxed(pd.base + NI_CHILD_PTR(c)); + arm_ni_probe_domain(base + reg, &cd); + ret = arm_ni_init_cd(ni, &cd, res->start); + if (ret) + return ret; + } + } + } + + return 0; +} + +static void arm_ni_remove(struct platform_device *pdev) +{ + struct arm_ni *ni = platform_get_drvdata(pdev); + + for (int i = 0; i < ni->num_cds; i++) { + struct arm_ni_cd *cd = ni->cds + i; + + if (!cd->pmu_base) + continue; + + writel_relaxed(0, cd->pmu_base + NI_PMCR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); + perf_pmu_unregister(&cd->pmu); + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); + } +} + +#ifdef CONFIG_OF +static const struct of_device_id arm_ni_of_match[] = { + { .compatible = "arm,ni-700" }, + {} +}; +MODULE_DEVICE_TABLE(of, arm_ni_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id arm_ni_acpi_match[] = { + { "ARMHCB70" }, + {} +}; +MODULE_DEVICE_TABLE(acpi, arm_ni_acpi_match); +#endif + +static struct platform_driver arm_ni_driver = { + .driver = { + .name = "arm-ni", + .of_match_table = of_match_ptr(arm_ni_of_match), + .acpi_match_table = ACPI_PTR(arm_ni_acpi_match), + }, + .probe = arm_ni_probe, + .remove = arm_ni_remove, +}; + +static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu) +{ + perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu); + irq_set_affinity(cd->irq, cpumask_of(cpu)); + cd->cpu = cpu; +} + +static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct arm_ni_cd *cd; + int node; + + cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); + node = dev_to_node(cd_to_ni(cd)->dev); + if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node) + arm_ni_pmu_migrate(cd, cpu); + return 0; +} + +static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct arm_ni_cd *cd; + unsigned int target; + int node; + + cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); + if (cpu != cd->cpu) + return 0; + + node = dev_to_node(cd_to_ni(cd)->dev); + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target < nr_cpu_ids) + arm_ni_pmu_migrate(cd, target); + return 0; +} + +static int __init arm_ni_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/ni:online", + arm_ni_pmu_online_cpu, + arm_ni_pmu_offline_cpu); + if (ret < 0) + return ret; + + arm_ni_hp_state = ret; + + ret = platform_driver_register(&arm_ni_driver); + if (ret) + cpuhp_remove_multi_state(arm_ni_hp_state); + return ret; +} + +static void __exit arm_ni_exit(void) +{ + platform_driver_unregister(&arm_ni_driver); + cpuhp_remove_multi_state(arm_ni_hp_state); +} + +module_init(arm_ni_init); +module_exit(arm_ni_exit); + +MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>"); +MODULE_DESCRIPTION("Arm NI-700 PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c index f146a455e838..f72f5689923c 100644 --- a/drivers/perf/arm_cspmu/ampere_cspmu.c +++ b/drivers/perf/arm_cspmu/ampere_cspmu.c @@ -269,4 +269,5 @@ static void __exit ampere_cspmu_exit(void) module_init(ampere_cspmu_init); module_exit(ampere_cspmu_exit); +MODULE_DESCRIPTION("Ampere SoC Performance Monitor Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 50b89b989ce7..81e8b97e9353 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -27,6 +27,7 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/of.h> #include <linux/perf_event.h> #include <linux/platform_device.h> @@ -100,13 +101,6 @@ #define ARM_CSPMU_ACTIVE_CPU_MASK 0x0 #define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1 -/* Check and use default if implementer doesn't provide attribute callback */ -#define CHECK_DEFAULT_IMPL_OPS(ops, callback) \ - do { \ - if (!ops->callback) \ - ops->callback = arm_cspmu_ ## callback; \ - } while (0) - /* * Maximum poll count for reading counter value using high-low-high sequence. */ @@ -121,7 +115,9 @@ static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { - return *(struct acpi_apmt_node **)dev_get_platdata(dev); + struct acpi_apmt_node **ptr = dev_get_platdata(dev); + + return ptr ? *ptr : NULL; } /* @@ -227,16 +223,6 @@ arm_cspmu_event_attr_is_visible(struct kobject *kobj, return attr->mode; } -ssize_t arm_cspmu_sysfs_format_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct dev_ext_attribute *eattr = - container_of(attr, struct dev_ext_attribute, attr); - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} -EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show); - static struct attribute *arm_cspmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, @@ -317,6 +303,10 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu) dev = cspmu->dev; apmt_node = arm_cspmu_apmt_node(dev); + if (!apmt_node) + return devm_kasprintf(dev, GFP_KERNEL, PMUNAME "_%u", + atomic_fetch_inc(&pmu_idx[0])); + pmu_type = apmt_node->type; if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) { @@ -408,21 +398,32 @@ static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) return NULL; } +#define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name + static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) { int ret = 0; - struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev); struct arm_cspmu_impl_match *match; - /* - * Get PMU implementer and product id from APMT node. - * If APMT node doesn't have implementer/product id, try get it - * from PMIIDR. - */ - cspmu->impl.pmiidr = - (apmt_node->impl_id) ? apmt_node->impl_id : - readl(cspmu->base0 + PMIIDR); + /* Start with a default PMU implementation */ + cspmu->impl.module = THIS_MODULE; + cspmu->impl.pmiidr = readl(cspmu->base0 + PMIIDR); + cspmu->impl.ops = (struct arm_cspmu_impl_ops) { + DEFAULT_IMPL_OP(get_event_attrs), + DEFAULT_IMPL_OP(get_format_attrs), + DEFAULT_IMPL_OP(get_identifier), + DEFAULT_IMPL_OP(get_name), + DEFAULT_IMPL_OP(is_cycle_counter_event), + DEFAULT_IMPL_OP(event_type), + DEFAULT_IMPL_OP(event_filter), + DEFAULT_IMPL_OP(set_ev_filter), + DEFAULT_IMPL_OP(event_attr_is_visible), + }; + + /* Firmware may override implementer/product ID from PMIIDR */ + if (apmt_node && apmt_node->impl_id) + cspmu->impl.pmiidr = apmt_node->impl_id; /* Find implementer specific attribute ops. */ match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr); @@ -450,24 +451,9 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) } mutex_unlock(&arm_cspmu_lock); + } - if (ret) - return ret; - } else - cspmu->impl.module = THIS_MODULE; - - /* Use default callbacks if implementer doesn't provide one. */ - CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs); - CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs); - CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier); - CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name); - CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event); - CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type); - CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter); - CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible); - CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter); - - return 0; + return ret; } static struct attribute_group * @@ -512,23 +498,16 @@ arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu) return format_group; } -static struct attribute_group ** -arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu) +static int arm_cspmu_alloc_attr_groups(struct arm_cspmu *cspmu) { - struct attribute_group **attr_groups = NULL; - struct device *dev = cspmu->dev; + const struct attribute_group **attr_groups = cspmu->attr_groups; const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; cspmu->identifier = impl_ops->get_identifier(cspmu); cspmu->name = impl_ops->get_name(cspmu); if (!cspmu->identifier || !cspmu->name) - return NULL; - - attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *), - GFP_KERNEL); - if (!attr_groups) - return NULL; + return -ENOMEM; attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu); attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu); @@ -536,18 +515,14 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu) attr_groups[3] = &arm_cspmu_cpumask_attr_group; if (!attr_groups[0] || !attr_groups[1]) - return NULL; + return -ENOMEM; - return attr_groups; + return 0; } static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu) { - u32 pmcr = 0; - - pmcr |= PMCR_P; - pmcr |= PMCR_C; - writel(pmcr, cspmu->base0 + PMCR); + writel(PMCR_C | PMCR_P, cspmu->base0 + PMCR); } static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu) @@ -962,7 +937,14 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev) platform_set_drvdata(pdev, cspmu); apmt_node = arm_cspmu_apmt_node(dev); - cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC; + if (apmt_node) { + cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC; + } else { + u32 width = 0; + + device_property_read_u32(dev, "reg-io-width", &width); + cspmu->has_atomic_dword = (width == 8); + } return cspmu; } @@ -1153,11 +1135,6 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) } } - if (cpumask_empty(&cspmu->associated_cpus)) { - dev_dbg(cspmu->dev, "No cpu associated with the PMU\n"); - return -ENODEV; - } - return 0; } #else @@ -1167,19 +1144,45 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) } #endif +static int arm_cspmu_of_get_cpus(struct arm_cspmu *cspmu) +{ + struct of_phandle_iterator it; + int ret, cpu; + + of_for_each_phandle(&it, ret, dev_of_node(cspmu->dev), "cpus", NULL, 0) { + cpu = of_cpu_node_to_id(it.node); + if (cpu < 0) + continue; + cpumask_set_cpu(cpu, &cspmu->associated_cpus); + } + return ret == -ENOENT ? 0 : ret; +} + static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) { - return arm_cspmu_acpi_get_cpus(cspmu); + int ret = 0; + + if (arm_cspmu_apmt_node(cspmu->dev)) + ret = arm_cspmu_acpi_get_cpus(cspmu); + else if (device_property_present(cspmu->dev, "cpus")) + ret = arm_cspmu_of_get_cpus(cspmu); + else + cpumask_copy(&cspmu->associated_cpus, cpu_possible_mask); + + if (!ret && cpumask_empty(&cspmu->associated_cpus)) { + dev_dbg(cspmu->dev, "No cpu associated with the PMU\n"); + ret = -ENODEV; + } + return ret; } static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) { int ret, capabilities; - struct attribute_group **attr_groups; - attr_groups = arm_cspmu_alloc_attr_group(cspmu); - if (!attr_groups) - return -ENOMEM; + ret = arm_cspmu_alloc_attr_groups(cspmu); + if (ret) + return ret; ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node); @@ -1193,6 +1196,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) cspmu->pmu = (struct pmu){ .task_ctx_nr = perf_invalid_context, .module = cspmu->impl.module, + .parent = cspmu->dev, .pmu_enable = arm_cspmu_enable, .pmu_disable = arm_cspmu_disable, .event_init = arm_cspmu_event_init, @@ -1201,12 +1205,11 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) .start = arm_cspmu_start, .stop = arm_cspmu_stop, .read = arm_cspmu_read, - .attr_groups = (const struct attribute_group **)attr_groups, + .attr_groups = cspmu->attr_groups, .capabilities = capabilities, }; /* Hardware counter init */ - arm_cspmu_stop_counters(cspmu); arm_cspmu_reset_counters(cspmu); ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1); @@ -1252,14 +1255,12 @@ static int arm_cspmu_device_probe(struct platform_device *pdev) return ret; } -static int arm_cspmu_device_remove(struct platform_device *pdev) +static void arm_cspmu_device_remove(struct platform_device *pdev) { struct arm_cspmu *cspmu = platform_get_drvdata(pdev); perf_pmu_unregister(&cspmu->pmu); cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node); - - return 0; } static const struct platform_device_id arm_cspmu_id[] = { @@ -1268,11 +1269,18 @@ static const struct platform_device_id arm_cspmu_id[] = { }; MODULE_DEVICE_TABLE(platform, arm_cspmu_id); +static const struct of_device_id arm_cspmu_of_match[] = { + { .compatible = "arm,coresight-pmu" }, + {} +}; +MODULE_DEVICE_TABLE(of, arm_cspmu_of_match); + static struct platform_driver arm_cspmu_driver = { .driver = { - .name = DRVNAME, - .suppress_bind_attrs = true, - }, + .name = DRVNAME, + .of_match_table = arm_cspmu_of_match, + .suppress_bind_attrs = true, + }, .probe = arm_cspmu_device_probe, .remove = arm_cspmu_device_remove, .id_table = arm_cspmu_id, @@ -1305,8 +1313,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) { - int dst; - struct cpumask online_supported; + unsigned int dst; struct arm_cspmu *cspmu = hlist_entry_safe(node, struct arm_cspmu, cpuhp_node); @@ -1316,9 +1323,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) return 0; /* Choose a new CPU to migrate ownership of the PMU to */ - cpumask_and(&online_supported, &cspmu->associated_cpus, - cpu_online_mask); - dst = cpumask_any_but(&online_supported, cpu); + dst = cpumask_any_and_but(&cspmu->associated_cpus, + cpu_online_mask, cpu); if (dst >= nr_cpu_ids) return 0; @@ -1421,4 +1427,5 @@ EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister); module_init(arm_cspmu_init); module_exit(arm_cspmu_exit); +MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 2fe723555a6b..2621f3111148 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -28,7 +28,7 @@ })[0].attr.attr) #define ARM_CSPMU_FORMAT_ATTR(_name, _config) \ - ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config) + ARM_CSPMU_EXT_ATTR(_name, device_show_string, _config) #define ARM_CSPMU_EVENT_ATTR(_name, _config) \ PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config) @@ -157,6 +157,7 @@ struct arm_cspmu { int cycle_counter_logical_idx; struct arm_cspmu_hw_events hw_events; + const struct attribute_group *attr_groups[5]; struct arm_cspmu_impl impl; }; @@ -166,11 +167,6 @@ ssize_t arm_cspmu_sysfs_event_show(struct device *dev, struct device_attribute *attr, char *buf); -/* Default function to show format attribute in sysfs. */ -ssize_t arm_cspmu_sysfs_format_show(struct device *dev, - struct device_attribute *attr, - char *buf); - /* Register vendor backend. */ int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index 0382b702f092..8116c7846a46 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -54,65 +54,24 @@ static struct attribute *scf_pmu_event_attrs[] = { ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3), NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101), - NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105), NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109), - NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d), - NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111), NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115), - NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119), - NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d), - NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121), - NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125), - NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129), NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d), - NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131), NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135), NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139), - NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d), - NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141), - - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155), - - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169), ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d), ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e), ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f), - ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170), - ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171), - ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172), ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173), ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174), - ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175), - ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176), - ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177), - ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178), ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179), - ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a), ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b), NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188), - NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c), - - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198), - NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c), - ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0), ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1), ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2), @@ -122,35 +81,12 @@ static struct attribute *scf_pmu_event_attrs[] = { ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5), ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6), ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7), - ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8), - ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9), - ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa), ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab), ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac), - ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad), - ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae), - ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af), - ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0), ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1), - ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2), - - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3), - - ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7), - ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8), - ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9), ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3), - NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7), - ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db), ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), @@ -194,6 +130,7 @@ static struct attribute *pcie_pmu_format_attrs[] = { static struct attribute *nvlink_c2c_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"), NULL, }; @@ -238,10 +175,12 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event) const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); - if (ctx->filter_mask == 0) + const u32 filter_val = event->attr.config1 & ctx->filter_mask; + + if (filter_val == 0) return ctx->filter_default_val; - return event->attr.config1 & ctx->filter_mask; + return filter_val; } enum nv_cspmu_name_fmt { @@ -274,7 +213,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x104, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, @@ -284,7 +223,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x105, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, @@ -388,12 +327,6 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) impl_ops->get_format_attrs = nv_cspmu_get_format_attrs; impl_ops->get_name = nv_cspmu_get_name; - /* Set others to NULL to use default callback. */ - impl_ops->event_type = NULL; - impl_ops->event_attr_is_visible = NULL; - impl_ops->get_identifier = NULL; - impl_ops->is_cycle_counter_event = NULL; - return 0; } @@ -423,4 +356,5 @@ static void __exit nvidia_cspmu_exit(void) module_init(nvidia_cspmu_init); module_exit(nvidia_cspmu_exit); +MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 30cea6859574..619cf937602f 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -542,12 +542,16 @@ static int dmc620_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; + hwc->idx = -1; + + if (event->group_leader == event) + return 0; + /* * We can't atomically disable all HW counters so only one event allowed, * although software events are acceptable. */ - if (event->group_leader != event && - !is_software_event(event->group_leader)) + if (!is_software_event(event->group_leader)) return -EINVAL; for_each_sibling_event(sibling, event->group_leader) { @@ -556,7 +560,6 @@ static int dmc620_pmu_event_init(struct perf_event *event) return -EINVAL; } - hwc->idx = -1; return 0; } @@ -673,6 +676,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) dmc620_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .event_init = dmc620_pmu_event_init, @@ -724,7 +728,7 @@ out_teardown_dev: return ret; } -static int dmc620_pmu_device_remove(struct platform_device *pdev) +static void dmc620_pmu_device_remove(struct platform_device *pdev) { struct dmc620_pmu *dmc620_pmu = platform_get_drvdata(pdev); @@ -732,8 +736,6 @@ static int dmc620_pmu_device_remove(struct platform_device *pdev) /* perf will synchronise RCU before devres can free dmc620_pmu */ perf_pmu_unregister(&dmc620_pmu->pmu); - - return 0; } static const struct acpi_device_id dmc620_acpi_match[] = { diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 7ec4498e312f..cb4fb59fe04b 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -85,7 +85,7 @@ DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config) #define DSU_FORMAT_ATTR(_name, _config) \ - DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config) + DSU_EXT_ATTR(_name, device_show_string, _config) #define DSU_CPUMASK_ATTR(_name, _config) \ DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config) @@ -139,15 +139,6 @@ static ssize_t dsu_pmu_sysfs_event_show(struct device *dev, return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var); } -static ssize_t dsu_pmu_sysfs_format_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} - static ssize_t dsu_pmu_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -230,15 +221,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = { NULL, }; -static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu) -{ - struct cpumask online_supported; - - cpumask_and(&online_supported, - &dsu_pmu->associated_cpus, cpu_online_mask); - return cpumask_any_but(&online_supported, cpu); -} - static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx) { return (idx < dsu_pmu->num_counters) || @@ -751,6 +733,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) dsu_pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, + .parent = &pdev->dev, .module = THIS_MODULE, .pmu_enable = dsu_pmu_enable, .pmu_disable = dsu_pmu_disable, @@ -774,14 +757,12 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) return rc; } -static int dsu_pmu_device_remove(struct platform_device *pdev) +static void dsu_pmu_device_remove(struct platform_device *pdev) { struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&dsu_pmu->pmu); cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node); - - return 0; } static const struct of_device_id dsu_pmu_of_match[] = { @@ -829,14 +810,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) { - int dst; - struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, - cpuhp_node); + struct dsu_pmu *dsu_pmu; + unsigned int dst; + + dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node); if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu)) return 0; - dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu); + dst = cpumask_any_and_but(&dsu_pmu->associated_cpus, + cpu_online_mask, cpu); /* If there are no active CPUs in the DSU, leave IRQ disabled */ if (dst >= nr_cpu_ids) return 0; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 8458fe2cebb4..398cce3d76fc 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -522,7 +522,7 @@ static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS); /* For task-bound events we may be called on other CPUs */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) @@ -742,7 +742,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) struct perf_event *event; int idx; - for (idx = 0; idx < armpmu->num_events; idx++) { + for_each_set_bit(idx, armpmu->cntr_mask, ARMPMU_MAX_HWEVENTS) { event = hw_events->events[idx]; if (!event) continue; @@ -772,7 +772,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, { struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS); if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return NOTIFY_DONE; @@ -924,8 +924,9 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) goto out_destroy; - pr_info("enabled with %s PMU driver, %d counters available%s\n", - pmu->name, pmu->num_events, + pr_info("enabled with %s PMU driver, %d (%*pb) counters available%s\n", + pmu->name, bitmap_weight(pmu->cntr_mask, ARMPMU_MAX_HWEVENTS), + ARMPMU_MAX_HWEVENTS, &pmu->cntr_mask, has_nmi ? ", using NMIs" : ""); kvm_host_pmu_init(pmu); diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 3596db36cbff..118170a5cede 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -59,7 +59,7 @@ static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq) static bool pmu_has_irq_affinity(struct device_node *node) { - return !!of_find_property(node, "interrupt-affinity", NULL); + return of_property_present(node, "interrupt-affinity"); } static int pmu_parse_irq_affinity(struct device *dev, int i) @@ -196,6 +196,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (!pmu) return -ENOMEM; + pmu->pmu.parent = &pdev->dev; pmu->plat_device = pdev; ret = pmu_parse_irqs(pmu); diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 23fa6c5da82c..0e360feb3432 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -25,8 +25,6 @@ #include <linux/smp.h> #include <linux/nmi.h> -#include <asm/arm_pmuv3.h> - /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -338,6 +336,11 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event) return ATTR_CFG_GET_FLD(&event->attr, rdpmc); } +static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr) +{ + return ATTR_CFG_GET_FLD(attr, threshold); +} + static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) { u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare); @@ -449,13 +452,6 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { }; /* - * Perf Events' indices - */ -#define ARMV8_IDX_CYCLE_COUNTER 0 -#define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_CYCLE_COUNTER_USER 32 - -/* * We unconditionally enable ARMv8.5-PMU long event counter support * (64-bit events) where supported. Indicate if this arm_pmu has long * event counter support. @@ -486,19 +482,12 @@ static bool armv8pmu_event_is_chained(struct perf_event *event) return !armv8pmu_event_has_user_read(event) && armv8pmu_event_is_64bit(event) && !armv8pmu_has_long_event(cpu_pmu) && - (idx != ARMV8_IDX_CYCLE_COUNTER); + (idx < ARMV8_PMU_MAX_GENERAL_COUNTERS); } /* * ARMv8 low level PMU access */ - -/* - * Perf Event to low level counters mapping - */ -#define ARMV8_IDX_TO_COUNTER(x) \ - (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) - static u64 armv8pmu_pmcr_read(void) { return read_pmcr(); @@ -511,21 +500,19 @@ static void armv8pmu_pmcr_write(u64 val) write_pmcr(val); } -static int armv8pmu_has_overflowed(u32 pmovsr) +static int armv8pmu_has_overflowed(u64 pmovsr) { - return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; + return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK); } -static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +static int armv8pmu_counter_has_overflowed(u64 pmnc, int idx) { - return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); + return !!(pmnc & BIT(idx)); } static u64 armv8pmu_read_evcntr(int idx) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - - return read_pmevcntrn(counter); + return read_pmevcntrn(idx); } static u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -554,7 +541,7 @@ static bool armv8pmu_event_needs_bias(struct perf_event *event) return false; if (armv8pmu_has_long_event(cpu_pmu) || - idx == ARMV8_IDX_CYCLE_COUNTER) + idx >= ARMV8_PMU_MAX_GENERAL_COUNTERS) return true; return false; @@ -582,8 +569,10 @@ static u64 armv8pmu_read_counter(struct perf_event *event) int idx = hwc->idx; u64 value; - if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_PMU_CYCLE_IDX) value = read_pmccntr(); + else if (idx == ARMV8_PMU_INSTR_IDX) + value = read_pmicntr(); else value = armv8pmu_read_hw_counter(event); @@ -592,9 +581,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) static void armv8pmu_write_evcntr(int idx, u64 value) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - - write_pmevcntrn(counter, value); + write_pmevcntrn(idx, value); } static void armv8pmu_write_hw_counter(struct perf_event *event, @@ -617,15 +604,16 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) value = armv8pmu_bias_long_counter(event, value); - if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_PMU_CYCLE_IDX) write_pmccntr(value); + else if (idx == ARMV8_PMU_INSTR_IDX) + write_pmicntr(value); else armv8pmu_write_hw_counter(event, value); } static void armv8pmu_write_evtype(int idx, unsigned long val) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); unsigned long mask = ARMV8_PMU_EVTYPE_EVENT | ARMV8_PMU_INCLUDE_EL2 | ARMV8_PMU_EXCLUDE_EL0 | @@ -635,7 +623,7 @@ static void armv8pmu_write_evtype(int idx, unsigned long val) mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH; val &= mask; - write_pmevtypern(counter, val); + write_pmevtypern(idx, val); } static void armv8pmu_write_event_type(struct perf_event *event) @@ -655,24 +643,26 @@ static void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_PMU_CYCLE_IDX) write_pmccfiltr(hwc->config_base); + else if (idx == ARMV8_PMU_INSTR_IDX) + write_pmicfiltr(hwc->config_base); else armv8pmu_write_evtype(idx, hwc->config_base); } } -static u32 armv8pmu_event_cnten_mask(struct perf_event *event) +static u64 armv8pmu_event_cnten_mask(struct perf_event *event) { - int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - u32 mask = BIT(counter); + int counter = event->hw.idx; + u64 mask = BIT(counter); if (armv8pmu_event_is_chained(event)) mask |= BIT(counter - 1); return mask; } -static void armv8pmu_enable_counter(u32 mask) +static void armv8pmu_enable_counter(u64 mask) { /* * Make sure event configuration register writes are visible before we @@ -685,7 +675,7 @@ static void armv8pmu_enable_counter(u32 mask) static void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - u32 mask = armv8pmu_event_cnten_mask(event); + u64 mask = armv8pmu_event_cnten_mask(event); kvm_set_pmu_events(mask, attr); @@ -694,7 +684,7 @@ static void armv8pmu_enable_event_counter(struct perf_event *event) armv8pmu_enable_counter(mask); } -static void armv8pmu_disable_counter(u32 mask) +static void armv8pmu_disable_counter(u64 mask) { write_pmcntenclr(mask); /* @@ -707,7 +697,7 @@ static void armv8pmu_disable_counter(u32 mask) static void armv8pmu_disable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - u32 mask = armv8pmu_event_cnten_mask(event); + u64 mask = armv8pmu_event_cnten_mask(event); kvm_clr_pmu_events(mask); @@ -716,18 +706,17 @@ static void armv8pmu_disable_event_counter(struct perf_event *event) armv8pmu_disable_counter(mask); } -static void armv8pmu_enable_intens(u32 mask) +static void armv8pmu_enable_intens(u64 mask) { write_pmintenset(mask); } static void armv8pmu_enable_event_irq(struct perf_event *event) { - u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - armv8pmu_enable_intens(BIT(counter)); + armv8pmu_enable_intens(BIT(event->hw.idx)); } -static void armv8pmu_disable_intens(u32 mask) +static void armv8pmu_disable_intens(u64 mask) { write_pmintenclr(mask); isb(); @@ -738,13 +727,12 @@ static void armv8pmu_disable_intens(u32 mask) static void armv8pmu_disable_event_irq(struct perf_event *event) { - u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - armv8pmu_disable_intens(BIT(counter)); + armv8pmu_disable_intens(BIT(event->hw.idx)); } -static u32 armv8pmu_getreset_flags(void) +static u64 armv8pmu_getreset_flags(void) { - u32 value; + u64 value; /* Read */ value = read_pmovsclr(); @@ -782,15 +770,27 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) int i; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - /* Clear any unused counters to avoid leaking their contents */ - for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { - if (i == ARMV8_IDX_CYCLE_COUNTER) - write_pmccntr(0); - else - armv8pmu_write_evcntr(i, 0); + if (is_pmuv3p9(cpu_pmu->pmuver)) { + u64 mask = 0; + for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_event_has_user_read(cpuc->events[i])) + mask |= BIT(i); + } + write_pmuacr(mask); + } else { + /* Clear any unused counters to avoid leaking their contents */ + for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, + ARMPMU_MAX_HWEVENTS) { + if (i == ARMV8_PMU_CYCLE_IDX) + write_pmccntr(0); + else if (i == ARMV8_PMU_INSTR_IDX) + write_pmicntr(0); + else + armv8pmu_write_evcntr(i, 0); + } } - update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); + update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_UEN); } static void armv8pmu_enable_event(struct perf_event *event) @@ -839,7 +839,7 @@ static void armv8pmu_stop(struct arm_pmu *cpu_pmu) static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { - u32 pmovsr; + u64 pmovsr; struct perf_sample_data data; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; @@ -866,7 +866,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) * to prevent skews in group events. */ armv8pmu_stop(cpu_pmu); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -905,7 +905,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, { int idx; - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; } @@ -921,7 +921,9 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, * Chaining requires two consecutive event counters, where * the lower idx must be even. */ - for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { + if (!(idx & 0x1)) + continue; if (!test_and_set_bit(idx, cpuc->used_mask)) { /* Check if the preceding even counter is available */ if (!test_and_set_bit(idx - 1, cpuc->used_mask)) @@ -941,9 +943,10 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { - if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return ARMV8_IDX_CYCLE_COUNTER; + if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !armv8pmu_event_get_threshold(&event->attr)) { + if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) + return ARMV8_PMU_CYCLE_IDX; else if (armv8pmu_event_is_64bit(event) && armv8pmu_event_want_user_access(event) && !armv8pmu_has_long_event(cpu_pmu)) @@ -951,6 +954,19 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, } /* + * Always prefer to place a instruction counter into the instruction counter, + * but don't expose the instruction counter to userspace access as userspace + * may not know how to handle it. + */ + if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) && + !armv8pmu_event_get_threshold(&event->attr) && + test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && + !armv8pmu_event_want_user_access(event)) { + if (!test_and_set_bit(ARMV8_PMU_INSTR_IDX, cpuc->used_mask)) + return ARMV8_PMU_INSTR_IDX; + } + + /* * Otherwise use events counters */ if (armv8pmu_event_is_chained(event)) @@ -974,15 +990,7 @@ static int armv8pmu_user_event_idx(struct perf_event *event) if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) return 0; - /* - * We remap the cycle counter index to 32 to - * match the offset applied to the rest of - * the counter indices. - */ - if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) - return ARMV8_IDX_CYCLE_COUNTER_USER; - - return event->hw.idx; + return event->hw.idx + 1; } /* @@ -1033,13 +1041,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will * be 0 and will also trigger this check, preventing it from being used. */ - th = ATTR_CFG_GET_FLD(attr, threshold); + th = armv8pmu_event_get_threshold(attr); if (th > threshold_max(cpu_pmu)) { pr_debug("PMU event threshold exceeds max value\n"); return -EINVAL; } - if (IS_ENABLED(CONFIG_ARM64) && th) { + if (th) { config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th); config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC, armv8pmu_event_threshold_control(attr)); @@ -1057,14 +1065,16 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u64 pmcr; + u64 pmcr, mask; + + bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS); /* The counter and interrupt enable registers are unknown at reset. */ - armv8pmu_disable_counter(U32_MAX); - armv8pmu_disable_intens(U32_MAX); + armv8pmu_disable_counter(mask); + armv8pmu_disable_intens(mask); /* Clear the counters we flip at guest entry/exit */ - kvm_clr_pmu_events(U32_MAX); + kvm_clr_pmu_events(mask); /* * Initialize & Reset PMNC. Request overflow interrupt for @@ -1085,14 +1095,14 @@ static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, if (event->attr.type == PERF_TYPE_HARDWARE && event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) { - if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - armpmu->pmceid_bitmap)) - return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; - if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED, armpmu->pmceid_bitmap)) return ARMV8_PMUV3_PERFCTR_BR_RETIRED; + if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; + return HW_OP_UNSUPPORTED; } @@ -1207,10 +1217,15 @@ static void __armv8pmu_probe_pmu(void *info) probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ - cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()); + bitmap_set(cpu_pmu->cntr_mask, + 0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read())); /* Add the CPU cycles counter */ - cpu_pmu->num_events += 1; + set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask); + + /* Add the CPU instructions counter */ + if (pmuv3_has_icntr()) + set_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask); pmceid[0] = pmceid_raw[0] = read_pmceid0(); pmceid[1] = pmceid_raw[1] = read_pmceid1(); @@ -1253,7 +1268,7 @@ static void armv8pmu_disable_user_access_ipi(void *unused) armv8pmu_disable_user_access(); } -static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, +static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -1264,7 +1279,7 @@ static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, return 0; } -static struct ctl_table armv8_pmu_sysctl_table[] = { +static const struct ctl_table armv8_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, @@ -1340,18 +1355,26 @@ PMUV3_INIT_SIMPLE(armv9_cortex_a520) PMUV3_INIT_SIMPLE(armv9_cortex_a710) PMUV3_INIT_SIMPLE(armv9_cortex_a715) PMUV3_INIT_SIMPLE(armv9_cortex_a720) +PMUV3_INIT_SIMPLE(armv9_cortex_a725) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) PMUV3_INIT_SIMPLE(armv9_cortex_x3) PMUV3_INIT_SIMPLE(armv9_cortex_x4) +PMUV3_INIT_SIMPLE(armv9_cortex_x925) PMUV3_INIT_SIMPLE(armv8_neoverse_e1) PMUV3_INIT_SIMPLE(armv8_neoverse_n1) PMUV3_INIT_SIMPLE(armv9_neoverse_n2) +PMUV3_INIT_SIMPLE(armv9_neoverse_n3) PMUV3_INIT_SIMPLE(armv8_neoverse_v1) +PMUV3_INIT_SIMPLE(armv8_neoverse_v2) +PMUV3_INIT_SIMPLE(armv8_neoverse_v3) +PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae) PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) PMUV3_INIT_SIMPLE(armv8_nvidia_denver) +PMUV3_INIT_SIMPLE(armv8_samsung_mongoose) + PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event) PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event) PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event) @@ -1379,18 +1402,25 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, + {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init}, {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init}, + {.compatible = "arm,cortex-x925-pmu", .data = armv9_cortex_x925_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, + {.compatible = "arm,neoverse-n3-pmu", .data = armv9_neoverse_n3_pmu_init}, {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init}, + {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init}, + {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init}, + {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init}, {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init}, + {.compatible = "samsung,mongoose-pmu", .data = armv8_samsung_mongoose_pmu_init}, {}, }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 6303b82566f9..621f02a7f43b 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -431,6 +431,17 @@ static int smmu_pmu_event_init(struct perf_event *event) return -EINVAL; } + /* + * Ensure all events are on the same cpu so all events are in the + * same cpu context, to avoid races on pmu_enable etc. + */ + event->cpu = smmu_pmu->on_cpu; + + hwc->idx = -1; + + if (event->group_leader == event) + return 0; + for_each_sibling_event(sibling, event->group_leader) { if (is_software_event(sibling)) continue; @@ -442,14 +453,6 @@ static int smmu_pmu_event_init(struct perf_event *event) return -EINVAL; } - hwc->idx = -1; - - /* - * Ensure all events are on the same cpu so all events are in the - * same cpu context, to avoid races on pmu_enable etc. - */ - event->cpu = smmu_pmu->on_cpu; - return 0; } @@ -716,7 +719,7 @@ static void smmu_pmu_free_msis(void *data) { struct device *dev = data; - platform_msi_domain_free_irqs(dev); + platform_device_msi_free_irqs_all(dev); } static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) @@ -746,7 +749,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu) if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI)) return; - ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); + ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); if (ret) { dev_warn(dev, "failed to allocate MSIs\n"); return; @@ -860,6 +863,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) smmu_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = smmu_pmu_enable, .pmu_disable = smmu_pmu_disable, @@ -965,14 +969,12 @@ out_unregister: return err; } -static int smmu_pmu_remove(struct platform_device *pdev) +static void smmu_pmu_remove(struct platform_device *pdev) { struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&smmu_pmu->pmu); cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); - - return 0; } static void smmu_pmu_shutdown(struct platform_device *pdev) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b622d75d8c9e..f5e6878db9d6 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -41,7 +41,7 @@ /* * Cache if the event is allowed to trace Context information. - * This allows us to perform the check, i.e, perfmon_capable(), + * This allows us to perform the check, i.e, perf_allow_kernel(), * in the context of the event owner, once, during the event_init(). */ #define SPE_PMU_HW_FLAGS_CX 0x00001 @@ -50,7 +50,7 @@ static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_C static void set_spe_event_has_cx(struct perf_event *event) { - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel(&event->attr)) event->hw.flags |= SPE_PMU_HW_FLAGS_CX; } @@ -85,6 +85,7 @@ struct arm_spe_pmu { #define SPE_PMU_FEAT_LDS (1UL << 4) #define SPE_PMU_FEAT_ERND (1UL << 5) #define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) +#define SPE_PMU_FEAT_DISCARD (1UL << 7) #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) u64 features; @@ -193,6 +194,9 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */ #define ATTR_CFG_FLD_store_filter_LO 34 #define ATTR_CFG_FLD_store_filter_HI 34 +#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */ +#define ATTR_CFG_FLD_discard_LO 35 +#define ATTR_CFG_FLD_discard_HI 35 #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */ #define ATTR_CFG_FLD_event_filter_LO 0 @@ -216,6 +220,7 @@ GEN_PMU_FORMAT_ATTR(store_filter); GEN_PMU_FORMAT_ATTR(event_filter); GEN_PMU_FORMAT_ATTR(inv_event_filter); GEN_PMU_FORMAT_ATTR(min_latency); +GEN_PMU_FORMAT_ATTR(discard); static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_ts_enable.attr, @@ -228,6 +233,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_event_filter.attr, &format_attr_inv_event_filter.attr, &format_attr_min_latency.attr, + &format_attr_discard.attr, NULL, }; @@ -238,6 +244,9 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + if (attr == &format_attr_discard.attr && !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return 0; + if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) return 0; @@ -502,6 +511,12 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, u64 base, limit; struct arm_spe_pmu_buf *buf; + if (ATTR_CFG_GET_FLD(&event->attr, discard)) { + limit = FIELD_PREP(PMBLIMITR_EL1_FM, PMBLIMITR_EL1_FM_DISCARD); + limit |= PMBLIMITR_EL1_E; + goto out_write_limit; + } + /* Start a new aux session */ buf = perf_aux_output_begin(handle, event); if (!buf) { @@ -743,11 +758,14 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + if (ATTR_CFG_GET_FLD(&event->attr, discard) && + !(spe_pmu->features & SPE_PMU_FEAT_DISCARD)) + return -EOPNOTSUPP; + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); - if (!perfmon_capable() && - (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT))) - return -EACCES; + if (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT)) + return perf_allow_kernel(&event->attr); return 0; } @@ -932,6 +950,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu) spe_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &spe_pmu->pdev->dev, .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, .attr_groups = arm_spe_pmu_attr_groups, /* @@ -1027,6 +1046,9 @@ static void __arm_spe_pmu_dev_probe(void *info) if (FIELD_GET(PMSIDR_EL1_ERND, reg)) spe_pmu->features |= SPE_PMU_FEAT_ERND; + if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2) + spe_pmu->features |= SPE_PMU_FEAT_DISCARD; + /* This field has a spaced out encoding, so just use a look-up */ fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { @@ -1263,14 +1285,13 @@ out_free_handle: return ret; } -static int arm_spe_pmu_device_remove(struct platform_device *pdev) +static void arm_spe_pmu_device_remove(struct platform_device *pdev) { struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); arm_spe_pmu_perf_destroy(spe_pmu); arm_spe_pmu_dev_teardown(spe_pmu); free_percpu(spe_pmu->handle); - return 0; } static struct platform_driver arm_spe_pmu_driver = { @@ -1281,7 +1302,7 @@ static struct platform_driver arm_spe_pmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_spe_pmu_device_probe, - .remove = arm_spe_pmu_device_remove, + .remove = arm_spe_pmu_device_remove, }; static int __init arm_spe_pmu_init(void) diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c new file mode 100644 index 000000000000..b09615bb2bb2 --- /dev/null +++ b/drivers/perf/arm_v6_pmu.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#include <asm/cputype.h> +#include <asm/irq_regs.h> + +#include <linux/of.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> + +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 0, + ARMV6_COUNTER0, + ARMV6_COUNTER1, + ARMV6_NUM_COUNTERS +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + + /* + * The ARM performance counters can count micro DTLB misses, micro ITLB + * misses and main TLB misses. There isn't an event for TLB misses, so + * use the micro misses here and if users want the main TLB misses they + * can use a raw counter. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u64 armv6pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +static void armv6pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); +} + +static irqreturn_t +armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV6_NUM_COUNTERS) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv6pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); +} + +static void armv6pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); +} + +static int +armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void armv6pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); +} + +static int armv6_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv6_perf_map, + &armv6_perf_cache_map, 0xFF); +} + +static void armv6pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6_map_event; + + bitmap_set(cpu_pmu->cntr_mask, 0, ARMV6_NUM_COUNTERS); +} + +static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1136"; + return 0; +} + +static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1176"; + return 0; +} + +static const struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL); +} + +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +builtin_platform_driver(armv6_pmu_driver); diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c new file mode 100644 index 000000000000..420cadd108e7 --- /dev/null +++ b/drivers/perf/arm_v7_pmu.c @@ -0,0 +1,1975 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet <jpihet@mvista.com> + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/vfp.h> +#include "../vfp/vfpinstr.h" + +#include <linux/of.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> + +/* + * Common ARMv7 event types + * + * Note: An implementation may not be able to count all of these events + * but the encodings are considered to be `reserved' in the case that + * they are not available. + */ +#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 +#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 +#define ARMV7_PERFCTR_ITLB_REFILL 0x02 +#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 +#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 +#define ARMV7_PERFCTR_DTLB_REFILL 0x05 +#define ARMV7_PERFCTR_MEM_READ 0x06 +#define ARMV7_PERFCTR_MEM_WRITE 0x07 +#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 +#define ARMV7_PERFCTR_EXC_TAKEN 0x09 +#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A +#define ARMV7_PERFCTR_CID_WRITE 0x0B + +/* + * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all (taken) branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ +#define ARMV7_PERFCTR_PC_WRITE 0x0C +#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D +#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E +#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F +#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 +#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 +#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 + +/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ +#define ARMV7_PERFCTR_MEM_ACCESS 0x13 +#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 +#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 +#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 +#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 +#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV7_PERFCTR_BUS_ACCESS 0x19 +#define ARMV7_PERFCTR_MEM_ERROR 0x1A +#define ARMV7_PERFCTR_INSTR_SPEC 0x1B +#define ARMV7_PERFCTR_TTBR_WRITE 0x1C +#define ARMV7_PERFCTR_BUS_CYCLES 0x1D + +#define ARMV7_PERFCTR_CPU_CYCLES 0xFF + +/* ARMv7 Cortex-A8 specific event types */ +#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 +#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 +#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 +#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 + +/* ARMv7 Cortex-A9 specific event types */ +#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 +#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 +#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 + +/* ARMv7 Cortex-A5 specific event types */ +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 + +/* ARMv7 Cortex-A15 specific event types */ +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 + +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D + +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 + +#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 + +/* ARMv7 Cortex-A12 specific event types */ +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 + +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 + +#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 + +#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 + +/* ARMv7 Krait specific event types */ +#define KRAIT_PMRESR0_GROUP0 0xcc +#define KRAIT_PMRESR1_GROUP0 0xd0 +#define KRAIT_PMRESR2_GROUP0 0xd4 +#define KRAIT_VPMRESR0_GROUP0 0xd8 + +#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 +#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 + +#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 +#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 + +/* ARMv7 Scorpion specific event types */ +#define SCORPION_LPM0_GROUP0 0x4c +#define SCORPION_LPM1_GROUP0 0x50 +#define SCORPION_LPM2_GROUP0 0x54 +#define SCORPION_L2LPM_GROUP0 0x58 +#define SCORPION_VLPM_GROUP0 0x5c + +#define SCORPION_ICACHE_ACCESS 0x10053 +#define SCORPION_ICACHE_MISS 0x10052 + +#define SCORPION_DTLB_ACCESS 0x12013 +#define SCORPION_DTLB_MISS 0x12012 + +#define SCORPION_ITLB_MISS 0x12021 + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A5 HW events mapping + */ +static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + /* + * The prefetch counters don't differentiate between the I side and the + * D side. + */ + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A15 HW events mapping + */ +static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A7 HW events mapping + */ +static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A12 HW events mapping + */ +static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Krait HW events mapping + */ +static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Scorpion HW events mapping + */ +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, + /* + * Only ITLB misses and DTLB refills are supported. If users want the + * DTLB refills misses a raw counter must be used. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *armv7_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group armv7_pmu_format_attr_group = { + .name = "format", + .attrs = armv7_pmu_format_attrs, +}; + +#define ARMV7_EVENT_ATTR_RESOLVE(m) #m +#define ARMV7_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ + "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) + +ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); +ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); +ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); +ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); +ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); +ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); +ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); +ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); +ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); +ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); +ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); +ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); +ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); +ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); +ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); +ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); +ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); +ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); + +static struct attribute *armv7_pmuv1_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv1_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv1_event_attrs, +}; + +ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); +ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); +ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); +ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); +ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); +ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); +ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); +ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); +ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); +ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); + +static struct attribute *armv7_pmuv2_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + &armv7_event_attr_mem_access.attr.attr, + &armv7_event_attr_l1i_cache.attr.attr, + &armv7_event_attr_l1d_cache_wb.attr.attr, + &armv7_event_attr_l2d_cache.attr.attr, + &armv7_event_attr_l2d_cache_refill.attr.attr, + &armv7_event_attr_l2d_cache_wb.attr.attr, + &armv7_event_attr_bus_access.attr.attr, + &armv7_event_attr_memory_error.attr.attr, + &armv7_event_attr_inst_spec.attr.attr, + &armv7_event_attr_ttbr_write_retired.attr.attr, + &armv7_event_attr_bus_cycles.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv2_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv2_event_attrs, +}; + +/* + * Perf Events' indices + */ +#define ARMV7_IDX_CYCLE_COUNTER 31 +#define ARMV7_IDX_COUNTER_MAX 31 +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ +#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv2 + */ +#define ARMV7_EXCLUDE_PL1 BIT(31) +#define ARMV7_EXCLUDE_USER BIT(30) +#define ARMV7_INCLUDE_HYP BIT(27) + +/* + * Secure debug enable reg + */ +#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ + +static inline u32 armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(u32 val) +{ + val &= ARMV7_PMNC_MASK; + isb(); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(u32 pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) +{ + return test_bit(idx, cpu_pmu->cntr_mask); +} + +static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) +{ + return pmnc & BIT(idx); +} + +static inline void armv7_pmnc_select_counter(int idx) +{ + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (idx)); + isb(); +} + +static inline u64 armv7pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u32 value = 0; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + } else { + armv7_pmnc_select_counter(idx); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); + } + + return value; +} + +static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); + } else { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); + } +} + +static inline void armv7_pmnc_write_evtsel(int idx, u32 val) +{ + armv7_pmnc_select_counter(idx); + val &= ARMV7_EVTYPE_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); +} + +static inline void armv7_pmnc_enable_counter(int idx) +{ + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(idx))); +} + +static inline void armv7_pmnc_disable_counter(int idx) +{ + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(idx))); +} + +static inline void armv7_pmnc_enable_intens(int idx) +{ + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(idx))); +} + +static inline void armv7_pmnc_disable_intens(int idx) +{ + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(idx))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(idx))); + isb(); +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) +{ + u32 val; + unsigned int cnt; + + pr_info("PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + pr_info("PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + pr_info("CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + pr_info("INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + pr_info("FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + pr_info("SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + pr_info("CCNT =0x%08x\n", val); + + for_each_set_bit(cnt, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + pr_info("CNT[%d] count =0x%08x\n", cnt, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + pr_info("CNT[%d] evtsel=0x%08x\n", cnt, val); + } +} +#endif + +static void armv7pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We only need to set the event for the cycle counter if we + * have the ability to perform event filtering. + */ + if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); +} + +static void armv7pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Disable counter and interrupt + */ + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); +} + +static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + u32 pmnc; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(struct arm_pmu *cpu_pmu) +{ + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); +} + +static void armv7pmu_stop(struct arm_pmu *cpu_pmu) +{ + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); +} + +static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; + + /* Always place a cycle counter into the cycle counter. */ + if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_IDX_CYCLE_COUNTER; + } + + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; +} + +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv7pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EOPNOTSUPP; + } + if (attr->exclude_user) + config_base |= ARMV7_EXCLUDE_USER; + if (attr->exclude_kernel) + config_base |= ARMV7_EXCLUDE_PL1; + if (!attr->exclude_hv) + config_base |= ARMV7_INCLUDE_HYP; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv7pmu_reset(void *info) +{ + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + u32 idx, val; + + if (cpu_pmu->secure_access) { + asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); + val |= ARMV7_SDER_SUNIDEN; + asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); + } + + /* The counter and interrupt enable registers are unknown at reset. */ + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) { + armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_intens(idx); + } + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); +} + +static int armv7_a8_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a8_perf_map, + &armv7_a8_perf_cache_map, 0xFF); +} + +static int armv7_a9_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a9_perf_map, + &armv7_a9_perf_cache_map, 0xFF); +} + +static int armv7_a5_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a5_perf_map, + &armv7_a5_perf_cache_map, 0xFF); +} + +static int armv7_a15_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a15_perf_map, + &armv7_a15_perf_cache_map, 0xFF); +} + +static int armv7_a7_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a7_perf_map, + &armv7_a7_perf_cache_map, 0xFF); +} + +static int armv7_a12_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a12_perf_map, + &armv7_a12_perf_cache_map, 0xFF); +} + +static int krait_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map, + &krait_perf_cache_map, 0xFFFFF); +} + +static int krait_map_event_no_branch(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map_no_branch, + &krait_perf_cache_map, 0xFFFFF); +} + +static int scorpion_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &scorpion_perf_map, + &scorpion_perf_cache_map, 0xFFFFF); +} + +static void armv7pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv7pmu_handle_irq; + cpu_pmu->enable = armv7pmu_enable_event; + cpu_pmu->disable = armv7pmu_disable_event; + cpu_pmu->read_counter = armv7pmu_read_counter; + cpu_pmu->write_counter = armv7pmu_write_counter; + cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; + cpu_pmu->start = armv7pmu_start; + cpu_pmu->stop = armv7pmu_stop; + cpu_pmu->reset = armv7pmu_reset; +}; + +static void armv7_read_num_pmnc_events(void *info) +{ + int nb_cnt; + struct arm_pmu *cpu_pmu = info; + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + bitmap_set(cpu_pmu->cntr_mask, 0, nb_cnt); + + /* Add the CPU cycles counter */ + set_bit(ARMV7_IDX_CYCLE_COUNTER, cpu_pmu->cntr_mask); +} + +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + arm_pmu, 1); +} + +static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a8"; + cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a9"; + cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a5"; + cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a15"; + cpu_pmu->map_event = armv7_a15_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a7"; + cpu_pmu->map_event = armv7_a7_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a12"; + cpu_pmu->map_event = armv7_a12_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv7_a12_pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a17"; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return ret; +} + +/* + * Krait Performance Monitor Region Event Selection Register (PMRESRn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +#define KRAIT_EVENT (1 << 16) +#define VENUM_EVENT (2 << 16) +#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) +#define PMRESRn_EN BIT(31) + +#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ +#define EVENT_GROUP(event) ((event) & 0xf) /* G */ +#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ +#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ +#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ + +static u32 krait_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } + + return val; +} + +static void krait_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } +} + +static u32 venum_read_pmresr(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void venum_write_pmresr(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) +{ + u32 venum_new_val; + u32 fp_new_val; + + BUG_ON(preemptible()); + /* CPACR Enable CP10 and CP11 access */ + *venum_orig_val = get_copro_access(); + venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); + set_copro_access(venum_new_val); + + /* Enable FPEXC */ + *fp_orig_val = fmrx(FPEXC); + fp_new_val = *fp_orig_val | FPEXC_EN; + fmxr(FPEXC, fp_new_val); +} + +static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) +{ + BUG_ON(preemptible()); + /* Restore FPEXC */ + fmxr(FPEXC, fp_orig_val); + isb(); + /* Restore CPACR */ + set_copro_access(venum_orig_val); +} + +static u32 krait_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, + KRAIT_PMRESR1_GROUP0, + KRAIT_PMRESR2_GROUP0 }; + return pmresrn_table[region]; +} + +static void krait_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = KRAIT_VPMRESR0_GROUP0; + else + val = krait_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = krait_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_pmresrn(region, val); + } +} + +static u32 clear_pmresrn_group(u32 val, int group) +{ + u32 mask; + int group_shift; + + group_shift = group * 8; + mask = 0xff << group_shift; + val &= ~mask; + + /* Don't clear enable bit if entire region isn't disabled */ + if (val & ~PMRESRn_EN) + return val |= PMRESRn_EN; + + return 0; +} + +static void krait_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = krait_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + krait_write_pmresrn(region, val); + } +} + +static void krait_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* Disable counter and interrupt */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); +} + +static void krait_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We set the event for the cycle counter because we + * have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_evt_setup(idx, hwc->config_base); + else + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); +} + +static void krait_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + krait_write_pmresrn(0, 0); + krait_write_pmresrn(1, 0); + krait_write_pmresrn(2, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } + +} + +static int krait_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = KRAIT_VPMRESR0_GROUP0; + else + bit = krait_get_pmresrn_event(region); + bit -= krait_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX); + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int code = EVENT_CODE(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); + + if (venum_event || krait_event) { + /* Ignore invalid events */ + if (group > 3 || region > 2) + return -EINVAL; + if (venum_event && (code & 0xe0)) + return -EINVAL; + + bit = krait_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); + + armv7pmu_clear_event_idx(cpuc, event); + if (venum_event || krait_event) { + bit = krait_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_krait"; + /* Some early versions of Krait don't support PC write events */ + if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, + "qcom,no-pc-write")) + cpu_pmu->map_event = krait_map_event_no_branch; + else + cpu_pmu->map_event = krait_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->reset = krait_pmu_reset; + cpu_pmu->enable = krait_pmu_enable_event; + cpu_pmu->disable = krait_pmu_disable_event; + cpu_pmu->get_event_idx = krait_pmu_get_event_idx; + cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +/* + * Scorpion Local Performance Monitor Register (LPMn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 + * +--------------------------------+ + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +static u32 scorpion_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + break; + case 3: + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } + + return val; +} + +static void scorpion_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); + break; + case 3: + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } +} + +static u32 scorpion_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, + SCORPION_LPM1_GROUP0, + SCORPION_LPM2_GROUP0, + SCORPION_L2LPM_GROUP0 }; + return pmresrn_table[region]; +} + +static void scorpion_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = SCORPION_VLPM_GROUP0; + else + val = scorpion_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* Disable counter and interrupt */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); +} + +static void scorpion_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't set the event for the cycle counter because we + * don't have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_evt_setup(idx, hwc->config_base); + else if (idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); +} + +static void scorpion_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + scorpion_write_pmresrn(0, 0); + scorpion_write_pmresrn(1, 0); + scorpion_write_pmresrn(2, 0); + scorpion_write_pmresrn(3, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } +} + +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = SCORPION_VLPM_GROUP0; + else + bit = scorpion_get_pmresrn_event(region); + bit -= scorpion_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX); + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + /* Ignore invalid events */ + if (group > 3 || region > 3) + return -EINVAL; + + bit = scorpion_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + armv7pmu_clear_event_idx(cpuc, event); + if (venum_event || scorpion_event) { + bit = scorpion_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion_mp"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; + +static int armv7_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, NULL); +} + +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = armv7_pmu_device_probe, +}; + +builtin_platform_driver(armv7_pmu_driver); diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c new file mode 100644 index 000000000000..638fea9b1263 --- /dev/null +++ b/drivers/perf/arm_xscale_pmu.c @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#include <asm/cputype.h> +#include <asm/irq_regs.h> + +#include <linux/of.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> + +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 0, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; +#define XSCALE1_NUM_COUNTERS 3 +#define XSCALE2_NUM_COUNTERS 5 + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE1_NUM_COUNTERS) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale1pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); +} + +static void xscale1pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); +} + +static int +xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + if (XSCALE_PERFCTR_CCNT == hwc->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void xscale1pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); +} + +static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); +} + +static inline u64 xscale1pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static int xscale_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &xscale_perf_map, + &xscale_perf_cache_map, 0xFF); +} + +static int xscale1pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "armv5_xscale1"; + cpu_pmu->handle_irq = xscale1pmu_handle_irq; + cpu_pmu->enable = xscale1pmu_enable_event; + cpu_pmu->disable = xscale1pmu_disable_event; + cpu_pmu->read_counter = xscale1pmu_read_counter; + cpu_pmu->write_counter = xscale1pmu_write_counter; + cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; + cpu_pmu->start = xscale1pmu_start; + cpu_pmu->stop = xscale1pmu_stop; + cpu_pmu->map_event = xscale_map_event; + + bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE1_NUM_COUNTERS); + + return 0; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE2_NUM_COUNTERS) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale2pmu_enable_event(struct perf_event *event) +{ + unsigned long ien, evtsel; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); +} + +static void xscale2pmu_disable_event(struct perf_event *event) +{ + unsigned long ien, evtsel, of_flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + of_flags = XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + of_flags = XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + of_flags = XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + of_flags = XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + of_flags = XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + xscale2pmu_write_overflow_flags(of_flags); +} + +static int +xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void xscale2pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); +} + +static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); +} + +static inline u64 xscale2pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static int xscale2pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "armv5_xscale2"; + cpu_pmu->handle_irq = xscale2pmu_handle_irq; + cpu_pmu->enable = xscale2pmu_enable_event; + cpu_pmu->disable = xscale2pmu_disable_event; + cpu_pmu->read_counter = xscale2pmu_read_counter; + cpu_pmu->write_counter = xscale2pmu_write_counter; + cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; + cpu_pmu->start = xscale2pmu_start; + cpu_pmu->stop = xscale2pmu_stop; + cpu_pmu->map_event = xscale_map_event; + + bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE2_NUM_COUNTERS); + + return 0; +} + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); +} + +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +builtin_platform_driver(xscale_pmu_driver); diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index 308c9969642e..d6693519eaee 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -208,21 +208,10 @@ static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info) return 0; } -static ssize_t cxl_pmu_format_sysfs_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} - #define CXL_PMU_FORMAT_ATTR(_name, _format)\ (&((struct dev_ext_attribute[]) { \ { \ - .attr = __ATTR(_name, 0444, \ - cxl_pmu_format_sysfs_show, NULL), \ + .attr = __ATTR(_name, 0444, device_show_string, NULL), \ .var = (void *)_format \ } \ })[0].attr.attr) @@ -345,7 +334,7 @@ static ssize_t cxl_pmu_event_sysfs_show(struct device *dev, /* For CXL spec defined events */ #define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \ - CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk) + CXL_PMU_EVENT_ATTR(_name, PCI_VENDOR_ID_CXL, _gid, _msk) static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)), @@ -365,7 +354,7 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)), CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)), CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)), - /* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */ + /* CXL rev 3.0 Table 3-20 - D2H Response Encodings */ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)), CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)), CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)), @@ -388,12 +377,14 @@ static struct attribute *cxl_pmu_event_attrs[] = { /* CXL rev 3.0 Table 13-5 directly lists these */ CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)), - /* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */ + /* CXL rev 3.1 Table 3-35 M2S Req Memory Opcodes */ CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdtee, CXL_PMU_GID_M2S_REQ, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddatatee, CXL_PMU_GID_M2S_REQ, BIT(6)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)), @@ -415,10 +406,11 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)), - /* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */ + /* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpm, CXL_PMU_GID_S2M_NDR, BIT(3)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)), /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)), @@ -983,8 +975,9 @@ static __exit void cxl_pmu_exit(void) cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num); } +MODULE_DESCRIPTION("CXL Performance Monitor Driver"); MODULE_LICENSE("GPL"); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("CXL"); module_init(cxl_pmu_init); module_exit(cxl_pmu_exit); MODULE_ALIAS_CXL(CXL_DEVICE_PMU); diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c index 957058ad0099..cccecae9823f 100644 --- a/drivers/perf/dwc_pcie_pmu.c +++ b/drivers/perf/dwc_pcie_pmu.c @@ -20,7 +20,6 @@ #include <linux/sysfs.h> #include <linux/types.h> -#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 #define DWC_PCIE_EVENT_CNT_CTL 0x8 /* @@ -82,7 +81,6 @@ struct dwc_pcie_pmu { u16 ras_des_offset; u32 nr_lanes; - struct list_head pmu_node; struct hlist_node cpuhp_node; struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX]; int on_cpu; @@ -101,12 +99,23 @@ struct dwc_pcie_dev_info { struct list_head dev_node; }; -struct dwc_pcie_vendor_id { - int vendor_id; +struct dwc_pcie_pmu_vsec_id { + u16 vendor_id; + u16 vsec_id; + u8 vsec_rev; }; -static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { - {.vendor_id = PCI_VENDOR_ID_ALIBABA }, +/* + * VSEC IDs are allocated by the vendor, so a given ID may mean different + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. + */ +static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = { + { .vendor_id = PCI_VENDOR_ID_ALIBABA, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_AMPERE, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_QCOM, + .vsec_id = 0x02, .vsec_rev = 0x4 }, {} /* terminator */ }; @@ -198,14 +207,14 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05), DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06), DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x08), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x09), /* Group #1 */ - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_pcie_tlp_data_payload, 0x20), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_pcie_tlp_data_payload, 0x21), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_ccix_tlp_data_payload, 0x22), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_ccix_tlp_data_payload, 0x23), /* * Leave it to the user to specify the lane ID to avoid generating @@ -215,9 +224,9 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601), DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602), DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603), - DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604), - DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605), - DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nullified_tlp, 0x604), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nullified_tlp, 0x605), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tlp, 0x606), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702), @@ -518,31 +527,28 @@ static void dwc_pcie_unregister_pmu(void *data) perf_pmu_unregister(&pcie_pmu->pmu); } -static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) +static u16 dwc_pcie_des_cap(struct pci_dev *pdev) { - const struct dwc_pcie_vendor_id *vid; - u16 vsec = 0; + const struct dwc_pcie_pmu_vsec_id *vid; + u16 vsec; u32 val; if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) - return false; + return 0; - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { vsec = pci_find_vsec_capability(pdev, vid->vendor_id, - DWC_PCIE_VSEC_RAS_DES_ID); - if (vsec) - break; + vid->vsec_id); + if (vsec) { + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, + &val); + if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); + return vsec; + } + } } - if (!vsec) - return false; - - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); - if (PCI_VNDR_HEADER_REV(val) != 0x04) - return false; - - pci_dbg(pdev, - "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); - return true; + return 0; } static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) @@ -556,10 +562,10 @@ static int dwc_pcie_register_dev(struct pci_dev *pdev) { struct platform_device *plat_dev; struct dwc_pcie_dev_info *dev_info; - u32 bdf; + u32 sbdf; - bdf = PCI_DEVID(pdev->bus->number, pdev->devfn); - plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf, + sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn); + plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", sbdf, pdev, sizeof(*pdev)); if (IS_ERR(plat_dev)) @@ -586,7 +592,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb, switch (action) { case BUS_NOTIFY_ADD_DEVICE: - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) return NOTIFY_DONE; if (dwc_pcie_register_dev(pdev)) return NOTIFY_BAD; @@ -611,15 +617,16 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) struct pci_dev *pdev = plat_dev->dev.platform_data; struct dwc_pcie_pmu *pcie_pmu; char *name; - u32 bdf, val; + u32 sbdf; u16 vsec; int ret; - vsec = pci_find_vsec_capability(pdev, pdev->vendor, - DWC_PCIE_VSEC_RAS_DES_ID); - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); - bdf = PCI_DEVID(pdev->bus->number, pdev->devfn); - name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf); + vsec = dwc_pcie_des_cap(pdev); + if (!vsec) + return -ENODEV; + + sbdf = plat_dev->id; + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); if (!name) return -ENOMEM; @@ -650,7 +657,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state, &pcie_pmu->cpuhp_node); if (ret) { - pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf); + pci_err(pdev, "Error %d registering hotplug @%x\n", ret, sbdf); return ret; } @@ -663,7 +670,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) ret = perf_pmu_register(&pcie_pmu->pmu, name, -1); if (ret) { - pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf); + pci_err(pdev, "Error %d registering PMU @%x\n", ret, sbdf); return ret; } ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu, @@ -690,9 +697,8 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n { struct dwc_pcie_pmu *pcie_pmu; struct pci_dev *pdev; - int node; - cpumask_t mask; unsigned int target; + int node; pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); /* Nothing to do if this CPU doesn't own the PMU */ @@ -702,10 +708,9 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n pcie_pmu->on_cpu = -1; pdev = pcie_pmu->pdev; node = dev_to_node(&pdev->dev); - if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { @@ -728,11 +733,10 @@ static struct platform_driver dwc_pcie_pmu_driver = { static int __init dwc_pcie_pmu_init(void) { struct pci_dev *pdev = NULL; - bool found = false; int ret; for_each_pci_dev(pdev) { - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) continue; ret = dwc_pcie_register_dev(pdev); @@ -740,11 +744,7 @@ static int __init dwc_pcie_pmu_init(void) pci_dev_put(pdev); return ret; } - - found = true; } - if (!found) - return -ENODEV; ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/dwc_pcie_pmu:online", diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 7dbfaee372c7..b989ffa95d69 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -651,6 +651,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, *pmu = (struct ddr_pmu) { .pmu = (struct pmu) { .module = THIS_MODULE, + .parent = dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .attr_groups = attr_groups, @@ -826,7 +827,7 @@ cpuhp_state_err: return ret; } -static int ddr_perf_remove(struct platform_device *pdev) +static void ddr_perf_remove(struct platform_device *pdev) { struct ddr_pmu *pmu = platform_get_drvdata(pdev); @@ -836,7 +837,6 @@ static int ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); ida_free(&ddr_ida, pmu->id); - return 0; } static struct platform_driver imx_ddr_pmu_driver = { @@ -850,4 +850,5 @@ static struct platform_driver imx_ddr_pmu_driver = { }; module_platform_driver(imx_ddr_pmu_driver); +MODULE_DESCRIPTION("Freescale i.MX8 DDR Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 9685645bfe04..843f163e6c33 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -11,14 +11,24 @@ #include <linux/perf_event.h> /* Performance monitor configuration */ -#define PMCFG1 0x00 -#define PMCFG1_RD_TRANS_FILT_EN BIT(31) -#define PMCFG1_WR_TRANS_FILT_EN BIT(30) -#define PMCFG1_RD_BT_FILT_EN BIT(29) -#define PMCFG1_ID_MASK GENMASK(17, 0) +#define PMCFG1 0x00 +#define MX93_PMCFG1_RD_TRANS_FILT_EN BIT(31) +#define MX93_PMCFG1_WR_TRANS_FILT_EN BIT(30) +#define MX93_PMCFG1_RD_BT_FILT_EN BIT(29) +#define MX93_PMCFG1_ID_MASK GENMASK(17, 0) -#define PMCFG2 0x04 -#define PMCFG2_ID GENMASK(17, 0) +#define MX95_PMCFG1_WR_BEAT_FILT_EN BIT(31) +#define MX95_PMCFG1_RD_BEAT_FILT_EN BIT(30) + +#define PMCFG2 0x04 +#define MX93_PMCFG2_ID GENMASK(17, 0) + +#define PMCFG3 0x08 +#define PMCFG4 0x0C +#define PMCFG5 0x10 +#define PMCFG6 0x14 +#define MX95_PMCFG_ID_MASK GENMASK(9, 0) +#define MX95_PMCFG_ID GENMASK(25, 16) /* Global control register affects all counters and takes priority over local control registers */ #define PMGC0 0x40 @@ -41,6 +51,10 @@ #define NUM_COUNTERS 11 #define CYCLES_COUNTER 0 +#define CYCLES_EVENT_ID 0 + +#define CONFIG_EVENT_MASK GENMASK(7, 0) +#define CONFIG_COUNTER_MASK GENMASK(23, 16) #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) @@ -49,8 +63,21 @@ static DEFINE_IDA(ddr_ida); +/* + * V1 support 1 read transaction, 1 write transaction and 1 read beats + * event which corresponding respecitively to counter 2, 3 and 4. + */ +#define DDR_PERF_AXI_FILTER_V1 0x1 + +/* + * V2 support 1 read beats and 3 write beats events which corresponding + * respecitively to counter 2-5. + */ +#define DDR_PERF_AXI_FILTER_V2 0x2 + struct imx_ddr_devtype_data { const char *identifier; /* system PMU identifier for userspace */ + unsigned int filter_ver; /* AXI filter version */ }; struct ddr_pmu { @@ -67,12 +94,35 @@ struct ddr_pmu { int id; }; +static const struct imx_ddr_devtype_data imx91_devtype_data = { + .identifier = "imx91", + .filter_ver = DDR_PERF_AXI_FILTER_V1 +}; + static const struct imx_ddr_devtype_data imx93_devtype_data = { .identifier = "imx93", + .filter_ver = DDR_PERF_AXI_FILTER_V1 +}; + +static const struct imx_ddr_devtype_data imx95_devtype_data = { + .identifier = "imx95", + .filter_ver = DDR_PERF_AXI_FILTER_V2 }; +static inline bool axi_filter_v1(struct ddr_pmu *pmu) +{ + return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V1; +} + +static inline bool axi_filter_v2(struct ddr_pmu *pmu) +{ + return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V2; +} + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { - {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data}, + { .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data }, + { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data }, + { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); @@ -118,21 +168,40 @@ static const struct attribute_group ddr_perf_cpumask_attr_group = { .attrs = ddr_perf_cpumask_attrs, }; +struct imx9_pmu_events_attr { + struct device_attribute attr; + u64 id; + const struct imx_ddr_devtype_data *devtype_data; +}; + static ssize_t ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, char *page) { - struct perf_pmu_events_attr *pmu_attr; + struct imx9_pmu_events_attr *pmu_attr; - pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + pmu_attr = container_of(attr, struct imx9_pmu_events_attr, attr); return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } -#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ - (&((struct perf_pmu_events_attr[]) { \ +#define COUNTER_OFFSET_IN_EVENT 8 +#define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id) + +#define DDR_PMU_EVENT_ATTR_COMM(_name, _id, _data) \ + (&((struct imx9_pmu_events_attr[]) { \ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ - .id = _id, } \ + .id = _id, \ + .devtype_data = _data, } \ })[0].attr.attr) +#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, NULL) + +#define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data) + +#define IMX95_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx95_devtype_data) + static struct attribute *ddr_perf_events_attrs[] = { /* counter0 cycles event */ IMX9_DDR_PMU_EVENT_ATTR(cycles, 0), @@ -159,90 +228,115 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63), /* counter1 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, ID(1, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, ID(1, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, ID(1, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, ID(1, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, ID(1, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, ID(1, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, ID(1, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, ID(1, 71)), /* counter2 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, ID(2, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, ID(2, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, ID(2, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, ID(2, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, ID(2, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, ID(2, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_beat_filt, ID(2, 73)), /* imx95 specific*/ /* counter3 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, ID(3, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, ID(3, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, ID(3, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, ID(3, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, ID(3, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt2, ID(3, 73)), /* imx95 specific*/ /* counter4 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, ID(4, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, ID(4, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, ID(4, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, ID(4, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, ID(4, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt1, ID(4, 73)), /* imx95 specific*/ /* counter5 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, ID(5, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, ID(5, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, ID(5, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, ID(5, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, ID(5, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)), + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt0, ID(5, 73)), /* imx95 specific*/ /* counter6 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, ID(6, 72)), /* counter7 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, ID(7, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, ID(7, 65)), /* counter8 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, ID(8, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, ID(8, 65)), /* counter9 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, ID(9, 65)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, ID(9, 66)), /* counter10 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, ID(10, 65)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, ID(10, 66)), NULL, }; +static umode_t +ddr_perf_events_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj)); + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + struct imx9_pmu_events_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + + if (!eattr->devtype_data) + return attr->mode; + + if (eattr->devtype_data != ddr_pmu->devtype_data && + eattr->devtype_data->filter_ver != ddr_pmu->devtype_data->filter_ver) + return 0; + + return attr->mode; +} + static const struct attribute_group ddr_perf_events_attr_group = { .name = "events", .attrs = ddr_perf_events_attrs, + .is_visible = ddr_perf_events_attrs_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7,16-23"); PMU_FORMAT_ATTR(counter, "config:8-15"); PMU_FORMAT_ATTR(axi_id, "config1:0-17"); PMU_FORMAT_ATTR(axi_mask, "config2:0-17"); @@ -339,8 +433,10 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, int counter, bool enable) { u32 ctrl_a; + int event; ctrl_a = readl_relaxed(pmu->base + PMLCA(counter)); + event = FIELD_GET(CONFIG_EVENT_MASK, config); if (enable) { ctrl_a |= PMLCA_FC; @@ -352,7 +448,7 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, ctrl_a &= ~PMLCA_FC; ctrl_a |= PMLCA_CE; ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F); - ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF)); + ctrl_a |= FIELD_PREP(PMLCA_EVENT, event); writel(ctrl_a, pmu->base + PMLCA(counter)); } else { /* Freeze counter. */ @@ -361,39 +457,79 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, } } -static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2) +static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, + int counter, int axi_id, int axi_mask) { u32 pmcfg1, pmcfg2; - int event, counter; - - event = cfg & 0x000000FF; - counter = (cfg & 0x0000FF00) >> 8; + u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN, + MX93_PMCFG1_WR_TRANS_FILT_EN, + MX93_PMCFG1_RD_BT_FILT_EN }; pmcfg1 = readl_relaxed(pmu->base + PMCFG1); - if (counter == 2 && event == 73) - pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN; - else if (counter == 2 && event != 73) - pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN; + if (counter >= 2 && counter <= 4) + pmcfg1 = event == 73 ? pmcfg1 | mask[counter - 2] : + pmcfg1 & ~mask[counter - 2]; - if (counter == 3 && event == 73) - pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN; - else if (counter == 3 && event != 73) - pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN; + pmcfg1 &= ~FIELD_PREP(MX93_PMCFG1_ID_MASK, 0x3FFFF); + pmcfg1 |= FIELD_PREP(MX93_PMCFG1_ID_MASK, axi_mask); + writel_relaxed(pmcfg1, pmu->base + PMCFG1); - if (counter == 4 && event == 73) - pmcfg1 |= PMCFG1_RD_BT_FILT_EN; - else if (counter == 4 && event != 73) - pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN; + pmcfg2 = readl_relaxed(pmu->base + PMCFG2); + pmcfg2 &= ~FIELD_PREP(MX93_PMCFG2_ID, 0x3FFFF); + pmcfg2 |= FIELD_PREP(MX93_PMCFG2_ID, axi_id); + writel_relaxed(pmcfg2, pmu->base + PMCFG2); +} - pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF); - pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2); - writel(pmcfg1, pmu->base + PMCFG1); +static void imx95_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, + int counter, int axi_id, int axi_mask) +{ + u32 pmcfg1, pmcfg, offset = 0; - pmcfg2 = readl_relaxed(pmu->base + PMCFG2); - pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF); - pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1); - writel(pmcfg2, pmu->base + PMCFG2); + pmcfg1 = readl_relaxed(pmu->base + PMCFG1); + + if (event == 73) { + switch (counter) { + case 2: + pmcfg1 |= MX95_PMCFG1_WR_BEAT_FILT_EN; + offset = PMCFG3; + break; + case 3: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG4; + break; + case 4: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG5; + break; + case 5: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG6; + break; + } + } else { + switch (counter) { + case 2: + pmcfg1 &= ~MX95_PMCFG1_WR_BEAT_FILT_EN; + break; + case 3: + case 4: + case 5: + pmcfg1 &= ~MX95_PMCFG1_RD_BEAT_FILT_EN; + break; + } + } + + writel_relaxed(pmcfg1, pmu->base + PMCFG1); + + if (offset) { + pmcfg = readl_relaxed(pmu->base + offset); + pmcfg &= ~(FIELD_PREP(MX95_PMCFG_ID_MASK, 0x3FF) | + FIELD_PREP(MX95_PMCFG_ID, 0x3FF)); + pmcfg |= (FIELD_PREP(MX95_PMCFG_ID_MASK, axi_mask) | + FIELD_PREP(MX95_PMCFG_ID, axi_id)); + writel_relaxed(pmcfg, pmu->base + offset); + } } static void ddr_perf_event_update(struct perf_event *event) @@ -460,6 +596,28 @@ static void ddr_perf_event_start(struct perf_event *event, int flags) hwc->state = 0; } +static int ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event, int counter) +{ + int i; + + if (event == CYCLES_EVENT_ID) { + // Cycles counter is dedicated for cycle event. + if (pmu->events[CYCLES_COUNTER] == NULL) + return CYCLES_COUNTER; + } else if (counter != 0) { + // Counter specific event use specific counter. + if (pmu->events[counter] == NULL) + return counter; + } else { + // Auto allocate counter for referene event. + for (i = 1; i < NUM_COUNTERS; i++) + if (pmu->events[i] == NULL) + return i; + } + + return -ENOENT; +} + static int ddr_perf_event_add(struct perf_event *event, int flags) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); @@ -467,21 +625,33 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) int cfg = event->attr.config; int cfg1 = event->attr.config1; int cfg2 = event->attr.config2; - int counter; + int event_id, counter; + + event_id = FIELD_GET(CONFIG_EVENT_MASK, cfg); + counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg); - counter = (cfg & 0x0000FF00) >> 8; + counter = ddr_perf_alloc_counter(pmu, event_id, counter); + if (counter < 0) { + dev_dbg(pmu->dev, "There are not enough counters\n"); + return -EOPNOTSUPP; + } pmu->events[counter] = event; pmu->active_events++; hwc->idx = counter; hwc->state |= PERF_HES_STOPPED; + if (axi_filter_v1(pmu)) + /* read trans, write trans, read beat */ + imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + + if (axi_filter_v2(pmu)) + /* write beat, read beat2, read beat1, read beat */ + imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + if (flags & PERF_EF_START) ddr_perf_event_start(event, flags); - /* read trans, write trans, read beat */ - ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2); - return 0; } @@ -501,9 +671,11 @@ static void ddr_perf_event_del(struct perf_event *event, int flags) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; ddr_perf_event_stop(event, PERF_EF_UPDATE); + pmu->events[counter] = NULL; pmu->active_events--; hwc->idx = -1; } @@ -679,7 +851,7 @@ format_string_err: return ret; } -static int ddr_perf_remove(struct platform_device *pdev) +static void ddr_perf_remove(struct platform_device *pdev) { struct ddr_pmu *pmu = platform_get_drvdata(pdev); @@ -689,8 +861,6 @@ static int ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); ida_free(&ddr_ida, pmu->id); - - return 0; } static struct platform_driver imx_ddr_pmu_driver = { diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index b90ba8aca3fa..c5394d007b61 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -99,16 +99,6 @@ HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10); HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0); HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16); -static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} - static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -120,8 +110,7 @@ static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attr #define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format) \ (&((struct dev_ext_attribute[]){ \ - { .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show, \ - NULL), \ + { .attr = __ATTR(_name, 0444, device_show_string, NULL), \ .var = (void *)_format } \ })[0].attr.attr) @@ -152,6 +141,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(bus); +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max); +} +static DEVICE_ATTR_RO(bdf_max); + static struct hisi_pcie_reg_pair hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off) { @@ -216,12 +221,10 @@ static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, writeq_relaxed(val, pcie_pmu->base + offset); } -static void hisi_pcie_pmu_config_filter(struct perf_event *event) +static u64 hisi_pcie_pmu_get_event_ctrl_val(struct perf_event *event) { - struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; u64 port, trig_len, thr_len, len_mode; - u64 reg = HISI_PCIE_INIT_SET; + u64 reg = 0; /* Config HISI_PCIE_EVENT_CTRL according to event. */ reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event)); @@ -256,10 +259,19 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event) else reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT); + return reg; +} + +static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 reg = hisi_pcie_pmu_get_event_ctrl_val(event); + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg); } -static void hisi_pcie_pmu_clear_filter(struct perf_event *event) +static void hisi_pcie_pmu_clear_event_ctrl(struct perf_event *event) { struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -299,18 +311,24 @@ static bool hisi_pcie_pmu_valid_filter(struct perf_event *event, if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL) return false; - if (requester_id) { - if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id)) - return false; - } + /* Need to explicitly set filter of "port" or "bdf" */ + if (!hisi_pcie_get_port(event) && + !hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id)) + return false; return true; } +/* + * Check Whether two events share the same config. The same config means not + * only the event code, but also the filter settings of the two events are + * the same. + */ static bool hisi_pcie_pmu_cmp_event(struct perf_event *target, struct perf_event *event) { - return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event); + return hisi_pcie_pmu_get_event_ctrl_val(target) == + hisi_pcie_pmu_get_event_ctrl_val(event); } static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) @@ -337,15 +355,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) return false; for (num = 0; num < counters; num++) { + /* + * If we find a related event, then it's a valid group + * since we don't need to allocate a new counter for it. + */ if (hisi_pcie_pmu_cmp_event(event_group[num], sibling)) break; } + /* + * Otherwise it's a new event but if there's no available counter, + * fail the check since we cannot schedule all the events in + * the group simultaneously. + */ + if (num == HISI_PCIE_MAX_COUNTERS) + return false; + if (num == counters) event_group[counters++] = sibling; } - return counters <= HISI_PCIE_MAX_COUNTERS; + return true; } static int hisi_pcie_pmu_event_init(struct perf_event *event) @@ -385,40 +415,32 @@ static u64 hisi_pcie_pmu_read_counter(struct perf_event *event) return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx); } -static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu, - struct perf_event *event) +/* + * Check all work events, if a relevant event is found then we return it + * first, otherwise return the first idle counter (need to reset). + */ +static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu, + struct perf_event *event) { + int first_idle = -EAGAIN; struct perf_event *sibling; int idx; for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { sibling = pcie_pmu->hw_events[idx]; - if (!sibling) - continue; - - if (!hisi_pcie_pmu_cmp_event(sibling, event)) + if (!sibling) { + if (first_idle == -EAGAIN) + first_idle = idx; continue; + } /* Related events must be used in group */ - if (sibling->group_leader == event->group_leader) - return idx; - else - return -EINVAL; - } - - return idx; -} - -static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu) -{ - int idx; - - for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { - if (!pcie_pmu->hw_events[idx]) + if (hisi_pcie_pmu_cmp_event(sibling, event) && + sibling->group_leader == event->group_leader) return idx; } - return -EINVAL; + return first_idle; } static void hisi_pcie_pmu_event_update(struct perf_event *event) @@ -446,10 +468,24 @@ static void hisi_pcie_pmu_set_period(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + u64 orig_cnt, cnt; + + orig_cnt = hisi_pcie_pmu_read_counter(event); local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL); + + /* + * The counter maybe unwritable if the target event is unsupported. + * Check this by comparing the counts after setting the period. If + * the counts stay unchanged after setting the period then update + * the hwc->prev_count correctly. Otherwise the final counts user + * get maybe totally wrong. + */ + cnt = hisi_pcie_pmu_read_counter(event); + if (orig_cnt == cnt) + local64_set(&hwc->prev_count, cnt); } static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) @@ -505,7 +541,7 @@ static void hisi_pcie_pmu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; - hisi_pcie_pmu_config_filter(event); + hisi_pcie_pmu_config_event_ctrl(event); hisi_pcie_pmu_enable_counter(pcie_pmu, hwc); hisi_pcie_pmu_enable_int(pcie_pmu, hwc); hisi_pcie_pmu_set_period(event); @@ -526,7 +562,7 @@ static void hisi_pcie_pmu_stop(struct perf_event *event, int flags) hisi_pcie_pmu_event_update(event); hisi_pcie_pmu_disable_int(pcie_pmu, hwc); hisi_pcie_pmu_disable_counter(pcie_pmu, hwc); - hisi_pcie_pmu_clear_filter(event); + hisi_pcie_pmu_clear_event_ctrl(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; @@ -544,27 +580,18 @@ static int hisi_pcie_pmu_add(struct perf_event *event, int flags) hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - /* Check all working events to find a related event. */ - idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event); - if (idx < 0) - return idx; - - /* Current event shares an enabled counter with the related event */ - if (idx < HISI_PCIE_MAX_COUNTERS) { - hwc->idx = idx; - goto start_count; - } - - idx = hisi_pcie_pmu_get_event_idx(pcie_pmu); + idx = hisi_pcie_pmu_get_event_idx(pcie_pmu, event); if (idx < 0) return idx; hwc->idx = idx; - pcie_pmu->hw_events[idx] = event; - /* Reset Counter to avoid previous statistic interference. */ - hisi_pcie_pmu_reset_counter(pcie_pmu, idx); -start_count: + /* No enabled counter found with related event, reset it */ + if (!pcie_pmu->hw_events[idx]) { + hisi_pcie_pmu_reset_counter(pcie_pmu, idx); + pcie_pmu->hw_events[idx] = event; + } + if (flags & PERF_EF_START) hisi_pcie_pmu_start(event, PERF_EF_RELOAD); @@ -677,7 +704,6 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); unsigned int target; - cpumask_t mask; int numa_node; /* Nothing to do if this CPU doesn't own the PMU */ @@ -688,10 +714,10 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) /* Choose a local CPU from all online cpus. */ numa_node = dev_to_node(&pcie_pmu->pdev->dev); - if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(numa_node), + cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { @@ -714,10 +740,18 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = { HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011), + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_flux, 0x0104), + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_time, 0x10104), HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804), HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804), + HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_flux, 0x2004), + HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_time, 0x12004), + HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_flux, 0x0105), + HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_time, 0x10105), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405), + HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_flux, 0x1005), + HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_time, 0x11005), NULL }; @@ -745,6 +779,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = { static struct attribute *hisi_pcie_pmu_bus_attrs[] = { &dev_attr_bus.attr, + &dev_attr_bdf_max.attr, + &dev_attr_bdf_min.attr, NULL }; @@ -803,6 +839,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_ pcie_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, + .parent = &pdev->dev, .event_init = hisi_pcie_pmu_event_init, .pmu_enable = hisi_pcie_pmu_enable, .pmu_disable = hisi_pcie_pmu_disable, diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 40f1bc9f9b91..b879b81adfdd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->sicl_id)) { + hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev); + + if (cpa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->index_id)) { + if (cpa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->ccl_id = -1; - cpa_pmu->sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); @@ -227,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = { .attrs = hisi_cpa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { - .attrs = hisi_cpa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_cpa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { - &hisi_cpa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_identifier_group = { - .attrs = hisi_cpa_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { &hisi_cpa_pmu_format_group, &hisi_cpa_pmu_events_group, - &hisi_cpa_pmu_cpumask_attr_group, - &hisi_cpa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -311,8 +286,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", - cpa_pmu->sicl_id, cpa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d", + cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -341,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_cpa_pmu_remove(struct platform_device *pdev) +static void hisi_cpa_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev); @@ -349,7 +324,6 @@ static int hisi_cpa_pmu_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node); hisi_cpa_pmu_enable_pm(cpa_pmu); - return 0; } static struct platform_driver hisi_cpa_pmu_driver = { @@ -390,6 +364,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ffb039d05d07..7e490f8868f2 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -111,14 +111,14 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, * so there is no need to write event type, while it is programmable counter in * PMU v2. */ -static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, +static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { u32 offset; - if (hha_pmu->identifier >= HISI_PMU_V2) { + if (ddrc_pmu->identifier >= HISI_PMU_V2) { offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, hha_pmu->base + offset); + writel(type, ddrc_pmu->base + offset); } } @@ -297,23 +297,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { + hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev); + /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &ddrc_pmu->index_id)) { + &ddrc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->sccl_id)) { + if (ddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } - /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->ccl_id = -1; ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { @@ -323,8 +322,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->sub_id)) { + if (ddrc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -382,42 +380,19 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { .attrs = hisi_ddrc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { - .attrs = hisi_ddrc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_ddrc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { - &hisi_ddrc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_ddrc_pmu_identifier_group = { - .attrs = hisi_ddrc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { &hisi_ddrc_pmu_v1_format_group, &hisi_ddrc_pmu_v1_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { &hisi_ddrc_pmu_v2_format_group, &hisi_ddrc_pmu_v2_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -501,13 +476,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u_%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id, - ddrc_pmu->sub_id); + "hisi_sccl%d_ddrc%d_%d", + ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id, + ddrc_pmu->topo.sub_id); else name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, - ddrc_pmu->index_id); + "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id, + ddrc_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -531,14 +506,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_ddrc_pmu_remove(struct platform_device *pdev) +static void hisi_ddrc_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&ddrc_pmu->pmu); cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node); - return 0; } static struct platform_driver hisi_ddrc_pmu_driver = { @@ -576,10 +550,10 @@ static void __exit hisi_ddrc_pmu_module_exit(void) { platform_driver_unregister(&hisi_ddrc_pmu_driver); cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE); - } module_exit(hisi_ddrc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 15caf99e1eef..ca609db86046 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, unsigned long long id; acpi_status status; + hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev); + /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->sccl_id)) { + if (hha_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * Early versions of BIOS support _UID by mistake, so we support * both "hisilicon, idx-id" as preference, if available. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->index_id)) { + if (hha_pmu->topo.index_id < 0) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -318,10 +318,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - hha_pmu->index_id = id; + hha_pmu->topo.index_id = id; } - /* HHA PMUs only share the same SCCL */ - hha_pmu->ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { @@ -407,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = { .attrs = hisi_hha_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { - .attrs = hisi_hha_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_hha_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_hha_pmu_identifier_attrs[] = { - &hisi_hha_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_hha_pmu_identifier_group = { - .attrs = hisi_hha_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { &hisi_hha_pmu_v1_format_group, &hisi_hha_pmu_v1_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { &hisi_hha_pmu_v2_format_group, &hisi_hha_pmu_v2_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -510,8 +485,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d", + hha_pmu->topo.sccl_id, hha_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -534,14 +509,13 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_hha_pmu_remove(struct platform_device *pdev) +static void hisi_hha_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&hha_pmu->pmu); cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); - return 0; } static struct platform_driver hisi_hha_pmu_driver = { @@ -582,6 +556,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 794dbcd19b7a..412fc3a97963 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -355,18 +355,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->sccl_id)) { + if (l3c_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->ccl_id)) { + if (l3c_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } @@ -441,42 +441,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { - .attrs = hisi_l3c_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_l3c_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { - &hisi_l3c_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_l3c_pmu_identifier_group = { - .attrs = hisi_l3c_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { &hisi_l3c_pmu_v2_format_group, &hisi_l3c_pmu_v2_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -544,8 +521,8 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; @@ -568,14 +545,13 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_l3c_pmu_remove(struct platform_device *pdev) +static void hisi_l3c_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&l3c_pmu->pmu); cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); - return 0; } static struct platform_driver hisi_l3c_pmu_driver = { @@ -616,6 +592,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 797cf201996a..a0142684e379 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -269,25 +269,22 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { + hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev); + /* * As PA PMU is in a SICL, use the SICL_ID and the index ID * to identify the PA PMU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sicl_id)) { + if (pa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->index_id)) { + if (pa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->ccl_id = -1; - pa_pmu->sccl_id = -1; - pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) return -ENODEV; @@ -356,29 +353,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = { .attrs = hisi_h60pa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { - .attrs = hisi_pa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_pa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_pa_pmu_identifier_attrs[] = { - &hisi_pa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_identifier_group = { - .attrs = hisi_pa_pmu_identifier_attrs, -}; - static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { .mask_offset = PA_INT_MASK, .clear_offset = PA_INT_CLEAR, @@ -388,8 +362,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -402,8 +376,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v3_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -422,8 +396,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_h60pa_pmu_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -488,9 +462,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", - pa_pmu->sicl_id, pa_pmu->dev_info->name, - pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d", + pa_pmu->topo.sicl_id, pa_pmu->dev_info->name, + pa_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -514,14 +488,13 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_pa_pmu_remove(struct platform_device *pdev) +static void hisi_pa_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&pa_pmu->pmu); cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, &pa_pmu->node); - return 0; } static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { @@ -570,6 +543,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 04031450d5fe..ef058b1dd509 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> +#include <linux/property.h> #include <asm/cputype.h> #include <asm/local64.h> @@ -23,20 +24,6 @@ #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* - * PMU format attributes - */ -ssize_t hisi_format_sysfs_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} -EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); - -/* * PMU event attributes */ ssize_t hisi_event_sysfs_show(struct device *dev, @@ -48,7 +35,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -60,7 +47,52 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus); +} +static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL); + +static struct attribute *hisi_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + &dev_attr_associated_cpus.attr, + NULL +}; + +const struct attribute_group hisi_pmu_cpumask_attr_group = { + .attrs = hisi_pmu_cpumask_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, "HISI_PMU"); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); + +static struct device_attribute hisi_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pmu_identifier_attrs[] = { + &hisi_pmu_identifier_attr.attr, + NULL +}; + +const struct attribute_group hisi_pmu_identifier_group = { + .attrs = hisi_pmu_identifier_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, "HISI_PMU"); static bool hisi_validate_event_group(struct perf_event *event) { @@ -110,17 +142,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); - -ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - - return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); -} -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -179,7 +201,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -233,7 +255,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); /* * Set the counter to count the event that we're interested in, @@ -287,7 +309,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -308,7 +330,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -331,7 +353,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -348,7 +370,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -371,7 +393,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -383,14 +405,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -403,7 +425,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -411,7 +433,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); /* @@ -458,22 +480,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) */ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { + struct hisi_pmu_topology *topo = &hisi_pmu->topo; int sccl_id, ccl_id; - /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ - if (hisi_pmu->sccl_id == -1) - return true; - - if (hisi_pmu->ccl_id == -1) { + if (topo->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); - return sccl_id == hisi_pmu->sccl_id; + return sccl_id == topo->sccl_id; } hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id); - return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; + return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id; } int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) @@ -481,13 +500,25 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) + /* + * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu + * based on the locality if it hasn't been initialized yet. For PMUs + * do have associated CPUs, it'll be updated later. + */ + if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) { + if (hisi_pmu->on_cpu != -1) + return 0; + + hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(hisi_pmu->on_cpu))); return 0; + } cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus); - /* If another CPU is already managing this PMU, simply return. */ - if (hisi_pmu->on_cpu != -1) + /* If another associated CPU is already managing this PMU, simply return. */ + if (hisi_pmu->on_cpu != -1 && + cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus)) return 0; /* Use this CPU in cpumask for event counting */ @@ -498,18 +529,14 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - cpumask_t pmu_online_cpus; unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) - return 0; - /* Nothing to do if this CPU doesn't own the PMU */ if (hisi_pmu->on_cpu != cpu) return 0; @@ -517,10 +544,16 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) /* Give up ownership of the PMU */ hisi_pmu->on_cpu = -1; - /* Choose a new CPU to migrate ownership of the PMU to */ - cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus, - cpu_online_mask); - target = cpumask_any_but(&pmu_online_cpus, cpu); + /* + * Migrate ownership of the PMU to a new CPU chosen from PMU's online + * associated CPUs if possible, if no associated CPU online then + * migrate to one online CPU. + */ + target = cpumask_any_and_but(&hisi_pmu->associated_cpus, + cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) return 0; @@ -531,13 +564,43 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); + +/* + * Retrieve the topology information from the firmware for the hisi_pmu device. + * The topology ID will be -1 if we cannot initialize it, it may either due to + * the PMU doesn't locate on this certain topology or the firmware needs to be + * fixed. + */ +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev) +{ + struct hisi_pmu_topology *topo = &hisi_pmu->topo; + + topo->sccl_id = -1; + topo->ccl_id = -1; + topo->index_id = -1; + topo->sub_id = -1; + + if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id)) + dev_dbg(dev, "no scl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id)) + dev_dbg(dev, "no ccl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id)) + dev_dbg(dev, "no idx-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) + dev_dbg(dev, "no sub-id present\n"); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; pmu->module = module; + pmu->parent = hisi_pmu->dev; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; pmu->pmu_enable = hisi_uncore_pmu_enable; @@ -550,6 +613,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_GPL(hisi_pmu_init); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); +MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 92402aa69d70..f4fed2544877 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -33,7 +33,7 @@ })[0].attr.attr) #define HISI_PMU_FORMAT_ATTR(_name, _config) \ - HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config) + HISI_PMU_ATTR(_name, device_show_string, _config) #define HISI_PMU_EVENT_ATTR(_name, _config) \ HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config) @@ -81,27 +81,55 @@ struct hisi_pmu_hwevents { const struct attribute_group **attr_groups; }; +/** + * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU + * is located. + * @sccl_id: ID of the SCCL on which the PMU locate is located. + * @sicl_id: ID of the SICL on which the PMU locate is located. + * @scl_id: ID used by the core which is unaware of the SCCL/SICL. + * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located. + * @index_id: the ID of the PMU module if there're several PMUs at a + * particularly location in the topology. + * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2 + * since each DDRC has more than one DMC + * + * The ID will be -1 if the PMU isn't located on a certain topology. + */ +struct hisi_pmu_topology { + /* + * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel + * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL + * distinction is not relevant, use scl_id instead. + */ + union { + int sccl_id; + int sicl_id; + int scl_id; + }; + int ccl_id; + int index_id; + int sub_id; +}; + /* Generic pmu struct for different pmu types */ struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; - /* associated_cpus: All CPUs associated with the PMU */ + struct hisi_pmu_topology topo; + /* + * CPUs associated to the PMU and are preferred to use for counting. + * Could be empty if PMU has no association (e.g. PMU on SICL), in + * which case any online CPU will be used. + */ cpumask_t associated_cpus; /* CPU used for counting */ int on_cpu; int irq; struct device *dev; struct hlist_node node; - int sccl_id; - int sicl_id; - int ccl_id; void __iomem *base; - /* the ID of the PMU modules */ - u32 index_id; - /* For DDRC PMU v2: each DDRC has more than one DMC */ - u32 sub_id; int num_counters; int counter_bits; /* check event code range */ @@ -109,6 +137,10 @@ struct hisi_pmu { u32 identifier; }; +/* Generic implementation of cpumask/identifier group */ +extern const struct attribute_group hisi_pmu_cpumask_attr_group; +extern const struct attribute_group hisi_pmu_identifier_group; + int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); @@ -122,8 +154,6 @@ void hisi_uncore_pmu_enable(struct pmu *pmu); void hisi_uncore_pmu_disable(struct pmu *pmu); ssize_t hisi_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf); -ssize_t hisi_format_sysfs_show(struct device *dev, - struct device_attribute *attr, char *buf); ssize_t hisi_cpumask_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf); int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node); @@ -134,6 +164,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, char *page); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index e706ca567676..dbd079016fc4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -288,25 +288,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); + /* * Use the SCCL_ID and the index ID to identify the SLLC PMU, * while SCCL_ID is from MPIDR_EL1 by CPU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->sccl_id)) { + if (sllc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->index_id)) { + if (sllc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - /* SLLC PMUs only share the same SCCL */ - sllc_pmu->ccl_id = -1; - sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); @@ -347,34 +344,11 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = { .attrs = hisi_sllc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { - .attrs = hisi_sllc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_sllc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { - &hisi_sllc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_identifier_group = { - .attrs = hisi_sllc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { &hisi_sllc_pmu_v2_format_group, &hisi_sllc_pmu_v2_events_group, - &hisi_sllc_pmu_cpumask_attr_group, - &hisi_sllc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -433,8 +407,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", - sllc_pmu->sccl_id, sllc_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d", + sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id); if (!name) return -ENOMEM; @@ -460,14 +434,13 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } -static int hisi_sllc_pmu_remove(struct platform_device *pdev) +static void hisi_sllc_pmu_remove(struct platform_device *pdev) { struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&sllc_pmu->pmu); cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, &sllc_pmu->node); - return 0; } static struct platform_driver hisi_sllc_pmu_driver = { @@ -508,6 +481,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>"); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 636fb79647c8..03cb9b564b99 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -11,7 +11,6 @@ #include <linux/irq.h> #include <linux/list.h> #include <linux/mod_devicetable.h> -#include <linux/property.h> #include "hisi_uncore_pmu.h" @@ -287,12 +286,52 @@ static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu, return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); } -static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, +static bool hisi_uc_pmu_get_glb_en_state(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + return !!FIELD_GET(HISI_UC_EVENT_GLB_EN, val); +} + +static void hisi_uc_pmu_write_counter_normal(struct hisi_pmu *uc_pmu, struct hw_perf_event *hwc, u64 val) { writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); } +static void hisi_uc_pmu_write_counter_quirk_v2(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + hisi_uc_pmu_start_counters(uc_pmu); + hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val); + hisi_uc_pmu_stop_counters(uc_pmu); +} + +static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + bool enable = hisi_uc_pmu_get_glb_en_state(uc_pmu); + bool erratum = uc_pmu->identifier == HISI_PMU_V2; + + /* + * HiSilicon UC PMU v2 suffers the erratum 162700402 that the + * PMU counter cannot be set due to the lack of clock under power + * saving mode. This will lead to error or inaccurate counts. + * The clock can be enabled by the PMU global enabling control. + * The irq handler and pmu_start() will call the function to set + * period. If the function under irq context, the PMU has been + * enabled therefore we set counter directly. Other situations + * the PMU is disabled, we need to enable it to turn on the + * counter clock to set period, and then restore PMU enable + * status, the counter can hold its value without a clock. + */ + if (enable || !erratum) + hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val); + else + hisi_uc_pmu_write_counter_quirk_v2(uc_pmu, hwc, val); +} + static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu, struct hw_perf_event *hwc) { @@ -326,25 +365,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) static int hisi_uc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *uc_pmu) { + hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev); + /* * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to * identify the topology information of UC PMU devices in the chip. * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->sccl_id)) { + if (uc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->ccl_id)) { + if (uc_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->sub_id)) { + if (uc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -399,34 +437,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = { .attrs = hisi_uc_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { - .attrs = hisi_uc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_uc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_uc_pmu_identifier_attrs[] = { - &hisi_uc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_uc_pmu_identifier_group = { - .attrs = hisi_uc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { &hisi_uc_pmu_format_group, &hisi_uc_pmu_events_group, - &hisi_uc_pmu_cpumask_attr_group, - &hisi_uc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -498,8 +513,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", - uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d", + uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id, + uc_pmu->topo.sub_id); if (!name) return -ENOMEM; @@ -573,6 +589,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>"); diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c index 16869bf5bf4c..c157f3572cae 100644 --- a/drivers/perf/hisilicon/hns3_pmu.c +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -363,16 +363,6 @@ HNS3_PMU_FILTER_ATTR(global, config1, 52, 52); HNS3_PMU_EVT_PPS_##_name##_TIME, \ HNS3_PMU_FILTER_INTR_##_name}) -static ssize_t hns3_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - - return sysfs_emit(buf, "%s\n", (char *)eattr->var); -} - static ssize_t hns3_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -421,7 +411,7 @@ static ssize_t hns3_pmu_filter_mode_show(struct device *dev, })[0].attr.attr) #define HNS3_PMU_FORMAT_ATTR(_name, _format) \ - HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format) + HNS3_PMU_ATTR(_name, device_show_string, _format) #define HNS3_PMU_EVENT_ATTR(_name, _event) \ HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event) #define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \ @@ -1085,15 +1075,27 @@ static bool hns3_pmu_validate_event_group(struct perf_event *event) return false; for (num = 0; num < counters; num++) { + /* + * If we find a related event, then it's a valid group + * since we don't need to allocate a new counter for it. + */ if (hns3_pmu_cmp_event(event_group[num], sibling)) break; } + /* + * Otherwise it's a new event but if there's no available counter, + * fail the check since we cannot schedule all the events in + * the group simultaneously. + */ + if (num == HNS3_PMU_MAX_HW_EVENTS) + return false; + if (num == counters) event_group[counters++] = sibling; } - return counters <= HNS3_PMU_MAX_HW_EVENTS; + return true; } static u32 hns3_pmu_get_filter_condition(struct perf_event *event) @@ -1419,6 +1421,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) hns3_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, + .parent = &pdev->dev, .event_init = hns3_pmu_event_init, .pmu_enable = hns3_pmu_enable, .pmu_disable = hns3_pmu_disable, @@ -1515,7 +1518,7 @@ static int hns3_pmu_irq_register(struct pci_dev *pdev, return ret; } - ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + ret = devm_add_action_or_reset(&pdev->dev, hns3_pmu_free_irq, pdev); if (ret) { pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); return ret; diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index 524ba82bfce2..039feded9152 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver +/* + * Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver * - * Copyright (C) 2021 Marvell. + * Copyright (C) 2021-2024 Marvell. */ #include <linux/init.h> @@ -14,24 +15,29 @@ #include <linux/platform_device.h> /* Performance Counters Operating Mode Control Registers */ -#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 -#define OP_MODE_CTRL_VAL_MANNUAL 0x1 +#define CN10K_DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 +#define ODY_DDRC_PERF_CNT_OP_MODE_CTRL 0x20020 +#define OP_MODE_CTRL_VAL_MANUAL 0x1 /* Performance Counters Start Operation Control Registers */ -#define DDRC_PERF_CNT_START_OP_CTRL 0x8028 +#define CN10K_DDRC_PERF_CNT_START_OP_CTRL 0x8028 +#define ODY_DDRC_PERF_CNT_START_OP_CTRL 0x200A0 #define START_OP_CTRL_VAL_START 0x1ULL #define START_OP_CTRL_VAL_ACTIVE 0x2 /* Performance Counters End Operation Control Registers */ -#define DDRC_PERF_CNT_END_OP_CTRL 0x8030 +#define CN10K_DDRC_PERF_CNT_END_OP_CTRL 0x8030 +#define ODY_DDRC_PERF_CNT_END_OP_CTRL 0x200E0 #define END_OP_CTRL_VAL_END 0x1ULL /* Performance Counters End Status Registers */ -#define DDRC_PERF_CNT_END_STATUS 0x8038 +#define CN10K_DDRC_PERF_CNT_END_STATUS 0x8038 +#define ODY_DDRC_PERF_CNT_END_STATUS 0x20120 #define END_STATUS_VAL_END_TIMER_MODE_END 0x1 /* Performance Counters Configuration Registers */ -#define DDRC_PERF_CFG_BASE 0x8040 +#define CN10K_DDRC_PERF_CFG_BASE 0x8040 +#define ODY_DDRC_PERF_CFG_BASE 0x20160 /* 8 Generic event counter + 2 fixed event counters */ #define DDRC_PERF_NUM_GEN_COUNTERS 8 @@ -42,18 +48,28 @@ DDRC_PERF_NUM_FIX_COUNTERS) /* Generic event counter registers */ -#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n)) +#define DDRC_PERF_CFG(base, n) ((base) + 8 * (n)) #define EVENT_ENABLE BIT_ULL(63) /* Two dedicated event counters for DDR reads and writes */ #define EVENT_DDR_READS 101 #define EVENT_DDR_WRITES 100 +#define DDRC_PERF_REG(base, n) ((base) + 8 * (n)) /* * programmable events IDs in programmable event counters. * DO NOT change these event-id numbers, they are used to * program event bitmap in h/w. */ +#define EVENT_DFI_CMD_IS_RETRY 61 +#define EVENT_RD_UC_ECC_ERROR 60 +#define EVENT_RD_CRC_ERROR 59 +#define EVENT_CAPAR_ERROR 58 +#define EVENT_WR_CRC_ERROR 57 +#define EVENT_DFI_PARITY_POISON 56 +#define EVENT_RETRY_FIFO_FULL 46 +#define EVENT_DFI_CYCLES 45 + #define EVENT_OP_IS_ZQLATCH 55 #define EVENT_OP_IS_ZQSTART 54 #define EVENT_OP_IS_TCR_MRR 53 @@ -102,28 +118,37 @@ #define EVENT_HIF_RD_OR_WR 1 /* Event counter value registers */ -#define DDRC_PERF_CNT_VALUE_BASE 0x8080 -#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n)) +#define CN10K_DDRC_PERF_CNT_VALUE_BASE 0x8080 +#define ODY_DDRC_PERF_CNT_VALUE_BASE 0x201C0 /* Fixed event counter enable/disable register */ -#define DDRC_PERF_CNT_FREERUN_EN 0x80C0 +#define CN10K_DDRC_PERF_CNT_FREERUN_EN 0x80C0 #define DDRC_PERF_FREERUN_WRITE_EN 0x1 #define DDRC_PERF_FREERUN_READ_EN 0x2 /* Fixed event counter control register */ -#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 +#define CN10K_DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 +#define ODY_DDRC_PERF_CNT_FREERUN_CTRL 0x20240 #define DDRC_FREERUN_WRITE_CNT_CLR 0x1 #define DDRC_FREERUN_READ_CNT_CLR 0x2 -/* Fixed event counter value register */ -#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 -#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 +/* Fixed event counter clear register, defined only for Odyssey */ +#define ODY_DDRC_PERF_CNT_FREERUN_CLR 0x20248 + #define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48) #define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0) +/* Fixed event counter value register */ +#define CN10K_DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 +#define CN10K_DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 +#define ODY_DDRC_PERF_CNT_VALUE_WR_OP 0x20250 +#define ODY_DDRC_PERF_CNT_VALUE_RD_OP 0x20258 + struct cn10k_ddr_pmu { struct pmu pmu; void __iomem *base; + const struct ddr_pmu_platform_data *p_data; + const struct ddr_pmu_ops *ops; unsigned int cpu; struct device *dev; int active_events; @@ -132,8 +157,36 @@ struct cn10k_ddr_pmu { struct hlist_node node; }; +struct ddr_pmu_ops { + void (*enable_read_freerun_counter)(struct cn10k_ddr_pmu *pmu, + bool enable); + void (*enable_write_freerun_counter)(struct cn10k_ddr_pmu *pmu, + bool enable); + void (*clear_read_freerun_counter)(struct cn10k_ddr_pmu *pmu); + void (*clear_write_freerun_counter)(struct cn10k_ddr_pmu *pmu); + void (*pmu_overflow_handler)(struct cn10k_ddr_pmu *pmu, int evt_idx); +}; + #define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu) +struct ddr_pmu_platform_data { + u64 counter_overflow_val; + u64 counter_max_val; + u64 cnt_base; + u64 cfg_base; + u64 cnt_op_mode_ctrl; + u64 cnt_start_op_ctrl; + u64 cnt_end_op_ctrl; + u64 cnt_end_status; + u64 cnt_freerun_en; + u64 cnt_freerun_ctrl; + u64 cnt_freerun_clr; + u64 cnt_value_wr_op; + u64 cnt_value_rd_op; + bool is_cn10k; + bool is_ody; +}; + static ssize_t cn10k_ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, char *page) @@ -209,6 +262,85 @@ static struct attribute *cn10k_ddr_perf_events_attrs[] = { NULL }; +static struct attribute *odyssey_ddr_perf_events_attrs[] = { + /* Programmable */ + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_wr_data_access, + EVENT_DFI_WR_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_rd_data_access, + EVENT_DFI_RD_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access, + EVENT_HPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access, + EVENT_LPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access, + EVENT_WR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, + EVENT_OP_IS_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, + EVENT_OP_IS_RD_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, + EVENT_PRECHARGE_FOR_RDWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other, + EVENT_PRECHARGE_FOR_OTHER), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, + EVENT_OP_IS_ENTER_POWERDOWN), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM), + CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH), + CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cycles, EVENT_DFI_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_retry_fifo_full, + EVENT_RETRY_FIFO_FULL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd, + EVENT_VISIBLE_WIN_LIMIT_REACHED_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr, + EVENT_VISIBLE_WIN_LIMIT_REACHED_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_parity_poison, + EVENT_DFI_PARITY_POISON), + CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_crc_error, EVENT_WR_CRC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_capar_error, EVENT_CAPAR_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_crc_error, EVENT_RD_CRC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_uc_ecc_error, EVENT_RD_UC_ECC_ERROR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cmd_is_retry, EVENT_DFI_CMD_IS_RETRY), + /* Free run event counters */ + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES), + NULL +}; + +static struct attribute_group odyssey_ddr_perf_events_attr_group = { + .name = "events", + .attrs = odyssey_ddr_perf_events_attrs, +}; + static struct attribute_group cn10k_ddr_perf_events_attr_group = { .name = "events", .attrs = cn10k_ddr_perf_events_attrs, @@ -254,6 +386,13 @@ static const struct attribute_group *cn10k_attr_groups[] = { NULL, }; +static const struct attribute_group *odyssey_attr_groups[] = { + &odyssey_ddr_perf_events_attr_group, + &cn10k_ddr_perf_format_attr_group, + &cn10k_ddr_perf_cpumask_attr_group, + NULL +}; + /* Default poll timeout is 100 sec, which is very sufficient for * 48 bit counter incremented max at 5.6 GT/s, which may take many * hours to overflow. @@ -266,9 +405,18 @@ static ktime_t cn10k_ddr_pmu_timer_period(void) return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC); } -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap, + struct cn10k_ddr_pmu *ddr_pmu) { + int err = 0; + switch (eventid) { + case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY: + if (!ddr_pmu->p_data->is_ody) { + err = -EINVAL; + break; + } + fallthrough; case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD: case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH: *event_bitmap = (1ULL << (eventid - 1)); @@ -279,11 +427,12 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) *event_bitmap = (0xFULL << (eventid - 1)); break; default: - pr_err("%s Invalid eventid %d\n", __func__, eventid); - return -EINVAL; + err = -EINVAL; } - return 0; + if (err) + pr_err("%s Invalid eventid %d\n", __func__, eventid); + return err; } static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu, @@ -351,9 +500,33 @@ static int cn10k_ddr_perf_event_init(struct perf_event *event) return 0; } +static void cn10k_ddr_perf_counter_start(struct cn10k_ddr_pmu *ddr_pmu, + int counter) +{ + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; + u64 ctrl_reg = p_data->cnt_start_op_ctrl; + + writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + + DDRC_PERF_REG(ctrl_reg, counter)); +} + +static void cn10k_ddr_perf_counter_stop(struct cn10k_ddr_pmu *ddr_pmu, + int counter) +{ + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; + u64 ctrl_reg = p_data->cnt_end_op_ctrl; + + writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + + DDRC_PERF_REG(ctrl_reg, counter)); +} + static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, int counter, bool enable) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 ctrl_reg = pmu->p_data->cnt_op_mode_ctrl; + const struct ddr_pmu_ops *ops = pmu->ops; + bool is_ody = pmu->p_data->is_ody; u32 reg; u64 val; @@ -363,7 +536,7 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, } if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { - reg = DDRC_PERF_CFG(counter); + reg = DDRC_PERF_CFG(p_data->cfg_base, counter); val = readq_relaxed(pmu->base + reg); if (enable) @@ -372,40 +545,52 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, val &= ~EVENT_ENABLE; writeq_relaxed(val, pmu->base + reg); - } else { - val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN); - if (enable) { - if (counter == DDRC_PERF_READ_COUNTER_IDX) - val |= DDRC_PERF_FREERUN_READ_EN; - else - val |= DDRC_PERF_FREERUN_WRITE_EN; - } else { - if (counter == DDRC_PERF_READ_COUNTER_IDX) - val &= ~DDRC_PERF_FREERUN_READ_EN; - else - val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + if (is_ody) { + if (enable) { + /* + * Setup the PMU counter to work in + * manual mode + */ + reg = DDRC_PERF_REG(ctrl_reg, counter); + writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, + pmu->base + reg); + + cn10k_ddr_perf_counter_start(pmu, counter); + } else { + cn10k_ddr_perf_counter_stop(pmu, counter); + } } - writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN); + } else { + if (counter == DDRC_PERF_READ_COUNTER_IDX) + ops->enable_read_freerun_counter(pmu, enable); + else + ops->enable_write_freerun_counter(pmu, enable); } } static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; u64 val; if (counter == DDRC_PERF_READ_COUNTER_IDX) - return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP); + return readq_relaxed(pmu->base + + p_data->cnt_value_rd_op); if (counter == DDRC_PERF_WRITE_COUNTER_IDX) - return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP); + return readq_relaxed(pmu->base + + p_data->cnt_value_wr_op); - val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter)); + val = readq_relaxed(pmu->base + + DDRC_PERF_REG(p_data->cnt_base, counter)); return val; } static void cn10k_ddr_perf_event_update(struct perf_event *event) { struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + const struct ddr_pmu_platform_data *p_data = pmu->p_data; struct hw_perf_event *hwc = &event->hw; u64 prev_count, new_count, mask; @@ -414,7 +599,7 @@ static void cn10k_ddr_perf_event_update(struct perf_event *event) new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx); } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); - mask = DDRC_PERF_CNT_MAX_VALUE; + mask = p_data->counter_max_val; local64_add((new_count - prev_count) & mask, &event->count); } @@ -435,6 +620,8 @@ static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags) static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) { struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + const struct ddr_pmu_ops *ops = pmu->ops; struct hw_perf_event *hwc = &event->hw; u8 config = event->attr.config; int counter, ret; @@ -454,8 +641,8 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { /* Generic counters, configure event id */ - reg_offset = DDRC_PERF_CFG(counter); - ret = ddr_perf_get_event_bitmap(config, &val); + reg_offset = DDRC_PERF_CFG(p_data->cfg_base, counter); + ret = ddr_perf_get_event_bitmap(config, &val, pmu); if (ret) return ret; @@ -463,11 +650,9 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) } else { /* fixed event counter, clear counter value */ if (counter == DDRC_PERF_READ_COUNTER_IDX) - val = DDRC_FREERUN_READ_CNT_CLR; + ops->clear_read_freerun_counter(pmu); else - val = DDRC_FREERUN_WRITE_CNT_CLR; - - writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL); + ops->clear_write_freerun_counter(pmu); } hwc->state |= PERF_HES_STOPPED; @@ -512,17 +697,19 @@ static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags) static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu) { struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + - DDRC_PERF_CNT_START_OP_CTRL); + p_data->cnt_start_op_ctrl); } static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu) { struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data; writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + - DDRC_PERF_CNT_END_OP_CTRL); + p_data->cnt_end_op_ctrl); } static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) @@ -547,8 +734,123 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) } } +static void ddr_pmu_enable_read_freerun(struct cn10k_ddr_pmu *pmu, bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_en); + if (enable) + val |= DDRC_PERF_FREERUN_READ_EN; + else + val &= ~DDRC_PERF_FREERUN_READ_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en); +} + +static void ddr_pmu_enable_write_freerun(struct cn10k_ddr_pmu *pmu, bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_en); + if (enable) + val |= DDRC_PERF_FREERUN_WRITE_EN; + else + val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en); +} + +static void ddr_pmu_read_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_READ_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_write_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_WRITE_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx) +{ + cn10k_ddr_perf_event_update_all(pmu); + cn10k_ddr_perf_pmu_disable(&pmu->pmu); + cn10k_ddr_perf_pmu_enable(&pmu->pmu); +} + +static void ddr_pmu_ody_enable_read_freerun(struct cn10k_ddr_pmu *pmu, + bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl); + if (enable) + val |= DDRC_PERF_FREERUN_READ_EN; + else + val &= ~DDRC_PERF_FREERUN_READ_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_ody_enable_write_freerun(struct cn10k_ddr_pmu *pmu, + bool enable) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl); + if (enable) + val |= DDRC_PERF_FREERUN_WRITE_EN; + else + val &= ~DDRC_PERF_FREERUN_WRITE_EN; + + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl); +} + +static void ddr_pmu_ody_read_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_READ_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr); +} + +static void ddr_pmu_ody_write_clear_freerun(struct cn10k_ddr_pmu *pmu) +{ + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + u64 val; + + val = DDRC_FREERUN_WRITE_CNT_CLR; + writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr); +} + +static void ddr_pmu_ody_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx) +{ + /* + * On reaching the maximum value of the counter, the counter freezes + * there. The particular event is updated and the respective counter + * is stopped and started again so that it starts counting from zero + */ + cn10k_ddr_perf_event_update(pmu->events[evt_idx]); + cn10k_ddr_perf_counter_stop(pmu, evt_idx); + cn10k_ddr_perf_counter_start(pmu, evt_idx); +} + static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) { + const struct ddr_pmu_platform_data *p_data = pmu->p_data; + const struct ddr_pmu_ops *ops = pmu->ops; struct perf_event *event; struct hw_perf_event *hwc; u64 prev_count, new_count; @@ -586,11 +888,9 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) continue; value = cn10k_ddr_perf_read_counter(pmu, i); - if (value == DDRC_PERF_CNT_MAX_VALUE) { + if (value == p_data->counter_max_val) { pr_info("Counter-(%d) reached max value\n", i); - cn10k_ddr_perf_event_update_all(pmu); - cn10k_ddr_perf_pmu_disable(&pmu->pmu); - cn10k_ddr_perf_pmu_enable(&pmu->pmu); + ops->pmu_overflow_handler(pmu, i); } } @@ -629,11 +929,68 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } +static const struct ddr_pmu_ops ddr_pmu_ops = { + .enable_read_freerun_counter = ddr_pmu_enable_read_freerun, + .enable_write_freerun_counter = ddr_pmu_enable_write_freerun, + .clear_read_freerun_counter = ddr_pmu_read_clear_freerun, + .clear_write_freerun_counter = ddr_pmu_write_clear_freerun, + .pmu_overflow_handler = ddr_pmu_overflow_hander, +}; + +#if defined(CONFIG_ACPI) || defined(CONFIG_OF) +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = { + .counter_overflow_val = BIT_ULL(48), + .counter_max_val = GENMASK_ULL(48, 0), + .cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE, + .cfg_base = CN10K_DDRC_PERF_CFG_BASE, + .cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL, + .cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL, + .cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL, + .cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS, + .cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN, + .cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL, + .cnt_freerun_clr = 0, + .cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP, + .cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP, + .is_cn10k = TRUE, +}; +#endif + +static const struct ddr_pmu_ops ddr_pmu_ody_ops = { + .enable_read_freerun_counter = ddr_pmu_ody_enable_read_freerun, + .enable_write_freerun_counter = ddr_pmu_ody_enable_write_freerun, + .clear_read_freerun_counter = ddr_pmu_ody_read_clear_freerun, + .clear_write_freerun_counter = ddr_pmu_ody_write_clear_freerun, + .pmu_overflow_handler = ddr_pmu_ody_overflow_hander, +}; + +#ifdef CONFIG_ACPI +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = { + .counter_overflow_val = 0, + .counter_max_val = GENMASK_ULL(63, 0), + .cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE, + .cfg_base = ODY_DDRC_PERF_CFG_BASE, + .cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL, + .cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL, + .cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL, + .cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS, + .cnt_freerun_en = 0, + .cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL, + .cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR, + .cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP, + .cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP, + .is_ody = TRUE, +}; +#endif + static int cn10k_ddr_perf_probe(struct platform_device *pdev) { + const struct ddr_pmu_platform_data *dev_data; struct cn10k_ddr_pmu *ddr_pmu; struct resource *res; void __iomem *base; + bool is_cn10k; + bool is_ody; char *name; int ret; @@ -644,30 +1001,60 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev) ddr_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, ddr_pmu); + dev_data = device_get_match_data(&pdev->dev); + if (!dev_data) { + dev_err(&pdev->dev, "Error: No device match data found\n"); + return -ENODEV; + } + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); ddr_pmu->base = base; - /* Setup the PMU counter to work in manual mode */ - writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base + - DDRC_PERF_CNT_OP_MODE_CTRL); - - ddr_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - .task_ctx_nr = perf_invalid_context, - .attr_groups = cn10k_attr_groups, - .event_init = cn10k_ddr_perf_event_init, - .add = cn10k_ddr_perf_event_add, - .del = cn10k_ddr_perf_event_del, - .start = cn10k_ddr_perf_event_start, - .stop = cn10k_ddr_perf_event_stop, - .read = cn10k_ddr_perf_event_update, - .pmu_enable = cn10k_ddr_perf_pmu_enable, - .pmu_disable = cn10k_ddr_perf_pmu_disable, - }; + ddr_pmu->p_data = dev_data; + is_cn10k = ddr_pmu->p_data->is_cn10k; + is_ody = ddr_pmu->p_data->is_ody; + + if (is_cn10k) { + ddr_pmu->ops = &ddr_pmu_ops; + /* Setup the PMU counter to work in manual mode */ + writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base + + ddr_pmu->p_data->cnt_op_mode_ctrl); + + ddr_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = cn10k_attr_groups, + .event_init = cn10k_ddr_perf_event_init, + .add = cn10k_ddr_perf_event_add, + .del = cn10k_ddr_perf_event_del, + .start = cn10k_ddr_perf_event_start, + .stop = cn10k_ddr_perf_event_stop, + .read = cn10k_ddr_perf_event_update, + .pmu_enable = cn10k_ddr_perf_pmu_enable, + .pmu_disable = cn10k_ddr_perf_pmu_disable, + }; + } + + if (is_ody) { + ddr_pmu->ops = &ddr_pmu_ody_ops; + + ddr_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = odyssey_attr_groups, + .event_init = cn10k_ddr_perf_event_init, + .add = cn10k_ddr_perf_event_add, + .del = cn10k_ddr_perf_event_del, + .start = cn10k_ddr_perf_event_start, + .stop = cn10k_ddr_perf_event_stop, + .read = cn10k_ddr_perf_event_update, + }; + } /* Choose this cpu to collect perf data */ ddr_pmu->cpu = raw_smp_processor_id(); @@ -688,7 +1075,7 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev) if (ret) goto error; - pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start); + pr_info("DDR PMU Driver for ddrc@%llx\n", res->start); return 0; error: cpuhp_state_remove_instance_nocalls( @@ -697,7 +1084,7 @@ error: return ret; } -static int cn10k_ddr_perf_remove(struct platform_device *pdev) +static void cn10k_ddr_perf_remove(struct platform_device *pdev) { struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev); @@ -706,12 +1093,11 @@ static int cn10k_ddr_perf_remove(struct platform_device *pdev) &ddr_pmu->node); perf_pmu_unregister(&ddr_pmu->pmu); - return 0; } #ifdef CONFIG_OF static const struct of_device_id cn10k_ddr_pmu_of_match[] = { - { .compatible = "marvell,cn10k-ddr-pmu", }, + { .compatible = "marvell,cn10k-ddr-pmu", .data = &cn10k_ddr_pmu_pdata }, { }, }; MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); @@ -719,7 +1105,8 @@ MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = { - {"MRVL000A", 0}, + {"MRVL000A", (kernel_ulong_t)&cn10k_ddr_pmu_pdata }, + {"MRVL000C", (kernel_ulong_t)&odyssey_ddr_pmu_pdata}, {}, }; MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match); @@ -764,4 +1151,5 @@ module_init(cn10k_ddr_pmu_init); module_exit(cn10k_ddr_pmu_exit); MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>"); +MODULE_DESCRIPTION("Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index fec8e82edb95..51ccb0befa05 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -37,6 +37,15 @@ struct tad_pmu { DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS); }; +enum mrvl_tad_pmu_version { + TAD_PMU_V1 = 1, + TAD_PMU_V2, +}; + +struct tad_pmu_data { + int id; +}; + static int tad_pmu_cpuhp_state; static void tad_pmu_event_counter_read(struct perf_event *event) @@ -214,6 +223,24 @@ static const struct attribute_group tad_pmu_events_attr_group = { .attrs = tad_pmu_event_attrs, }; +static struct attribute *ody_tad_pmu_event_attrs[] = { + TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3), + TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a), + TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b), + TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c), + TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d), + TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e), + TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f), + TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20), + TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF), + NULL +}; + +static const struct attribute_group ody_tad_pmu_events_attr_group = { + .name = "events", + .attrs = ody_tad_pmu_event_attrs, +}; + PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *tad_pmu_format_attrs[] = { @@ -252,8 +279,16 @@ static const struct attribute_group *tad_pmu_attr_groups[] = { NULL }; +static const struct attribute_group *ody_tad_pmu_attr_groups[] = { + &ody_tad_pmu_events_attr_group, + &tad_pmu_format_attr_group, + &tad_pmu_cpumask_attr_group, + NULL +}; + static int tad_pmu_probe(struct platform_device *pdev) { + const struct tad_pmu_data *dev_data; struct device *dev = &pdev->dev; struct tad_region *regions; struct tad_pmu *tad_pmu; @@ -261,6 +296,7 @@ static int tad_pmu_probe(struct platform_device *pdev) u32 tad_pmu_page_size; u32 tad_page_size; u32 tad_cnt; + int version; int i, ret; char *name; @@ -270,6 +306,13 @@ static int tad_pmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tad_pmu); + dev_data = device_get_match_data(&pdev->dev); + if (!dev_data) { + dev_err(&pdev->dev, "Error: No device match data found\n"); + return -ENODEV; + } + version = dev_data->id; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "Mem resource not found\n"); @@ -319,7 +362,6 @@ static int tad_pmu_probe(struct platform_device *pdev) tad_pmu->pmu = (struct pmu) { .module = THIS_MODULE, - .attr_groups = tad_pmu_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .task_ctx_nr = perf_invalid_context, @@ -332,6 +374,11 @@ static int tad_pmu_probe(struct platform_device *pdev) .read = tad_pmu_event_counter_read, }; + if (version == TAD_PMU_V1) + tad_pmu->pmu.attr_groups = tad_pmu_attr_groups; + else + tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups; + tad_pmu->cpu = raw_smp_processor_id(); /* Register pmu instance for cpu hotplug */ @@ -351,27 +398,38 @@ static int tad_pmu_probe(struct platform_device *pdev) return ret; } -static int tad_pmu_remove(struct platform_device *pdev) +static void tad_pmu_remove(struct platform_device *pdev) { struct tad_pmu *pmu = platform_get_drvdata(pdev); cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state, &pmu->node); perf_pmu_unregister(&pmu->pmu); - - return 0; } +#if defined(CONFIG_OF) || defined(CONFIG_ACPI) +static const struct tad_pmu_data tad_pmu_data = { + .id = TAD_PMU_V1, +}; +#endif + +#ifdef CONFIG_ACPI +static const struct tad_pmu_data tad_pmu_v2_data = { + .id = TAD_PMU_V2, +}; +#endif + #ifdef CONFIG_OF static const struct of_device_id tad_pmu_of_match[] = { - { .compatible = "marvell,cn10k-tad-pmu", }, + { .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data }, {}, }; #endif #ifdef CONFIG_ACPI static const struct acpi_device_id tad_pmu_acpi_match[] = { - {"MRVL000B", 0}, + {"MRVL000B", (kernel_ulong_t)&tad_pmu_data}, + {"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data}, {}, }; MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match); diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c new file mode 100644 index 000000000000..29fbcd1848e4 --- /dev/null +++ b/drivers/perf/marvell_pem_pmu.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Marvell PEM(PCIe RC) Performance Monitor Driver + * + * Copyright (C) 2024 Marvell. + */ + +#include <linux/acpi.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> + +/* + * Each of these events maps to a free running 64 bit counter + * with no event control, but can be reset. + */ +enum pem_events { + IB_TLP_NPR, + IB_TLP_PR, + IB_TLP_CPL, + IB_TLP_DWORDS_NPR, + IB_TLP_DWORDS_PR, + IB_TLP_DWORDS_CPL, + IB_INFLIGHT, + IB_READS, + IB_REQ_NO_RO_NCB, + IB_REQ_NO_RO_EBUS, + OB_TLP_NPR, + OB_TLP_PR, + OB_TLP_CPL, + OB_TLP_DWORDS_NPR, + OB_TLP_DWORDS_PR, + OB_TLP_DWORDS_CPL, + OB_INFLIGHT, + OB_READS, + OB_MERGES_NPR, + OB_MERGES_PR, + OB_MERGES_CPL, + ATS_TRANS, + ATS_TRANS_LATENCY, + ATS_PRI, + ATS_PRI_LATENCY, + ATS_INV, + ATS_INV_LATENCY, + PEM_EVENTIDS_MAX +}; + +static u64 eventid_to_offset_table[] = { + [IB_TLP_NPR] = 0x0, + [IB_TLP_PR] = 0x8, + [IB_TLP_CPL] = 0x10, + [IB_TLP_DWORDS_NPR] = 0x100, + [IB_TLP_DWORDS_PR] = 0x108, + [IB_TLP_DWORDS_CPL] = 0x110, + [IB_INFLIGHT] = 0x200, + [IB_READS] = 0x300, + [IB_REQ_NO_RO_NCB] = 0x400, + [IB_REQ_NO_RO_EBUS] = 0x408, + [OB_TLP_NPR] = 0x500, + [OB_TLP_PR] = 0x508, + [OB_TLP_CPL] = 0x510, + [OB_TLP_DWORDS_NPR] = 0x600, + [OB_TLP_DWORDS_PR] = 0x608, + [OB_TLP_DWORDS_CPL] = 0x610, + [OB_INFLIGHT] = 0x700, + [OB_READS] = 0x800, + [OB_MERGES_NPR] = 0x900, + [OB_MERGES_PR] = 0x908, + [OB_MERGES_CPL] = 0x910, + [ATS_TRANS] = 0x2D18, + [ATS_TRANS_LATENCY] = 0x2D20, + [ATS_PRI] = 0x2D28, + [ATS_PRI_LATENCY] = 0x2D30, + [ATS_INV] = 0x2D38, + [ATS_INV_LATENCY] = 0x2D40, +}; + +struct pem_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct device *dev; + struct hlist_node node; +}; + +#define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu) + +static int eventid_to_offset(int eventid) +{ + return eventid_to_offset_table[eventid]; +} + +/* Events */ +static ssize_t pem_pmu_event_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define PEM_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \ + .id = _id, } \ + })[0].attr.attr) + +static struct attribute *pem_perf_events_attrs[] = { + PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR), + PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR), + PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL), + PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR), + PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR), + PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL), + PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT), + PEM_EVENT_ATTR(ib_reads, IB_READS), + PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB), + PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS), + PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR), + PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR), + PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL), + PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR), + PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR), + PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL), + PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT), + PEM_EVENT_ATTR(ob_reads_partid, OB_READS), + PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR), + PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR), + PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL), + PEM_EVENT_ATTR(ats_trans, ATS_TRANS), + PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY), + PEM_EVENT_ATTR(ats_pri, ATS_PRI), + PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY), + PEM_EVENT_ATTR(ats_inv, ATS_INV), + PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY), + NULL +}; + +static struct attribute_group pem_perf_events_attr_group = { + .name = "events", + .attrs = pem_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-5"); + +static struct attribute *pem_perf_format_attrs[] = { + &format_attr_event.attr, + NULL +}; + +static struct attribute_group pem_perf_format_attr_group = { + .name = "format", + .attrs = pem_perf_format_attrs, +}; + +/* cpumask */ +static ssize_t pem_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pem_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute pem_perf_cpumask_attr = + __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL); + +static struct attribute *pem_perf_cpumask_attrs[] = { + &pem_perf_cpumask_attr.attr, + NULL +}; + +static struct attribute_group pem_perf_cpumask_attr_group = { + .attrs = pem_perf_cpumask_attrs, +}; + +static const struct attribute_group *pem_perf_attr_groups[] = { + &pem_perf_events_attr_group, + &pem_perf_cpumask_attr_group, + &pem_perf_format_attr_group, + NULL +}; + +static int pem_perf_event_init(struct perf_event *event) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (event->attr.config >= PEM_EVENTIDS_MAX) + return -EINVAL; + + if (is_sampling_event(event) || + event->attach_state & PERF_ATTACH_TASK) { + return -EOPNOTSUPP; + } + + if (event->cpu < 0) + return -EOPNOTSUPP; + + /* We must NOT create groups containing mixed PMUs */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + } + /* + * Set ownership of event to one CPU, same event can not be observed + * on multiple cpus at same time. + */ + event->cpu = pmu->cpu; + hwc->idx = -1; + return 0; +} + +static u64 pem_perf_read_counter(struct pem_pmu *pmu, + struct perf_event *event, int eventid) +{ + return readq_relaxed(pmu->base + eventid_to_offset(eventid)); +} + +static void pem_perf_event_update(struct perf_event *event) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 prev_count, new_count; + + do { + prev_count = local64_read(&hwc->prev_count); + new_count = pem_perf_read_counter(pmu, event, hwc->idx); + } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); + + local64_add((new_count - prev_count), &event->count); +} + +static void pem_perf_event_start(struct perf_event *event, int flags) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int eventid = hwc->idx; + + /* + * All counters are free-running and associated with + * a fixed event to track in Hardware + */ + local64_set(&hwc->prev_count, + pem_perf_read_counter(pmu, event, eventid)); + + hwc->state = 0; +} + +static int pem_perf_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = event->attr.config; + if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX)) + return -EINVAL; + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + pem_perf_event_start(event, flags); + + return 0; +} + +static void pem_perf_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_UPDATE) + pem_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void pem_perf_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + pem_perf_event_stop(event, PERF_EF_UPDATE); + hwc->idx = -1; +} + +static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node); + unsigned int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + return 0; +} + +static int pem_perf_probe(struct platform_device *pdev) +{ + struct pem_pmu *pem_pmu; + struct resource *res; + void __iomem *base; + char *name; + int ret; + + pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL); + if (!pem_pmu) + return -ENOMEM; + + pem_pmu->dev = &pdev->dev; + platform_set_drvdata(pdev, pem_pmu); + + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(base)) + return PTR_ERR(base); + + pem_pmu->base = base; + + pem_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = pem_perf_attr_groups, + .event_init = pem_perf_event_init, + .add = pem_perf_event_add, + .del = pem_perf_event_del, + .start = pem_perf_event_start, + .stop = pem_perf_event_stop, + .read = pem_perf_event_update, + }; + + /* Choose this cpu to collect perf data */ + pem_pmu->cpu = raw_smp_processor_id(); + + name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx", + res->start); + if (!name) + return -ENOMEM; + + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + + ret = perf_pmu_register(&pem_pmu->pmu, name, -1); + if (ret) + goto error; + + return 0; +error: + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + return ret; +} + +static void pem_perf_remove(struct platform_device *pdev) +{ + struct pem_pmu *pem_pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + + perf_pmu_unregister(&pem_pmu->pmu); +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id pem_pmu_acpi_match[] = { + {"MRVL000E", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match); +#endif + +static struct platform_driver pem_pmu_driver = { + .driver = { + .name = "pem-pmu", + .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match), + .suppress_bind_attrs = true, + }, + .probe = pem_perf_probe, + .remove = pem_perf_remove, +}; + +static int __init pem_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + "perf/marvell/pem:online", NULL, + pem_pmu_offline_cpu); + if (ret) + return ret; + + ret = platform_driver_register(&pem_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE); + return ret; +} + +static void __exit pem_pmu_exit(void) +{ + platform_driver_unregister(&pem_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE); +} + +module_init(pem_pmu_init); +module_exit(pem_pmu_exit); + +MODULE_DESCRIPTION("Marvell PEM Perf driver"); +MODULE_AUTHOR("Gowthami Thiagarajan <gthiagarajan@marvell.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 3f9a98c17a89..ea8c85729937 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { - struct cluster_pmu *cluster; struct l2cache_pmu *l2cache_pmu; - cpumask_t cluster_online_cpus; + struct cluster_pmu *cluster; unsigned int target; l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node); @@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cluster->on_cpu = -1; /* Any other CPU for this cluster which is still online */ - cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus, - cpu_online_mask); - target = cpumask_any_but(&cluster_online_cpus, cpu); + target = cpumask_any_and_but(&cluster->cluster_cpus, + cpu_online_mask, cpu); if (target >= nr_cpu_ids) { disable_irq(cluster->irq); return 0; @@ -904,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev) l2cache_pmu->pmu = (struct pmu) { /* suffix is instance id for future use with multiple sockets */ .name = "l2cache_0", + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = l2_cache_pmu_enable, .pmu_disable = l2_cache_pmu_disable, @@ -965,7 +964,7 @@ out_unregister: return err; } -static int l2_cache_pmu_remove(struct platform_device *pdev) +static void l2_cache_pmu_remove(struct platform_device *pdev) { struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(platform_get_drvdata(pdev)); @@ -973,7 +972,6 @@ static int l2_cache_pmu_remove(struct platform_device *pdev) perf_pmu_unregister(&l2cache_pmu->pmu); cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, &l2cache_pmu->node); - return 0; } static struct platform_driver l2_cache_pmu_driver = { diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index f16783d03db7..66e6cabd6fff 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -609,18 +609,9 @@ static void qcom_l3_cache__event_read(struct perf_event *event) /* formats */ -static ssize_t l3cache_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - return sysfs_emit(buf, "%s\n", (char *) eattr->var); -} - #define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \ (&((struct dev_ext_attribute[]) { \ - { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \ + { .attr = __ATTR(_name, 0444, device_show_string, NULL), \ .var = (void *) _config, } \ })[0].attr.attr) @@ -748,6 +739,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) return -ENOMEM; l3pmu->pmu = (struct pmu) { + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = qcom_l3_cache__pmu_enable, diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index c78a6fd6c57f..7644147d50b4 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -39,7 +39,6 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_short = 0; userpg->cap_user_rdpmc = riscv_perf_user_access(event); -#ifdef CONFIG_RISCV_PMU /* * The counters are 64-bit but the priv spec doesn't mandate all the * bits to be implemented: that's why, counter width can vary based on @@ -47,7 +46,6 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (userpg->cap_user_rdpmc) userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; -#endif do { rd = sched_clock_read_begin(&seq); @@ -167,7 +165,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) unsigned long cmask; u64 oldval, delta; - if (!rvpmu->ctr_read) + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) return 0; cmask = riscv_pmu_ctr_get_width_mask(event); @@ -191,8 +189,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - if (!(hwc->state & PERF_HES_STOPPED)) { if (rvpmu->ctr_stop) { rvpmu->ctr_stop(event, 0); @@ -313,6 +309,10 @@ static int riscv_pmu_event_init(struct perf_event *event) u64 event_config = 0; uint64_t cmask; + /* driver does not support branch stack sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + hwc->flags = 0; mapped_event = rvpmu->event_map(event, &event_config); if (mapped_event < 0) { @@ -404,6 +404,7 @@ struct riscv_pmu *riscv_pmu_alloc(void) cpuc->n_events = 0; for (i = 0; i < RISCV_MAX_COUNTERS; i++) cpuc->events[i] = NULL; + cpuc->snapshot_addr = NULL; } pmu->pmu = (struct pmu) { .event_init = riscv_pmu_event_init, diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index fa0bccf4edf2..93c8e0fdb589 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -22,13 +22,13 @@ static int pmu_legacy_ctr_get_idx(struct perf_event *event) struct perf_event_attr *attr = &event->attr; if (event->attr.type != PERF_TYPE_HARDWARE) - return -EOPNOTSUPP; + return -ENOENT; if (attr->config == PERF_COUNT_HW_CPU_CYCLES) return RISCV_PMU_LEGACY_CYCLE; else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS) return RISCV_PMU_LEGACY_INSTRET; else - return -EOPNOTSUPP; + return -ENOENT; } /* For legacy config & counter index are same */ @@ -136,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev) pmu = riscv_pmu_alloc(); if (!pmu) return -ENOMEM; + pmu->pmu.parent = &pdev->dev; pmu_legacy_init(pmu); return 0; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 452aab49db1e..698de8ddf895 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -19,10 +19,37 @@ #include <linux/of.h> #include <linux/cpu_pm.h> #include <linux/sched/clock.h> +#include <linux/soc/andes/irq.h> +#include <linux/workqueue.h> #include <asm/errata_list.h> #include <asm/sbi.h> #include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#define ALT_SBI_PMU_OVERFLOW(__ovl) \ +asm volatile(ALTERNATIVE_2( \ + "csrr %0, " __stringify(CSR_SCOUNTOVF), \ + "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ + THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ + CONFIG_ERRATA_THEAD_PMU, \ + "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ + CONFIG_ANDES_CUSTOM_PMU) \ + : "=r" (__ovl) : \ + : "memory") + +#define ALT_SBI_PMU_OVF_CLEAR_PENDING(__irq_mask) \ +asm volatile(ALTERNATIVE( \ + "csrc " __stringify(CSR_IP) ", %0\n\t", \ + "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ + CONFIG_ANDES_CUSTOM_PMU) \ + : : "r"(__irq_mask) \ + : "memory") #define SYSCTL_NO_USER_ACCESS 0 #define SYSCTL_USER_ACCESS 1 @@ -33,7 +60,12 @@ #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) PMU_FORMAT_ATTR(event, "config:0-47"); -PMU_FORMAT_ATTR(firmware, "config:63"); +PMU_FORMAT_ATTR(firmware, "config:62-63"); + +static bool sbi_v2_available; +static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available); +#define sbi_pmu_snapshot_available() \ + static_branch_unlikely(&sbi_pmu_snapshot_available) static struct attribute *riscv_arch_formats_attr[] = { &format_attr_event.attr, @@ -61,6 +93,7 @@ static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS; static union sbi_pmu_ctr_info *pmu_ctr_list; static bool riscv_pmu_use_irq; static unsigned int riscv_pmu_irq_num; +static unsigned int riscv_pmu_irq_mask; static unsigned int riscv_pmu_irq; /* Cache the available counters in a bitmask */ @@ -86,7 +119,7 @@ struct sbi_pmu_event_data { }; }; -static const struct sbi_pmu_event_data pmu_hw_event_map[] = { +static struct sbi_pmu_event_data pmu_hw_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { SBI_PMU_HW_CPU_CYCLES, SBI_PMU_EVENT_TYPE_HW, 0}}, @@ -120,7 +153,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = { }; #define C(x) PERF_COUNT_HW_CACHE_##x -static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { @@ -265,6 +298,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M }, }; +static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + 0, cmask, 0, edata->event_idx, 0, 0); + if (!ret.error) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { + /* This event cannot be monitored by any counter */ + edata->event_idx = -ENOENT; + } +} + +static void pmu_sbi_check_std_events(struct work_struct *work) +{ + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + pmu_sbi_check_event(&pmu_hw_event_map[i]); + + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); +} + +static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); + static int pmu_sbi_ctr_get_width(int idx) { return pmu_ctr_list[idx].width; @@ -355,13 +416,13 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) * but not in the user access mode as we want to use the other counters * that support sampling/filtering. */ - if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) { if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; cmask = 1; } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; - cmask = 1UL << (CSR_INSTRET - CSR_CYCLE); + cmask = BIT(CSR_INSTRET - CSR_CYCLE); } } @@ -446,9 +507,13 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) { u32 type = event->attr.type; u64 config = event->attr.config; - int bSoftware; - u64 raw_config_val; - int ret; + int ret = -ENOENT; + + /* + * Ensure we are finished checking standard hardware events for + * validity before allowing userspace to configure any events. + */ + flush_work(&check_std_events_work); switch (type) { case PERF_TYPE_HARDWARE: @@ -461,46 +526,165 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) break; case PERF_TYPE_RAW: /* - * As per SBI specification, the upper 16 bits must be unused for - * a raw event. Use the MSB (63b) to distinguish between hardware - * raw event and firmware events. + * As per SBI specification, the upper 16 bits must be unused + * for a hardware raw event. + * Bits 63:62 are used to distinguish between raw events + * 00 - Hardware raw event + * 10 - SBI firmware events + * 11 - Risc-V platform specific firmware event */ - bSoftware = config >> 63; - raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; - if (bSoftware) { - ret = (raw_config_val & 0xFFFF) | - (SBI_PMU_EVENT_TYPE_FW << 16); - } else { - ret = RISCV_PMU_RAW_EVENT_IDX; - *econfig = raw_config_val; + + switch (config >> 62) { + case 0: + /* Return error any bits [48-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + *econfig = config & RISCV_PMU_RAW_EVENT_MASK; + ret = RISCV_PMU_RAW_EVENT_IDX; + } + break; + case 2: + ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); + break; + case 3: + /* + * For Risc-V platform specific firmware events + * Event code - 0xFFFF + * Event data - raw event encoding + */ + ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; + *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; + break; + default: + break; } break; default: - ret = -EINVAL; break; } return ret; } +static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); + + if (!cpu_hw_evt->snapshot_addr) + continue; + + free_page((unsigned long)cpu_hw_evt->snapshot_addr); + cpu_hw_evt->snapshot_addr = NULL; + cpu_hw_evt->snapshot_addr_phys = 0; + } +} + +static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu) +{ + int cpu; + struct page *snapshot_page; + + for_each_possible_cpu(cpu) { + struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); + + snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!snapshot_page) { + pmu_sbi_snapshot_free(pmu); + return -ENOMEM; + } + cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page); + cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page); + } + + return 0; +} + +static int pmu_sbi_snapshot_disable(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0, 0, 0, 0); + if (ret.error) { + pr_warn("failed to disable snapshot shared memory\n"); + return sbi_err_map_linux_errno(ret.error); + } + + return 0; +} + +static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu) +{ + struct cpu_hw_events *cpu_hw_evt; + struct sbiret ret = {0}; + + cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); + if (!cpu_hw_evt->snapshot_addr_phys) + return -EINVAL; + + if (cpu_hw_evt->snapshot_set_done) + return 0; + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + cpu_hw_evt->snapshot_addr_phys, + (u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0); + + /* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */ + if (ret.error) { + if (ret.error != SBI_ERR_NOT_SUPPORTED) + pr_warn("pmu snapshot setup failed with error %ld\n", ret.error); + return sbi_err_map_linux_errno(ret.error); + } + + memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS); + cpu_hw_evt->snapshot_set_done = true; + + return 0; +} + static u64 pmu_sbi_ctr_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; struct sbiret ret; - union sbi_pmu_ctr_info info; u64 val = 0; + struct riscv_pmu *pmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; + union sbi_pmu_ctr_info info = pmu_ctr_list[idx]; + + /* Read the value from the shared memory directly only if counter is stopped */ + if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) { + val = sdata->ctr_values[idx]; + return val; + } if (pmu_sbi_is_fw_event(event)) { ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, hwc->idx, 0, 0, 0, 0, 0); - if (!ret.error) - val = ret.value; + if (ret.error) + return 0; + + val = ret.value; + if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI, + hwc->idx, 0, 0, 0, 0, 0); + if (!ret.error) + val |= ((u64)ret.value << 32); + else + WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n", + ret.error); + } } else { - info = pmu_ctr_list[idx]; val = riscv_pmu_ctr_read_csr(info.csr); if (IS_ENABLED(CONFIG_32BIT)) - val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val; + val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32; } return val; @@ -530,6 +714,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) struct hw_perf_event *hwc = &event->hw; unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + /* There is no benefit setting SNAPSHOT FLAG for a single counter */ #if defined(CONFIG_32BIT) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, 1, flag, ival, ival >> 32, 0); @@ -550,16 +735,36 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) { struct sbiret ret; struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *pmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) pmu_sbi_reset_scounteren((void *)event); + if (sbi_pmu_snapshot_available()) + flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); - if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && - flag != SBI_PMU_STOP_FLAG_RESET) + if (!ret.error && sbi_pmu_snapshot_available()) { + /* + * The counter snapshot is based on the index base specified by hwc->idx. + * The actual counter value is updated in shared memory at index 0 when counter + * mask is 0x01. To ensure accurate counter values, it's necessary to transfer + * the counter value to shared memory. However, if hwc->idx is zero, the counter + * value is already correctly updated in shared memory, requiring no further + * adjustment. + */ + if (hwc->idx > 0) { + sdata->ctr_values[hwc->idx] = sdata->ctr_values[0]; + sdata->ctr_values[0] = 0; + } + } else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && + flag != SBI_PMU_STOP_FLAG_RESET) { pr_err("Stopping counter idx %d failed with error %d\n", hwc->idx, sbi_err_map_linux_errno(ret.error)); + } } static int pmu_sbi_find_num_ctrs(void) @@ -611,16 +816,45 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) * which may include counters that are not enabled yet. */ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, pmu->cmask, 0, 0, 0, 0); + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); } static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) { struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; + unsigned long flag = 0; + int i, idx; + struct sbiret ret; + u64 temp_ctr_overflow_mask = 0; + + if (sbi_pmu_snapshot_available()) + flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + + /* Reset the shadow copy to avoid save/restore any value from previous overflow */ + memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS); + + for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { + /* No need to check the error here as we can't do anything about the error */ + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG, + cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0); + if (!ret.error && sbi_pmu_snapshot_available()) { + /* Save the counter values to avoid clobbering */ + for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) + cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] = + sdata->ctr_values[idx]; + /* Save the overflow mask to avoid clobbering */ + temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG); + } + } - /* No need to check the error here as we can't do anything about the error */ - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, - cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0); + /* Restore the counter values to the shared memory for used hw counters */ + if (sbi_pmu_snapshot_available()) { + for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) + sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx]; + if (temp_ctr_overflow_mask) + sdata->ctr_overflow_mask = temp_ctr_overflow_mask; + } } /* @@ -629,11 +863,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) * while the overflowed counters need to be started with updated initialization * value. */ -static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, - unsigned long ctr_ovf_mask) +static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt, + u64 ctr_ovf_mask) { - int idx = 0; - struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + int idx = 0, i; struct perf_event *event; unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; unsigned long ctr_start_mask = 0; @@ -641,11 +874,12 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, struct hw_perf_event *hwc; u64 init_val = 0; - ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask; - - /* Start all the counters that did not overflow in a single shot */ - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, - 0, 0, 0, 0); + for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { + ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask; + /* Start all the counters that did not overflow in a single shot */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask, + 0, 0, 0, 0); + } /* Reinitialize and start all the counter that overflowed */ while (ctr_ovf_mask) { @@ -668,6 +902,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, } } +static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt, + u64 ctr_ovf_mask) +{ + int i, idx = 0; + struct perf_event *event; + unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT; + u64 max_period, init_val = 0; + struct hw_perf_event *hwc; + struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; + + for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { + if (ctr_ovf_mask & BIT(idx)) { + event = cpu_hw_evt->events[idx]; + hwc = &event->hw; + max_period = riscv_pmu_ctr_get_width_mask(event); + init_val = local64_read(&hwc->prev_count) & max_period; + cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val; + } + /* + * We do not need to update the non-overflow counters the previous + * value should have been there already. + */ + } + + for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { + /* Restore the counter values to relative indices for used hw counters */ + for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) + sdata->ctr_values[idx] = + cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG]; + /* Start all the counters in a single shot */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG, + cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0); + } +} + +static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, + u64 ctr_ovf_mask) +{ + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + + if (sbi_pmu_snapshot_available()) + pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask); + else + pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask); +} + static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) { struct perf_sample_data data; @@ -677,10 +957,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) int lidx, hidx, fidx; struct riscv_pmu *pmu; struct perf_event *event; - unsigned long overflow; - unsigned long overflowed_ctrs = 0; + u64 overflow; + u64 overflowed_ctrs = 0; struct cpu_hw_events *cpu_hw_evt = dev; u64 start_clock = sched_clock(); + struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; if (WARN_ON_ONCE(!cpu_hw_evt)) return IRQ_NONE; @@ -694,7 +975,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) event = cpu_hw_evt->events[fidx]; if (!event) { - csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); + ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); return IRQ_NONE; } @@ -702,13 +983,16 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) pmu_sbi_stop_hw_ctrs(pmu); /* Overflow status register should only be read after counter are stopped */ - ALT_SBI_PMU_OVERFLOW(overflow); + if (sbi_pmu_snapshot_available()) + overflow = sdata->ctr_overflow_mask; + else + ALT_SBI_PMU_OVERFLOW(overflow); /* * Overflow interrupt pending bit should only be cleared after stopping * all the counters to avoid any race condition. */ - csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); + ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); /* No overflow bit is set */ if (!overflow) @@ -728,9 +1012,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) if (!info || info->type != SBI_PMU_CTR_TYPE_HW) continue; - /* compute hardware counter index */ - hidx = info->csr - CSR_CYCLE; - /* check if the corresponding bit is set in sscountovf */ + if (sbi_pmu_snapshot_available()) + /* SBI implementation already updated the logical indicies */ + hidx = lidx; + else + /* compute hardware counter index */ + hidx = info->csr - CSR_CYCLE; + + /* check if the corresponding bit is set in sscountovf or overflow mask in shmem */ if (!(overflow & BIT(hidx))) continue; @@ -740,7 +1029,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) */ overflowed_ctrs |= BIT(lidx); hw_evt = &event->hw; + /* Update the event states here so that we know the state while reading */ + hw_evt->state |= PERF_HES_STOPPED; riscv_pmu_event_update(event); + hw_evt->state |= PERF_HES_UPTODATE; perf_sample_data_init(&data, 0, hw_evt->last_period); if (riscv_pmu_event_set_period(event)) { /* @@ -753,6 +1045,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) */ perf_event_overflow(event, &data, regs); } + /* Reset the state as we are going to start the counter after the loop */ + hw_evt->state = 0; } pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); @@ -780,11 +1074,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) if (riscv_pmu_use_irq) { cpu_hw_evt->irq = riscv_pmu_irq; - csr_clear(CSR_IP, BIT(riscv_pmu_irq_num)); - csr_set(CSR_IE, BIT(riscv_pmu_irq_num)); + ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); } + if (sbi_pmu_snapshot_available()) + return pmu_sbi_snapshot_setup(pmu, cpu); + return 0; } @@ -792,12 +1088,14 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { if (riscv_pmu_use_irq) { disable_percpu_irq(riscv_pmu_irq); - csr_clear(CSR_IE, BIT(riscv_pmu_irq_num)); } /* Disable all counters access for user mode now */ csr_write(CSR_SCOUNTEREN, 0x0); + if (sbi_pmu_snapshot_available()) + return pmu_sbi_snapshot_disable(); + return 0; } @@ -816,8 +1114,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde riscv_cached_mimpid(0) == 0) { riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; riscv_pmu_use_irq = true; + } else if (riscv_has_vendor_extension_unlikely(ANDES_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XANDESPMU) && + IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) { + riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI; + riscv_pmu_use_irq = true; } + riscv_pmu_irq_mask = BIT(riscv_pmu_irq_num % BITS_PER_LONG); + if (!riscv_pmu_use_irq) return -EOPNOTSUPP; @@ -900,6 +1205,12 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } static void riscv_pmu_destroy(struct riscv_pmu *pmu) { + if (sbi_v2_available) { + if (sbi_pmu_snapshot_available()) { + pmu_sbi_snapshot_disable(); + pmu_sbi_snapshot_free(pmu); + } + } riscv_pm_pmu_unregister(pmu); cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); } @@ -986,7 +1297,7 @@ static void riscv_pmu_update_counter_access(void *info) csr_write(CSR_SCOUNTEREN, 0x2); } -static int riscv_pmu_proc_user_access_handler(struct ctl_table *table, +static int riscv_pmu_proc_user_access_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -1006,7 +1317,7 @@ static int riscv_pmu_proc_user_access_handler(struct ctl_table *table, return 0; } -static struct ctl_table sbi_pmu_sysctl_table[] = { +static const struct ctl_table sbi_pmu_sysctl_table[] = { { .procname = "perf_user_access", .data = &sysctl_perf_user_access, @@ -1016,7 +1327,6 @@ static struct ctl_table sbi_pmu_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { } }; static int pmu_sbi_device_probe(struct platform_device *pdev) @@ -1054,6 +1364,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) } pmu->pmu.attr_groups = riscv_pmu_attr_groups; + pmu->pmu.parent = &pdev->dev; pmu->cmask = cmask; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; @@ -1067,10 +1378,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) pmu->event_unmapped = pmu_sbi_event_unmapped; pmu->csr_index = pmu_sbi_csr_index; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); - if (ret) - return ret; - ret = riscv_pm_pmu_register(pmu); if (ret) goto out_unregister; @@ -1079,8 +1386,42 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + /* SBI PMU Snapsphot is only available in SBI v2.0 */ + if (sbi_v2_available) { + int cpu; + + ret = pmu_sbi_snapshot_alloc(pmu); + if (ret) + goto out_unregister; + + cpu = get_cpu(); + ret = pmu_sbi_snapshot_setup(pmu, cpu); + put_cpu(); + + if (ret) { + /* Snapshot is an optional feature. Continue if not available */ + pmu_sbi_snapshot_free(pmu); + } else { + pr_info("SBI PMU snapshot detected\n"); + /* + * We enable it once here for the boot cpu. If snapshot shmem setup + * fails during cpu hotplug process, it will fail to start the cpu + * as we can not handle hetergenous PMUs with different snapshot + * capability. + */ + static_branch_enable(&sbi_pmu_snapshot_available); + } + } + register_sysctl("kernel", sbi_pmu_sysctl_table); + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); + if (ret) + goto out_unregister; + + /* Asynchronously check which standard events are available */ + schedule_work(&check_std_events_work); + return 0; out_unregister: @@ -1108,6 +1449,9 @@ static int __init pmu_sbi_devinit(void) return 0; } + if (sbi_spec_version >= sbi_mk_version(2, 0)) + sbi_v2_available = true; + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, "perf/riscv/pmu:starting", pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); diff --git a/drivers/perf/starfive_starlink_pmu.c b/drivers/perf/starfive_starlink_pmu.c new file mode 100644 index 000000000000..5e5a672b4229 --- /dev/null +++ b/drivers/perf/starfive_starlink_pmu.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * StarFive's StarLink PMU driver + * + * Copyright (C) 2023 StarFive Technology Co., Ltd. + * + * Author: Ji Sheng Teoh <jisheng.teoh@starfivetech.com> + * + */ + +#define STARLINK_PMU_PDEV_NAME "starfive_starlink_pmu" +#define pr_fmt(fmt) STARLINK_PMU_PDEV_NAME ": " fmt + +#include <linux/bitmap.h> +#include <linux/cpu_pm.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/sysfs.h> + +#define STARLINK_PMU_MAX_COUNTERS 64 +#define STARLINK_PMU_NUM_COUNTERS 16 +#define STARLINK_PMU_IDX_CYCLE_COUNTER 63 + +#define STARLINK_PMU_EVENT_SELECT 0x060 +#define STARLINK_PMU_EVENT_COUNTER 0x160 +#define STARLINK_PMU_COUNTER_MASK GENMASK_ULL(63, 0) +#define STARLINK_PMU_CYCLE_COUNTER 0x058 + +#define STARLINK_PMU_CONTROL 0x040 +#define STARLINK_PMU_GLOBAL_ENABLE BIT_ULL(0) + +#define STARLINK_PMU_INTERRUPT_ENABLE 0x050 +#define STARLINK_PMU_COUNTER_OVERFLOW_STATUS 0x048 +#define STARLINK_PMU_CYCLE_OVERFLOW_MASK BIT_ULL(63) + +#define STARLINK_CYCLES 0x058 +#define CACHE_READ_REQUEST 0x04000701 +#define CACHE_WRITE_REQUEST 0x03000001 +#define CACHE_RELEASE_REQUEST 0x0003e001 +#define CACHE_READ_HIT 0x00901202 +#define CACHE_READ_MISS 0x04008002 +#define CACHE_WRITE_HIT 0x006c0002 +#define CACHE_WRITE_MISS 0x03000002 +#define CACHE_WRITEBACK 0x00000403 + +#define to_starlink_pmu(p) (container_of(p, struct starlink_pmu, pmu)) + +#define STARLINK_FORMAT_ATTR(_name, _config) \ + (&((struct dev_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, starlink_pmu_sysfs_format_show, NULL), \ + .var = (void *)_config, } \ + })[0].attr.attr) + +#define STARLINK_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR_ID(_name, starlink_pmu_sysfs_event_show, _id) + +static int starlink_pmu_cpuhp_state; + +struct starlink_hw_events { + struct perf_event *events[STARLINK_PMU_MAX_COUNTERS]; + DECLARE_BITMAP(used_mask, STARLINK_PMU_MAX_COUNTERS); +}; + +struct starlink_pmu { + struct pmu pmu; + struct starlink_hw_events __percpu *hw_events; + struct hlist_node node; + struct notifier_block starlink_pmu_pm_nb; + void __iomem *pmu_base; + cpumask_t cpumask; + int irq; +}; + +static ssize_t +starlink_pmu_sysfs_format_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *eattr = container_of(attr, + struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static struct attribute *starlink_pmu_format_attrs[] = { + STARLINK_FORMAT_ATTR(event, "config:0-31"), + NULL +}; + +static const struct attribute_group starlink_pmu_format_attr_group = { + .name = "format", + .attrs = starlink_pmu_format_attrs, +}; + +static ssize_t +starlink_pmu_sysfs_event_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct perf_pmu_events_attr *eattr = container_of(attr, + struct perf_pmu_events_attr, attr); + + return sysfs_emit(buf, "event=0x%02llx\n", eattr->id); +} + +static struct attribute *starlink_pmu_event_attrs[] = { + STARLINK_EVENT_ATTR(cycles, STARLINK_CYCLES), + STARLINK_EVENT_ATTR(read_request, CACHE_READ_REQUEST), + STARLINK_EVENT_ATTR(write_request, CACHE_WRITE_REQUEST), + STARLINK_EVENT_ATTR(release_request, CACHE_RELEASE_REQUEST), + STARLINK_EVENT_ATTR(read_hit, CACHE_READ_HIT), + STARLINK_EVENT_ATTR(read_miss, CACHE_READ_MISS), + STARLINK_EVENT_ATTR(write_hit, CACHE_WRITE_HIT), + STARLINK_EVENT_ATTR(write_miss, CACHE_WRITE_MISS), + STARLINK_EVENT_ATTR(writeback, CACHE_WRITEBACK), + NULL +}; + +static const struct attribute_group starlink_pmu_events_attr_group = { + .name = "events", + .attrs = starlink_pmu_event_attrs, +}; + +static ssize_t +cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &starlink_pmu->cpumask); +} + +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *starlink_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group starlink_pmu_cpumask_attr_group = { + .attrs = starlink_pmu_cpumask_attrs, +}; + +static const struct attribute_group *starlink_pmu_attr_groups[] = { + &starlink_pmu_format_attr_group, + &starlink_pmu_events_attr_group, + &starlink_pmu_cpumask_attr_group, + NULL +}; + +static void starlink_pmu_set_event_period(struct perf_event *event) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = event->hw.idx; + + /* + * Program counter to half of it's max count to handle + * cases of extreme interrupt latency. + */ + u64 val = STARLINK_PMU_COUNTER_MASK >> 1; + + local64_set(&hwc->prev_count, val); + if (hwc->config == STARLINK_CYCLES) + writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CYCLE_COUNTER); + else + writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_EVENT_COUNTER + + idx * sizeof(u64)); +} + +static void starlink_pmu_counter_start(struct perf_event *event, + struct starlink_pmu *starlink_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = event->hw.idx; + u64 val; + + /* + * Enable counter overflow interrupt[63:0], + * which is mapped as follow: + * + * event counter 0 - Bit [0] + * event counter 1 - Bit [1] + * ... + * cycle counter - Bit [63] + */ + val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE); + + if (hwc->config == STARLINK_CYCLES) { + /* + * Cycle count has its dedicated register, and it starts + * counting as soon as STARLINK_PMU_GLOBAL_ENABLE is set. + */ + val |= STARLINK_PMU_CYCLE_OVERFLOW_MASK; + } else { + writeq(event->hw.config, starlink_pmu->pmu_base + + STARLINK_PMU_EVENT_SELECT + idx * sizeof(u64)); + + val |= BIT_ULL(idx); + } + + writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE); + + writeq(STARLINK_PMU_GLOBAL_ENABLE, starlink_pmu->pmu_base + + STARLINK_PMU_CONTROL); +} + +static void starlink_pmu_counter_stop(struct perf_event *event, + struct starlink_pmu *starlink_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = event->hw.idx; + u64 val; + + val = readq(starlink_pmu->pmu_base + STARLINK_PMU_CONTROL); + val &= ~STARLINK_PMU_GLOBAL_ENABLE; + writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CONTROL); + + val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE); + if (hwc->config == STARLINK_CYCLES) + val &= ~STARLINK_PMU_CYCLE_OVERFLOW_MASK; + else + val &= ~BIT_ULL(idx); + + writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE); +} + +static void starlink_pmu_update(struct perf_event *event) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 prev_raw_count, new_raw_count; + u64 oldval; + u64 delta; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + if (hwc->config == STARLINK_CYCLES) + new_raw_count = readq(starlink_pmu->pmu_base + + STARLINK_PMU_CYCLE_COUNTER); + else + new_raw_count = readq(starlink_pmu->pmu_base + + STARLINK_PMU_EVENT_COUNTER + + idx * sizeof(u64)); + oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count); + } while (oldval != prev_raw_count); + + delta = (new_raw_count - prev_raw_count) & STARLINK_PMU_COUNTER_MASK; + local64_add(delta, &event->count); +} + +static void starlink_pmu_start(struct perf_event *event, int flags) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + starlink_pmu_set_event_period(event); + starlink_pmu_counter_start(event, starlink_pmu); + + perf_event_update_userpage(event); +} + +static void starlink_pmu_stop(struct perf_event *event, int flags) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (hwc->state & PERF_HES_STOPPED) + return; + + starlink_pmu_counter_stop(event, starlink_pmu); + starlink_pmu_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int starlink_pmu_add(struct perf_event *event, int flags) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct starlink_hw_events *hw_events = + this_cpu_ptr(starlink_pmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + unsigned long *used_mask = hw_events->used_mask; + u32 n_events = STARLINK_PMU_NUM_COUNTERS; + int idx; + + /* + * Cycle counter has dedicated register to hold counter value. + * Event other than cycle count has to be enabled through + * event select register, and assigned with independent counter + * as they appear. + */ + + if (hwc->config == STARLINK_CYCLES) { + idx = STARLINK_PMU_IDX_CYCLE_COUNTER; + } else { + idx = find_first_zero_bit(used_mask, n_events); + /* All counter are in use */ + if (idx < 0) + return idx; + + set_bit(idx, used_mask); + } + + hwc->idx = idx; + hw_events->events[idx] = event; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + starlink_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void starlink_pmu_del(struct perf_event *event, int flags) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct starlink_hw_events *hw_events = + this_cpu_ptr(starlink_pmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + + starlink_pmu_stop(event, PERF_EF_UPDATE); + hw_events->events[hwc->idx] = NULL; + clear_bit(hwc->idx, hw_events->used_mask); + + perf_event_update_userpage(event); +} + +static bool starlink_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct perf_event *sibling; + int counter = 1; + + /* + * Ensure hardware events in the group are on the same PMU, + * software events are acceptable. + */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return false; + + for_each_sibling_event(sibling, leader) { + if (sibling->pmu != event->pmu && !is_software_event(sibling)) + return false; + + counter++; + } + + return counter <= STARLINK_PMU_NUM_COUNTERS; +} + +static int starlink_pmu_event_init(struct perf_event *event) +{ + struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * Sampling is not supported, as counters are shared + * by all CPU. + */ + if (hwc->sample_period) + return -EOPNOTSUPP; + + /* + * Per-task and attach to a task are not supported, + * as uncore events are not specific to any CPU. + */ + if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + if (!starlink_pmu_validate_event_group(event)) + return -EINVAL; + + hwc->idx = -1; + hwc->config = event->attr.config; + event->cpu = cpumask_first(&starlink_pmu->cpumask); + + return 0; +} + +static irqreturn_t starlink_pmu_handle_irq(int irq_num, void *data) +{ + struct starlink_pmu *starlink_pmu = data; + struct starlink_hw_events *hw_events = + this_cpu_ptr(starlink_pmu->hw_events); + bool handled = false; + int idx; + u64 overflow_status; + + for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) { + struct perf_event *event = hw_events->events[idx]; + + if (!event) + continue; + + overflow_status = readq(starlink_pmu->pmu_base + + STARLINK_PMU_COUNTER_OVERFLOW_STATUS); + if (!(overflow_status & BIT_ULL(idx))) + continue; + + writeq(BIT_ULL(idx), starlink_pmu->pmu_base + + STARLINK_PMU_COUNTER_OVERFLOW_STATUS); + + starlink_pmu_update(event); + starlink_pmu_set_event_period(event); + handled = true; + } + return IRQ_RETVAL(handled); +} + +static int starlink_setup_irqs(struct starlink_pmu *starlink_pmu, + struct platform_device *pdev) +{ + int ret, irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -EINVAL; + + ret = devm_request_irq(&pdev->dev, irq, starlink_pmu_handle_irq, + 0, STARLINK_PMU_PDEV_NAME, starlink_pmu); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to request IRQ\n"); + + starlink_pmu->irq = irq; + + return 0; +} + +static int starlink_pmu_pm_notify(struct notifier_block *b, + unsigned long cmd, void *v) +{ + struct starlink_pmu *starlink_pmu = container_of(b, struct starlink_pmu, + starlink_pmu_pm_nb); + struct starlink_hw_events *hw_events = + this_cpu_ptr(starlink_pmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, + STARLINK_PMU_MAX_COUNTERS); + struct perf_event *event; + int idx; + + if (!enabled) + return NOTIFY_OK; + + for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) { + event = hw_events->events[idx]; + if (!event) + continue; + + switch (cmd) { + case CPU_PM_ENTER: + /* Stop and update the counter */ + starlink_pmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* Restore and enable the counter */ + starlink_pmu_start(event, PERF_EF_RELOAD); + break; + default: + break; + } + } + + return NOTIFY_OK; +} + +static int starlink_pmu_pm_register(struct starlink_pmu *starlink_pmu) +{ + if (!IS_ENABLED(CONFIG_CPU_PM)) + return 0; + + starlink_pmu->starlink_pmu_pm_nb.notifier_call = starlink_pmu_pm_notify; + return cpu_pm_register_notifier(&starlink_pmu->starlink_pmu_pm_nb); +} + +static void starlink_pmu_pm_unregister(struct starlink_pmu *starlink_pmu) +{ + if (!IS_ENABLED(CONFIG_CPU_PM)) + return; + + cpu_pm_unregister_notifier(&starlink_pmu->starlink_pmu_pm_nb); +} + +static void starlink_pmu_destroy(struct starlink_pmu *starlink_pmu) +{ + starlink_pmu_pm_unregister(starlink_pmu); + cpuhp_state_remove_instance(starlink_pmu_cpuhp_state, + &starlink_pmu->node); +} + +static int starlink_pmu_probe(struct platform_device *pdev) +{ + struct starlink_pmu *starlink_pmu; + struct starlink_hw_events *hw_events; + struct resource *res; + int cpuid, i, ret; + + starlink_pmu = devm_kzalloc(&pdev->dev, sizeof(*starlink_pmu), GFP_KERNEL); + if (!starlink_pmu) + return -ENOMEM; + + starlink_pmu->pmu_base = + devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(starlink_pmu->pmu_base)) + return PTR_ERR(starlink_pmu->pmu_base); + + starlink_pmu->hw_events = alloc_percpu_gfp(struct starlink_hw_events, + GFP_KERNEL); + if (!starlink_pmu->hw_events) { + dev_err(&pdev->dev, "Failed to allocate per-cpu PMU data\n"); + return -ENOMEM; + } + + for_each_possible_cpu(cpuid) { + hw_events = per_cpu_ptr(starlink_pmu->hw_events, cpuid); + for (i = 0; i < STARLINK_PMU_MAX_COUNTERS; i++) + hw_events->events[i] = NULL; + } + + ret = starlink_setup_irqs(starlink_pmu, pdev); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(starlink_pmu_cpuhp_state, + &starlink_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Failed to register hotplug\n"); + return ret; + } + + ret = starlink_pmu_pm_register(starlink_pmu); + if (ret) { + cpuhp_state_remove_instance(starlink_pmu_cpuhp_state, + &starlink_pmu->node); + return ret; + } + + starlink_pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .event_init = starlink_pmu_event_init, + .add = starlink_pmu_add, + .del = starlink_pmu_del, + .start = starlink_pmu_start, + .stop = starlink_pmu_stop, + .read = starlink_pmu_update, + .attr_groups = starlink_pmu_attr_groups, + }; + + ret = perf_pmu_register(&starlink_pmu->pmu, STARLINK_PMU_PDEV_NAME, -1); + if (ret) + starlink_pmu_destroy(starlink_pmu); + + return ret; +} + +static const struct of_device_id starlink_pmu_of_match[] = { + { .compatible = "starfive,jh8100-starlink-pmu" }, + {} +}; +MODULE_DEVICE_TABLE(of, starlink_pmu_of_match); + +static struct platform_driver starlink_pmu_driver = { + .driver = { + .name = STARLINK_PMU_PDEV_NAME, + .of_match_table = starlink_pmu_of_match, + .suppress_bind_attrs = true, + }, + .probe = starlink_pmu_probe, +}; + +static int +starlink_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct starlink_pmu *starlink_pmu = hlist_entry_safe(node, + struct starlink_pmu, + node); + + if (cpumask_empty(&starlink_pmu->cpumask)) + cpumask_set_cpu(cpu, &starlink_pmu->cpumask); + + WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(cpu))); + + return 0; +} + +static int +starlink_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct starlink_pmu *starlink_pmu = hlist_entry_safe(node, + struct starlink_pmu, + node); + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &starlink_pmu->cpumask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&starlink_pmu->pmu, cpu, target); + + cpumask_set_cpu(target, &starlink_pmu->cpumask); + WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(target))); + + return 0; +} + +static int __init starlink_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "soc/starfive/starlink_pmu:online", + starlink_pmu_online_cpu, + starlink_pmu_offline_cpu); + if (ret < 0) + return ret; + + starlink_pmu_cpuhp_state = ret; + + return platform_driver_register(&starlink_pmu_driver); +} + +device_initcall(starlink_pmu_init); diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 1edb9c03704f..cadd60221b8f 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event) static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) { - int i = 0; - struct acpi_tx2_pmu_device { - __u8 id[ACPI_ID_LEN]; - enum tx2_uncore_type type; - } devices[] = { + struct acpi_device_id devices[] = { {"CAV901D", PMU_TYPE_L3C}, {"CAV901F", PMU_TYPE_DMC}, {"CAV901E", PMU_TYPE_CCPI2}, - {"", PMU_TYPE_INVALID} + {} }; + const struct acpi_device_id *id; - while (devices[i].type != PMU_TYPE_INVALID) { - if (!strcmp(acpi_device_hid(adev), devices[i].id)) - break; - i++; - } + id = acpi_match_acpi_device(devices, adev); + if (!id) + return PMU_TYPE_INVALID; - return devices[i].type; + return (enum tx2_uncore_type)id->driver_data; } static bool tx2_uncore_validate_event(struct pmu *pmu, @@ -729,6 +724,7 @@ static int tx2_uncore_pmu_register( /* Perf event registration */ tx2_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = tx2_pmu->dev, .attr_groups = tx2_pmu->attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = tx2_uncore_event_init, @@ -932,9 +928,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu, static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *hpnode) { - int new_cpu; struct tx2_uncore_pmu *tx2_pmu; - struct cpumask cpu_online_mask_temp; + unsigned int new_cpu; tx2_pmu = hlist_entry_safe(hpnode, struct tx2_uncore_pmu, hpnode); @@ -945,11 +940,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, if (tx2_pmu->hrtimer_callback) hrtimer_cancel(&tx2_pmu->hrtimer); - cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); - cpumask_clear_cpu(cpu, &cpu_online_mask_temp); - new_cpu = cpumask_any_and( - cpumask_of_node(tx2_pmu->node), - &cpu_online_mask_temp); + new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node), + cpu_online_mask, cpu); tx2_pmu->cpu = new_cpu; if (new_cpu >= nr_cpu_ids) @@ -993,7 +985,7 @@ static int tx2_uncore_probe(struct platform_device *pdev) return 0; } -static int tx2_uncore_remove(struct platform_device *pdev) +static void tx2_uncore_remove(struct platform_device *pdev) { struct tx2_uncore_pmu *tx2_pmu, *temp; struct device *dev = &pdev->dev; @@ -1009,7 +1001,6 @@ static int tx2_uncore_remove(struct platform_device *pdev) } } } - return 0; } static struct platform_driver tx2_uncore_driver = { diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 7ce344248dda..33b5497bdc06 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -162,18 +162,9 @@ enum xgene_pmu_dev_type { /* * sysfs format attributes */ -static ssize_t xgene_pmu_format_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *eattr; - - eattr = container_of(attr, struct dev_ext_attribute, attr); - return sysfs_emit(buf, "%s\n", (char *) eattr->var); -} - #define XGENE_PMU_FORMAT_ATTR(_name, _config) \ (&((struct dev_ext_attribute[]) { \ - { .attr = __ATTR(_name, S_IRUGO, xgene_pmu_format_show, NULL), \ + { .attr = __ATTR(_name, S_IRUGO, device_show_string, NULL), \ .var = (void *) _config, } \ })[0].attr.attr) @@ -1102,6 +1093,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) /* Perf driver registration */ pmu_dev->pmu = (struct pmu) { + .parent = pmu_dev->parent->dev, .attr_groups = pmu_dev->attr_groups, .task_ctx_nr = perf_invalid_context, .pmu_enable = xgene_perf_pmu_enable, @@ -1937,7 +1929,7 @@ xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus) } } -static int xgene_pmu_remove(struct platform_device *pdev) +static void xgene_pmu_remove(struct platform_device *pdev) { struct xgene_pmu *xgene_pmu = dev_get_drvdata(&pdev->dev); @@ -1947,8 +1939,6 @@ static int xgene_pmu_remove(struct platform_device *pdev) xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus); cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, &xgene_pmu->node); - - return 0; } static struct platform_driver xgene_pmu_driver = { |