summaryrefslogtreecommitdiff
path: root/drivers/perf
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig50
-rw-r--r--drivers/perf/Makefile6
-rw-r--r--drivers/perf/alibaba_uncore_drw_pmu.c29
-rw-r--r--drivers/perf/amlogic/meson_ddr_pmu_core.c1
-rw-r--r--drivers/perf/amlogic/meson_g12_ddr_pmu.c4
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c184
-rw-r--r--drivers/perf/arm-cci.c19
-rw-r--r--drivers/perf/arm-ccn.c17
-rw-r--r--drivers/perf/arm-cmn.c461
-rw-r--r--drivers/perf/arm-ni.c780
-rw-r--r--drivers/perf/arm_cspmu/ampere_cspmu.c1
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c177
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h8
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c82
-rw-r--r--drivers/perf/arm_dmc620_pmu.c14
-rw-r--r--drivers/perf/arm_dsu_pmu.c35
-rw-r--r--drivers/perf/arm_pmu.c11
-rw-r--r--drivers/perf/arm_pmu_platform.c3
-rw-r--r--drivers/perf/arm_pmuv3.c200
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c28
-rw-r--r--drivers/perf/arm_spe_pmu.c37
-rw-r--r--drivers/perf/arm_v6_pmu.c432
-rw-r--r--drivers/perf/arm_v7_pmu.c1975
-rw-r--r--drivers/perf/arm_xscale_pmu.c749
-rw-r--r--drivers/perf/cxl_pmu.c27
-rw-r--r--drivers/perf/dwc_pcie_pmu.c114
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c5
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c378
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c173
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c45
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c64
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c51
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c47
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c56
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c182
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h53
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c46
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_uc_pmu.c87
-rw-r--r--drivers/perf/hisilicon/hns3_pmu.c29
-rw-r--r--drivers/perf/marvell_cn10k_ddr_pmu.c534
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c70
-rw-r--r--drivers/perf/marvell_pem_pmu.c425
-rw-r--r--drivers/perf/qcom_l2_pmu.c12
-rw-r--r--drivers/perf/qcom_l3_pmu.c12
-rw-r--r--drivers/perf/riscv_pmu.c11
-rw-r--r--drivers/perf/riscv_pmu_legacy.c5
-rw-r--r--drivers/perf/riscv_pmu_sbi.c460
-rw-r--r--drivers/perf/starfive_starlink_pmu.c642
-rw-r--r--drivers/perf/thunderx2_pmu.c33
-rw-r--r--drivers/perf/xgene_pmu.c16
50 files changed, 7476 insertions, 1404 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index ec6e0d9194a1..4e268de351c4 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -48,6 +48,13 @@ config ARM_CMN
Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
Network interconnect.
+config ARM_NI
+ tristate "Arm NI-700 PMU support"
+ depends on ARM64 || COMPILE_TEST
+ help
+ Support for PMU events monitoring on the Arm NI-700 Network-on-Chip
+ interconnect and family.
+
config ARM_PMU
depends on ARM || ARM64
bool "ARM PMU framework"
@@ -56,6 +63,18 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.
+config ARM_V6_PMU
+ depends on ARM_PMU && (CPU_V6 || CPU_V6K)
+ def_bool y
+
+config ARM_V7_PMU
+ depends on ARM_PMU && CPU_V7
+ def_bool y
+
+config ARM_XSCALE_PMU
+ depends on ARM_PMU && CPU_XSCALE
+ def_bool y
+
config RISCV_PMU
depends on RISCV
bool "RISC-V PMU framework"
@@ -86,6 +105,30 @@ config RISCV_PMU_SBI
full perf feature support i.e. counter overflow, privilege mode
filtering, counter configuration.
+config STARFIVE_STARLINK_PMU
+ depends on ARCH_STARFIVE || COMPILE_TEST
+ depends on 64BIT
+ bool "StarFive StarLink PMU"
+ help
+ Provide support for StarLink Performance Monitor Unit.
+ StarLink Performance Monitor Unit integrates one or more cores with
+ an L3 memory system. The L3 cache events are added into perf event
+ subsystem, allowing monitoring of various L3 cache perf events.
+
+config ANDES_CUSTOM_PMU
+ bool "Andes custom PMU support"
+ depends on ARCH_RENESAS && RISCV_ALTERNATIVE && RISCV_PMU_SBI
+ default y
+ help
+ The Andes cores implement the PMU overflow extension very
+ similar to the standard Sscofpmf and Smcntrpmf extension.
+
+ This will patch the overflow and pending CSRs and handle the
+ non-standard behaviour via the regular SBI PMU driver and
+ interface.
+
+ If you don't know what to do here, say "Y".
+
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
@@ -241,4 +284,11 @@ config CXL_PMU
If unsure say 'm'.
+config MARVELL_PEM_PMU
+ tristate "MARVELL PEM PMU Support"
+ depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+ help
+ Enable support for PCIe Interface performance monitoring
+ on Marvell platform.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index a06338e3401c..de71d2574857 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -3,9 +3,13 @@ obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o
obj-$(CONFIG_ARM_CCN) += arm-ccn.o
obj-$(CONFIG_ARM_CMN) += arm-cmn.o
obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
+obj-$(CONFIG_ARM_NI) += arm-ni.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o
+obj-$(CONFIG_ARM_V6_PMU) += arm_v6_pmu.o
+obj-$(CONFIG_ARM_V7_PMU) += arm_v7_pmu.o
+obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
@@ -15,12 +19,14 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
+obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_MARVELL_PEM_PMU) += marvell_pem_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 19d459a36be5..99a0ef9817e0 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -236,24 +236,16 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
.attrs = ali_drw_pmu_cpumask_attrs,
};
-static ssize_t ali_drw_pmu_identifier_show(struct device *dev,
- struct device_attribute *attr,
- char *page)
-{
- return sysfs_emit(page, "%s\n", "ali_drw_pmu");
-}
-
static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
return attr->mode;
}
-static struct device_attribute ali_drw_pmu_identifier_attr =
- __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL);
+static DEVICE_STRING_ATTR_RO(ali_drw_pmu_identifier, 0444, "ali_drw_pmu");
static struct attribute *ali_drw_pmu_identifier_attrs[] = {
- &ali_drw_pmu_identifier_attr.attr,
+ &dev_attr_ali_drw_pmu_identifier.attr.attr,
NULL
};
@@ -408,7 +400,7 @@ static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data)
}
/* clear common counter intr status */
- clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1);
+ clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status);
writel(clr_status,
drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
}
@@ -709,6 +701,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
drw_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.event_init = ali_drw_pmu_event_init,
.add = ali_drw_pmu_add,
@@ -729,7 +722,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int ali_drw_pmu_remove(struct platform_device *pdev)
+static void ali_drw_pmu_remove(struct platform_device *pdev)
{
struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev);
@@ -739,8 +732,6 @@ static int ali_drw_pmu_remove(struct platform_device *pdev)
ali_drw_pmu_uninit_irq(drw_pmu);
perf_pmu_unregister(&drw_pmu->pmu);
-
- return 0;
}
static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
@@ -748,18 +739,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
struct ali_drw_pmu_irq *irq;
struct ali_drw_pmu *drw_pmu;
unsigned int target;
- int ret;
- cpumask_t node_online_cpus;
irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
if (cpu != irq->cpu)
return 0;
- ret = cpumask_and(&node_online_cpus,
- cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
- if (ret)
- target = cpumask_any_but(&node_online_cpus, cpu);
- else
+ target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index bbc7285fd934..07446d784a1a 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -492,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
*pmu = (struct ddr_pmu) {
.pmu = {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index 15d52ab3276a..f33e9a456e85 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -355,11 +355,9 @@ static int g12_ddr_pmu_probe(struct platform_device *pdev)
return meson_ddr_pmu_create(pdev);
}
-static int g12_ddr_pmu_remove(struct platform_device *pdev)
+static void g12_ddr_pmu_remove(struct platform_device *pdev)
{
meson_ddr_pmu_remove(pdev);
-
- return 0;
}
static const struct of_device_id meson_ddr_pmu_dt_match[] = {
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index f322e5ca1114..06fd317529fc 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -47,46 +47,79 @@
* implementations, we'll have to introduce per cpu-type tables.
*/
enum m1_pmu_events {
- M1_PMU_PERFCTR_UNKNOWN_01 = 0x01,
- M1_PMU_PERFCTR_CPU_CYCLES = 0x02,
- M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c,
- M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d,
- M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e,
- M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f,
- M1_PMU_PERFCTR_UNKNOWN_90 = 0x90,
- M1_PMU_PERFCTR_UNKNOWN_93 = 0x93,
- M1_PMU_PERFCTR_UNKNOWN_94 = 0x94,
- M1_PMU_PERFCTR_UNKNOWN_95 = 0x95,
- M1_PMU_PERFCTR_UNKNOWN_96 = 0x96,
- M1_PMU_PERFCTR_UNKNOWN_97 = 0x97,
- M1_PMU_PERFCTR_UNKNOWN_98 = 0x98,
- M1_PMU_PERFCTR_UNKNOWN_99 = 0x99,
- M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a,
- M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b,
- M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c,
- M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
- M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf,
- M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0,
- M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1,
- M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4,
- M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5,
- M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6,
- M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8,
- M1_PMU_PERFCTR_UNKNOWN_ca = 0xca,
- M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb,
- M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
- M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
- M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
- M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
- M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
- M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
+ M1_PMU_PERFCTR_RETIRE_UOP = 0x1,
+ M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2,
+ M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4,
+ M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5,
+ M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7,
+ M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8,
+ M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa,
+ M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb,
+ M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd,
+ M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52,
+ M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c,
+ M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70,
+ M1_PMU_PERFCTR_MAP_REWIND = 0x75,
+ M1_PMU_PERFCTR_MAP_STALL = 0x76,
+ M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c,
+ M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d,
+ M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e,
+ M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84,
+ M1_PMU_PERFCTR_INST_ALL = 0x8c,
+ M1_PMU_PERFCTR_INST_BRANCH = 0x8d,
+ M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e,
+ M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f,
+ M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90,
+ M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93,
+ M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94,
+ M1_PMU_PERFCTR_INST_INT_LD = 0x95,
+ M1_PMU_PERFCTR_INST_INT_ST = 0x96,
+ M1_PMU_PERFCTR_INST_INT_ALU = 0x97,
+ M1_PMU_PERFCTR_INST_SIMD_LD = 0x98,
+ M1_PMU_PERFCTR_INST_SIMD_ST = 0x99,
+ M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a,
+ M1_PMU_PERFCTR_INST_LDST = 0x9b,
+ M1_PMU_PERFCTR_INST_BARRIER = 0x9c,
+ M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
+ M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0,
+ M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3,
+ M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6,
+ M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7,
+ M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8,
+ M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1,
+ M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2,
+ M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3,
+ M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0,
+ M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1,
+ M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4,
+ M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5,
+ M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6,
+ M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8,
+ M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca,
+ M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb,
+ M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4,
+ M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6,
+ M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb,
+ M1_PMU_PERFCTR_FETCH_RESTART = 0xde,
+ M1_PMU_PERFCTR_ST_NT_UOP = 0xe5,
+ M1_PMU_PERFCTR_LD_NT_UOP = 0xe6,
+ M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
+ M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
+ M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
+ M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
+ M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
+ M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
/*
* From this point onwards, these are not actual HW events,
* but attributes that get stored in hw->config_base.
*/
- M1_PMU_CFG_COUNT_USER = BIT(8),
- M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+ M1_PMU_CFG_COUNT_USER = BIT(8),
+ M1_PMU_CFG_COUNT_KERNEL = BIT(9),
};
/*
@@ -96,46 +129,47 @@ enum m1_pmu_events {
* counters had strange affinities.
*/
static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
- [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
- [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7),
- [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0),
- [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1),
- [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
- [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
+ [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
+ [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7),
+ [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0),
+ [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1),
+ [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7),
+ [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7),
+ [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7),
+ [M1_PMU_PERFCTR_INST_LDST] = BIT(7),
+ [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
+ [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
+ [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
};
static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS,
- /* No idea about the rest yet */
+ [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE,
+ [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH,
+ [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC,
};
/* sysfs definitions */
@@ -154,8 +188,8 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
static struct attribute *m1_pmu_event_attrs[] = {
- M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
- M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+ M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE),
+ M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL),
NULL,
};
@@ -400,7 +434,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
regs = get_irq_regs();
- for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, M1_PMU_NR_COUNTERS) {
struct perf_event *event = cpuc->events[idx];
struct perf_sample_data data;
@@ -560,7 +594,7 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
cpu_pmu->reset = m1_pmu_reset;
cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
- cpu_pmu->num_events = M1_PMU_NR_COUNTERS;
+ bitmap_set(cpu_pmu->cntr_mask, 0, M1_PMU_NR_COUNTERS);
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
return 0;
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 61de861eaf91..1cc3214d6b6d 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -127,8 +127,6 @@ enum cci_models {
static void pmu_write_counters(struct cci_pmu *cci_pmu,
unsigned long *mask);
-static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf);
static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
@@ -138,7 +136,7 @@ static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
})[0].attr.attr
#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
- CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
+ CCI_EXT_ATTR_ENTRY(_name, device_show_string, _config)
#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
@@ -688,14 +686,6 @@ static void __cci_pmu_disable(struct cci_pmu *cci_pmu)
writel(val, cci_pmu->ctrl_base + CCI_PMCR);
}
-static ssize_t cci_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr = container_of(attr,
- struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1409,6 +1399,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
cci_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.name = cci_pmu->model->name,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = cci_pmu_enable,
@@ -1697,16 +1688,14 @@ error_pmu_init:
return ret;
}
-static int cci_pmu_remove(struct platform_device *pdev)
+static void cci_pmu_remove(struct platform_device *pdev)
{
if (!g_cci_pmu)
- return 0;
+ return;
cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
perf_pmu_unregister(&g_cci_pmu->pmu);
g_cci_pmu = NULL;
-
- return 0;
}
static struct platform_driver cci_pmu_driver = {
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..d5fcea3d4328 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -215,18 +215,9 @@ static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port)
*config |= (node_xp << 0) | (type << 8) | (port << 24);
}
-static ssize_t arm_ccn_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *ea = container_of(attr,
- struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)ea->var);
-}
-
#define CCN_FORMAT_ATTR(_name, _config) \
struct dev_ext_attribute arm_ccn_pmu_format_attr_##_name = \
- { __ATTR(_name, S_IRUGO, arm_ccn_pmu_format_show, \
+ { __ATTR(_name, S_IRUGO, device_show_string, \
NULL), _config }
static CCN_FORMAT_ATTR(node, "config:0-7");
@@ -1265,6 +1256,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = ccn->dev,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
@@ -1515,13 +1507,11 @@ static int arm_ccn_probe(struct platform_device *pdev)
return arm_ccn_pmu_init(ccn);
}
-static int arm_ccn_remove(struct platform_device *pdev)
+static void arm_ccn_remove(struct platform_device *pdev)
{
struct arm_ccn *ccn = platform_get_drvdata(pdev);
arm_ccn_pmu_cleanup(ccn);
-
- return 0;
}
static const struct of_device_id arm_ccn_match[] = {
@@ -1571,4 +1561,5 @@ module_init(arm_ccn_init);
module_exit(arm_ccn_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
+MODULE_DESCRIPTION("ARM CCN (Cache Coherent Network) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 7e3aa7e2345f..ef959e66db7c 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -24,14 +24,6 @@
#define CMN_NI_NODE_ID GENMASK_ULL(31, 16)
#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32)
-#define CMN_NODEID_DEVID(reg) ((reg) & 3)
-#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1)
-#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1)
-#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3)
-#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7)
-#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits)))
-#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1))
-
#define CMN_CHILD_INFO 0x0080
#define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0)
#define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16)
@@ -43,6 +35,9 @@
#define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
#define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
+/* Currently XPs are the node type we can have most of; others top out at 128 */
+#define CMN_MAX_NODES_PER_EVENT CMN_MAX_XPS
+
/* The CFG node has various info besides the discovery tree */
#define CMN_CFGM_PERIPH_ID_01 0x0008
#define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0)
@@ -50,24 +45,28 @@
#define CMN_CFGM_PERIPH_ID_23 0x0010
#define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4)
-#define CMN_CFGM_INFO_GLOBAL 0x900
+#define CMN_CFGM_INFO_GLOBAL 0x0900
#define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63)
#define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52)
#define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50)
+#define CMN_INFO_DEVICE_ISO_ENABLE BIT_ULL(44)
-#define CMN_CFGM_INFO_GLOBAL_1 0x908
+#define CMN_CFGM_INFO_GLOBAL_1 0x0908
#define CMN_INFO_SNP_VC_NUM GENMASK_ULL(3, 2)
#define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0)
/* XPs also have some local topology info which has uses too */
#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p))
-#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0)
+#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(5, 0)
#define CMN_MAX_PORTS 6
#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10
/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
+/* ...except when they don't :( */
+#define CMN_S3_DTM_OFFSET 0xa000
+#define CMN_S3_PMU_OFFSET 0xd900
/* For most nodes, this is all there is */
#define CMN_PMU_EVENT_SEL 0x000
@@ -78,7 +77,8 @@
/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
#define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32)
-/* HN-Ps are weird... */
+/* Some types are designed to coexist with another device in the same node */
+#define CMN_CCLA_PMU_EVENT_SEL 0x008
#define CMN_HNP_PMU_EVENT_SEL 0x008
/* DTMs live in the PMU space of XP registers */
@@ -123,27 +123,28 @@
/* The DTC node is where the magic happens */
#define CMN_DT_DTC_CTL 0x0a00
#define CMN_DT_DTC_CTL_DT_EN BIT(0)
+#define CMN_DT_DTC_CTL_CG_DISABLE BIT(10)
/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4)
-#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40)
+#define CMN_DT_PMEVCNT(dtc, n) ((dtc)->pmu_base + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR(dtc) ((dtc)->pmu_base + 0x40)
-#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90)
+#define CMN_DT_PMEVCNTSR(dtc, n) ((dtc)->pmu_base + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR(dtc) ((dtc)->pmu_base + 0x90)
-#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR(dtc) ((dtc)->pmu_base + 0x100)
#define CMN_DT_PMCR_PMU_EN BIT(0)
#define CMN_DT_PMCR_CNTR_RST BIT(5)
#define CMN_DT_PMCR_OVFL_INTR_EN BIT(6)
-#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118)
-#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120)
+#define CMN_DT_PMOVSR(dtc) ((dtc)->pmu_base + 0x118)
+#define CMN_DT_PMOVSR_CLR(dtc) ((dtc)->pmu_base + 0x120)
-#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR(dtc) ((dtc)->pmu_base + 0x128)
#define CMN_DT_PMSSR_SS_STATUS(n) BIT(n)
-#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR(dtc) ((dtc)->pmu_base + 0x130)
#define CMN_DT_PMSRR_SS_REQ BIT(0)
#define CMN_DT_NUM_COUNTERS 8
@@ -174,9 +175,8 @@
#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27)
#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
-/* Note that we don't yet support the tertiary match group on newer IPs */
-#define CMN_CONFIG_WP_GRP BIT_ULL(56)
-#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
+#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
+#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
@@ -199,10 +199,11 @@ enum cmn_model {
CMN650 = 2,
CMN700 = 4,
CI700 = 8,
+ CMNS3 = 16,
/* ...and then we can use bitmap tricks for commonality */
CMN_ANY = -1,
NOT_CMN600 = -2,
- CMN_650ON = CMN650 | CMN700,
+ CMN_650ON = CMN650 | CMN700 | CMNS3,
};
/* Actual part numbers and revision IDs defined by the hardware */
@@ -211,6 +212,7 @@ enum cmn_part {
PART_CMN650 = 0x436,
PART_CMN700 = 0x43c,
PART_CI700 = 0x43a,
+ PART_CMN_S3 = 0x43e,
};
/* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -262,6 +264,7 @@ enum cmn_node_type {
CMN_TYPE_HNS = 0x200,
CMN_TYPE_HNS_MPAM_S,
CMN_TYPE_HNS_MPAM_NS,
+ CMN_TYPE_APB = 0x1000,
/* Not a real node type */
CMN_TYPE_WP = 0x7770
};
@@ -281,8 +284,11 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
+ /* XP properties really, but replicated to children for convenience */
u8 dtm;
s8 dtc;
+ u8 portid_bits:4;
+ u8 deviceid_bits:4;
/* DN/HN-F/CXHA */
struct {
u8 val : 4;
@@ -308,8 +314,9 @@ struct arm_cmn_dtm {
struct arm_cmn_dtc {
void __iomem *base;
+ void __iomem *pmu_base;
int irq;
- int irq_friend;
+ s8 irq_friend;
bool cc_active;
struct perf_event *counters[CMN_DT_NUM_COUNTERS];
@@ -358,49 +365,33 @@ struct arm_cmn {
static int arm_cmn_hp_state;
struct arm_cmn_nodeid {
- u8 x;
- u8 y;
u8 port;
u8 dev;
};
static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
{
- return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
+ return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
}
-static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
{
struct arm_cmn_nodeid nid;
- if (cmn->num_xps == 1) {
- nid.x = 0;
- nid.y = 0;
- nid.port = CMN_NODEID_1x1_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- } else {
- int bits = arm_cmn_xyidbits(cmn);
-
- nid.x = CMN_NODEID_X(id, bits);
- nid.y = CMN_NODEID_Y(id, bits);
- if (cmn->ports_used & 0xc) {
- nid.port = CMN_NODEID_EXT_PID(id);
- nid.dev = CMN_NODEID_EXT_DEVID(id);
- } else {
- nid.port = CMN_NODEID_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- }
- }
+ nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
+ nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
return nid;
}
static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
const struct arm_cmn_node *dn)
{
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
- int xp_idx = cmn->mesh_x * nid.y + nid.x;
+ int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
+ int bits = arm_cmn_xyidbits(cmn);
+ int x = id >> bits;
+ int y = id & ((1U << bits) - 1);
- return cmn->xps + xp_idx;
+ return cmn->xps + cmn->mesh_x * y + x;
}
static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
enum cmn_node_type type)
@@ -424,15 +415,27 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
return CMN700;
case PART_CI700:
return CI700;
+ case PART_CMN_S3:
+ return CMNS3;
default:
return 0;
};
}
+static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
+{
+ if (cmn->part == PART_CMN_S3) {
+ if (dn->type == CMN_TYPE_XP)
+ return CMN_S3_DTM_OFFSET;
+ return CMN_S3_PMU_OFFSET;
+ }
+ return CMN_PMU_OFFSET;
+}
+
static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
const struct arm_cmn_node *xp, int port)
{
- int offset = CMN_MXP__CONNECT_INFO(port);
+ int offset = CMN_MXP__CONNECT_INFO(port) - arm_cmn_pmu_offset(cmn, xp);
if (port >= 2) {
if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
@@ -445,7 +448,7 @@ static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
offset += CI700_CONNECT_INFO_P2_5_OFFSET;
}
- return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset);
+ return readl_relaxed(xp->pmu_base + offset);
}
static struct dentry *arm_cmn_debugfs;
@@ -479,20 +482,26 @@ static const char *arm_cmn_device_type(u8 type)
case 0x17: return "RN-F_C_E|";
case 0x18: return " RN-F_E |";
case 0x19: return "RN-F_E_E|";
+ case 0x1a: return " HN-S |";
+ case 0x1b: return " LCN |";
case 0x1c: return " MTSX |";
case 0x1d: return " HN-V |";
case 0x1e: return " CCG |";
+ case 0x20: return " RN-F_F |";
+ case 0x21: return "RN-F_F_E|";
+ case 0x22: return " SN-F_F |";
default: return " ???? |";
}
}
-static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
{
struct arm_cmn *cmn = s->private;
struct arm_cmn_node *dn;
+ u16 id = xp->id | d | (p << xp->deviceid_bits);
for (dn = cmn->dns; dn->type; dn++) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ int pad = dn->logid < 10;
if (dn->type == CMN_TYPE_XP)
continue;
@@ -500,10 +509,10 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
if (dn->type < CMN_TYPE_HNI)
continue;
- if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+ if (dn->id != id)
continue;
- seq_printf(s, " #%-2d |", dn->logid);
+ seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid);
return;
}
seq_puts(s, " |");
@@ -516,28 +525,27 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, " X");
for (x = 0; x < cmn->mesh_x; x++)
- seq_printf(s, " %d ", x);
+ seq_printf(s, " %-2d ", x);
seq_puts(s, "\nY P D+");
y = cmn->mesh_y;
while (y--) {
int xp_base = cmn->mesh_x * y;
+ struct arm_cmn_node *xp = cmn->xps + xp_base;
u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
for (x = 0; x < cmn->mesh_x; x++)
seq_puts(s, "--------+");
- seq_printf(s, "\n%d |", y);
+ seq_printf(s, "\n%-2d |", y);
for (x = 0; x < cmn->mesh_x; x++) {
- struct arm_cmn_node *xp = cmn->xps + xp_base + x;
-
for (p = 0; p < CMN_MAX_PORTS; p++)
- port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
- seq_printf(s, " XP #%-2d |", xp_base + x);
+ port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
+ seq_printf(s, " XP #%-3d|", xp_base + x);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++) {
- s8 dtc = cmn->xps[xp_base + x].dtc;
+ s8 dtc = xp[x].dtc;
if (dtc < 0)
seq_puts(s, " DTC ?? |");
@@ -554,10 +562,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, arm_cmn_device_type(port[p][x]));
seq_puts(s, "\n 0|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 0);
+ arm_cmn_show_logid(s, xp + x, p, 0);
seq_puts(s, "\n 1|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 1);
+ arm_cmn_show_logid(s, xp + x, p, 1);
}
seq_puts(s, "\n-----+");
}
@@ -585,13 +593,21 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
- u64 dtm_idx[4];
+ u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
+
+ /*
+ * WP config registers are divided to UP and DOWN events. We need to
+ * keep to track only one of them.
+ */
+ DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
+
bool wide_sel;
enum cmn_filter_select filter_sel;
};
+static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
@@ -602,7 +618,6 @@ struct arm_cmn_hw_event {
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
- BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
return (struct arm_cmn_hw_event *)&event->hw;
}
@@ -616,6 +631,17 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
}
+static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val)
+{
+ if (val)
+ set_bit(pos, wp_idx);
+}
+
+static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos)
+{
+ return test_bit(pos, wp_idx);
+}
+
struct arm_cmn_event_attr {
struct device_attribute attr;
enum cmn_model model;
@@ -772,8 +798,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
#define CMN_EVENT_CCRA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
-#define CMN_EVENT_CCHA(_name, _event) \
- CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCHA(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
#define CMN_EVENT_CCLA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
#define CMN_EVENT_CCLA_RNI(_name, _event) \
@@ -1131,42 +1157,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_CCRA(wdb_alloc, 0x59),
CMN_EVENT_CCRA(ssb_alloc, 0x5a),
- CMN_EVENT_CCHA(rddatbyp, 0x61),
- CMN_EVENT_CCHA(chirsp_up_stall, 0x62),
- CMN_EVENT_CCHA(chidat_up_stall, 0x63),
- CMN_EVENT_CCHA(snppcrd_link0_stall, 0x64),
- CMN_EVENT_CCHA(snppcrd_link1_stall, 0x65),
- CMN_EVENT_CCHA(snppcrd_link2_stall, 0x66),
- CMN_EVENT_CCHA(reqtrk_occ, 0x67),
- CMN_EVENT_CCHA(rdb_occ, 0x68),
- CMN_EVENT_CCHA(rdbyp_occ, 0x69),
- CMN_EVENT_CCHA(wdb_occ, 0x6a),
- CMN_EVENT_CCHA(snptrk_occ, 0x6b),
- CMN_EVENT_CCHA(sdb_occ, 0x6c),
- CMN_EVENT_CCHA(snphaz_occ, 0x6d),
- CMN_EVENT_CCHA(reqtrk_alloc, 0x6e),
- CMN_EVENT_CCHA(rdb_alloc, 0x6f),
- CMN_EVENT_CCHA(rdbyp_alloc, 0x70),
- CMN_EVENT_CCHA(wdb_alloc, 0x71),
- CMN_EVENT_CCHA(snptrk_alloc, 0x72),
- CMN_EVENT_CCHA(sdb_alloc, 0x73),
- CMN_EVENT_CCHA(snphaz_alloc, 0x74),
- CMN_EVENT_CCHA(pb_rhu_req_occ, 0x75),
- CMN_EVENT_CCHA(pb_rhu_req_alloc, 0x76),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_occ, 0x77),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc, 0x78),
- CMN_EVENT_CCHA(pb_pcie_wr_req_occ, 0x79),
- CMN_EVENT_CCHA(pb_pcie_wr_req_alloc, 0x7a),
- CMN_EVENT_CCHA(pb_pcie_reg_req_occ, 0x7b),
- CMN_EVENT_CCHA(pb_pcie_reg_req_alloc, 0x7c),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ, 0x7d),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc, 0x7e),
- CMN_EVENT_CCHA(pb_rhu_dat_occ, 0x7f),
- CMN_EVENT_CCHA(pb_rhu_dat_alloc, 0x80),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ, 0x81),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc, 0x82),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_occ, 0x83),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMN_ANY, rddatbyp, 0x61),
+ CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall, 0x62),
+ CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall, 0x63),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall, 0x64),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall, 0x65),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall, 0x66),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ, 0x67),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_occ, 0x68),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ, 0x69),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_occ, 0x6a),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_occ, 0x6b),
+ CMN_EVENT_CCHA(CMN_ANY, sdb_occ, 0x6c),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_occ, 0x6d),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc, 0x6e),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_alloc, 0x6f),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc, 0x70),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_alloc, 0x71),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc, 0x72),
+ CMN_EVENT_CCHA(CMN_ANY, db_alloc, 0x73),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc, 0x74),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ, 0x75),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc, 0x76),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ, 0x77),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc, 0x78),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ, 0x79),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc, 0x7a),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ, 0x7b),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc, 0x7c),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ, 0x7d),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc, 0x7e),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ, 0x7f),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc, 0x80),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ, 0x81),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc, 0x82),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ, 0x83),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall, 0x85),
CMN_EVENT_CCLA(rx_cxs, 0x21),
CMN_EVENT_CCLA(tx_cxs, 0x22),
@@ -1253,15 +1280,11 @@ static ssize_t arm_cmn_format_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
- int lo = __ffs(fmt->field), hi = __fls(fmt->field);
-
- if (lo == hi)
- return sysfs_emit(buf, "config:%d\n", lo);
if (!fmt->config)
- return sysfs_emit(buf, "config:%d-%d\n", lo, hi);
+ return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field);
- return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi);
+ return sysfs_emit(buf, "config%d:%*pbl\n", fmt->config, 64, &fmt->field);
}
#define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \
@@ -1335,12 +1358,37 @@ static const struct attribute_group *arm_cmn_attr_groups[] = {
NULL
};
-static int arm_cmn_wp_idx(struct perf_event *event)
+static int arm_cmn_find_free_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event)
{
- return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (dtm->wp_event[wp_idx] >= 0)
+ if (dtm->wp_event[++wp_idx] >= 0)
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
+static int arm_cmn_get_assigned_wp_idx(struct perf_event *event,
+ struct arm_cmn_hw_event *hw,
+ unsigned int pos)
+{
+ return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos);
+}
+
+static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event,
+ unsigned int dtc, int wp_idx,
+ unsigned int pos)
+{
+ struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+
+ dtm->wp_event[wp_idx] = hw->dtc_idx[dtc];
+ arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event));
}
-static u32 arm_cmn_wp_config(struct perf_event *event)
+static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
{
u32 config;
u32 dev = CMN_EVENT_WP_DEV_SEL(event);
@@ -1350,6 +1398,10 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
u32 combine = CMN_EVENT_WP_COMBINE(event);
bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600;
+ /* CMN-600 supports only primary and secondary matching groups */
+ if (is_cmn600)
+ grp &= 1;
+
config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
@@ -1357,7 +1409,9 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
if (exc)
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
- if (combine && !grp)
+
+ /* wp_combine is available only on WP0 and WP2 */
+ if (combine && !(wp_idx & 0x1))
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
CMN_DTM_WPn_CONFIG_WP_COMBINE;
return config;
@@ -1366,7 +1420,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
{
if (!cmn->state)
- writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+ writel_relaxed(0, CMN_DT_PMCR(&cmn->dtc[0]));
cmn->state |= state;
}
@@ -1375,7 +1429,7 @@ static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
cmn->state &= ~state;
if (!cmn->state)
writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
- cmn->dtc[0].base + CMN_DT_PMCR);
+ CMN_DT_PMCR(&cmn->dtc[0]));
}
static void arm_cmn_pmu_enable(struct pmu *pmu)
@@ -1410,18 +1464,19 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
{
- u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+ void __iomem *pmccntr = CMN_DT_PMCCNTR(dtc);
+ u64 val = readq_relaxed(pmccntr);
- writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+ writeq_relaxed(CMN_CC_INIT, pmccntr);
return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
}
static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
{
- u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+ void __iomem *pmevcnt = CMN_DT_PMEVCNT(dtc, idx);
+ u32 val = readl_relaxed(pmevcnt);
- val = readl_relaxed(dtc->base + pmevcnt);
- writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+ writel_relaxed(CMN_COUNTER_INIT, pmevcnt);
return val - CMN_COUNTER_INIT;
}
@@ -1432,7 +1487,7 @@ static void arm_cmn_init_counter(struct perf_event *event)
u64 count;
for_each_hw_dtc_idx(hw, i, idx) {
- writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+ writel_relaxed(CMN_COUNTER_INIT, CMN_DT_PMEVCNT(&cmn->dtc[i], idx));
cmn->dtc[i].counters[idx] = event;
}
@@ -1515,16 +1570,19 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = hw->dtc_idx[0];
- writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
- cmn->dtc[i].cc_active = true;
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
+ dtc->base + CMN_DT_DTC_CTL);
+ writeq_relaxed(CMN_CC_INIT, CMN_DT_PMCCNTR(dtc));
+ dtc->cc_active = true;
} else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
u64 val = CMN_EVENT_WP_VAL(event);
u64 mask = CMN_EVENT_WP_MASK(event);
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx));
writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
@@ -1546,13 +1604,14 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = hw->dtc_idx[0];
- cmn->dtc[i].cc_active = false;
- } else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+ dtc->cc_active = false;
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
+ } else if (type == CMN_TYPE_WP) {
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx));
writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
@@ -1570,10 +1629,23 @@ struct arm_cmn_val {
u8 dtm_count[CMN_MAX_DTMS];
u8 occupid[CMN_MAX_DTMS][SEL_MAX];
u8 wp[CMN_MAX_DTMS][4];
+ u8 wp_combine[CMN_MAX_DTMS][2];
int dtc_count[CMN_MAX_DTCS];
bool cycles;
};
+static int arm_cmn_val_find_free_wp_config(struct perf_event *event,
+ struct arm_cmn_val *val, int dtm)
+{
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (val->wp[dtm][wp_idx])
+ if (val->wp[dtm][++wp_idx])
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
struct perf_event *event)
{
@@ -1605,8 +1677,9 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ val->wp[dtm][wp_idx] = 1;
+ val->wp_combine[dtm][wp_idx >> 1] += !!CMN_EVENT_WP_COMBINE(event);
}
}
@@ -1630,6 +1703,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
return -ENOMEM;
arm_cmn_val_add_event(cmn, val, leader);
+
for_each_sibling_event(sibling, leader)
arm_cmn_val_add_event(cmn, val, sibling);
@@ -1639,12 +1713,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
goto done;
}
- for (i = 0; i < CMN_MAX_DTCS; i++)
- if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
+ for_each_hw_dtc_idx(hw, dtc, idx)
+ if (val->dtc_count[dtc] == CMN_DT_NUM_COUNTERS)
goto done;
for_each_hw_dn(hw, dn, i) {
- int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
+ int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
goto done;
@@ -1656,12 +1730,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- if (val->wp[dtm][wp_idx])
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ if (wp_idx < 0)
goto done;
- wp_cmb = val->wp[dtm][wp_idx ^ 1];
- if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
+ if (wp_idx & 1 &&
+ val->wp_combine[dtm][wp_idx >> 1] != !!CMN_EVENT_WP_COMBINE(event))
goto done;
}
@@ -1721,7 +1795,8 @@ static int arm_cmn_event_init(struct perf_event *event)
/* ...but the DTM may depend on which port we're watching */
if (cmn->multi_dtm)
hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
- } else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
+ } else if (type == CMN_TYPE_XP &&
+ (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
hw->wide_sel = true;
}
@@ -1752,10 +1827,7 @@ static int arm_cmn_event_init(struct perf_event *event)
}
if (!hw->num_dns) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
-
- dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
- nodeid, nid.x, nid.y, nid.port, nid.dev, type);
+ dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
return -EINVAL;
}
@@ -1772,8 +1844,11 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
- if (type == CMN_TYPE_WP)
- dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
+ if (type == CMN_TYPE_WP) {
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
+
+ dtm->wp_event[wp_idx] = -1;
+ }
if (hw->filter_sel > SEL_NONE)
hw->dn[i].occupid[hw->filter_sel].count--;
@@ -1782,6 +1857,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
+ memset(hw->wp_idx, 0, sizeof(hw->wp_idx));
for_each_hw_dtc_idx(hw, j, idx)
cmn->dtc[j].counters[idx] = NULL;
@@ -1835,22 +1911,26 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
if (type == CMN_TYPE_XP) {
input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
} else if (type == CMN_TYPE_WP) {
- int tmp, wp_idx = arm_cmn_wp_idx(event);
- u32 cfg = arm_cmn_wp_config(event);
+ int tmp, wp_idx;
+ u32 cfg;
- if (dtm->wp_event[wp_idx] >= 0)
+ wp_idx = arm_cmn_find_free_wp_idx(dtm, event);
+ if (wp_idx < 0)
goto free_dtms;
+ cfg = arm_cmn_wp_config(event, wp_idx);
+
tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
- dtm->wp_event[wp_idx] = hw->dtc_idx[d];
+
+ arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
if (cmn->multi_dtm)
nid.port %= 2;
@@ -1939,7 +2019,7 @@ static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_nod
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
node = dev_to_node(cmn->dev);
- if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
+ if (cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
arm_cmn_migrate(cmn, cpu);
return 0;
}
@@ -1949,20 +2029,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no
struct arm_cmn *cmn;
unsigned int target;
int node;
- cpumask_t mask;
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
return 0;
node = dev_to_node(cmn->dev);
- if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target < nr_cpu_ids)
arm_cmn_migrate(cmn, target);
+
return 0;
}
@@ -1972,7 +2052,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
irqreturn_t ret = IRQ_NONE;
for (;;) {
- u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+ u32 status = readl_relaxed(CMN_DT_PMOVSR(dtc));
u64 delta;
int i;
@@ -1994,7 +2074,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
}
}
- writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+ writel_relaxed(status, CMN_DT_PMOVSR_CLR(dtc));
if (!dtc->irq_friend)
return ret;
@@ -2048,15 +2128,16 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
{
struct arm_cmn_dtc *dtc = cmn->dtc + idx;
- dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+ dtc->pmu_base = dn->pmu_base;
+ dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn);
dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
if (dtc->irq < 0)
return dtc->irq;
writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
- writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
- writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
- writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+ writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc));
+ writeq_relaxed(0, CMN_DT_PMCCNTR(dtc));
+ writel_relaxed(0x1ff, CMN_DT_PMOVSR_CLR(dtc));
return 0;
}
@@ -2100,7 +2181,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
dn->dtc = xp->dtc;
dn->dtm = xp->dtm;
if (cmn->multi_dtm)
- dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+ dn->dtm += arm_cmn_nid(dn).port / 2;
if (dn->type == CMN_TYPE_DTC) {
int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
@@ -2142,7 +2223,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
- node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+ node->pmu_base = cmn->base + offset + arm_cmn_pmu_offset(cmn, node);
if (node->type == CMN_TYPE_CFG)
level = 0;
@@ -2200,7 +2281,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
+ /*
+ * With the device isolation feature, if firmware has neglected to enable
+ * an XP port then we risk locking up if we try to access anything behind
+ * it; however we also have no way to tell from Non-Secure whether any
+ * given port is disabled or not, so the only way to win is not to play...
+ */
reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+ if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
+ dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
+ return -ENODEV;
+ }
cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
@@ -2270,18 +2361,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
arm_cmn_init_dtm(dtm++, xp, 0);
/*
* Keeping track of connected ports will let us filter out
- * unnecessary XP events easily. We can also reliably infer the
- * "extra device ports" configuration for the node ID format
- * from this, since in that case we will see at least one XP
- * with port 2 connected, for the HN-D.
+ * unnecessary XP events easily, and also infer the per-XP
+ * part of the node ID format.
*/
for (int p = 0; p < CMN_MAX_PORTS; p++)
if (arm_cmn_device_connect_info(cmn, xp, p))
xp_ports |= BIT(p);
- if (cmn->multi_dtm && (xp_ports & 0xc))
+ if (cmn->num_xps == 1) {
+ xp->portid_bits = 3;
+ xp->deviceid_bits = 2;
+ } else if (xp_ports > 0x3) {
+ xp->portid_bits = 2;
+ xp->deviceid_bits = 1;
+ } else {
+ xp->portid_bits = 1;
+ xp->deviceid_bits = 2;
+ }
+
+ if (cmn->multi_dtm && (xp_ports > 0x3))
arm_cmn_init_dtm(dtm++, xp, 1);
- if (cmn->multi_dtm && (xp_ports & 0x30))
+ if (cmn->multi_dtm && (xp_ports > 0xf))
arm_cmn_init_dtm(dtm++, xp, 2);
cmn->ports_used |= xp_ports;
@@ -2318,6 +2418,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
}
arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
+ dn->portid_bits = xp->portid_bits;
+ dn->deviceid_bits = xp->deviceid_bits;
switch (dn->type) {
case CMN_TYPE_DTC:
@@ -2336,10 +2438,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CXHA:
case CMN_TYPE_CCRA:
case CMN_TYPE_CCHA:
- case CMN_TYPE_CCLA:
case CMN_TYPE_HNS:
dn++;
break;
+ case CMN_TYPE_CCLA:
+ dn->pmu_base += CMN_CCLA_PMU_EVENT_SEL;
+ dn++;
+ break;
/* Nothing to see here */
case CMN_TYPE_MPAM_S:
case CMN_TYPE_MPAM_NS:
@@ -2347,6 +2452,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CXLA:
case CMN_TYPE_HNS_MPAM_S:
case CMN_TYPE_HNS_MPAM_NS:
+ case CMN_TYPE_APB:
break;
/*
* Split "optimised" combination nodes into separate
@@ -2357,7 +2463,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_HNP:
case CMN_TYPE_CCLA_RNI:
dn[1] = dn[0];
- dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL;
+ dn[0].pmu_base += CMN_CCLA_PMU_EVENT_SEL;
dn[1].type = arm_cmn_subtype(dn->type);
dn += 2;
break;
@@ -2481,6 +2587,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
cmn->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = cmn->dev,
.attr_groups = arm_cmn_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
@@ -2515,7 +2622,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
return err;
}
-static int arm_cmn_remove(struct platform_device *pdev)
+static void arm_cmn_remove(struct platform_device *pdev)
{
struct arm_cmn *cmn = platform_get_drvdata(pdev);
@@ -2524,7 +2631,6 @@ static int arm_cmn_remove(struct platform_device *pdev)
perf_pmu_unregister(&cmn->pmu);
cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
debugfs_remove(cmn->debug);
- return 0;
}
#ifdef CONFIG_OF
@@ -2532,6 +2638,7 @@ static const struct of_device_id arm_cmn_of_match[] = {
{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
{ .compatible = "arm,cmn-650" },
{ .compatible = "arm,cmn-700" },
+ { .compatible = "arm,cmn-s3" },
{ .compatible = "arm,ci-700" },
{}
};
diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
new file mode 100644
index 000000000000..fd7a5e60e963
--- /dev/null
+++ b/drivers/perf/arm-ni.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022-2024 Arm Limited
+// NI-700 Network-on-Chip PMU driver
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Common registers */
+#define NI_NODE_TYPE 0x000
+#define NI_NODE_TYPE_NODE_ID GENMASK(31, 16)
+#define NI_NODE_TYPE_NODE_TYPE GENMASK(15, 0)
+
+#define NI_CHILD_NODE_INFO 0x004
+#define NI_CHILD_PTR(n) (0x008 + (n) * 4)
+
+#define NI700_PMUSELA 0x00c
+
+/* Config node */
+#define NI_PERIPHERAL_ID0 0xfe0
+#define NI_PIDR0_PART_7_0 GENMASK(7, 0)
+#define NI_PERIPHERAL_ID1 0xfe4
+#define NI_PIDR1_PART_11_8 GENMASK(3, 0)
+#define NI_PERIPHERAL_ID2 0xfe8
+#define NI_PIDR2_VERSION GENMASK(7, 4)
+
+/* PMU node */
+#define NI_PMEVCNTR(n) (0x008 + (n) * 8)
+#define NI_PMCCNTR_L 0x0f8
+#define NI_PMCCNTR_U 0x0fc
+#define NI_PMEVTYPER(n) (0x400 + (n) * 4)
+#define NI_PMEVTYPER_NODE_TYPE GENMASK(12, 9)
+#define NI_PMEVTYPER_NODE_ID GENMASK(8, 0)
+#define NI_PMCNTENSET 0xc00
+#define NI_PMCNTENCLR 0xc20
+#define NI_PMINTENSET 0xc40
+#define NI_PMINTENCLR 0xc60
+#define NI_PMOVSCLR 0xc80
+#define NI_PMOVSSET 0xcc0
+#define NI_PMCFGR 0xe00
+#define NI_PMCR 0xe04
+#define NI_PMCR_RESET_CCNT BIT(2)
+#define NI_PMCR_RESET_EVCNT BIT(1)
+#define NI_PMCR_ENABLE BIT(0)
+
+#define NI_NUM_COUNTERS 8
+#define NI_CCNT_IDX 31
+
+/* Event attributes */
+#define NI_CONFIG_TYPE GENMASK_ULL(15, 0)
+#define NI_CONFIG_NODEID GENMASK_ULL(31, 16)
+#define NI_CONFIG_EVENTID GENMASK_ULL(47, 32)
+
+#define NI_EVENT_TYPE(event) FIELD_GET(NI_CONFIG_TYPE, (event)->attr.config)
+#define NI_EVENT_NODEID(event) FIELD_GET(NI_CONFIG_NODEID, (event)->attr.config)
+#define NI_EVENT_EVENTID(event) FIELD_GET(NI_CONFIG_EVENTID, (event)->attr.config)
+
+enum ni_part {
+ PART_NI_700 = 0x43b,
+ PART_NI_710AE = 0x43d,
+};
+
+enum ni_node_type {
+ NI_GLOBAL,
+ NI_VOLTAGE,
+ NI_POWER,
+ NI_CLOCK,
+ NI_ASNI,
+ NI_AMNI,
+ NI_PMU,
+ NI_HSNI,
+ NI_HMNI,
+ NI_PMNI,
+};
+
+struct arm_ni_node {
+ void __iomem *base;
+ enum ni_node_type type;
+ u16 id;
+ u32 num_components;
+};
+
+struct arm_ni_unit {
+ void __iomem *pmusela;
+ enum ni_node_type type;
+ u16 id;
+ bool ns;
+ union {
+ __le64 pmusel;
+ u8 event[8];
+ };
+};
+
+struct arm_ni_cd {
+ void __iomem *pmu_base;
+ u16 id;
+ int num_units;
+ int irq;
+ int cpu;
+ struct hlist_node cpuhp_node;
+ struct pmu pmu;
+ struct arm_ni_unit *units;
+ struct perf_event *evcnt[NI_NUM_COUNTERS];
+ struct perf_event *ccnt;
+};
+
+struct arm_ni {
+ struct device *dev;
+ void __iomem *base;
+ enum ni_part part;
+ int id;
+ int num_cds;
+ struct arm_ni_cd cds[] __counted_by(num_cds);
+};
+
+#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id])
+#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu)
+
+#define cd_for_each_unit(cd, u) \
+ for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++)
+
+static int arm_ni_hp_state;
+
+struct arm_ni_event_attr {
+ struct device_attribute attr;
+ enum ni_node_type type;
+};
+
+#define NI_EVENT_ATTR(_name, _type) \
+ (&((struct arm_ni_event_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, arm_ni_event_show, NULL), \
+ .type = _type, \
+ }})[0].attr.attr)
+
+static ssize_t arm_ni_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni_event_attr *eattr = container_of(attr, typeof(*eattr), attr);
+
+ if (eattr->type == NI_PMU)
+ return sysfs_emit(buf, "type=0x%x\n", eattr->type);
+
+ return sysfs_emit(buf, "type=0x%x,eventid=?,nodeid=?\n", eattr->type);
+}
+
+static umode_t arm_ni_event_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
+ struct arm_ni_event_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ cd_for_each_unit(cd, unit) {
+ if (unit->type == eattr->type && unit->ns)
+ return attr->mode;
+ }
+
+ return 0;
+}
+
+static struct attribute *arm_ni_event_attrs[] = {
+ NI_EVENT_ATTR(asni, NI_ASNI),
+ NI_EVENT_ATTR(amni, NI_AMNI),
+ NI_EVENT_ATTR(cycles, NI_PMU),
+ NI_EVENT_ATTR(hsni, NI_HSNI),
+ NI_EVENT_ATTR(hmni, NI_HMNI),
+ NI_EVENT_ATTR(pmni, NI_PMNI),
+ NULL
+};
+
+static const struct attribute_group arm_ni_event_attrs_group = {
+ .name = "events",
+ .attrs = arm_ni_event_attrs,
+ .is_visible = arm_ni_event_attr_is_visible,
+};
+
+struct arm_ni_format_attr {
+ struct device_attribute attr;
+ u64 field;
+};
+
+#define NI_FORMAT_ATTR(_name, _fld) \
+ (&((struct arm_ni_format_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, arm_ni_format_show, NULL), \
+ .field = _fld, \
+ }})[0].attr.attr)
+
+static ssize_t arm_ni_format_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
+
+ return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field);
+}
+
+static struct attribute *arm_ni_format_attrs[] = {
+ NI_FORMAT_ATTR(type, NI_CONFIG_TYPE),
+ NI_FORMAT_ATTR(nodeid, NI_CONFIG_NODEID),
+ NI_FORMAT_ATTR(eventid, NI_CONFIG_EVENTID),
+ NULL
+};
+
+static const struct attribute_group arm_ni_format_attrs_group = {
+ .name = "format",
+ .attrs = arm_ni_format_attrs,
+};
+
+static ssize_t arm_ni_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu));
+}
+
+static struct device_attribute arm_ni_cpumask_attr =
+ __ATTR(cpumask, 0444, arm_ni_cpumask_show, NULL);
+
+static ssize_t arm_ni_identifier_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev)));
+ u32 reg = readl_relaxed(ni->base + NI_PERIPHERAL_ID2);
+ int version = FIELD_GET(NI_PIDR2_VERSION, reg);
+
+ return sysfs_emit(buf, "%03x%02x\n", ni->part, version);
+}
+
+static struct device_attribute arm_ni_identifier_attr =
+ __ATTR(identifier, 0444, arm_ni_identifier_show, NULL);
+
+static struct attribute *arm_ni_other_attrs[] = {
+ &arm_ni_cpumask_attr.attr,
+ &arm_ni_identifier_attr.attr,
+ NULL
+};
+
+static const struct attribute_group arm_ni_other_attr_group = {
+ .attrs = arm_ni_other_attrs,
+};
+
+static const struct attribute_group *arm_ni_attr_groups[] = {
+ &arm_ni_event_attrs_group,
+ &arm_ni_format_attrs_group,
+ &arm_ni_other_attr_group,
+ NULL
+};
+
+static void arm_ni_pmu_enable(struct pmu *pmu)
+{
+ writel_relaxed(NI_PMCR_ENABLE, pmu_to_cd(pmu)->pmu_base + NI_PMCR);
+}
+
+static void arm_ni_pmu_disable(struct pmu *pmu)
+{
+ writel_relaxed(0, pmu_to_cd(pmu)->pmu_base + NI_PMCR);
+}
+
+struct arm_ni_val {
+ unsigned int evcnt;
+ unsigned int ccnt;
+};
+
+static bool arm_ni_val_count_event(struct perf_event *evt, struct arm_ni_val *val)
+{
+ if (is_software_event(evt))
+ return true;
+
+ if (NI_EVENT_TYPE(evt) == NI_PMU) {
+ val->ccnt++;
+ return val->ccnt <= 1;
+ }
+
+ val->evcnt++;
+ return val->evcnt <= NI_NUM_COUNTERS;
+}
+
+static int arm_ni_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct arm_ni_val val = { 0 };
+
+ if (leader == event)
+ return 0;
+
+ arm_ni_val_count_event(event, &val);
+ if (!arm_ni_val_count_event(leader, &val))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!arm_ni_val_count_event(sibling, &val))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int arm_ni_event_init(struct perf_event *event)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ event->cpu = cd->cpu;
+ if (NI_EVENT_TYPE(event) == NI_PMU)
+ return arm_ni_validate_group(event);
+
+ cd_for_each_unit(cd, unit) {
+ if (unit->type == NI_EVENT_TYPE(event) &&
+ unit->id == NI_EVENT_NODEID(event) && unit->ns) {
+ event->hw.config_base = (unsigned long)unit;
+ return arm_ni_validate_group(event);
+ }
+ }
+ return -EINVAL;
+}
+
+static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd)
+{
+ u64 l, u_old, u_new;
+ int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
+
+ u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U);
+ do {
+ u_old = u_new;
+ l = readl_relaxed(cd->pmu_base + NI_PMCCNTR_L);
+ u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U);
+ } while (u_new != u_old && --retries);
+ WARN_ON(!retries);
+
+ return (u_new << 32) | l;
+}
+
+static void arm_ni_event_read(struct perf_event *event)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct hw_perf_event *hw = &event->hw;
+ u64 count, prev;
+ bool ccnt = hw->idx == NI_CCNT_IDX;
+
+ do {
+ prev = local64_read(&hw->prev_count);
+ if (ccnt)
+ count = arm_ni_read_ccnt(cd);
+ else
+ count = readl_relaxed(cd->pmu_base + NI_PMEVCNTR(hw->idx));
+ } while (local64_cmpxchg(&hw->prev_count, prev, count) != prev);
+
+ count -= prev;
+ if (!ccnt)
+ count = (u32)count;
+ local64_add(count, &event->count);
+}
+
+static void arm_ni_event_start(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+
+ writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENSET);
+}
+
+static void arm_ni_event_stop(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+
+ writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENCLR);
+ if (flags & PERF_EF_UPDATE)
+ arm_ni_event_read(event);
+}
+
+static void arm_ni_init_ccnt(struct arm_ni_cd *cd)
+{
+ local64_set(&cd->ccnt->hw.prev_count, S64_MIN);
+ lo_hi_writeq_relaxed(S64_MIN, cd->pmu_base + NI_PMCCNTR_L);
+}
+
+static void arm_ni_init_evcnt(struct arm_ni_cd *cd, int idx)
+{
+ local64_set(&cd->evcnt[idx]->hw.prev_count, S32_MIN);
+ writel_relaxed(S32_MIN, cd->pmu_base + NI_PMEVCNTR(idx));
+}
+
+static int arm_ni_event_add(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct hw_perf_event *hw = &event->hw;
+ struct arm_ni_unit *unit;
+ enum ni_node_type type = NI_EVENT_TYPE(event);
+ u32 reg;
+
+ if (type == NI_PMU) {
+ if (cd->ccnt)
+ return -ENOSPC;
+ hw->idx = NI_CCNT_IDX;
+ cd->ccnt = event;
+ arm_ni_init_ccnt(cd);
+ } else {
+ hw->idx = 0;
+ while (cd->evcnt[hw->idx]) {
+ if (++hw->idx == NI_NUM_COUNTERS)
+ return -ENOSPC;
+ }
+ cd->evcnt[hw->idx] = event;
+ unit = (void *)hw->config_base;
+ unit->event[hw->idx] = NI_EVENT_EVENTID(event);
+ arm_ni_init_evcnt(cd, hw->idx);
+ lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela);
+
+ reg = FIELD_PREP(NI_PMEVTYPER_NODE_TYPE, type) |
+ FIELD_PREP(NI_PMEVTYPER_NODE_ID, NI_EVENT_NODEID(event));
+ writel_relaxed(reg, cd->pmu_base + NI_PMEVTYPER(hw->idx));
+ }
+ if (flags & PERF_EF_START)
+ arm_ni_event_start(event, 0);
+ return 0;
+}
+
+static void arm_ni_event_del(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct hw_perf_event *hw = &event->hw;
+
+ arm_ni_event_stop(event, PERF_EF_UPDATE);
+
+ if (hw->idx == NI_CCNT_IDX)
+ cd->ccnt = NULL;
+ else
+ cd->evcnt[hw->idx] = NULL;
+}
+
+static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id)
+{
+ struct arm_ni_cd *cd = dev_id;
+ irqreturn_t ret = IRQ_NONE;
+ u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
+
+ if (reg & (1U << NI_CCNT_IDX)) {
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->ccnt))) {
+ arm_ni_event_read(cd->ccnt);
+ arm_ni_init_ccnt(cd);
+ }
+ }
+ for (int i = 0; i < NI_NUM_COUNTERS; i++) {
+ if (!(reg & (1U << i)))
+ continue;
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->evcnt[i]))) {
+ arm_ni_event_read(cd->evcnt[i]);
+ arm_ni_init_evcnt(cd, i);
+ }
+ }
+ writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
+ return ret;
+}
+
+static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start)
+{
+ struct arm_ni_cd *cd = ni->cds + node->id;
+ const char *name;
+ int err;
+
+ cd->id = node->id;
+ cd->num_units = node->num_components;
+ cd->units = devm_kcalloc(ni->dev, cd->num_units, sizeof(*(cd->units)), GFP_KERNEL);
+ if (!cd->units)
+ return -ENOMEM;
+
+ for (int i = 0; i < cd->num_units; i++) {
+ u32 reg = readl_relaxed(node->base + NI_CHILD_PTR(i));
+ void __iomem *unit_base = ni->base + reg;
+ struct arm_ni_unit *unit = cd->units + i;
+
+ reg = readl_relaxed(unit_base + NI_NODE_TYPE);
+ unit->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg);
+ unit->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg);
+
+ switch (unit->type) {
+ case NI_PMU:
+ reg = readl_relaxed(unit_base + NI_PMCFGR);
+ if (!reg) {
+ dev_info(ni->dev, "No access to PMU %d\n", cd->id);
+ devm_kfree(ni->dev, cd->units);
+ return 0;
+ }
+ unit->ns = true;
+ cd->pmu_base = unit_base;
+ break;
+ case NI_ASNI:
+ case NI_AMNI:
+ case NI_HSNI:
+ case NI_HMNI:
+ case NI_PMNI:
+ unit->pmusela = unit_base + NI700_PMUSELA;
+ writel_relaxed(1, unit->pmusela);
+ if (readl_relaxed(unit->pmusela) != 1)
+ dev_info(ni->dev, "No access to node 0x%04x%04x\n", unit->id, unit->type);
+ else
+ unit->ns = true;
+ break;
+ default:
+ /*
+ * e.g. FMU - thankfully bits 3:2 of FMU_ERR_FR0 are RES0 so
+ * can't alias any of the leaf node types we're looking for.
+ */
+ dev_dbg(ni->dev, "Mystery node 0x%04x%04x\n", unit->id, unit->type);
+ break;
+ }
+ }
+
+ res_start += cd->pmu_base - ni->base;
+ if (!devm_request_mem_region(ni->dev, res_start, SZ_4K, dev_name(ni->dev))) {
+ dev_err(ni->dev, "Failed to request PMU region 0x%llx\n", res_start);
+ return -EBUSY;
+ }
+
+ writel_relaxed(NI_PMCR_RESET_CCNT | NI_PMCR_RESET_EVCNT,
+ cd->pmu_base + NI_PMCR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
+
+ cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id);
+ if (cd->irq < 0)
+ return cd->irq;
+
+ err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ dev_name(ni->dev), cd);
+ if (err)
+ return err;
+
+ cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
+ cd->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .parent = ni->dev,
+ .attr_groups = arm_ni_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .pmu_enable = arm_ni_pmu_enable,
+ .pmu_disable = arm_ni_pmu_disable,
+ .event_init = arm_ni_event_init,
+ .add = arm_ni_event_add,
+ .del = arm_ni_event_del,
+ .start = arm_ni_event_start,
+ .stop = arm_ni_event_stop,
+ .read = arm_ni_event_read,
+ };
+
+ name = devm_kasprintf(ni->dev, GFP_KERNEL, "arm_ni_%d_cd_%d", ni->id, cd->id);
+ if (!name)
+ return -ENOMEM;
+
+ err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
+ if (err)
+ return err;
+
+ err = perf_pmu_register(&cd->pmu, name, -1);
+ if (err)
+ cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
+
+ return err;
+}
+
+static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
+{
+ u32 reg = readl_relaxed(base + NI_NODE_TYPE);
+
+ node->base = base;
+ node->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg);
+ node->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg);
+ node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
+}
+
+static int arm_ni_probe(struct platform_device *pdev)
+{
+ struct arm_ni_node cfg, vd, pd, cd;
+ struct arm_ni *ni;
+ struct resource *res;
+ void __iomem *base;
+ static atomic_t id;
+ int num_cds;
+ u32 reg, part;
+
+ /*
+ * We want to map the whole configuration space for ease of discovery,
+ * but the PMU pages are the only ones for which we can honestly claim
+ * exclusive ownership, so we'll request them explicitly once found.
+ */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+ if (!base)
+ return -ENOMEM;
+
+ arm_ni_probe_domain(base, &cfg);
+ if (cfg.type != NI_GLOBAL)
+ return -ENODEV;
+
+ reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID0);
+ part = FIELD_GET(NI_PIDR0_PART_7_0, reg);
+ reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID1);
+ part |= FIELD_GET(NI_PIDR1_PART_11_8, reg) << 8;
+
+ switch (part) {
+ case PART_NI_700:
+ case PART_NI_710AE:
+ break;
+ default:
+ dev_WARN(&pdev->dev, "Unknown part number: 0x%03x, this may go badly\n", part);
+ break;
+ }
+
+ num_cds = 0;
+ for (int v = 0; v < cfg.num_components; v++) {
+ reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
+ arm_ni_probe_domain(base + reg, &vd);
+ for (int p = 0; p < vd.num_components; p++) {
+ reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
+ arm_ni_probe_domain(base + reg, &pd);
+ num_cds += pd.num_components;
+ }
+ }
+
+ ni = devm_kzalloc(&pdev->dev, struct_size(ni, cds, num_cds), GFP_KERNEL);
+ if (!ni)
+ return -ENOMEM;
+
+ ni->dev = &pdev->dev;
+ ni->base = base;
+ ni->num_cds = num_cds;
+ ni->part = part;
+ ni->id = atomic_fetch_inc(&id);
+
+ for (int v = 0; v < cfg.num_components; v++) {
+ reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
+ arm_ni_probe_domain(base + reg, &vd);
+ for (int p = 0; p < vd.num_components; p++) {
+ reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
+ arm_ni_probe_domain(base + reg, &pd);
+ for (int c = 0; c < pd.num_components; c++) {
+ int ret;
+
+ reg = readl_relaxed(pd.base + NI_CHILD_PTR(c));
+ arm_ni_probe_domain(base + reg, &cd);
+ ret = arm_ni_init_cd(ni, &cd, res->start);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void arm_ni_remove(struct platform_device *pdev)
+{
+ struct arm_ni *ni = platform_get_drvdata(pdev);
+
+ for (int i = 0; i < ni->num_cds; i++) {
+ struct arm_ni_cd *cd = ni->cds + i;
+
+ if (!cd->pmu_base)
+ continue;
+
+ writel_relaxed(0, cd->pmu_base + NI_PMCR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR);
+ perf_pmu_unregister(&cd->pmu);
+ cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
+ }
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arm_ni_of_match[] = {
+ { .compatible = "arm,ni-700" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, arm_ni_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id arm_ni_acpi_match[] = {
+ { "ARMHCB70" },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, arm_ni_acpi_match);
+#endif
+
+static struct platform_driver arm_ni_driver = {
+ .driver = {
+ .name = "arm-ni",
+ .of_match_table = of_match_ptr(arm_ni_of_match),
+ .acpi_match_table = ACPI_PTR(arm_ni_acpi_match),
+ },
+ .probe = arm_ni_probe,
+ .remove = arm_ni_remove,
+};
+
+static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu)
+{
+ perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu);
+ irq_set_affinity(cd->irq, cpumask_of(cpu));
+ cd->cpu = cpu;
+}
+
+static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct arm_ni_cd *cd;
+ int node;
+
+ cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
+ node = dev_to_node(cd_to_ni(cd)->dev);
+ if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node)
+ arm_ni_pmu_migrate(cd, cpu);
+ return 0;
+}
+
+static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct arm_ni_cd *cd;
+ unsigned int target;
+ int node;
+
+ cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
+ if (cpu != cd->cpu)
+ return 0;
+
+ node = dev_to_node(cd_to_ni(cd)->dev);
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ arm_ni_pmu_migrate(cd, target);
+ return 0;
+}
+
+static int __init arm_ni_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/arm/ni:online",
+ arm_ni_pmu_online_cpu,
+ arm_ni_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ arm_ni_hp_state = ret;
+
+ ret = platform_driver_register(&arm_ni_driver);
+ if (ret)
+ cpuhp_remove_multi_state(arm_ni_hp_state);
+ return ret;
+}
+
+static void __exit arm_ni_exit(void)
+{
+ platform_driver_unregister(&arm_ni_driver);
+ cpuhp_remove_multi_state(arm_ni_hp_state);
+}
+
+module_init(arm_ni_init);
+module_exit(arm_ni_exit);
+
+MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
+MODULE_DESCRIPTION("Arm NI-700 PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
index f146a455e838..f72f5689923c 100644
--- a/drivers/perf/arm_cspmu/ampere_cspmu.c
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -269,4 +269,5 @@ static void __exit ampere_cspmu_exit(void)
module_init(ampere_cspmu_init);
module_exit(ampere_cspmu_exit);
+MODULE_DESCRIPTION("Ampere SoC Performance Monitor Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 50b89b989ce7..81e8b97e9353 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -27,6 +27,7 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
@@ -100,13 +101,6 @@
#define ARM_CSPMU_ACTIVE_CPU_MASK 0x0
#define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1
-/* Check and use default if implementer doesn't provide attribute callback */
-#define CHECK_DEFAULT_IMPL_OPS(ops, callback) \
- do { \
- if (!ops->callback) \
- ops->callback = arm_cspmu_ ## callback; \
- } while (0)
-
/*
* Maximum poll count for reading counter value using high-low-high sequence.
*/
@@ -121,7 +115,9 @@ static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
{
- return *(struct acpi_apmt_node **)dev_get_platdata(dev);
+ struct acpi_apmt_node **ptr = dev_get_platdata(dev);
+
+ return ptr ? *ptr : NULL;
}
/*
@@ -227,16 +223,6 @@ arm_cspmu_event_attr_is_visible(struct kobject *kobj,
return attr->mode;
}
-ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr =
- container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
-
static struct attribute *arm_cspmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_FILTER_ATTR,
@@ -317,6 +303,10 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
dev = cspmu->dev;
apmt_node = arm_cspmu_apmt_node(dev);
+ if (!apmt_node)
+ return devm_kasprintf(dev, GFP_KERNEL, PMUNAME "_%u",
+ atomic_fetch_inc(&pmu_idx[0]));
+
pmu_type = apmt_node->type;
if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
@@ -408,21 +398,32 @@ static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr)
return NULL;
}
+#define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name
+
static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
{
int ret = 0;
- struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
struct arm_cspmu_impl_match *match;
- /*
- * Get PMU implementer and product id from APMT node.
- * If APMT node doesn't have implementer/product id, try get it
- * from PMIIDR.
- */
- cspmu->impl.pmiidr =
- (apmt_node->impl_id) ? apmt_node->impl_id :
- readl(cspmu->base0 + PMIIDR);
+ /* Start with a default PMU implementation */
+ cspmu->impl.module = THIS_MODULE;
+ cspmu->impl.pmiidr = readl(cspmu->base0 + PMIIDR);
+ cspmu->impl.ops = (struct arm_cspmu_impl_ops) {
+ DEFAULT_IMPL_OP(get_event_attrs),
+ DEFAULT_IMPL_OP(get_format_attrs),
+ DEFAULT_IMPL_OP(get_identifier),
+ DEFAULT_IMPL_OP(get_name),
+ DEFAULT_IMPL_OP(is_cycle_counter_event),
+ DEFAULT_IMPL_OP(event_type),
+ DEFAULT_IMPL_OP(event_filter),
+ DEFAULT_IMPL_OP(set_ev_filter),
+ DEFAULT_IMPL_OP(event_attr_is_visible),
+ };
+
+ /* Firmware may override implementer/product ID from PMIIDR */
+ if (apmt_node && apmt_node->impl_id)
+ cspmu->impl.pmiidr = apmt_node->impl_id;
/* Find implementer specific attribute ops. */
match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr);
@@ -450,24 +451,9 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
}
mutex_unlock(&arm_cspmu_lock);
+ }
- if (ret)
- return ret;
- } else
- cspmu->impl.module = THIS_MODULE;
-
- /* Use default callbacks if implementer doesn't provide one. */
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter);
-
- return 0;
+ return ret;
}
static struct attribute_group *
@@ -512,23 +498,16 @@ arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
return format_group;
}
-static struct attribute_group **
-arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+static int arm_cspmu_alloc_attr_groups(struct arm_cspmu *cspmu)
{
- struct attribute_group **attr_groups = NULL;
- struct device *dev = cspmu->dev;
+ const struct attribute_group **attr_groups = cspmu->attr_groups;
const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
cspmu->identifier = impl_ops->get_identifier(cspmu);
cspmu->name = impl_ops->get_name(cspmu);
if (!cspmu->identifier || !cspmu->name)
- return NULL;
-
- attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
- GFP_KERNEL);
- if (!attr_groups)
- return NULL;
+ return -ENOMEM;
attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
@@ -536,18 +515,14 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
attr_groups[3] = &arm_cspmu_cpumask_attr_group;
if (!attr_groups[0] || !attr_groups[1])
- return NULL;
+ return -ENOMEM;
- return attr_groups;
+ return 0;
}
static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
{
- u32 pmcr = 0;
-
- pmcr |= PMCR_P;
- pmcr |= PMCR_C;
- writel(pmcr, cspmu->base0 + PMCR);
+ writel(PMCR_C | PMCR_P, cspmu->base0 + PMCR);
}
static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
@@ -962,7 +937,14 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
platform_set_drvdata(pdev, cspmu);
apmt_node = arm_cspmu_apmt_node(dev);
- cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC;
+ if (apmt_node) {
+ cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC;
+ } else {
+ u32 width = 0;
+
+ device_property_read_u32(dev, "reg-io-width", &width);
+ cspmu->has_atomic_dword = (width == 8);
+ }
return cspmu;
}
@@ -1153,11 +1135,6 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
}
}
- if (cpumask_empty(&cspmu->associated_cpus)) {
- dev_dbg(cspmu->dev, "No cpu associated with the PMU\n");
- return -ENODEV;
- }
-
return 0;
}
#else
@@ -1167,19 +1144,45 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
}
#endif
+static int arm_cspmu_of_get_cpus(struct arm_cspmu *cspmu)
+{
+ struct of_phandle_iterator it;
+ int ret, cpu;
+
+ of_for_each_phandle(&it, ret, dev_of_node(cspmu->dev), "cpus", NULL, 0) {
+ cpu = of_cpu_node_to_id(it.node);
+ if (cpu < 0)
+ continue;
+ cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+ }
+ return ret == -ENOENT ? 0 : ret;
+}
+
static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
{
- return arm_cspmu_acpi_get_cpus(cspmu);
+ int ret = 0;
+
+ if (arm_cspmu_apmt_node(cspmu->dev))
+ ret = arm_cspmu_acpi_get_cpus(cspmu);
+ else if (device_property_present(cspmu->dev, "cpus"))
+ ret = arm_cspmu_of_get_cpus(cspmu);
+ else
+ cpumask_copy(&cspmu->associated_cpus, cpu_possible_mask);
+
+ if (!ret && cpumask_empty(&cspmu->associated_cpus)) {
+ dev_dbg(cspmu->dev, "No cpu associated with the PMU\n");
+ ret = -ENODEV;
+ }
+ return ret;
}
static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
{
int ret, capabilities;
- struct attribute_group **attr_groups;
- attr_groups = arm_cspmu_alloc_attr_group(cspmu);
- if (!attr_groups)
- return -ENOMEM;
+ ret = arm_cspmu_alloc_attr_groups(cspmu);
+ if (ret)
+ return ret;
ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
&cspmu->cpuhp_node);
@@ -1193,6 +1196,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
.module = cspmu->impl.module,
+ .parent = cspmu->dev,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@@ -1201,12 +1205,11 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
.start = arm_cspmu_start,
.stop = arm_cspmu_stop,
.read = arm_cspmu_read,
- .attr_groups = (const struct attribute_group **)attr_groups,
+ .attr_groups = cspmu->attr_groups,
.capabilities = capabilities,
};
/* Hardware counter init */
- arm_cspmu_stop_counters(cspmu);
arm_cspmu_reset_counters(cspmu);
ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
@@ -1252,14 +1255,12 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
return ret;
}
-static int arm_cspmu_device_remove(struct platform_device *pdev)
+static void arm_cspmu_device_remove(struct platform_device *pdev)
{
struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&cspmu->pmu);
cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
-
- return 0;
}
static const struct platform_device_id arm_cspmu_id[] = {
@@ -1268,11 +1269,18 @@ static const struct platform_device_id arm_cspmu_id[] = {
};
MODULE_DEVICE_TABLE(platform, arm_cspmu_id);
+static const struct of_device_id arm_cspmu_of_match[] = {
+ { .compatible = "arm,coresight-pmu" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, arm_cspmu_of_match);
+
static struct platform_driver arm_cspmu_driver = {
.driver = {
- .name = DRVNAME,
- .suppress_bind_attrs = true,
- },
+ .name = DRVNAME,
+ .of_match_table = arm_cspmu_of_match,
+ .suppress_bind_attrs = true,
+ },
.probe = arm_cspmu_device_probe,
.remove = arm_cspmu_device_remove,
.id_table = arm_cspmu_id,
@@ -1305,8 +1313,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct cpumask online_supported;
+ unsigned int dst;
struct arm_cspmu *cspmu =
hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
@@ -1316,9 +1323,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
return 0;
/* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&online_supported, &cspmu->associated_cpus,
- cpu_online_mask);
- dst = cpumask_any_but(&online_supported, cpu);
+ dst = cpumask_any_and_but(&cspmu->associated_cpus,
+ cpu_online_mask, cpu);
if (dst >= nr_cpu_ids)
return 0;
@@ -1421,4 +1427,5 @@ EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
module_init(arm_cspmu_init);
module_exit(arm_cspmu_exit);
+MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 2fe723555a6b..2621f3111148 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -28,7 +28,7 @@
})[0].attr.attr)
#define ARM_CSPMU_FORMAT_ATTR(_name, _config) \
- ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+ ARM_CSPMU_EXT_ATTR(_name, device_show_string, _config)
#define ARM_CSPMU_EVENT_ATTR(_name, _config) \
PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
@@ -157,6 +157,7 @@ struct arm_cspmu {
int cycle_counter_logical_idx;
struct arm_cspmu_hw_events hw_events;
+ const struct attribute_group *attr_groups[5];
struct arm_cspmu_impl impl;
};
@@ -166,11 +167,6 @@ ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
struct device_attribute *attr,
char *buf);
-/* Default function to show format attribute in sysfs. */
-ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf);
-
/* Register vendor backend. */
int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 0382b702f092..8116c7846a46 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -54,65 +54,24 @@ static struct attribute *scf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105),
NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111),
NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119),
- NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d),
- NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129),
NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131),
NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169),
ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172),
ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
- ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178),
ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
- ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a),
ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c),
-
ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
@@ -122,35 +81,12 @@ static struct attribute *scf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa),
ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
- ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0),
ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
- ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3),
-
- ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7),
- ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8),
- ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9),
ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7),
-
ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
@@ -194,6 +130,7 @@ static struct attribute *pcie_pmu_format_attrs[] = {
static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
NULL,
};
@@ -238,10 +175,12 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
- if (ctx->filter_mask == 0)
+ const u32 filter_val = event->attr.config1 & ctx->filter_mask;
+
+ if (filter_val == 0)
return ctx->filter_default_val;
- return event->attr.config1 & ctx->filter_mask;
+ return filter_val;
}
enum nv_cspmu_name_fmt {
@@ -274,7 +213,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
{
.prodid = 0x104,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = 0x0,
+ .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
@@ -284,7 +223,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
{
.prodid = 0x105,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = 0x0,
+ .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
@@ -388,12 +327,6 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
impl_ops->get_format_attrs = nv_cspmu_get_format_attrs;
impl_ops->get_name = nv_cspmu_get_name;
- /* Set others to NULL to use default callback. */
- impl_ops->event_type = NULL;
- impl_ops->event_attr_is_visible = NULL;
- impl_ops->get_identifier = NULL;
- impl_ops->is_cycle_counter_event = NULL;
-
return 0;
}
@@ -423,4 +356,5 @@ static void __exit nvidia_cspmu_exit(void)
module_init(nvidia_cspmu_init);
module_exit(nvidia_cspmu_exit);
+MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 30cea6859574..619cf937602f 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -542,12 +542,16 @@ static int dmc620_pmu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
+ hwc->idx = -1;
+
+ if (event->group_leader == event)
+ return 0;
+
/*
* We can't atomically disable all HW counters so only one event allowed,
* although software events are acceptable.
*/
- if (event->group_leader != event &&
- !is_software_event(event->group_leader))
+ if (!is_software_event(event->group_leader))
return -EINVAL;
for_each_sibling_event(sibling, event->group_leader) {
@@ -556,7 +560,6 @@ static int dmc620_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
- hwc->idx = -1;
return 0;
}
@@ -673,6 +676,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
dmc620_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.event_init = dmc620_pmu_event_init,
@@ -724,7 +728,7 @@ out_teardown_dev:
return ret;
}
-static int dmc620_pmu_device_remove(struct platform_device *pdev)
+static void dmc620_pmu_device_remove(struct platform_device *pdev)
{
struct dmc620_pmu *dmc620_pmu = platform_get_drvdata(pdev);
@@ -732,8 +736,6 @@ static int dmc620_pmu_device_remove(struct platform_device *pdev)
/* perf will synchronise RCU before devres can free dmc620_pmu */
perf_pmu_unregister(&dmc620_pmu->pmu);
-
- return 0;
}
static const struct acpi_device_id dmc620_acpi_match[] = {
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 7ec4498e312f..cb4fb59fe04b 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -85,7 +85,7 @@
DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
#define DSU_FORMAT_ATTR(_name, _config) \
- DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
+ DSU_EXT_ATTR(_name, device_show_string, _config)
#define DSU_CPUMASK_ATTR(_name, _config) \
DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
@@ -139,15 +139,6 @@ static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var);
}
-static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr = container_of(attr,
- struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t dsu_pmu_cpumask_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -230,15 +221,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = {
NULL,
};
-static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
-{
- struct cpumask online_supported;
-
- cpumask_and(&online_supported,
- &dsu_pmu->associated_cpus, cpu_online_mask);
- return cpumask_any_but(&online_supported, cpu);
-}
-
static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
{
return (idx < dsu_pmu->num_counters) ||
@@ -751,6 +733,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
dsu_pmu->pmu = (struct pmu) {
.task_ctx_nr = perf_invalid_context,
+ .parent = &pdev->dev,
.module = THIS_MODULE,
.pmu_enable = dsu_pmu_enable,
.pmu_disable = dsu_pmu_disable,
@@ -774,14 +757,12 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
return rc;
}
-static int dsu_pmu_device_remove(struct platform_device *pdev)
+static void dsu_pmu_device_remove(struct platform_device *pdev)
{
struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&dsu_pmu->pmu);
cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
-
- return 0;
}
static const struct of_device_id dsu_pmu_of_match[] = {
@@ -829,14 +810,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
- cpuhp_node);
+ struct dsu_pmu *dsu_pmu;
+ unsigned int dst;
+
+ dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node);
if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
return 0;
- dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
+ dst = cpumask_any_and_but(&dsu_pmu->associated_cpus,
+ cpu_online_mask, cpu);
/* If there are no active CPUs in the DSU, leave IRQ disabled */
if (dst >= nr_cpu_ids)
return 0;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 8458fe2cebb4..398cce3d76fc 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -522,7 +522,7 @@ static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
/* For task-bound events we may be called on other CPUs */
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -742,7 +742,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
struct perf_event *event;
int idx;
- for (idx = 0; idx < armpmu->num_events; idx++) {
+ for_each_set_bit(idx, armpmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
event = hw_events->events[idx];
if (!event)
continue;
@@ -772,7 +772,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
@@ -924,8 +924,9 @@ int armpmu_register(struct arm_pmu *pmu)
if (ret)
goto out_destroy;
- pr_info("enabled with %s PMU driver, %d counters available%s\n",
- pmu->name, pmu->num_events,
+ pr_info("enabled with %s PMU driver, %d (%*pb) counters available%s\n",
+ pmu->name, bitmap_weight(pmu->cntr_mask, ARMPMU_MAX_HWEVENTS),
+ ARMPMU_MAX_HWEVENTS, &pmu->cntr_mask,
has_nmi ? ", using NMIs" : "");
kvm_host_pmu_init(pmu);
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 3596db36cbff..118170a5cede 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -59,7 +59,7 @@ static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
static bool pmu_has_irq_affinity(struct device_node *node)
{
- return !!of_find_property(node, "interrupt-affinity", NULL);
+ return of_property_present(node, "interrupt-affinity");
}
static int pmu_parse_irq_affinity(struct device *dev, int i)
@@ -196,6 +196,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu->plat_device = pdev;
ret = pmu_parse_irqs(pmu);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 23fa6c5da82c..0e360feb3432 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -25,8 +25,6 @@
#include <linux/smp.h>
#include <linux/nmi.h>
-#include <asm/arm_pmuv3.h>
-
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@@ -338,6 +336,11 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event)
return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
}
+static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr)
+{
+ return ATTR_CFG_GET_FLD(attr, threshold);
+}
+
static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
{
u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
@@ -449,13 +452,6 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = {
};
/*
- * Perf Events' indices
- */
-#define ARMV8_IDX_CYCLE_COUNTER 0
-#define ARMV8_IDX_COUNTER0 1
-#define ARMV8_IDX_CYCLE_COUNTER_USER 32
-
-/*
* We unconditionally enable ARMv8.5-PMU long event counter support
* (64-bit events) where supported. Indicate if this arm_pmu has long
* event counter support.
@@ -486,19 +482,12 @@ static bool armv8pmu_event_is_chained(struct perf_event *event)
return !armv8pmu_event_has_user_read(event) &&
armv8pmu_event_is_64bit(event) &&
!armv8pmu_has_long_event(cpu_pmu) &&
- (idx != ARMV8_IDX_CYCLE_COUNTER);
+ (idx < ARMV8_PMU_MAX_GENERAL_COUNTERS);
}
/*
* ARMv8 low level PMU access
*/
-
-/*
- * Perf Event to low level counters mapping
- */
-#define ARMV8_IDX_TO_COUNTER(x) \
- (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
-
static u64 armv8pmu_pmcr_read(void)
{
return read_pmcr();
@@ -511,21 +500,19 @@ static void armv8pmu_pmcr_write(u64 val)
write_pmcr(val);
}
-static int armv8pmu_has_overflowed(u32 pmovsr)
+static int armv8pmu_has_overflowed(u64 pmovsr)
{
- return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
+ return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
}
-static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
+static int armv8pmu_counter_has_overflowed(u64 pmnc, int idx)
{
- return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
+ return !!(pmnc & BIT(idx));
}
static u64 armv8pmu_read_evcntr(int idx)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-
- return read_pmevcntrn(counter);
+ return read_pmevcntrn(idx);
}
static u64 armv8pmu_read_hw_counter(struct perf_event *event)
@@ -554,7 +541,7 @@ static bool armv8pmu_event_needs_bias(struct perf_event *event)
return false;
if (armv8pmu_has_long_event(cpu_pmu) ||
- idx == ARMV8_IDX_CYCLE_COUNTER)
+ idx >= ARMV8_PMU_MAX_GENERAL_COUNTERS)
return true;
return false;
@@ -582,8 +569,10 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
int idx = hwc->idx;
u64 value;
- if (idx == ARMV8_IDX_CYCLE_COUNTER)
+ if (idx == ARMV8_PMU_CYCLE_IDX)
value = read_pmccntr();
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ value = read_pmicntr();
else
value = armv8pmu_read_hw_counter(event);
@@ -592,9 +581,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
static void armv8pmu_write_evcntr(int idx, u64 value)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-
- write_pmevcntrn(counter, value);
+ write_pmevcntrn(idx, value);
}
static void armv8pmu_write_hw_counter(struct perf_event *event,
@@ -617,15 +604,16 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
value = armv8pmu_bias_long_counter(event, value);
- if (idx == ARMV8_IDX_CYCLE_COUNTER)
+ if (idx == ARMV8_PMU_CYCLE_IDX)
write_pmccntr(value);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicntr(value);
else
armv8pmu_write_hw_counter(event, value);
}
static void armv8pmu_write_evtype(int idx, unsigned long val)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
ARMV8_PMU_INCLUDE_EL2 |
ARMV8_PMU_EXCLUDE_EL0 |
@@ -635,7 +623,7 @@ static void armv8pmu_write_evtype(int idx, unsigned long val)
mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH;
val &= mask;
- write_pmevtypern(counter, val);
+ write_pmevtypern(idx, val);
}
static void armv8pmu_write_event_type(struct perf_event *event)
@@ -655,24 +643,26 @@ static void armv8pmu_write_event_type(struct perf_event *event)
armv8pmu_write_evtype(idx - 1, hwc->config_base);
armv8pmu_write_evtype(idx, chain_evt);
} else {
- if (idx == ARMV8_IDX_CYCLE_COUNTER)
+ if (idx == ARMV8_PMU_CYCLE_IDX)
write_pmccfiltr(hwc->config_base);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicfiltr(hwc->config_base);
else
armv8pmu_write_evtype(idx, hwc->config_base);
}
}
-static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
+static u64 armv8pmu_event_cnten_mask(struct perf_event *event)
{
- int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
- u32 mask = BIT(counter);
+ int counter = event->hw.idx;
+ u64 mask = BIT(counter);
if (armv8pmu_event_is_chained(event))
mask |= BIT(counter - 1);
return mask;
}
-static void armv8pmu_enable_counter(u32 mask)
+static void armv8pmu_enable_counter(u64 mask)
{
/*
* Make sure event configuration register writes are visible before we
@@ -685,7 +675,7 @@ static void armv8pmu_enable_counter(u32 mask)
static void armv8pmu_enable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- u32 mask = armv8pmu_event_cnten_mask(event);
+ u64 mask = armv8pmu_event_cnten_mask(event);
kvm_set_pmu_events(mask, attr);
@@ -694,7 +684,7 @@ static void armv8pmu_enable_event_counter(struct perf_event *event)
armv8pmu_enable_counter(mask);
}
-static void armv8pmu_disable_counter(u32 mask)
+static void armv8pmu_disable_counter(u64 mask)
{
write_pmcntenclr(mask);
/*
@@ -707,7 +697,7 @@ static void armv8pmu_disable_counter(u32 mask)
static void armv8pmu_disable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- u32 mask = armv8pmu_event_cnten_mask(event);
+ u64 mask = armv8pmu_event_cnten_mask(event);
kvm_clr_pmu_events(mask);
@@ -716,18 +706,17 @@ static void armv8pmu_disable_event_counter(struct perf_event *event)
armv8pmu_disable_counter(mask);
}
-static void armv8pmu_enable_intens(u32 mask)
+static void armv8pmu_enable_intens(u64 mask)
{
write_pmintenset(mask);
}
static void armv8pmu_enable_event_irq(struct perf_event *event)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
- armv8pmu_enable_intens(BIT(counter));
+ armv8pmu_enable_intens(BIT(event->hw.idx));
}
-static void armv8pmu_disable_intens(u32 mask)
+static void armv8pmu_disable_intens(u64 mask)
{
write_pmintenclr(mask);
isb();
@@ -738,13 +727,12 @@ static void armv8pmu_disable_intens(u32 mask)
static void armv8pmu_disable_event_irq(struct perf_event *event)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
- armv8pmu_disable_intens(BIT(counter));
+ armv8pmu_disable_intens(BIT(event->hw.idx));
}
-static u32 armv8pmu_getreset_flags(void)
+static u64 armv8pmu_getreset_flags(void)
{
- u32 value;
+ u64 value;
/* Read */
value = read_pmovsclr();
@@ -782,15 +770,27 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
int i;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- /* Clear any unused counters to avoid leaking their contents */
- for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) {
- if (i == ARMV8_IDX_CYCLE_COUNTER)
- write_pmccntr(0);
- else
- armv8pmu_write_evcntr(i, 0);
+ if (is_pmuv3p9(cpu_pmu->pmuver)) {
+ u64 mask = 0;
+ for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) {
+ if (armv8pmu_event_has_user_read(cpuc->events[i]))
+ mask |= BIT(i);
+ }
+ write_pmuacr(mask);
+ } else {
+ /* Clear any unused counters to avoid leaking their contents */
+ for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask,
+ ARMPMU_MAX_HWEVENTS) {
+ if (i == ARMV8_PMU_CYCLE_IDX)
+ write_pmccntr(0);
+ else if (i == ARMV8_PMU_INSTR_IDX)
+ write_pmicntr(0);
+ else
+ armv8pmu_write_evcntr(i, 0);
+ }
}
- update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+ update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_UEN);
}
static void armv8pmu_enable_event(struct perf_event *event)
@@ -839,7 +839,7 @@ static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
- u32 pmovsr;
+ u64 pmovsr;
struct perf_sample_data data;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
@@ -866,7 +866,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
* to prevent skews in group events.
*/
armv8pmu_stop(cpu_pmu);
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
@@ -905,7 +905,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
{
int idx;
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
@@ -921,7 +921,9 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
* Chaining requires two consecutive event counters, where
* the lower idx must be even.
*/
- for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
+ if (!(idx & 0x1))
+ continue;
if (!test_and_set_bit(idx, cpuc->used_mask)) {
/* Check if the preceding even counter is available */
if (!test_and_set_bit(idx - 1, cpuc->used_mask))
@@ -941,9 +943,10 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
- if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
- if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return ARMV8_IDX_CYCLE_COUNTER;
+ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
+ !armv8pmu_event_get_threshold(&event->attr)) {
+ if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask))
+ return ARMV8_PMU_CYCLE_IDX;
else if (armv8pmu_event_is_64bit(event) &&
armv8pmu_event_want_user_access(event) &&
!armv8pmu_has_long_event(cpu_pmu))
@@ -951,6 +954,19 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
}
/*
+ * Always prefer to place a instruction counter into the instruction counter,
+ * but don't expose the instruction counter to userspace access as userspace
+ * may not know how to handle it.
+ */
+ if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) &&
+ !armv8pmu_event_get_threshold(&event->attr) &&
+ test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) &&
+ !armv8pmu_event_want_user_access(event)) {
+ if (!test_and_set_bit(ARMV8_PMU_INSTR_IDX, cpuc->used_mask))
+ return ARMV8_PMU_INSTR_IDX;
+ }
+
+ /*
* Otherwise use events counters
*/
if (armv8pmu_event_is_chained(event))
@@ -974,15 +990,7 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
return 0;
- /*
- * We remap the cycle counter index to 32 to
- * match the offset applied to the rest of
- * the counter indices.
- */
- if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
- return ARMV8_IDX_CYCLE_COUNTER_USER;
-
- return event->hw.idx;
+ return event->hw.idx + 1;
}
/*
@@ -1033,13 +1041,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
* If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
* be 0 and will also trigger this check, preventing it from being used.
*/
- th = ATTR_CFG_GET_FLD(attr, threshold);
+ th = armv8pmu_event_get_threshold(attr);
if (th > threshold_max(cpu_pmu)) {
pr_debug("PMU event threshold exceeds max value\n");
return -EINVAL;
}
- if (IS_ENABLED(CONFIG_ARM64) && th) {
+ if (th) {
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
armv8pmu_event_threshold_control(attr));
@@ -1057,14 +1065,16 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u64 pmcr;
+ u64 pmcr, mask;
+
+ bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
/* The counter and interrupt enable registers are unknown at reset. */
- armv8pmu_disable_counter(U32_MAX);
- armv8pmu_disable_intens(U32_MAX);
+ armv8pmu_disable_counter(mask);
+ armv8pmu_disable_intens(mask);
/* Clear the counters we flip at guest entry/exit */
- kvm_clr_pmu_events(U32_MAX);
+ kvm_clr_pmu_events(mask);
/*
* Initialize & Reset PMNC. Request overflow interrupt for
@@ -1085,14 +1095,14 @@ static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
if (event->attr.type == PERF_TYPE_HARDWARE &&
event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
- if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
- armpmu->pmceid_bitmap))
- return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
-
if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
armpmu->pmceid_bitmap))
return ARMV8_PMUV3_PERFCTR_BR_RETIRED;
+ if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+ armpmu->pmceid_bitmap))
+ return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
+
return HW_OP_UNSUPPORTED;
}
@@ -1207,10 +1217,15 @@ static void __armv8pmu_probe_pmu(void *info)
probe->present = true;
/* Read the nb of CNTx counters supported from PMNC */
- cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
+ bitmap_set(cpu_pmu->cntr_mask,
+ 0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
/* Add the CPU cycles counter */
- cpu_pmu->num_events += 1;
+ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
+
+ /* Add the CPU instructions counter */
+ if (pmuv3_has_icntr())
+ set_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask);
pmceid[0] = pmceid_raw[0] = read_pmceid0();
pmceid[1] = pmceid_raw[1] = read_pmceid1();
@@ -1253,7 +1268,7 @@ static void armv8pmu_disable_user_access_ipi(void *unused)
armv8pmu_disable_user_access();
}
-static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write,
+static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -1264,7 +1279,7 @@ static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table armv8_pmu_sysctl_table[] = {
+static const struct ctl_table armv8_pmu_sysctl_table[] = {
{
.procname = "perf_user_access",
.data = &sysctl_perf_user_access,
@@ -1340,18 +1355,26 @@ PMUV3_INIT_SIMPLE(armv9_cortex_a520)
PMUV3_INIT_SIMPLE(armv9_cortex_a710)
PMUV3_INIT_SIMPLE(armv9_cortex_a715)
PMUV3_INIT_SIMPLE(armv9_cortex_a720)
+PMUV3_INIT_SIMPLE(armv9_cortex_a725)
PMUV3_INIT_SIMPLE(armv8_cortex_x1)
PMUV3_INIT_SIMPLE(armv9_cortex_x2)
PMUV3_INIT_SIMPLE(armv9_cortex_x3)
PMUV3_INIT_SIMPLE(armv9_cortex_x4)
+PMUV3_INIT_SIMPLE(armv9_cortex_x925)
PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
+PMUV3_INIT_SIMPLE(armv9_neoverse_n3)
PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v2)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae)
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
+PMUV3_INIT_SIMPLE(armv8_samsung_mongoose)
+
PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event)
PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event)
PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event)
@@ -1379,18 +1402,25 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init},
{.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init},
{.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init},
+ {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init},
{.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init},
{.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init},
{.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init},
{.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init},
+ {.compatible = "arm,cortex-x925-pmu", .data = armv9_cortex_x925_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
{.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
+ {.compatible = "arm,neoverse-n3-pmu", .data = armv9_neoverse_n3_pmu_init},
{.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init},
+ {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init},
+ {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init},
+ {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
{.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init},
+ {.compatible = "samsung,mongoose-pmu", .data = armv8_samsung_mongoose_pmu_init},
{},
};
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 6303b82566f9..621f02a7f43b 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -431,6 +431,17 @@ static int smmu_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
+ /*
+ * Ensure all events are on the same cpu so all events are in the
+ * same cpu context, to avoid races on pmu_enable etc.
+ */
+ event->cpu = smmu_pmu->on_cpu;
+
+ hwc->idx = -1;
+
+ if (event->group_leader == event)
+ return 0;
+
for_each_sibling_event(sibling, event->group_leader) {
if (is_software_event(sibling))
continue;
@@ -442,14 +453,6 @@ static int smmu_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
- hwc->idx = -1;
-
- /*
- * Ensure all events are on the same cpu so all events are in the
- * same cpu context, to avoid races on pmu_enable etc.
- */
- event->cpu = smmu_pmu->on_cpu;
-
return 0;
}
@@ -716,7 +719,7 @@ static void smmu_pmu_free_msis(void *data)
{
struct device *dev = data;
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
}
static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
@@ -746,7 +749,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
return;
- ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs\n");
return;
@@ -860,6 +863,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = smmu_pmu_enable,
.pmu_disable = smmu_pmu_disable,
@@ -965,14 +969,12 @@ out_unregister:
return err;
}
-static int smmu_pmu_remove(struct platform_device *pdev)
+static void smmu_pmu_remove(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&smmu_pmu->pmu);
cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-
- return 0;
}
static void smmu_pmu_shutdown(struct platform_device *pdev)
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index b622d75d8c9e..f5e6878db9d6 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -41,7 +41,7 @@
/*
* Cache if the event is allowed to trace Context information.
- * This allows us to perform the check, i.e, perfmon_capable(),
+ * This allows us to perform the check, i.e, perf_allow_kernel(),
* in the context of the event owner, once, during the event_init().
*/
#define SPE_PMU_HW_FLAGS_CX 0x00001
@@ -50,7 +50,7 @@ static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_C
static void set_spe_event_has_cx(struct perf_event *event)
{
- if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+ if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel(&event->attr))
event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
}
@@ -85,6 +85,7 @@ struct arm_spe_pmu {
#define SPE_PMU_FEAT_LDS (1UL << 4)
#define SPE_PMU_FEAT_ERND (1UL << 5)
#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6)
+#define SPE_PMU_FEAT_DISCARD (1UL << 7)
#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
u64 features;
@@ -193,6 +194,9 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */
#define ATTR_CFG_FLD_store_filter_LO 34
#define ATTR_CFG_FLD_store_filter_HI 34
+#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */
+#define ATTR_CFG_FLD_discard_LO 35
+#define ATTR_CFG_FLD_discard_HI 35
#define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
#define ATTR_CFG_FLD_event_filter_LO 0
@@ -216,6 +220,7 @@ GEN_PMU_FORMAT_ATTR(store_filter);
GEN_PMU_FORMAT_ATTR(event_filter);
GEN_PMU_FORMAT_ATTR(inv_event_filter);
GEN_PMU_FORMAT_ATTR(min_latency);
+GEN_PMU_FORMAT_ATTR(discard);
static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_ts_enable.attr,
@@ -228,6 +233,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_event_filter.attr,
&format_attr_inv_event_filter.attr,
&format_attr_min_latency.attr,
+ &format_attr_discard.attr,
NULL,
};
@@ -238,6 +244,9 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+ if (attr == &format_attr_discard.attr && !(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
+ return 0;
+
if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
return 0;
@@ -502,6 +511,12 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
u64 base, limit;
struct arm_spe_pmu_buf *buf;
+ if (ATTR_CFG_GET_FLD(&event->attr, discard)) {
+ limit = FIELD_PREP(PMBLIMITR_EL1_FM, PMBLIMITR_EL1_FM_DISCARD);
+ limit |= PMBLIMITR_EL1_E;
+ goto out_write_limit;
+ }
+
/* Start a new aux session */
buf = perf_aux_output_begin(handle, event);
if (!buf) {
@@ -743,11 +758,14 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
return -EOPNOTSUPP;
+ if (ATTR_CFG_GET_FLD(&event->attr, discard) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
+ return -EOPNOTSUPP;
+
set_spe_event_has_cx(event);
reg = arm_spe_event_to_pmscr(event);
- if (!perfmon_capable() &&
- (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT)))
- return -EACCES;
+ if (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT))
+ return perf_allow_kernel(&event->attr);
return 0;
}
@@ -932,6 +950,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
spe_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &spe_pmu->pdev->dev,
.capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
.attr_groups = arm_spe_pmu_attr_groups,
/*
@@ -1027,6 +1046,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
if (FIELD_GET(PMSIDR_EL1_ERND, reg))
spe_pmu->features |= SPE_PMU_FEAT_ERND;
+ if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2)
+ spe_pmu->features |= SPE_PMU_FEAT_DISCARD;
+
/* This field has a spaced out encoding, so just use a look-up */
fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
switch (fld) {
@@ -1263,14 +1285,13 @@ out_free_handle:
return ret;
}
-static int arm_spe_pmu_device_remove(struct platform_device *pdev)
+static void arm_spe_pmu_device_remove(struct platform_device *pdev)
{
struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
arm_spe_pmu_perf_destroy(spe_pmu);
arm_spe_pmu_dev_teardown(spe_pmu);
free_percpu(spe_pmu->handle);
- return 0;
}
static struct platform_driver arm_spe_pmu_driver = {
@@ -1281,7 +1302,7 @@ static struct platform_driver arm_spe_pmu_driver = {
.suppress_bind_attrs = true,
},
.probe = arm_spe_pmu_device_probe,
- .remove = arm_spe_pmu_device_remove,
+ .remove = arm_spe_pmu_device_remove,
};
static int __init arm_spe_pmu_init(void)
diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c
new file mode 100644
index 000000000000..b09615bb2bb2
--- /dev/null
+++ b/drivers/perf/arm_v6_pmu.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ * effectively stops the counter from counting.
+ * - disable the counter's interrupt generation (each counter has it's
+ * own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ * - enable the counter's interrupt generation.
+ * - set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum armv6_perf_types {
+ ARMV6_PERFCTR_ICACHE_MISS = 0x0,
+ ARMV6_PERFCTR_IBUF_STALL = 0x1,
+ ARMV6_PERFCTR_DDEP_STALL = 0x2,
+ ARMV6_PERFCTR_ITLB_MISS = 0x3,
+ ARMV6_PERFCTR_DTLB_MISS = 0x4,
+ ARMV6_PERFCTR_BR_EXEC = 0x5,
+ ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
+ ARMV6_PERFCTR_INSTR_EXEC = 0x7,
+ ARMV6_PERFCTR_DCACHE_HIT = 0x9,
+ ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
+ ARMV6_PERFCTR_DCACHE_MISS = 0xB,
+ ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
+ ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
+ ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
+ ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
+ ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
+ ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
+ ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
+ ARMV6_PERFCTR_NOP = 0x20,
+};
+
+enum armv6_counters {
+ ARMV6_CYCLE_COUNTER = 0,
+ ARMV6_COUNTER0,
+ ARMV6_COUNTER1,
+ ARMV6_NUM_COUNTERS
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+ asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE (1 << 0)
+#define ARMV6_PMCR_CTR01_RESET (1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
+#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
+#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+ (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+ ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+ return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+ enum armv6_counters counter)
+{
+ int ret = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+ else if (ARMV6_COUNTER0 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+ else if (ARMV6_COUNTER1 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return ret;
+}
+
+static inline u64 armv6pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ unsigned long value = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return value;
+}
+
+static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+static void armv6pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = 0;
+ evt = ARMV6_PMCR_CCOUNT_IEN;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+ ARMV6_PMCR_COUNT0_IEN;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+ ARMV6_PMCR_COUNT1_IEN;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the event
+ * that we're interested in.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmcr = armv6_pmcr_read();
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ if (!armv6_pmcr_has_overflowed(pmcr))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ /*
+ * The interrupts are cleared by writing the overflow flags back to
+ * the control register. All of the other bits don't have any effect
+ * if they are rewritten, so write the whole value back.
+ */
+ armv6_pmcr_write(pmcr);
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV6_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val |= ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val &= ~ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static int
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ /* Always place a cycle counter into the cycle counter. */
+ if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
+ if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV6_CYCLE_COUNTER;
+ } else {
+ /*
+ * For anything other than a cycle counter, try and use
+ * counter0 and counter1.
+ */
+ if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+ return ARMV6_COUNTER1;
+
+ if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+ return ARMV6_COUNTER0;
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+ }
+}
+
+static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void armv6pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = ARMV6_PMCR_CCOUNT_IEN;
+ evt = 0;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the number
+ * of ETM bus signal assertion cycles. The external reporting should
+ * be disabled and so this should never increment.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv6_perf_map,
+ &armv6_perf_cache_map, 0xFF);
+}
+
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv6pmu_handle_irq;
+ cpu_pmu->enable = armv6pmu_enable_event;
+ cpu_pmu->disable = armv6pmu_disable_event;
+ cpu_pmu->read_counter = armv6pmu_read_counter;
+ cpu_pmu->write_counter = armv6pmu_write_counter;
+ cpu_pmu->get_event_idx = armv6pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx;
+ cpu_pmu->start = armv6pmu_start;
+ cpu_pmu->stop = armv6pmu_stop;
+ cpu_pmu->map_event = armv6_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, ARMV6_NUM_COUNTERS);
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1136";
+ return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1176";
+ return 0;
+}
+
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
+ { /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv6_pmu_driver = {
+ .driver = {
+ .name = "armv6-pmu",
+ .of_match_table = armv6_pmu_of_device_ids,
+ },
+ .probe = armv6_pmu_device_probe,
+};
+
+builtin_platform_driver(armv6_pmu_driver);
diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c
new file mode 100644
index 000000000000..420cadd108e7
--- /dev/null
+++ b/drivers/perf/arm_v7_pmu.c
@@ -0,0 +1,1975 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ * by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ * a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ * a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ * counter and all 4 performance counters together can be reset separately.
+ */
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/vfp.h>
+#include "../vfp/vfpinstr.h"
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+/*
+ * Common ARMv7 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ */
+#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00
+#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01
+#define ARMV7_PERFCTR_ITLB_REFILL 0x02
+#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03
+#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04
+#define ARMV7_PERFCTR_DTLB_REFILL 0x05
+#define ARMV7_PERFCTR_MEM_READ 0x06
+#define ARMV7_PERFCTR_MEM_WRITE 0x07
+#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08
+#define ARMV7_PERFCTR_EXC_TAKEN 0x09
+#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A
+#define ARMV7_PERFCTR_CID_WRITE 0x0B
+
+/*
+ * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+ * It counts:
+ * - all (taken) branch instructions,
+ * - instructions that explicitly write the PC,
+ * - exception generating instructions.
+ */
+#define ARMV7_PERFCTR_PC_WRITE 0x0C
+#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D
+#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E
+#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F
+#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10
+#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11
+#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12
+
+/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
+#define ARMV7_PERFCTR_MEM_ACCESS 0x13
+#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14
+#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15
+#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16
+#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17
+#define ARMV7_PERFCTR_L2_CACHE_WB 0x18
+#define ARMV7_PERFCTR_BUS_ACCESS 0x19
+#define ARMV7_PERFCTR_MEM_ERROR 0x1A
+#define ARMV7_PERFCTR_INSTR_SPEC 0x1B
+#define ARMV7_PERFCTR_TTBR_WRITE 0x1C
+#define ARMV7_PERFCTR_BUS_CYCLES 0x1D
+
+#define ARMV7_PERFCTR_CPU_CYCLES 0xFF
+
+/* ARMv7 Cortex-A8 specific event types */
+#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43
+#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44
+#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50
+#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56
+
+/* ARMv7 Cortex-A9 specific event types */
+#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68
+#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60
+#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66
+
+/* ARMv7 Cortex-A5 specific event types */
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3
+
+/* ARMv7 Cortex-A15 specific event types */
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43
+
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D
+
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53
+
+#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76
+
+/* ARMv7 Cortex-A12 specific event types */
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+
+#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76
+
+#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7
+
+/* ARMv7 Krait specific event types */
+#define KRAIT_PMRESR0_GROUP0 0xcc
+#define KRAIT_PMRESR1_GROUP0 0xd0
+#define KRAIT_PMRESR2_GROUP0 0xd4
+#define KRAIT_VPMRESR0_GROUP0 0xd8
+
+#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011
+#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010
+
+#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222
+#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210
+
+/* ARMv7 Scorpion specific event types */
+#define SCORPION_LPM0_GROUP0 0x4c
+#define SCORPION_LPM1_GROUP0 0x50
+#define SCORPION_LPM2_GROUP0 0x54
+#define SCORPION_L2LPM_GROUP0 0x58
+#define SCORPION_VLPM_GROUP0 0x5c
+
+#define SCORPION_ICACHE_ACCESS 0x10053
+#define SCORPION_ICACHE_MISS 0x10052
+
+#define SCORPION_DTLB_ACCESS 0x12013
+#define SCORPION_DTLB_MISS 0x12012
+
+#define SCORPION_ITLB_MISS 0x12021
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A5 HW events mapping
+ */
+static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+ /*
+ * The prefetch counters don't differentiate between the I side and the
+ * D side.
+ */
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A15 HW events mapping
+ */
+static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A7 HW events mapping
+ */
+static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A12 HW events mapping
+ */
+static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Krait HW events mapping
+ */
+static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+ /*
+ * Only ITLB misses and DTLB refills are supported. If users want the
+ * DTLB refills misses a raw counter must be used.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *armv7_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = armv7_pmu_format_attrs,
+};
+
+#define ARMV7_EVENT_ATTR_RESOLVE(m) #m
+#define ARMV7_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \
+ "event=" ARMV7_EVENT_ATTR_RESOLVE(config))
+
+ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR);
+ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL);
+ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL);
+ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ);
+ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE);
+ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED);
+ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN);
+ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED);
+ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE);
+ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE);
+ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH);
+ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN);
+ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS);
+ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED);
+ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES);
+ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED);
+
+static struct attribute *armv7_pmuv1_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv1_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv1_event_attrs,
+};
+
+ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
+ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
+ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS);
+ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL);
+ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB);
+ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS);
+ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR);
+ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC);
+ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE);
+ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES);
+
+static struct attribute *armv7_pmuv2_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ &armv7_event_attr_mem_access.attr.attr,
+ &armv7_event_attr_l1i_cache.attr.attr,
+ &armv7_event_attr_l1d_cache_wb.attr.attr,
+ &armv7_event_attr_l2d_cache.attr.attr,
+ &armv7_event_attr_l2d_cache_refill.attr.attr,
+ &armv7_event_attr_l2d_cache_wb.attr.attr,
+ &armv7_event_attr_bus_access.attr.attr,
+ &armv7_event_attr_memory_error.attr.attr,
+ &armv7_event_attr_inst_spec.attr.attr,
+ &armv7_event_attr_ttbr_write_retired.attr.attr,
+ &armv7_event_attr_bus_cycles.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv2_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv2_event_attrs,
+};
+
+/*
+ * Perf Events' indices
+ */
+#define ARMV7_IDX_CYCLE_COUNTER 31
+#define ARMV7_IDX_COUNTER_MAX 31
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
+#define ARMV7_PMNC_N_MASK 0x1f
+#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
+#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */
+#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv2
+ */
+#define ARMV7_EXCLUDE_PL1 BIT(31)
+#define ARMV7_EXCLUDE_USER BIT(30)
+#define ARMV7_INCLUDE_HYP BIT(27)
+
+/*
+ * Secure debug enable reg
+ */
+#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */
+
+static inline u32 armv7_pmnc_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void armv7_pmnc_write(u32 val)
+{
+ val &= ARMV7_PMNC_MASK;
+ isb();
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
+{
+ return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
+{
+ return test_bit(idx, cpu_pmu->cntr_mask);
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
+{
+ return pmnc & BIT(idx);
+}
+
+static inline void armv7_pmnc_select_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (idx));
+ isb();
+}
+
+static inline u64 armv7pmu_read_counter(struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u32 value = 0;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u reading wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+ }
+
+ return value;
+}
+
+static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u writing wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
+ }
+}
+
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+{
+ armv7_pmnc_select_counter(idx);
+ val &= ARMV7_EVTYPE_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+}
+
+static inline void armv7_pmnc_enable_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_disable_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_enable_intens(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_disable_intens(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(idx)));
+ isb();
+ /* Clear the overflow flag in case an interrupt is pending. */
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(idx)));
+ isb();
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+ u32 val;
+
+ /* Read */
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+ /* Write to clear flags */
+ val &= ARMV7_FLAG_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+ return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+{
+ u32 val;
+ unsigned int cnt;
+
+ pr_info("PMNC registers dump:\n");
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+ pr_info("PMNC =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+ pr_info("CNTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+ pr_info("INTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+ pr_info("FLAGS =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+ pr_info("SELECT=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+ pr_info("CCNT =0x%08x\n", val);
+
+ for_each_set_bit(cnt, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+ pr_info("CNT[%d] count =0x%08x\n", cnt, val);
+ asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+ pr_info("CNT[%d] evtsel=0x%08x\n", cnt, val);
+ }
+}
+#endif
+
+static void armv7pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /*
+ * Disable counter
+ */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We only need to set the event for the cycle counter if we
+ * have the ability to perform event filtering.
+ */
+ if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /*
+ * Enable interrupt for this counter
+ */
+ armv7_pmnc_enable_intens(idx);
+
+ /*
+ * Enable counter
+ */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void armv7pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ /*
+ * Disable counter and interrupt
+ */
+
+ /*
+ * Disable counter
+ */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Disable interrupt for this counter
+ */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ u32 pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * Get and reset the IRQ flags
+ */
+ pmnc = armv7_pmnc_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!armv7_pmnc_has_overflowed(pmnc))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+{
+ /* Enable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+}
+
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ /* Disable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+}
+
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
+
+ /* Always place a cycle counter into the cycle counter. */
+ if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+ if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV7_IDX_CYCLE_COUNTER;
+ }
+
+ /*
+ * For anything other than a cycle counter, try and use
+ * the events counters
+ */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+}
+
+static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+ if (attr->exclude_user)
+ config_base |= ARMV7_EXCLUDE_USER;
+ if (attr->exclude_kernel)
+ config_base |= ARMV7_EXCLUDE_PL1;
+ if (!attr->exclude_hv)
+ config_base |= ARMV7_INCLUDE_HYP;
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base = config_base;
+
+ return 0;
+}
+
+static void armv7pmu_reset(void *info)
+{
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+ u32 idx, val;
+
+ if (cpu_pmu->secure_access) {
+ asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
+ val |= ARMV7_SDER_SUNIDEN;
+ asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
+ }
+
+ /* The counter and interrupt enable registers are unknown at reset. */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
+ armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_intens(idx);
+ }
+
+ /* Initialize & Reset PMNC: C and P bits */
+ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+}
+
+static int armv7_a8_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a8_perf_map,
+ &armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a9_perf_map,
+ &armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a5_perf_map,
+ &armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a15_perf_map,
+ &armv7_a15_perf_cache_map, 0xFF);
+}
+
+static int armv7_a7_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a7_perf_map,
+ &armv7_a7_perf_cache_map, 0xFF);
+}
+
+static int armv7_a12_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a12_perf_map,
+ &armv7_a12_perf_cache_map, 0xFF);
+}
+
+static int krait_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int krait_map_event_no_branch(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map_no_branch,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int scorpion_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &scorpion_perf_map,
+ &scorpion_perf_cache_map, 0xFFFFF);
+}
+
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv7pmu_handle_irq;
+ cpu_pmu->enable = armv7pmu_enable_event;
+ cpu_pmu->disable = armv7pmu_disable_event;
+ cpu_pmu->read_counter = armv7pmu_read_counter;
+ cpu_pmu->write_counter = armv7pmu_write_counter;
+ cpu_pmu->get_event_idx = armv7pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx;
+ cpu_pmu->start = armv7pmu_start;
+ cpu_pmu->stop = armv7pmu_stop;
+ cpu_pmu->reset = armv7pmu_reset;
+};
+
+static void armv7_read_num_pmnc_events(void *info)
+{
+ int nb_cnt;
+ struct arm_pmu *cpu_pmu = info;
+
+ /* Read the nb of CNTx counters supported from PMNC */
+ nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+ bitmap_set(cpu_pmu->cntr_mask, 0, nb_cnt);
+
+ /* Add the CPU cycles counter */
+ set_bit(ARMV7_IDX_CYCLE_COUNTER, cpu_pmu->cntr_mask);
+}
+
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+ return smp_call_function_any(&arm_pmu->supported_cpus,
+ armv7_read_num_pmnc_events,
+ arm_pmu, 1);
+}
+
+static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a8";
+ cpu_pmu->map_event = armv7_a8_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a9";
+ cpu_pmu->map_event = armv7_a9_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a5";
+ cpu_pmu->map_event = armv7_a5_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a15";
+ cpu_pmu->map_event = armv7_a15_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a7";
+ cpu_pmu->map_event = armv7_a7_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a12";
+ cpu_pmu->map_event = armv7_a12_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int ret = armv7_a12_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a17";
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return ret;
+}
+
+/*
+ * Krait Performance Monitor Region Event Selection Register (PMRESRn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+#define KRAIT_EVENT (1 << 16)
+#define VENUM_EVENT (2 << 16)
+#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT)
+#define PMRESRn_EN BIT(31)
+
+#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */
+#define EVENT_GROUP(event) ((event) & 0xf) /* G */
+#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */
+#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */
+#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */
+
+static u32 krait_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void krait_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+}
+
+static u32 venum_read_pmresr(void)
+{
+ u32 val;
+ asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+ return val;
+}
+
+static void venum_write_pmresr(u32 val)
+{
+ asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
+{
+ u32 venum_new_val;
+ u32 fp_new_val;
+
+ BUG_ON(preemptible());
+ /* CPACR Enable CP10 and CP11 access */
+ *venum_orig_val = get_copro_access();
+ venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
+ set_copro_access(venum_new_val);
+
+ /* Enable FPEXC */
+ *fp_orig_val = fmrx(FPEXC);
+ fp_new_val = *fp_orig_val | FPEXC_EN;
+ fmxr(FPEXC, fp_new_val);
+}
+
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
+{
+ BUG_ON(preemptible());
+ /* Restore FPEXC */
+ fmxr(FPEXC, fp_orig_val);
+ isb();
+ /* Restore CPACR */
+ set_copro_access(venum_orig_val);
+}
+
+static u32 krait_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
+ KRAIT_PMRESR1_GROUP0,
+ KRAIT_PMRESR2_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void krait_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = KRAIT_VPMRESR0_GROUP0;
+ else
+ val = krait_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static u32 clear_pmresrn_group(u32 val, int group)
+{
+ u32 mask;
+ int group_shift;
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+ val &= ~mask;
+
+ /* Don't clear enable bit if entire region isn't disabled */
+ if (val & ~PMRESRn_EN)
+ return val |= PMRESRn_EN;
+
+ return 0;
+}
+
+static void krait_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static void krait_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void krait_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We set the event for the cycle counter because we
+ * have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_evt_setup(idx, hwc->config_base);
+ else
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /* Enable interrupt for this counter */
+ armv7_pmnc_enable_intens(idx);
+
+ /* Enable counter */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void krait_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ krait_write_pmresrn(0, 0);
+ krait_write_pmresrn(1, 0);
+ krait_write_pmresrn(2, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+
+}
+
+static int krait_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = KRAIT_VPMRESR0_GROUP0;
+ else
+ bit = krait_get_pmresrn_event(region);
+ bit -= krait_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX);
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int code = EVENT_CODE(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || krait_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 2)
+ return -EINVAL;
+ if (venum_event && (code & 0xe0))
+ return -EINVAL;
+
+ bit = krait_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || krait_event) {
+ bit = krait_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_krait";
+ /* Some early versions of Krait don't support PC write events */
+ if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
+ "qcom,no-pc-write"))
+ cpu_pmu->map_event = krait_map_event_no_branch;
+ else
+ cpu_pmu->map_event = krait_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->reset = krait_pmu_reset;
+ cpu_pmu->enable = krait_pmu_enable_event;
+ cpu_pmu->disable = krait_pmu_disable_event;
+ cpu_pmu->get_event_idx = krait_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3
+ * +--------------------------------+
+ * VLPM | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 3:
+ asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 3:
+ asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+ SCORPION_LPM1_GROUP0,
+ SCORPION_LPM2_GROUP0,
+ SCORPION_L2LPM_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = SCORPION_VLPM_GROUP0;
+ else
+ val = scorpion_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We don't set the event for the cycle counter because we
+ * don't have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_evt_setup(idx, hwc->config_base);
+ else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ /* Enable interrupt for this counter */
+ armv7_pmnc_enable_intens(idx);
+
+ /* Enable counter */
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ scorpion_write_pmresrn(0, 0);
+ scorpion_write_pmresrn(1, 0);
+ scorpion_write_pmresrn(2, 0);
+ scorpion_write_pmresrn(3, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = SCORPION_VLPM_GROUP0;
+ else
+ bit = scorpion_get_pmresrn_event(region);
+ bit -= scorpion_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX);
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || scorpion_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 3)
+ return -EINVAL;
+
+ bit = scorpion_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || scorpion_event) {
+ bit = scorpion_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion_mp";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+ {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
+ {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
+ {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
+ {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
+ {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
+ {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
+ {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
+ {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
+ {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
+ {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
+ {},
+};
+
+static int armv7_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv7_pmu_driver = {
+ .driver = {
+ .name = "armv7-pmu",
+ .of_match_table = armv7_pmu_of_device_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = armv7_pmu_device_probe,
+};
+
+builtin_platform_driver(armv7_pmu_driver);
diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c
new file mode 100644
index 000000000000..638fea9b1263
--- /dev/null
+++ b/drivers/perf/arm_xscale_pmu.c
@@ -0,0 +1,749 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * - xscale1pmu: 2 event counters and a cycle counter
+ * - xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum xscale_perf_types {
+ XSCALE_PERFCTR_ICACHE_MISS = 0x00,
+ XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
+ XSCALE_PERFCTR_DATA_STALL = 0x02,
+ XSCALE_PERFCTR_ITLB_MISS = 0x03,
+ XSCALE_PERFCTR_DTLB_MISS = 0x04,
+ XSCALE_PERFCTR_BRANCH = 0x05,
+ XSCALE_PERFCTR_BRANCH_MISS = 0x06,
+ XSCALE_PERFCTR_INSTRUCTION = 0x07,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
+ XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
+ XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
+ XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
+ XSCALE_PERFCTR_PC_CHANGED = 0x0D,
+ XSCALE_PERFCTR_BCU_REQUEST = 0x10,
+ XSCALE_PERFCTR_BCU_FULL = 0x11,
+ XSCALE_PERFCTR_BCU_DRAIN = 0x12,
+ XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
+ XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
+ XSCALE_PERFCTR_RMW = 0x16,
+ /* XSCALE_PERFCTR_CCNT is not hardware defined */
+ XSCALE_PERFCTR_CCNT = 0xFE,
+ XSCALE_PERFCTR_UNUSED = 0xFF,
+};
+
+enum xscale_counters {
+ XSCALE_CYCLE_COUNTER = 0,
+ XSCALE_COUNTER0,
+ XSCALE_COUNTER1,
+ XSCALE_COUNTER2,
+ XSCALE_COUNTER3,
+};
+#define XSCALE1_NUM_COUNTERS 3
+#define XSCALE2_NUM_COUNTERS 5
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
+ [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
+ [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+};
+
+#define XSCALE_PMU_ENABLE 0x001
+#define XSCALE_PMN_RESET 0x002
+#define XSCALE_CCNT_RESET 0x004
+#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64 0x008
+
+#define XSCALE1_OVERFLOWED_MASK 0x700
+#define XSCALE1_CCOUNT_OVERFLOW 0x400
+#define XSCALE1_COUNT0_OVERFLOW 0x100
+#define XSCALE1_COUNT1_OVERFLOW 0x200
+#define XSCALE1_CCOUNT_INT_EN 0x040
+#define XSCALE1_COUNT0_INT_EN 0x010
+#define XSCALE1_COUNT1_INT_EN 0x020
+#define XSCALE1_COUNT0_EVT_SHFT 12
+#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT 20
+#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+ /* upper 4bits and 7, 11 are write-as-0 */
+ val &= 0xffff77f;
+ asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * NOTE: there's an A stepping erratum that states if an overflow
+ * bit already exists and another occurs, the previous
+ * Overflow bit gets cleared. There's no workaround.
+ * Fixed in B stepping or later.
+ */
+ pmnc = xscale1pmu_read_pmnc();
+
+ /*
+ * Write the value back to clear the overflow flags. Overflow
+ * flags remain in pmnc for use below. We also disable the PMU
+ * while we process the interrupt.
+ */
+ xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE1_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale1pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = 0;
+ evt = XSCALE1_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+ XSCALE1_COUNT0_INT_EN;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+ XSCALE1_COUNT1_INT_EN;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = XSCALE1_CCOUNT_INT_EN;
+ evt = 0;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static int
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
+ if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return XSCALE_CYCLE_COUNTER;
+ } else {
+ if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+ return XSCALE_COUNTER1;
+
+ if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+ return XSCALE_COUNTER0;
+
+ return -EAGAIN;
+ }
+}
+
+static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val |= XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static inline u64 xscale1pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &xscale_perf_map,
+ &xscale_perf_cache_map, 0xFF);
+}
+
+static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale1";
+ cpu_pmu->handle_irq = xscale1pmu_handle_irq;
+ cpu_pmu->enable = xscale1pmu_enable_event;
+ cpu_pmu->disable = xscale1pmu_disable_event;
+ cpu_pmu->read_counter = xscale1pmu_read_counter;
+ cpu_pmu->write_counter = xscale1pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale1pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale1pmu_start;
+ cpu_pmu->stop = xscale1pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE1_NUM_COUNTERS);
+
+ return 0;
+}
+
+#define XSCALE2_OVERFLOWED_MASK 0x01f
+#define XSCALE2_CCOUNT_OVERFLOW 0x001
+#define XSCALE2_COUNT0_OVERFLOW 0x002
+#define XSCALE2_COUNT1_OVERFLOW 0x004
+#define XSCALE2_COUNT2_OVERFLOW 0x008
+#define XSCALE2_COUNT3_OVERFLOW 0x010
+#define XSCALE2_CCOUNT_INT_EN 0x001
+#define XSCALE2_COUNT0_INT_EN 0x002
+#define XSCALE2_COUNT1_INT_EN 0x004
+#define XSCALE2_COUNT2_INT_EN 0x008
+#define XSCALE2_COUNT3_INT_EN 0x010
+#define XSCALE2_COUNT0_EVT_SHFT 0
+#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT 8
+#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT 16
+#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT 24
+#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+ /* bits 1-2 and 4-23 are read-unpredictable */
+ return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+ /* bits 4-23 are write-as-0, 24-31 are write ignored */
+ val &= 0xf;
+ asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+ return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc, of_flags;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /* Disable the PMU. */
+ pmnc = xscale2pmu_read_pmnc();
+ xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ /* Check the overflow flag register. */
+ of_flags = xscale2pmu_read_overflow_flags();
+ if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ /* Clear the overflow bits. */
+ xscale2pmu_write_overflow_flags(of_flags);
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE2_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale2pmu_enable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien |= XSCALE2_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ ien |= XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ ien |= XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER2:
+ ien |= XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER3:
+ ien |= XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+}
+
+static void xscale2pmu_disable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel, of_flags;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien &= ~XSCALE2_CCOUNT_INT_EN;
+ of_flags = XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ien &= ~XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+ of_flags = XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ien &= ~XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+ of_flags = XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ien &= ~XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+ of_flags = XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ien &= ~XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+ of_flags = XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+ xscale2pmu_write_overflow_flags(of_flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx = xscale1pmu_get_event_idx(cpuc, event);
+ if (idx >= 0)
+ goto out;
+
+ if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+ idx = XSCALE_COUNTER3;
+ else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+ idx = XSCALE_COUNTER2;
+out:
+ return idx;
+}
+
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+ val |= XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static inline u64 xscale2pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale2";
+ cpu_pmu->handle_irq = xscale2pmu_handle_irq;
+ cpu_pmu->enable = xscale2pmu_enable_event;
+ cpu_pmu->disable = xscale2pmu_disable_event;
+ cpu_pmu->read_counter = xscale2pmu_read_counter;
+ cpu_pmu->write_counter = xscale2pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale2pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale2pmu_start;
+ cpu_pmu->stop = xscale2pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE2_NUM_COUNTERS);
+
+ return 0;
+}
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+ { /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
+}
+
+static struct platform_driver xscale_pmu_driver = {
+ .driver = {
+ .name = "xscale-pmu",
+ },
+ .probe = xscale_pmu_device_probe,
+};
+
+builtin_platform_driver(xscale_pmu_driver);
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
index 308c9969642e..d6693519eaee 100644
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -208,21 +208,10 @@ static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
return 0;
}
-static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
#define CXL_PMU_FORMAT_ATTR(_name, _format)\
(&((struct dev_ext_attribute[]) { \
{ \
- .attr = __ATTR(_name, 0444, \
- cxl_pmu_format_sysfs_show, NULL), \
+ .attr = __ATTR(_name, 0444, device_show_string, NULL), \
.var = (void *)_format \
} \
})[0].attr.attr)
@@ -345,7 +334,7 @@ static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
/* For CXL spec defined events */
#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \
- CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
+ CXL_PMU_EVENT_ATTR(_name, PCI_VENDOR_ID_CXL, _gid, _msk)
static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
@@ -365,7 +354,7 @@ static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)),
CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)),
CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)),
- /* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */
+ /* CXL rev 3.0 Table 3-20 - D2H Response Encodings */
CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)),
CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)),
CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)),
@@ -388,12 +377,14 @@ static struct attribute *cxl_pmu_event_attrs[] = {
/* CXL rev 3.0 Table 13-5 directly lists these */
CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)),
- /* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */
+ /* CXL rev 3.1 Table 3-35 M2S Req Memory Opcodes */
CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdtee, CXL_PMU_GID_M2S_REQ, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddatatee, CXL_PMU_GID_M2S_REQ, BIT(6)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)),
CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)),
@@ -415,10 +406,11 @@ static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)),
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)),
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)),
- /* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */
+ /* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpm, CXL_PMU_GID_S2M_NDR, BIT(3)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)),
/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)),
@@ -983,8 +975,9 @@ static __exit void cxl_pmu_exit(void)
cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
}
+MODULE_DESCRIPTION("CXL Performance Monitor Driver");
MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS(CXL);
+MODULE_IMPORT_NS("CXL");
module_init(cxl_pmu_init);
module_exit(cxl_pmu_exit);
MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index 957058ad0099..cccecae9823f 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -20,7 +20,6 @@
#include <linux/sysfs.h>
#include <linux/types.h>
-#define DWC_PCIE_VSEC_RAS_DES_ID 0x02
#define DWC_PCIE_EVENT_CNT_CTL 0x8
/*
@@ -82,7 +81,6 @@ struct dwc_pcie_pmu {
u16 ras_des_offset;
u32 nr_lanes;
- struct list_head pmu_node;
struct hlist_node cpuhp_node;
struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
int on_cpu;
@@ -101,12 +99,23 @@ struct dwc_pcie_dev_info {
struct list_head dev_node;
};
-struct dwc_pcie_vendor_id {
- int vendor_id;
+struct dwc_pcie_pmu_vsec_id {
+ u16 vendor_id;
+ u16 vsec_id;
+ u8 vsec_rev;
};
-static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = {
- {.vendor_id = PCI_VENDOR_ID_ALIBABA },
+/*
+ * VSEC IDs are allocated by the vendor, so a given ID may mean different
+ * things to different vendors. See PCIe r6.0, sec 7.9.5.2.
+ */
+static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = {
+ { .vendor_id = PCI_VENDOR_ID_ALIBABA,
+ .vsec_id = 0x02, .vsec_rev = 0x4 },
+ { .vendor_id = PCI_VENDOR_ID_AMPERE,
+ .vsec_id = 0x02, .vsec_rev = 0x4 },
+ { .vendor_id = PCI_VENDOR_ID_QCOM,
+ .vsec_id = 0x02, .vsec_rev = 0x4 },
{} /* terminator */
};
@@ -198,14 +207,14 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05),
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06),
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07),
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08),
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x08),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x09),
/* Group #1 */
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20),
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21),
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22),
- DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_pcie_tlp_data_payload, 0x20),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_pcie_tlp_data_payload, 0x21),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_ccix_tlp_data_payload, 0x22),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_ccix_tlp_data_payload, 0x23),
/*
* Leave it to the user to specify the lane ID to avoid generating
@@ -215,9 +224,9 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601),
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602),
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603),
- DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604),
- DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605),
- DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nullified_tlp, 0x604),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nullified_tlp, 0x605),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tlp, 0x606),
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700),
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701),
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702),
@@ -518,31 +527,28 @@ static void dwc_pcie_unregister_pmu(void *data)
perf_pmu_unregister(&pcie_pmu->pmu);
}
-static bool dwc_pcie_match_des_cap(struct pci_dev *pdev)
+static u16 dwc_pcie_des_cap(struct pci_dev *pdev)
{
- const struct dwc_pcie_vendor_id *vid;
- u16 vsec = 0;
+ const struct dwc_pcie_pmu_vsec_id *vid;
+ u16 vsec;
u32 val;
if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
- return false;
+ return 0;
- for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) {
+ for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) {
vsec = pci_find_vsec_capability(pdev, vid->vendor_id,
- DWC_PCIE_VSEC_RAS_DES_ID);
- if (vsec)
- break;
+ vid->vsec_id);
+ if (vsec) {
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER,
+ &val);
+ if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) {
+ pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
+ return vsec;
+ }
+ }
}
- if (!vsec)
- return false;
-
- pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
- if (PCI_VNDR_HEADER_REV(val) != 0x04)
- return false;
-
- pci_dbg(pdev,
- "Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
- return true;
+ return 0;
}
static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info)
@@ -556,10 +562,10 @@ static int dwc_pcie_register_dev(struct pci_dev *pdev)
{
struct platform_device *plat_dev;
struct dwc_pcie_dev_info *dev_info;
- u32 bdf;
+ u32 sbdf;
- bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
- plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf,
+ sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn);
+ plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", sbdf,
pdev, sizeof(*pdev));
if (IS_ERR(plat_dev))
@@ -586,7 +592,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
switch (action) {
case BUS_NOTIFY_ADD_DEVICE:
- if (!dwc_pcie_match_des_cap(pdev))
+ if (!dwc_pcie_des_cap(pdev))
return NOTIFY_DONE;
if (dwc_pcie_register_dev(pdev))
return NOTIFY_BAD;
@@ -611,15 +617,16 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
struct pci_dev *pdev = plat_dev->dev.platform_data;
struct dwc_pcie_pmu *pcie_pmu;
char *name;
- u32 bdf, val;
+ u32 sbdf;
u16 vsec;
int ret;
- vsec = pci_find_vsec_capability(pdev, pdev->vendor,
- DWC_PCIE_VSEC_RAS_DES_ID);
- pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
- bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
- name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf);
+ vsec = dwc_pcie_des_cap(pdev);
+ if (!vsec)
+ return -ENODEV;
+
+ sbdf = plat_dev->id;
+ name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf);
if (!name)
return -ENOMEM;
@@ -650,7 +657,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
&pcie_pmu->cpuhp_node);
if (ret) {
- pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf);
+ pci_err(pdev, "Error %d registering hotplug @%x\n", ret, sbdf);
return ret;
}
@@ -663,7 +670,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
if (ret) {
- pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf);
+ pci_err(pdev, "Error %d registering PMU @%x\n", ret, sbdf);
return ret;
}
ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
@@ -690,9 +697,8 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
{
struct dwc_pcie_pmu *pcie_pmu;
struct pci_dev *pdev;
- int node;
- cpumask_t mask;
unsigned int target;
+ int node;
pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
/* Nothing to do if this CPU doesn't own the PMU */
@@ -702,10 +708,9 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
pcie_pmu->on_cpu = -1;
pdev = pcie_pmu->pdev;
node = dev_to_node(&pdev->dev);
- if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
@@ -728,11 +733,10 @@ static struct platform_driver dwc_pcie_pmu_driver = {
static int __init dwc_pcie_pmu_init(void)
{
struct pci_dev *pdev = NULL;
- bool found = false;
int ret;
for_each_pci_dev(pdev) {
- if (!dwc_pcie_match_des_cap(pdev))
+ if (!dwc_pcie_des_cap(pdev))
continue;
ret = dwc_pcie_register_dev(pdev);
@@ -740,11 +744,7 @@ static int __init dwc_pcie_pmu_init(void)
pci_dev_put(pdev);
return ret;
}
-
- found = true;
}
- if (!found)
- return -ENODEV;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
"perf/dwc_pcie_pmu:online",
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 7dbfaee372c7..b989ffa95d69 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -651,6 +651,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
*pmu = (struct ddr_pmu) {
.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
@@ -826,7 +827,7 @@ cpuhp_state_err:
return ret;
}
-static int ddr_perf_remove(struct platform_device *pdev)
+static void ddr_perf_remove(struct platform_device *pdev)
{
struct ddr_pmu *pmu = platform_get_drvdata(pdev);
@@ -836,7 +837,6 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
ida_free(&ddr_ida, pmu->id);
- return 0;
}
static struct platform_driver imx_ddr_pmu_driver = {
@@ -850,4 +850,5 @@ static struct platform_driver imx_ddr_pmu_driver = {
};
module_platform_driver(imx_ddr_pmu_driver);
+MODULE_DESCRIPTION("Freescale i.MX8 DDR Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 9685645bfe04..843f163e6c33 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -11,14 +11,24 @@
#include <linux/perf_event.h>
/* Performance monitor configuration */
-#define PMCFG1 0x00
-#define PMCFG1_RD_TRANS_FILT_EN BIT(31)
-#define PMCFG1_WR_TRANS_FILT_EN BIT(30)
-#define PMCFG1_RD_BT_FILT_EN BIT(29)
-#define PMCFG1_ID_MASK GENMASK(17, 0)
+#define PMCFG1 0x00
+#define MX93_PMCFG1_RD_TRANS_FILT_EN BIT(31)
+#define MX93_PMCFG1_WR_TRANS_FILT_EN BIT(30)
+#define MX93_PMCFG1_RD_BT_FILT_EN BIT(29)
+#define MX93_PMCFG1_ID_MASK GENMASK(17, 0)
-#define PMCFG2 0x04
-#define PMCFG2_ID GENMASK(17, 0)
+#define MX95_PMCFG1_WR_BEAT_FILT_EN BIT(31)
+#define MX95_PMCFG1_RD_BEAT_FILT_EN BIT(30)
+
+#define PMCFG2 0x04
+#define MX93_PMCFG2_ID GENMASK(17, 0)
+
+#define PMCFG3 0x08
+#define PMCFG4 0x0C
+#define PMCFG5 0x10
+#define PMCFG6 0x14
+#define MX95_PMCFG_ID_MASK GENMASK(9, 0)
+#define MX95_PMCFG_ID GENMASK(25, 16)
/* Global control register affects all counters and takes priority over local control registers */
#define PMGC0 0x40
@@ -41,6 +51,10 @@
#define NUM_COUNTERS 11
#define CYCLES_COUNTER 0
+#define CYCLES_EVENT_ID 0
+
+#define CONFIG_EVENT_MASK GENMASK(7, 0)
+#define CONFIG_COUNTER_MASK GENMASK(23, 16)
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
@@ -49,8 +63,21 @@
static DEFINE_IDA(ddr_ida);
+/*
+ * V1 support 1 read transaction, 1 write transaction and 1 read beats
+ * event which corresponding respecitively to counter 2, 3 and 4.
+ */
+#define DDR_PERF_AXI_FILTER_V1 0x1
+
+/*
+ * V2 support 1 read beats and 3 write beats events which corresponding
+ * respecitively to counter 2-5.
+ */
+#define DDR_PERF_AXI_FILTER_V2 0x2
+
struct imx_ddr_devtype_data {
const char *identifier; /* system PMU identifier for userspace */
+ unsigned int filter_ver; /* AXI filter version */
};
struct ddr_pmu {
@@ -67,12 +94,35 @@ struct ddr_pmu {
int id;
};
+static const struct imx_ddr_devtype_data imx91_devtype_data = {
+ .identifier = "imx91",
+ .filter_ver = DDR_PERF_AXI_FILTER_V1
+};
+
static const struct imx_ddr_devtype_data imx93_devtype_data = {
.identifier = "imx93",
+ .filter_ver = DDR_PERF_AXI_FILTER_V1
+};
+
+static const struct imx_ddr_devtype_data imx95_devtype_data = {
+ .identifier = "imx95",
+ .filter_ver = DDR_PERF_AXI_FILTER_V2
};
+static inline bool axi_filter_v1(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V1;
+}
+
+static inline bool axi_filter_v2(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V2;
+}
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
- {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data},
+ { .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data },
+ { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data },
+ { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -118,21 +168,40 @@ static const struct attribute_group ddr_perf_cpumask_attr_group = {
.attrs = ddr_perf_cpumask_attrs,
};
+struct imx9_pmu_events_attr {
+ struct device_attribute attr;
+ u64 id;
+ const struct imx_ddr_devtype_data *devtype_data;
+};
+
static ssize_t ddr_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- struct perf_pmu_events_attr *pmu_attr;
+ struct imx9_pmu_events_attr *pmu_attr;
- pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ pmu_attr = container_of(attr, struct imx9_pmu_events_attr, attr);
return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
}
-#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \
- (&((struct perf_pmu_events_attr[]) { \
+#define COUNTER_OFFSET_IN_EVENT 8
+#define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id)
+
+#define DDR_PMU_EVENT_ATTR_COMM(_name, _id, _data) \
+ (&((struct imx9_pmu_events_attr[]) { \
{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
- .id = _id, } \
+ .id = _id, \
+ .devtype_data = _data, } \
})[0].attr.attr)
+#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, NULL)
+
+#define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data)
+
+#define IMX95_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx95_devtype_data)
+
static struct attribute *ddr_perf_events_attrs[] = {
/* counter0 cycles event */
IMX9_DDR_PMU_EVENT_ATTR(cycles, 0),
@@ -159,90 +228,115 @@ static struct attribute *ddr_perf_events_attrs[] = {
IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63),
/* counter1 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, ID(1, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, ID(1, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, ID(1, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, ID(1, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, ID(1, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, ID(1, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, ID(1, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, ID(1, 71)),
/* counter2 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, ID(2, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, ID(2, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, ID(2, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, ID(2, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, ID(2, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, ID(2, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_beat_filt, ID(2, 73)), /* imx95 specific*/
/* counter3 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, ID(3, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, ID(3, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, ID(3, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, ID(3, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, ID(3, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt2, ID(3, 73)), /* imx95 specific*/
/* counter4 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, ID(4, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, ID(4, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, ID(4, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, ID(4, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, ID(4, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt1, ID(4, 73)), /* imx95 specific*/
/* counter5 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70),
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, ID(5, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, ID(5, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, ID(5, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, ID(5, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, ID(5, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)),
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt0, ID(5, 73)), /* imx95 specific*/
/* counter6 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, ID(6, 72)),
/* counter7 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, ID(7, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, ID(7, 65)),
/* counter8 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, ID(8, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, ID(8, 65)),
/* counter9 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, ID(9, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, ID(9, 66)),
/* counter10 specific events */
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65),
- IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, ID(10, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, ID(10, 66)),
NULL,
};
+static umode_t
+ddr_perf_events_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+ struct imx9_pmu_events_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ if (!eattr->devtype_data)
+ return attr->mode;
+
+ if (eattr->devtype_data != ddr_pmu->devtype_data &&
+ eattr->devtype_data->filter_ver != ddr_pmu->devtype_data->filter_ver)
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group ddr_perf_events_attr_group = {
.name = "events",
.attrs = ddr_perf_events_attrs,
+ .is_visible = ddr_perf_events_attrs_is_visible,
};
-PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-7,16-23");
PMU_FORMAT_ATTR(counter, "config:8-15");
PMU_FORMAT_ATTR(axi_id, "config1:0-17");
PMU_FORMAT_ATTR(axi_mask, "config2:0-17");
@@ -339,8 +433,10 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
int counter, bool enable)
{
u32 ctrl_a;
+ int event;
ctrl_a = readl_relaxed(pmu->base + PMLCA(counter));
+ event = FIELD_GET(CONFIG_EVENT_MASK, config);
if (enable) {
ctrl_a |= PMLCA_FC;
@@ -352,7 +448,7 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
ctrl_a &= ~PMLCA_FC;
ctrl_a |= PMLCA_CE;
ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F);
- ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF));
+ ctrl_a |= FIELD_PREP(PMLCA_EVENT, event);
writel(ctrl_a, pmu->base + PMLCA(counter));
} else {
/* Freeze counter. */
@@ -361,39 +457,79 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
}
}
-static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2)
+static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
{
u32 pmcfg1, pmcfg2;
- int event, counter;
-
- event = cfg & 0x000000FF;
- counter = (cfg & 0x0000FF00) >> 8;
+ u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN,
+ MX93_PMCFG1_WR_TRANS_FILT_EN,
+ MX93_PMCFG1_RD_BT_FILT_EN };
pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
- if (counter == 2 && event == 73)
- pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN;
- else if (counter == 2 && event != 73)
- pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN;
+ if (counter >= 2 && counter <= 4)
+ pmcfg1 = event == 73 ? pmcfg1 | mask[counter - 2] :
+ pmcfg1 & ~mask[counter - 2];
- if (counter == 3 && event == 73)
- pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN;
- else if (counter == 3 && event != 73)
- pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN;
+ pmcfg1 &= ~FIELD_PREP(MX93_PMCFG1_ID_MASK, 0x3FFFF);
+ pmcfg1 |= FIELD_PREP(MX93_PMCFG1_ID_MASK, axi_mask);
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
- if (counter == 4 && event == 73)
- pmcfg1 |= PMCFG1_RD_BT_FILT_EN;
- else if (counter == 4 && event != 73)
- pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN;
+ pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
+ pmcfg2 &= ~FIELD_PREP(MX93_PMCFG2_ID, 0x3FFFF);
+ pmcfg2 |= FIELD_PREP(MX93_PMCFG2_ID, axi_id);
+ writel_relaxed(pmcfg2, pmu->base + PMCFG2);
+}
- pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF);
- pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2);
- writel(pmcfg1, pmu->base + PMCFG1);
+static void imx95_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
+{
+ u32 pmcfg1, pmcfg, offset = 0;
- pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
- pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF);
- pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1);
- writel(pmcfg2, pmu->base + PMCFG2);
+ pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+ if (event == 73) {
+ switch (counter) {
+ case 2:
+ pmcfg1 |= MX95_PMCFG1_WR_BEAT_FILT_EN;
+ offset = PMCFG3;
+ break;
+ case 3:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG4;
+ break;
+ case 4:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG5;
+ break;
+ case 5:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG6;
+ break;
+ }
+ } else {
+ switch (counter) {
+ case 2:
+ pmcfg1 &= ~MX95_PMCFG1_WR_BEAT_FILT_EN;
+ break;
+ case 3:
+ case 4:
+ case 5:
+ pmcfg1 &= ~MX95_PMCFG1_RD_BEAT_FILT_EN;
+ break;
+ }
+ }
+
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
+
+ if (offset) {
+ pmcfg = readl_relaxed(pmu->base + offset);
+ pmcfg &= ~(FIELD_PREP(MX95_PMCFG_ID_MASK, 0x3FF) |
+ FIELD_PREP(MX95_PMCFG_ID, 0x3FF));
+ pmcfg |= (FIELD_PREP(MX95_PMCFG_ID_MASK, axi_mask) |
+ FIELD_PREP(MX95_PMCFG_ID, axi_id));
+ writel_relaxed(pmcfg, pmu->base + offset);
+ }
}
static void ddr_perf_event_update(struct perf_event *event)
@@ -460,6 +596,28 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
hwc->state = 0;
}
+static int ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event, int counter)
+{
+ int i;
+
+ if (event == CYCLES_EVENT_ID) {
+ // Cycles counter is dedicated for cycle event.
+ if (pmu->events[CYCLES_COUNTER] == NULL)
+ return CYCLES_COUNTER;
+ } else if (counter != 0) {
+ // Counter specific event use specific counter.
+ if (pmu->events[counter] == NULL)
+ return counter;
+ } else {
+ // Auto allocate counter for referene event.
+ for (i = 1; i < NUM_COUNTERS; i++)
+ if (pmu->events[i] == NULL)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
static int ddr_perf_event_add(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
@@ -467,21 +625,33 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
int cfg = event->attr.config;
int cfg1 = event->attr.config1;
int cfg2 = event->attr.config2;
- int counter;
+ int event_id, counter;
+
+ event_id = FIELD_GET(CONFIG_EVENT_MASK, cfg);
+ counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg);
- counter = (cfg & 0x0000FF00) >> 8;
+ counter = ddr_perf_alloc_counter(pmu, event_id, counter);
+ if (counter < 0) {
+ dev_dbg(pmu->dev, "There are not enough counters\n");
+ return -EOPNOTSUPP;
+ }
pmu->events[counter] = event;
pmu->active_events++;
hwc->idx = counter;
hwc->state |= PERF_HES_STOPPED;
+ if (axi_filter_v1(pmu))
+ /* read trans, write trans, read beat */
+ imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
+ if (axi_filter_v2(pmu))
+ /* write beat, read beat2, read beat1, read beat */
+ imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
if (flags & PERF_EF_START)
ddr_perf_event_start(event, flags);
- /* read trans, write trans, read beat */
- ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2);
-
return 0;
}
@@ -501,9 +671,11 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
ddr_perf_event_stop(event, PERF_EF_UPDATE);
+ pmu->events[counter] = NULL;
pmu->active_events--;
hwc->idx = -1;
}
@@ -679,7 +851,7 @@ format_string_err:
return ret;
}
-static int ddr_perf_remove(struct platform_device *pdev)
+static void ddr_perf_remove(struct platform_device *pdev)
{
struct ddr_pmu *pmu = platform_get_drvdata(pdev);
@@ -689,8 +861,6 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
ida_free(&ddr_ida, pmu->id);
-
- return 0;
}
static struct platform_driver imx_ddr_pmu_driver = {
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index b90ba8aca3fa..c5394d007b61 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -99,16 +99,6 @@ HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
-static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -120,8 +110,7 @@ static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attr
#define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format) \
(&((struct dev_ext_attribute[]){ \
- { .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show, \
- NULL), \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
.var = (void *)_format } \
})[0].attr.attr)
@@ -152,6 +141,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR_RO(bus);
+static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min);
+}
+static DEVICE_ATTR_RO(bdf_min);
+
+static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max);
+}
+static DEVICE_ATTR_RO(bdf_max);
+
static struct hisi_pcie_reg_pair
hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off)
{
@@ -216,12 +221,10 @@ static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset,
writeq_relaxed(val, pcie_pmu->base + offset);
}
-static void hisi_pcie_pmu_config_filter(struct perf_event *event)
+static u64 hisi_pcie_pmu_get_event_ctrl_val(struct perf_event *event)
{
- struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
u64 port, trig_len, thr_len, len_mode;
- u64 reg = HISI_PCIE_INIT_SET;
+ u64 reg = 0;
/* Config HISI_PCIE_EVENT_CTRL according to event. */
reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -256,10 +259,19 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
else
reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+ return reg;
+}
+
+static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 reg = hisi_pcie_pmu_get_event_ctrl_val(event);
+
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
}
-static void hisi_pcie_pmu_clear_filter(struct perf_event *event)
+static void hisi_pcie_pmu_clear_event_ctrl(struct perf_event *event)
{
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
@@ -299,18 +311,24 @@ static bool hisi_pcie_pmu_valid_filter(struct perf_event *event,
if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL)
return false;
- if (requester_id) {
- if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
- return false;
- }
+ /* Need to explicitly set filter of "port" or "bdf" */
+ if (!hisi_pcie_get_port(event) &&
+ !hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
+ return false;
return true;
}
+/*
+ * Check Whether two events share the same config. The same config means not
+ * only the event code, but also the filter settings of the two events are
+ * the same.
+ */
static bool hisi_pcie_pmu_cmp_event(struct perf_event *target,
struct perf_event *event)
{
- return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event);
+ return hisi_pcie_pmu_get_event_ctrl_val(target) ==
+ hisi_pcie_pmu_get_event_ctrl_val(event);
}
static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
@@ -337,15 +355,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
+ /*
+ * If we find a related event, then it's a valid group
+ * since we don't need to allocate a new counter for it.
+ */
if (hisi_pcie_pmu_cmp_event(event_group[num], sibling))
break;
}
+ /*
+ * Otherwise it's a new event but if there's no available counter,
+ * fail the check since we cannot schedule all the events in
+ * the group simultaneously.
+ */
+ if (num == HISI_PCIE_MAX_COUNTERS)
+ return false;
+
if (num == counters)
event_group[counters++] = sibling;
}
- return counters <= HISI_PCIE_MAX_COUNTERS;
+ return true;
}
static int hisi_pcie_pmu_event_init(struct perf_event *event)
@@ -385,40 +415,32 @@ static u64 hisi_pcie_pmu_read_counter(struct perf_event *event)
return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx);
}
-static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu,
- struct perf_event *event)
+/*
+ * Check all work events, if a relevant event is found then we return it
+ * first, otherwise return the first idle counter (need to reset).
+ */
+static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu,
+ struct perf_event *event)
{
+ int first_idle = -EAGAIN;
struct perf_event *sibling;
int idx;
for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
sibling = pcie_pmu->hw_events[idx];
- if (!sibling)
- continue;
-
- if (!hisi_pcie_pmu_cmp_event(sibling, event))
+ if (!sibling) {
+ if (first_idle == -EAGAIN)
+ first_idle = idx;
continue;
+ }
/* Related events must be used in group */
- if (sibling->group_leader == event->group_leader)
- return idx;
- else
- return -EINVAL;
- }
-
- return idx;
-}
-
-static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu)
-{
- int idx;
-
- for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
- if (!pcie_pmu->hw_events[idx])
+ if (hisi_pcie_pmu_cmp_event(sibling, event) &&
+ sibling->group_leader == event->group_leader)
return idx;
}
- return -EINVAL;
+ return first_idle;
}
static void hisi_pcie_pmu_event_update(struct perf_event *event)
@@ -446,10 +468,24 @@ static void hisi_pcie_pmu_set_period(struct perf_event *event)
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ u64 orig_cnt, cnt;
+
+ orig_cnt = hisi_pcie_pmu_read_counter(event);
local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL);
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL);
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL);
+
+ /*
+ * The counter maybe unwritable if the target event is unsupported.
+ * Check this by comparing the counts after setting the period. If
+ * the counts stay unchanged after setting the period then update
+ * the hwc->prev_count correctly. Otherwise the final counts user
+ * get maybe totally wrong.
+ */
+ cnt = hisi_pcie_pmu_read_counter(event);
+ if (orig_cnt == cnt)
+ local64_set(&hwc->prev_count, cnt);
}
static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
@@ -505,7 +541,7 @@ static void hisi_pcie_pmu_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
- hisi_pcie_pmu_config_filter(event);
+ hisi_pcie_pmu_config_event_ctrl(event);
hisi_pcie_pmu_enable_counter(pcie_pmu, hwc);
hisi_pcie_pmu_enable_int(pcie_pmu, hwc);
hisi_pcie_pmu_set_period(event);
@@ -526,7 +562,7 @@ static void hisi_pcie_pmu_stop(struct perf_event *event, int flags)
hisi_pcie_pmu_event_update(event);
hisi_pcie_pmu_disable_int(pcie_pmu, hwc);
hisi_pcie_pmu_disable_counter(pcie_pmu, hwc);
- hisi_pcie_pmu_clear_filter(event);
+ hisi_pcie_pmu_clear_event_ctrl(event);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
@@ -544,27 +580,18 @@ static int hisi_pcie_pmu_add(struct perf_event *event, int flags)
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
- /* Check all working events to find a related event. */
- idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event);
- if (idx < 0)
- return idx;
-
- /* Current event shares an enabled counter with the related event */
- if (idx < HISI_PCIE_MAX_COUNTERS) {
- hwc->idx = idx;
- goto start_count;
- }
-
- idx = hisi_pcie_pmu_get_event_idx(pcie_pmu);
+ idx = hisi_pcie_pmu_get_event_idx(pcie_pmu, event);
if (idx < 0)
return idx;
hwc->idx = idx;
- pcie_pmu->hw_events[idx] = event;
- /* Reset Counter to avoid previous statistic interference. */
- hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
-start_count:
+ /* No enabled counter found with related event, reset it */
+ if (!pcie_pmu->hw_events[idx]) {
+ hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
+ pcie_pmu->hw_events[idx] = event;
+ }
+
if (flags & PERF_EF_START)
hisi_pcie_pmu_start(event, PERF_EF_RELOAD);
@@ -677,7 +704,6 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
unsigned int target;
- cpumask_t mask;
int numa_node;
/* Nothing to do if this CPU doesn't own the PMU */
@@ -688,10 +714,10 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
/* Choose a local CPU from all online cpus. */
numa_node = dev_to_node(&pcie_pmu->pdev->dev);
- if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(numa_node),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
@@ -714,10 +740,18 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_flux, 0x0104),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_time, 0x10104),
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_flux, 0x2004),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_time, 0x12004),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_flux, 0x0105),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_time, 0x10105),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_flux, 0x1005),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_time, 0x11005),
NULL
};
@@ -745,6 +779,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = {
static struct attribute *hisi_pcie_pmu_bus_attrs[] = {
&dev_attr_bus.attr,
+ &dev_attr_bdf_max.attr,
+ &dev_attr_bdf_min.attr,
NULL
};
@@ -803,6 +839,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_
pcie_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hisi_pcie_pmu_event_init,
.pmu_enable = hisi_pcie_pmu_enable,
.pmu_disable = hisi_pcie_pmu_disable,
diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
index 40f1bc9f9b91..b879b81adfdd 100644
--- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
@@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match);
static int hisi_cpa_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *cpa_pmu)
{
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &cpa_pmu->sicl_id)) {
+ hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev);
+
+ if (cpa_pmu->topo.sicl_id < 0) {
dev_err(&pdev->dev, "Can not read sicl-id\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &cpa_pmu->index_id)) {
+ if (cpa_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id\n");
return -EINVAL;
}
- cpa_pmu->ccl_id = -1;
- cpa_pmu->sccl_id = -1;
cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(cpa_pmu->base))
return PTR_ERR(cpa_pmu->base);
@@ -227,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = {
.attrs = hisi_cpa_pmu_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL
-};
-
-static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = {
- .attrs = hisi_cpa_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_cpa_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_cpa_pmu_identifier_attrs[] = {
- &hisi_cpa_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_cpa_pmu_identifier_group = {
- .attrs = hisi_cpa_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = {
&hisi_cpa_pmu_format_group,
&hisi_cpa_pmu_events_group,
- &hisi_cpa_pmu_cpumask_attr_group,
- &hisi_cpa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -311,8 +286,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u",
- cpa_pmu->sicl_id, cpa_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d",
+ cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -341,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_cpa_pmu_remove(struct platform_device *pdev)
+static void hisi_cpa_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev);
@@ -349,7 +324,6 @@ static int hisi_cpa_pmu_remove(struct platform_device *pdev)
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
&cpa_pmu->node);
hisi_cpa_pmu_enable_pm(cpa_pmu);
- return 0;
}
static struct platform_driver hisi_cpa_pmu_driver = {
@@ -390,6 +364,7 @@ static void __exit hisi_cpa_pmu_module_exit(void)
}
module_exit(hisi_cpa_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index ffb039d05d07..7e490f8868f2 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -111,14 +111,14 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
* so there is no need to write event type, while it is programmable counter in
* PMU v2.
*/
-static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx,
u32 type)
{
u32 offset;
- if (hha_pmu->identifier >= HISI_PMU_V2) {
+ if (ddrc_pmu->identifier >= HISI_PMU_V2) {
offset = DDRC_V2_EVENT_TYPE + 4 * idx;
- writel(type, hha_pmu->base + offset);
+ writel(type, ddrc_pmu->base + offset);
}
}
@@ -297,23 +297,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *ddrc_pmu)
{
+ hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and DDRC channel ID to identify the
* DDRC PMU, while SCCL_ID is in MPIDR[aff2].
*/
if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id",
- &ddrc_pmu->index_id)) {
+ &ddrc_pmu->topo.index_id)) {
dev_err(&pdev->dev, "Can not read ddrc channel-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &ddrc_pmu->sccl_id)) {
+ if (ddrc_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n");
return -EINVAL;
}
- /* DDRC PMUs only share the same SCCL */
- ddrc_pmu->ccl_id = -1;
ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ddrc_pmu->base)) {
@@ -323,8 +322,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION);
if (ddrc_pmu->identifier >= HISI_PMU_V2) {
- if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
- &ddrc_pmu->sub_id)) {
+ if (ddrc_pmu->topo.sub_id < 0) {
dev_err(&pdev->dev, "Can not read sub-id!\n");
return -EINVAL;
}
@@ -382,42 +380,19 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = {
.attrs = hisi_ddrc_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
- .attrs = hisi_ddrc_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_ddrc_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
- &hisi_ddrc_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
- .attrs = hisi_ddrc_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = {
&hisi_ddrc_pmu_v1_format_group,
&hisi_ddrc_pmu_v1_events_group,
- &hisi_ddrc_pmu_cpumask_attr_group,
- &hisi_ddrc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = {
&hisi_ddrc_pmu_v2_format_group,
&hisi_ddrc_pmu_v2_events_group,
- &hisi_ddrc_pmu_cpumask_attr_group,
- &hisi_ddrc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -501,13 +476,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
if (ddrc_pmu->identifier >= HISI_PMU_V2)
name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
- "hisi_sccl%u_ddrc%u_%u",
- ddrc_pmu->sccl_id, ddrc_pmu->index_id,
- ddrc_pmu->sub_id);
+ "hisi_sccl%d_ddrc%d_%d",
+ ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id,
+ ddrc_pmu->topo.sub_id);
else
name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
- "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
- ddrc_pmu->index_id);
+ "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id,
+ ddrc_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -531,14 +506,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
+static void hisi_ddrc_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&ddrc_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
&ddrc_pmu->node);
- return 0;
}
static struct platform_driver hisi_ddrc_pmu_driver = {
@@ -576,10 +550,10 @@ static void __exit hisi_ddrc_pmu_module_exit(void)
{
platform_driver_unregister(&hisi_ddrc_pmu_driver);
cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
-
}
module_exit(hisi_ddrc_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 15caf99e1eef..ca609db86046 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
unsigned long long id;
acpi_status status;
+ hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev);
+
/*
* Use SCCL_ID and UID to identify the HHA PMU, while
* SCCL_ID is in MPIDR[aff2].
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &hha_pmu->sccl_id)) {
+ if (hha_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
return -EINVAL;
}
@@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
* Early versions of BIOS support _UID by mistake, so we support
* both "hisilicon, idx-id" as preference, if available.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &hha_pmu->index_id)) {
+ if (hha_pmu->topo.index_id < 0) {
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
"_UID", NULL, &id);
if (ACPI_FAILURE(status)) {
@@ -318,10 +318,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
- hha_pmu->index_id = id;
+ hha_pmu->topo.index_id = id;
}
- /* HHA PMUs only share the same SCCL */
- hha_pmu->ccl_id = -1;
hha_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hha_pmu->base)) {
@@ -407,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = {
.attrs = hisi_hha_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_hha_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
- .attrs = hisi_hha_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_hha_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
- &hisi_hha_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_hha_pmu_identifier_group = {
- .attrs = hisi_hha_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = {
&hisi_hha_pmu_v1_format_group,
&hisi_hha_pmu_v1_events_group,
- &hisi_hha_pmu_cpumask_attr_group,
- &hisi_hha_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = {
&hisi_hha_pmu_v2_format_group,
&hisi_hha_pmu_v2_events_group,
- &hisi_hha_pmu_cpumask_attr_group,
- &hisi_hha_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -510,8 +485,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
- hha_pmu->sccl_id, hha_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d",
+ hha_pmu->topo.sccl_id, hha_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -534,14 +509,13 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_hha_pmu_remove(struct platform_device *pdev)
+static void hisi_hha_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&hha_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
&hha_pmu->node);
- return 0;
}
static struct platform_driver hisi_hha_pmu_driver = {
@@ -582,6 +556,7 @@ static void __exit hisi_hha_pmu_module_exit(void)
}
module_exit(hisi_hha_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 794dbcd19b7a..412fc3a97963 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -355,18 +355,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
+ hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
* SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &l3c_pmu->sccl_id)) {
+ if (l3c_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
- &l3c_pmu->ccl_id)) {
+ if (l3c_pmu->topo.ccl_id < 0) {
dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
return -EINVAL;
}
@@ -441,42 +441,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
.attrs = hisi_l3c_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
- .attrs = hisi_l3c_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_l3c_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
- &hisi_l3c_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_l3c_pmu_identifier_group = {
- .attrs = hisi_l3c_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
&hisi_l3c_pmu_v1_format_group,
&hisi_l3c_pmu_v1_events_group,
- &hisi_l3c_pmu_cpumask_attr_group,
- &hisi_l3c_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
&hisi_l3c_pmu_v2_format_group,
&hisi_l3c_pmu_v2_events_group,
- &hisi_l3c_pmu_cpumask_attr_group,
- &hisi_l3c_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -544,8 +521,8 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
- l3c_pmu->sccl_id, l3c_pmu->ccl_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
if (!name)
return -ENOMEM;
@@ -568,14 +545,13 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+static void hisi_l3c_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&l3c_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
&l3c_pmu->node);
- return 0;
}
static struct platform_driver hisi_l3c_pmu_driver = {
@@ -616,6 +592,7 @@ static void __exit hisi_l3c_pmu_module_exit(void)
}
module_exit(hisi_l3c_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index 797cf201996a..a0142684e379 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -269,25 +269,22 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx)
static int hisi_pa_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *pa_pmu)
{
+ hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev);
+
/*
* As PA PMU is in a SICL, use the SICL_ID and the index ID
* to identify the PA PMU.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &pa_pmu->sicl_id)) {
+ if (pa_pmu->topo.sicl_id < 0) {
dev_err(&pdev->dev, "Cannot read sicl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &pa_pmu->index_id)) {
+ if (pa_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id!\n");
return -EINVAL;
}
- pa_pmu->ccl_id = -1;
- pa_pmu->sccl_id = -1;
-
pa_pmu->dev_info = device_get_match_data(&pdev->dev);
if (!pa_pmu->dev_info)
return -ENODEV;
@@ -356,29 +353,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = {
.attrs = hisi_h60pa_pmu_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_pa_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL
-};
-
-static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = {
- .attrs = hisi_pa_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_pa_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_pa_pmu_identifier_attrs[] = {
- &hisi_pa_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_pa_pmu_identifier_group = {
- .attrs = hisi_pa_pmu_identifier_attrs,
-};
-
static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = {
.mask_offset = PA_INT_MASK,
.clear_offset = PA_INT_CLEAR,
@@ -388,8 +362,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = {
static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
&hisi_pa_pmu_v2_format_group,
&hisi_pa_pmu_v2_events_group,
- &hisi_pa_pmu_cpumask_attr_group,
- &hisi_pa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -402,8 +376,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = {
static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = {
&hisi_pa_pmu_v2_format_group,
&hisi_pa_pmu_v3_events_group,
- &hisi_pa_pmu_cpumask_attr_group,
- &hisi_pa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -422,8 +396,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = {
static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = {
&hisi_pa_pmu_v2_format_group,
&hisi_h60pa_pmu_events_group,
- &hisi_pa_pmu_cpumask_attr_group,
- &hisi_pa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -488,9 +462,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u",
- pa_pmu->sicl_id, pa_pmu->dev_info->name,
- pa_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d",
+ pa_pmu->topo.sicl_id, pa_pmu->dev_info->name,
+ pa_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -514,14 +488,13 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_pa_pmu_remove(struct platform_device *pdev)
+static void hisi_pa_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&pa_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
&pa_pmu->node);
- return 0;
}
static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
@@ -570,6 +543,7 @@ static void __exit hisi_pa_pmu_module_exit(void)
}
module_exit(hisi_pa_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 04031450d5fe..ef058b1dd509 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/property.h>
#include <asm/cputype.h>
#include <asm/local64.h>
@@ -23,20 +24,6 @@
#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
/*
- * PMU format attributes
- */
-ssize_t hisi_format_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-EXPORT_SYMBOL_GPL(hisi_format_sysfs_show);
-
-/*
* PMU event attributes
*/
ssize_t hisi_event_sysfs_show(struct device *dev,
@@ -48,7 +35,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev,
return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var);
}
-EXPORT_SYMBOL_GPL(hisi_event_sysfs_show);
+EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU");
/*
* sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
@@ -60,7 +47,52 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu);
}
-EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
+EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU");
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus);
+}
+static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL);
+
+static struct attribute *hisi_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ &dev_attr_associated_cpus.attr,
+ NULL
+};
+
+const struct attribute_group hisi_pmu_cpumask_attr_group = {
+ .attrs = hisi_pmu_cpumask_attrs,
+};
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, "HISI_PMU");
+
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
+}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU");
+
+static struct device_attribute hisi_pmu_identifier_attr =
+ __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_pmu_identifier_attrs[] = {
+ &hisi_pmu_identifier_attr.attr,
+ NULL
+};
+
+const struct attribute_group hisi_pmu_identifier_group = {
+ .attrs = hisi_pmu_identifier_attrs,
+};
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, "HISI_PMU");
static bool hisi_validate_event_group(struct perf_event *event)
{
@@ -110,17 +142,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
return idx;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx);
-
-ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
- struct device_attribute *attr,
- char *page)
-{
- struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
-
- return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
-}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU");
static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
{
@@ -179,7 +201,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU");
int hisi_uncore_pmu_event_init(struct perf_event *event)
{
@@ -233,7 +255,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU");
/*
* Set the counter to count the event that we're interested in,
@@ -287,7 +309,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event)
/* Write start value to the hardware event counter */
hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU");
void hisi_uncore_pmu_event_update(struct perf_event *event)
{
@@ -308,7 +330,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event)
HISI_MAX_PERIOD(hisi_pmu->counter_bits);
local64_add(delta, &event->count);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU");
void hisi_uncore_pmu_start(struct perf_event *event, int flags)
{
@@ -331,7 +353,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags)
hisi_uncore_pmu_enable_event(event);
perf_event_update_userpage(event);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU");
void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
{
@@ -348,7 +370,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
hisi_uncore_pmu_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU");
int hisi_uncore_pmu_add(struct perf_event *event, int flags)
{
@@ -371,7 +393,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags)
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU");
void hisi_uncore_pmu_del(struct perf_event *event, int flags)
{
@@ -383,14 +405,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags)
perf_event_update_userpage(event);
hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU");
void hisi_uncore_pmu_read(struct perf_event *event)
{
/* Read hardware counter and update the perf counter statistics */
hisi_uncore_pmu_event_update(event);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU");
void hisi_uncore_pmu_enable(struct pmu *pmu)
{
@@ -403,7 +425,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu)
hisi_pmu->ops->start_counters(hisi_pmu);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU");
void hisi_uncore_pmu_disable(struct pmu *pmu)
{
@@ -411,7 +433,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
hisi_pmu->ops->stop_counters(hisi_pmu);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU");
/*
@@ -458,22 +480,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp)
*/
static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
{
+ struct hisi_pmu_topology *topo = &hisi_pmu->topo;
int sccl_id, ccl_id;
- /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */
- if (hisi_pmu->sccl_id == -1)
- return true;
-
- if (hisi_pmu->ccl_id == -1) {
+ if (topo->ccl_id == -1) {
/* If CCL_ID is -1, the PMU only shares the same SCCL */
hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
- return sccl_id == hisi_pmu->sccl_id;
+ return sccl_id == topo->sccl_id;
}
hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
- return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
+ return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id;
}
int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
@@ -481,13 +500,25 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
- if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
+ /*
+ * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu
+ * based on the locality if it hasn't been initialized yet. For PMUs
+ * do have associated CPUs, it'll be updated later.
+ */
+ if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) {
+ if (hisi_pmu->on_cpu != -1)
+ return 0;
+
+ hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev));
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(hisi_pmu->on_cpu)));
return 0;
+ }
cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
- /* If another CPU is already managing this PMU, simply return. */
- if (hisi_pmu->on_cpu != -1)
+ /* If another associated CPU is already managing this PMU, simply return. */
+ if (hisi_pmu->on_cpu != -1 &&
+ cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus))
return 0;
/* Use this CPU in cpumask for event counting */
@@ -498,18 +529,14 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU");
int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
- cpumask_t pmu_online_cpus;
unsigned int target;
- if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
- return 0;
-
/* Nothing to do if this CPU doesn't own the PMU */
if (hisi_pmu->on_cpu != cpu)
return 0;
@@ -517,10 +544,16 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
/* Give up ownership of the PMU */
hisi_pmu->on_cpu = -1;
- /* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&pmu_online_cpus, cpu);
+ /*
+ * Migrate ownership of the PMU to a new CPU chosen from PMU's online
+ * associated CPUs if possible, if no associated CPU online then
+ * migrate to one online CPU.
+ */
+ target = cpumask_any_and_but(&hisi_pmu->associated_cpus,
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target >= nr_cpu_ids)
return 0;
@@ -531,13 +564,43 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU");
+
+/*
+ * Retrieve the topology information from the firmware for the hisi_pmu device.
+ * The topology ID will be -1 if we cannot initialize it, it may either due to
+ * the PMU doesn't locate on this certain topology or the firmware needs to be
+ * fixed.
+ */
+void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev)
+{
+ struct hisi_pmu_topology *topo = &hisi_pmu->topo;
+
+ topo->sccl_id = -1;
+ topo->ccl_id = -1;
+ topo->index_id = -1;
+ topo->sub_id = -1;
+
+ if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id))
+ dev_dbg(dev, "no scl-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id))
+ dev_dbg(dev, "no ccl-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id))
+ dev_dbg(dev, "no idx-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id))
+ dev_dbg(dev, "no sub-id present\n");
+}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU");
void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
{
struct pmu *pmu = &hisi_pmu->pmu;
pmu->module = module;
+ pmu->parent = hisi_pmu->dev;
pmu->task_ctx_nr = perf_invalid_context;
pmu->event_init = hisi_uncore_pmu_event_init;
pmu->pmu_enable = hisi_uncore_pmu_enable;
@@ -550,6 +613,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
pmu->attr_groups = hisi_pmu->pmu_events.attr_groups;
pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE;
}
-EXPORT_SYMBOL_GPL(hisi_pmu_init);
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 92402aa69d70..f4fed2544877 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -33,7 +33,7 @@
})[0].attr.attr)
#define HISI_PMU_FORMAT_ATTR(_name, _config) \
- HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
+ HISI_PMU_ATTR(_name, device_show_string, _config)
#define HISI_PMU_EVENT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
@@ -81,27 +81,55 @@ struct hisi_pmu_hwevents {
const struct attribute_group **attr_groups;
};
+/**
+ * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU
+ * is located.
+ * @sccl_id: ID of the SCCL on which the PMU locate is located.
+ * @sicl_id: ID of the SICL on which the PMU locate is located.
+ * @scl_id: ID used by the core which is unaware of the SCCL/SICL.
+ * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located.
+ * @index_id: the ID of the PMU module if there're several PMUs at a
+ * particularly location in the topology.
+ * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2
+ * since each DDRC has more than one DMC
+ *
+ * The ID will be -1 if the PMU isn't located on a certain topology.
+ */
+struct hisi_pmu_topology {
+ /*
+ * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel
+ * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL
+ * distinction is not relevant, use scl_id instead.
+ */
+ union {
+ int sccl_id;
+ int sicl_id;
+ int scl_id;
+ };
+ int ccl_id;
+ int index_id;
+ int sub_id;
+};
+
/* Generic pmu struct for different pmu types */
struct hisi_pmu {
struct pmu pmu;
const struct hisi_uncore_ops *ops;
const struct hisi_pmu_dev_info *dev_info;
struct hisi_pmu_hwevents pmu_events;
- /* associated_cpus: All CPUs associated with the PMU */
+ struct hisi_pmu_topology topo;
+ /*
+ * CPUs associated to the PMU and are preferred to use for counting.
+ * Could be empty if PMU has no association (e.g. PMU on SICL), in
+ * which case any online CPU will be used.
+ */
cpumask_t associated_cpus;
/* CPU used for counting */
int on_cpu;
int irq;
struct device *dev;
struct hlist_node node;
- int sccl_id;
- int sicl_id;
- int ccl_id;
void __iomem *base;
- /* the ID of the PMU modules */
- u32 index_id;
- /* For DDRC PMU v2: each DDRC has more than one DMC */
- u32 sub_id;
int num_counters;
int counter_bits;
/* check event code range */
@@ -109,6 +137,10 @@ struct hisi_pmu {
u32 identifier;
};
+/* Generic implementation of cpumask/identifier group */
+extern const struct attribute_group hisi_pmu_cpumask_attr_group;
+extern const struct attribute_group hisi_pmu_identifier_group;
+
int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
void hisi_uncore_pmu_read(struct perf_event *event);
int hisi_uncore_pmu_add(struct perf_event *event, int flags);
@@ -122,8 +154,6 @@ void hisi_uncore_pmu_enable(struct pmu *pmu);
void hisi_uncore_pmu_disable(struct pmu *pmu);
ssize_t hisi_event_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
-ssize_t hisi_format_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *buf);
ssize_t hisi_cpumask_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
@@ -134,6 +164,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
char *page);
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev);
+void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev);
void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module);
#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index e706ca567676..dbd079016fc4 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -288,25 +288,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *sllc_pmu)
{
+ hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and the index ID to identify the SLLC PMU,
* while SCCL_ID is from MPIDR_EL1 by CPU.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &sllc_pmu->sccl_id)) {
+ if (sllc_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Cannot read sccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &sllc_pmu->index_id)) {
+ if (sllc_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id!\n");
return -EINVAL;
}
- /* SLLC PMUs only share the same SCCL */
- sllc_pmu->ccl_id = -1;
-
sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sllc_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n");
@@ -347,34 +344,11 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = {
.attrs = hisi_sllc_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL
-};
-
-static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = {
- .attrs = hisi_sllc_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_sllc_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_sllc_pmu_identifier_attrs[] = {
- &hisi_sllc_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_sllc_pmu_identifier_group = {
- .attrs = hisi_sllc_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
&hisi_sllc_pmu_v2_format_group,
&hisi_sllc_pmu_v2_events_group,
- &hisi_sllc_pmu_cpumask_attr_group,
- &hisi_sllc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -433,8 +407,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u",
- sllc_pmu->sccl_id, sllc_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d",
+ sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -460,14 +434,13 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_sllc_pmu_remove(struct platform_device *pdev)
+static void hisi_sllc_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&sllc_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
&sllc_pmu->node);
- return 0;
}
static struct platform_driver hisi_sllc_pmu_driver = {
@@ -508,6 +481,7 @@ static void __exit hisi_sllc_pmu_module_exit(void)
}
module_exit(hisi_sllc_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
index 636fb79647c8..03cb9b564b99 100644
--- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
@@ -11,7 +11,6 @@
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/mod_devicetable.h>
-#include <linux/property.h>
#include "hisi_uncore_pmu.h"
@@ -287,12 +286,52 @@ static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu,
return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
}
-static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu,
+static bool hisi_uc_pmu_get_glb_en_state(struct hisi_pmu *uc_pmu)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ return !!FIELD_GET(HISI_UC_EVENT_GLB_EN, val);
+}
+
+static void hisi_uc_pmu_write_counter_normal(struct hisi_pmu *uc_pmu,
struct hw_perf_event *hwc, u64 val)
{
writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
}
+static void hisi_uc_pmu_write_counter_quirk_v2(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ hisi_uc_pmu_start_counters(uc_pmu);
+ hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val);
+ hisi_uc_pmu_stop_counters(uc_pmu);
+}
+
+static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ bool enable = hisi_uc_pmu_get_glb_en_state(uc_pmu);
+ bool erratum = uc_pmu->identifier == HISI_PMU_V2;
+
+ /*
+ * HiSilicon UC PMU v2 suffers the erratum 162700402 that the
+ * PMU counter cannot be set due to the lack of clock under power
+ * saving mode. This will lead to error or inaccurate counts.
+ * The clock can be enabled by the PMU global enabling control.
+ * The irq handler and pmu_start() will call the function to set
+ * period. If the function under irq context, the PMU has been
+ * enabled therefore we set counter directly. Other situations
+ * the PMU is disabled, we need to enable it to turn on the
+ * counter clock to set period, and then restore PMU enable
+ * status, the counter can hold its value without a clock.
+ */
+ if (enable || !erratum)
+ hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val);
+ else
+ hisi_uc_pmu_write_counter_quirk_v2(uc_pmu, hwc, val);
+}
+
static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu,
struct hw_perf_event *hwc)
{
@@ -326,25 +365,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx)
static int hisi_uc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *uc_pmu)
{
+ hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev);
+
/*
* Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to
* identify the topology information of UC PMU devices in the chip.
* They have some CCLs per SCCL and then 4 UC PMU per CCL.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &uc_pmu->sccl_id)) {
+ if (uc_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read uc sccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
- &uc_pmu->ccl_id)) {
+ if (uc_pmu->topo.ccl_id < 0) {
dev_err(&pdev->dev, "Can not read uc ccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
- &uc_pmu->sub_id)) {
+ if (uc_pmu->topo.sub_id < 0) {
dev_err(&pdev->dev, "Can not read sub-id!\n");
return -EINVAL;
}
@@ -399,34 +437,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = {
.attrs = hisi_uc_pmu_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_uc_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = {
- .attrs = hisi_uc_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_uc_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_uc_pmu_identifier_attrs[] = {
- &hisi_uc_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_uc_pmu_identifier_group = {
- .attrs = hisi_uc_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_uc_pmu_attr_groups[] = {
&hisi_uc_pmu_format_group,
&hisi_uc_pmu_events_group,
- &hisi_uc_pmu_cpumask_attr_group,
- &hisi_uc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -498,8 +513,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u",
- uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d",
+ uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id,
+ uc_pmu->topo.sub_id);
if (!name)
return -ENOMEM;
@@ -573,6 +589,7 @@ static void __exit hisi_uc_pmu_module_exit(void)
}
module_exit(hisi_uc_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
index 16869bf5bf4c..c157f3572cae 100644
--- a/drivers/perf/hisilicon/hns3_pmu.c
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -363,16 +363,6 @@ HNS3_PMU_FILTER_ATTR(global, config1, 52, 52);
HNS3_PMU_EVT_PPS_##_name##_TIME, \
HNS3_PMU_FILTER_INTR_##_name})
-static ssize_t hns3_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t hns3_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -421,7 +411,7 @@ static ssize_t hns3_pmu_filter_mode_show(struct device *dev,
})[0].attr.attr)
#define HNS3_PMU_FORMAT_ATTR(_name, _format) \
- HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format)
+ HNS3_PMU_ATTR(_name, device_show_string, _format)
#define HNS3_PMU_EVENT_ATTR(_name, _event) \
HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event)
#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \
@@ -1085,15 +1075,27 @@ static bool hns3_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
+ /*
+ * If we find a related event, then it's a valid group
+ * since we don't need to allocate a new counter for it.
+ */
if (hns3_pmu_cmp_event(event_group[num], sibling))
break;
}
+ /*
+ * Otherwise it's a new event but if there's no available counter,
+ * fail the check since we cannot schedule all the events in
+ * the group simultaneously.
+ */
+ if (num == HNS3_PMU_MAX_HW_EVENTS)
+ return false;
+
if (num == counters)
event_group[counters++] = sibling;
}
- return counters <= HNS3_PMU_MAX_HW_EVENTS;
+ return true;
}
static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
@@ -1419,6 +1421,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
hns3_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hns3_pmu_event_init,
.pmu_enable = hns3_pmu_enable,
.pmu_disable = hns3_pmu_disable,
@@ -1515,7 +1518,7 @@ static int hns3_pmu_irq_register(struct pci_dev *pdev,
return ret;
}
- ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
+ ret = devm_add_action_or_reset(&pdev->dev, hns3_pmu_free_irq, pdev);
if (ret) {
pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
return ret;
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index 524ba82bfce2..039feded9152 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+/*
+ * Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
*
- * Copyright (C) 2021 Marvell.
+ * Copyright (C) 2021-2024 Marvell.
*/
#include <linux/init.h>
@@ -14,24 +15,29 @@
#include <linux/platform_device.h>
/* Performance Counters Operating Mode Control Registers */
-#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
-#define OP_MODE_CTRL_VAL_MANNUAL 0x1
+#define CN10K_DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
+#define ODY_DDRC_PERF_CNT_OP_MODE_CTRL 0x20020
+#define OP_MODE_CTRL_VAL_MANUAL 0x1
/* Performance Counters Start Operation Control Registers */
-#define DDRC_PERF_CNT_START_OP_CTRL 0x8028
+#define CN10K_DDRC_PERF_CNT_START_OP_CTRL 0x8028
+#define ODY_DDRC_PERF_CNT_START_OP_CTRL 0x200A0
#define START_OP_CTRL_VAL_START 0x1ULL
#define START_OP_CTRL_VAL_ACTIVE 0x2
/* Performance Counters End Operation Control Registers */
-#define DDRC_PERF_CNT_END_OP_CTRL 0x8030
+#define CN10K_DDRC_PERF_CNT_END_OP_CTRL 0x8030
+#define ODY_DDRC_PERF_CNT_END_OP_CTRL 0x200E0
#define END_OP_CTRL_VAL_END 0x1ULL
/* Performance Counters End Status Registers */
-#define DDRC_PERF_CNT_END_STATUS 0x8038
+#define CN10K_DDRC_PERF_CNT_END_STATUS 0x8038
+#define ODY_DDRC_PERF_CNT_END_STATUS 0x20120
#define END_STATUS_VAL_END_TIMER_MODE_END 0x1
/* Performance Counters Configuration Registers */
-#define DDRC_PERF_CFG_BASE 0x8040
+#define CN10K_DDRC_PERF_CFG_BASE 0x8040
+#define ODY_DDRC_PERF_CFG_BASE 0x20160
/* 8 Generic event counter + 2 fixed event counters */
#define DDRC_PERF_NUM_GEN_COUNTERS 8
@@ -42,18 +48,28 @@
DDRC_PERF_NUM_FIX_COUNTERS)
/* Generic event counter registers */
-#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n))
+#define DDRC_PERF_CFG(base, n) ((base) + 8 * (n))
#define EVENT_ENABLE BIT_ULL(63)
/* Two dedicated event counters for DDR reads and writes */
#define EVENT_DDR_READS 101
#define EVENT_DDR_WRITES 100
+#define DDRC_PERF_REG(base, n) ((base) + 8 * (n))
/*
* programmable events IDs in programmable event counters.
* DO NOT change these event-id numbers, they are used to
* program event bitmap in h/w.
*/
+#define EVENT_DFI_CMD_IS_RETRY 61
+#define EVENT_RD_UC_ECC_ERROR 60
+#define EVENT_RD_CRC_ERROR 59
+#define EVENT_CAPAR_ERROR 58
+#define EVENT_WR_CRC_ERROR 57
+#define EVENT_DFI_PARITY_POISON 56
+#define EVENT_RETRY_FIFO_FULL 46
+#define EVENT_DFI_CYCLES 45
+
#define EVENT_OP_IS_ZQLATCH 55
#define EVENT_OP_IS_ZQSTART 54
#define EVENT_OP_IS_TCR_MRR 53
@@ -102,28 +118,37 @@
#define EVENT_HIF_RD_OR_WR 1
/* Event counter value registers */
-#define DDRC_PERF_CNT_VALUE_BASE 0x8080
-#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+#define CN10K_DDRC_PERF_CNT_VALUE_BASE 0x8080
+#define ODY_DDRC_PERF_CNT_VALUE_BASE 0x201C0
/* Fixed event counter enable/disable register */
-#define DDRC_PERF_CNT_FREERUN_EN 0x80C0
+#define CN10K_DDRC_PERF_CNT_FREERUN_EN 0x80C0
#define DDRC_PERF_FREERUN_WRITE_EN 0x1
#define DDRC_PERF_FREERUN_READ_EN 0x2
/* Fixed event counter control register */
-#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8
+#define CN10K_DDRC_PERF_CNT_FREERUN_CTRL 0x80C8
+#define ODY_DDRC_PERF_CNT_FREERUN_CTRL 0x20240
#define DDRC_FREERUN_WRITE_CNT_CLR 0x1
#define DDRC_FREERUN_READ_CNT_CLR 0x2
-/* Fixed event counter value register */
-#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0
-#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8
+/* Fixed event counter clear register, defined only for Odyssey */
+#define ODY_DDRC_PERF_CNT_FREERUN_CLR 0x20248
+
#define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48)
#define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0)
+/* Fixed event counter value register */
+#define CN10K_DDRC_PERF_CNT_VALUE_WR_OP 0x80D0
+#define CN10K_DDRC_PERF_CNT_VALUE_RD_OP 0x80D8
+#define ODY_DDRC_PERF_CNT_VALUE_WR_OP 0x20250
+#define ODY_DDRC_PERF_CNT_VALUE_RD_OP 0x20258
+
struct cn10k_ddr_pmu {
struct pmu pmu;
void __iomem *base;
+ const struct ddr_pmu_platform_data *p_data;
+ const struct ddr_pmu_ops *ops;
unsigned int cpu;
struct device *dev;
int active_events;
@@ -132,8 +157,36 @@ struct cn10k_ddr_pmu {
struct hlist_node node;
};
+struct ddr_pmu_ops {
+ void (*enable_read_freerun_counter)(struct cn10k_ddr_pmu *pmu,
+ bool enable);
+ void (*enable_write_freerun_counter)(struct cn10k_ddr_pmu *pmu,
+ bool enable);
+ void (*clear_read_freerun_counter)(struct cn10k_ddr_pmu *pmu);
+ void (*clear_write_freerun_counter)(struct cn10k_ddr_pmu *pmu);
+ void (*pmu_overflow_handler)(struct cn10k_ddr_pmu *pmu, int evt_idx);
+};
+
#define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu)
+struct ddr_pmu_platform_data {
+ u64 counter_overflow_val;
+ u64 counter_max_val;
+ u64 cnt_base;
+ u64 cfg_base;
+ u64 cnt_op_mode_ctrl;
+ u64 cnt_start_op_ctrl;
+ u64 cnt_end_op_ctrl;
+ u64 cnt_end_status;
+ u64 cnt_freerun_en;
+ u64 cnt_freerun_ctrl;
+ u64 cnt_freerun_clr;
+ u64 cnt_value_wr_op;
+ u64 cnt_value_rd_op;
+ bool is_cn10k;
+ bool is_ody;
+};
+
static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -209,6 +262,85 @@ static struct attribute *cn10k_ddr_perf_events_attrs[] = {
NULL
};
+static struct attribute *odyssey_ddr_perf_events_attrs[] = {
+ /* Programmable */
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_wr_data_access,
+ EVENT_DFI_WR_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_rd_data_access,
+ EVENT_DFI_RD_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+ EVENT_HPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+ EVENT_LPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+ EVENT_WR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access,
+ EVENT_OP_IS_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access,
+ EVENT_OP_IS_RD_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr,
+ EVENT_PRECHARGE_FOR_RDWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+ EVENT_PRECHARGE_FOR_OTHER),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown,
+ EVENT_OP_IS_ENTER_POWERDOWN),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cycles, EVENT_DFI_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_retry_fifo_full,
+ EVENT_RETRY_FIFO_FULL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_parity_poison,
+ EVENT_DFI_PARITY_POISON),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_crc_error, EVENT_WR_CRC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_capar_error, EVENT_CAPAR_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_crc_error, EVENT_RD_CRC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_uc_ecc_error, EVENT_RD_UC_ECC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cmd_is_retry, EVENT_DFI_CMD_IS_RETRY),
+ /* Free run event counters */
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+ NULL
+};
+
+static struct attribute_group odyssey_ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = odyssey_ddr_perf_events_attrs,
+};
+
static struct attribute_group cn10k_ddr_perf_events_attr_group = {
.name = "events",
.attrs = cn10k_ddr_perf_events_attrs,
@@ -254,6 +386,13 @@ static const struct attribute_group *cn10k_attr_groups[] = {
NULL,
};
+static const struct attribute_group *odyssey_attr_groups[] = {
+ &odyssey_ddr_perf_events_attr_group,
+ &cn10k_ddr_perf_format_attr_group,
+ &cn10k_ddr_perf_cpumask_attr_group,
+ NULL
+};
+
/* Default poll timeout is 100 sec, which is very sufficient for
* 48 bit counter incremented max at 5.6 GT/s, which may take many
* hours to overflow.
@@ -266,9 +405,18 @@ static ktime_t cn10k_ddr_pmu_timer_period(void)
return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
}
-static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
+ struct cn10k_ddr_pmu *ddr_pmu)
{
+ int err = 0;
+
switch (eventid) {
+ case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
+ if (!ddr_pmu->p_data->is_ody) {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
*event_bitmap = (1ULL << (eventid - 1));
@@ -279,11 +427,12 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
*event_bitmap = (0xFULL << (eventid - 1));
break;
default:
- pr_err("%s Invalid eventid %d\n", __func__, eventid);
- return -EINVAL;
+ err = -EINVAL;
}
- return 0;
+ if (err)
+ pr_err("%s Invalid eventid %d\n", __func__, eventid);
+ return err;
}
static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
@@ -351,9 +500,33 @@ static int cn10k_ddr_perf_event_init(struct perf_event *event)
return 0;
}
+static void cn10k_ddr_perf_counter_start(struct cn10k_ddr_pmu *ddr_pmu,
+ int counter)
+{
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+ u64 ctrl_reg = p_data->cnt_start_op_ctrl;
+
+ writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+ DDRC_PERF_REG(ctrl_reg, counter));
+}
+
+static void cn10k_ddr_perf_counter_stop(struct cn10k_ddr_pmu *ddr_pmu,
+ int counter)
+{
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+ u64 ctrl_reg = p_data->cnt_end_op_ctrl;
+
+ writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+ DDRC_PERF_REG(ctrl_reg, counter));
+}
+
static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
int counter, bool enable)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 ctrl_reg = pmu->p_data->cnt_op_mode_ctrl;
+ const struct ddr_pmu_ops *ops = pmu->ops;
+ bool is_ody = pmu->p_data->is_ody;
u32 reg;
u64 val;
@@ -363,7 +536,7 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
}
if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
- reg = DDRC_PERF_CFG(counter);
+ reg = DDRC_PERF_CFG(p_data->cfg_base, counter);
val = readq_relaxed(pmu->base + reg);
if (enable)
@@ -372,40 +545,52 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
val &= ~EVENT_ENABLE;
writeq_relaxed(val, pmu->base + reg);
- } else {
- val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
- if (enable) {
- if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val |= DDRC_PERF_FREERUN_READ_EN;
- else
- val |= DDRC_PERF_FREERUN_WRITE_EN;
- } else {
- if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val &= ~DDRC_PERF_FREERUN_READ_EN;
- else
- val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ if (is_ody) {
+ if (enable) {
+ /*
+ * Setup the PMU counter to work in
+ * manual mode
+ */
+ reg = DDRC_PERF_REG(ctrl_reg, counter);
+ writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL,
+ pmu->base + reg);
+
+ cn10k_ddr_perf_counter_start(pmu, counter);
+ } else {
+ cn10k_ddr_perf_counter_stop(pmu, counter);
+ }
}
- writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+ } else {
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ ops->enable_read_freerun_counter(pmu, enable);
+ else
+ ops->enable_write_freerun_counter(pmu, enable);
}
}
static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
u64 val;
if (counter == DDRC_PERF_READ_COUNTER_IDX)
- return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+ return readq_relaxed(pmu->base +
+ p_data->cnt_value_rd_op);
if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
- return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+ return readq_relaxed(pmu->base +
+ p_data->cnt_value_wr_op);
- val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+ val = readq_relaxed(pmu->base +
+ DDRC_PERF_REG(p_data->cnt_base, counter));
return val;
}
static void cn10k_ddr_perf_event_update(struct perf_event *event)
{
struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
struct hw_perf_event *hwc = &event->hw;
u64 prev_count, new_count, mask;
@@ -414,7 +599,7 @@ static void cn10k_ddr_perf_event_update(struct perf_event *event)
new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
- mask = DDRC_PERF_CNT_MAX_VALUE;
+ mask = p_data->counter_max_val;
local64_add((new_count - prev_count) & mask, &event->count);
}
@@ -435,6 +620,8 @@ static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
{
struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ const struct ddr_pmu_ops *ops = pmu->ops;
struct hw_perf_event *hwc = &event->hw;
u8 config = event->attr.config;
int counter, ret;
@@ -454,8 +641,8 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
/* Generic counters, configure event id */
- reg_offset = DDRC_PERF_CFG(counter);
- ret = ddr_perf_get_event_bitmap(config, &val);
+ reg_offset = DDRC_PERF_CFG(p_data->cfg_base, counter);
+ ret = ddr_perf_get_event_bitmap(config, &val, pmu);
if (ret)
return ret;
@@ -463,11 +650,9 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
} else {
/* fixed event counter, clear counter value */
if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val = DDRC_FREERUN_READ_CNT_CLR;
+ ops->clear_read_freerun_counter(pmu);
else
- val = DDRC_FREERUN_WRITE_CNT_CLR;
-
- writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+ ops->clear_write_freerun_counter(pmu);
}
hwc->state |= PERF_HES_STOPPED;
@@ -512,17 +697,19 @@ static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
{
struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
- DDRC_PERF_CNT_START_OP_CTRL);
+ p_data->cnt_start_op_ctrl);
}
static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
{
struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
- DDRC_PERF_CNT_END_OP_CTRL);
+ p_data->cnt_end_op_ctrl);
}
static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
@@ -547,8 +734,123 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
}
}
+static void ddr_pmu_enable_read_freerun(struct cn10k_ddr_pmu *pmu, bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_en);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_READ_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_READ_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en);
+}
+
+static void ddr_pmu_enable_write_freerun(struct cn10k_ddr_pmu *pmu, bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_en);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_WRITE_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en);
+}
+
+static void ddr_pmu_read_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_READ_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_write_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_WRITE_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx)
+{
+ cn10k_ddr_perf_event_update_all(pmu);
+ cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+ cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+}
+
+static void ddr_pmu_ody_enable_read_freerun(struct cn10k_ddr_pmu *pmu,
+ bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_READ_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_READ_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_ody_enable_write_freerun(struct cn10k_ddr_pmu *pmu,
+ bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_WRITE_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_ody_read_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_READ_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr);
+}
+
+static void ddr_pmu_ody_write_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_WRITE_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr);
+}
+
+static void ddr_pmu_ody_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx)
+{
+ /*
+ * On reaching the maximum value of the counter, the counter freezes
+ * there. The particular event is updated and the respective counter
+ * is stopped and started again so that it starts counting from zero
+ */
+ cn10k_ddr_perf_event_update(pmu->events[evt_idx]);
+ cn10k_ddr_perf_counter_stop(pmu, evt_idx);
+ cn10k_ddr_perf_counter_start(pmu, evt_idx);
+}
+
static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ const struct ddr_pmu_ops *ops = pmu->ops;
struct perf_event *event;
struct hw_perf_event *hwc;
u64 prev_count, new_count;
@@ -586,11 +888,9 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
continue;
value = cn10k_ddr_perf_read_counter(pmu, i);
- if (value == DDRC_PERF_CNT_MAX_VALUE) {
+ if (value == p_data->counter_max_val) {
pr_info("Counter-(%d) reached max value\n", i);
- cn10k_ddr_perf_event_update_all(pmu);
- cn10k_ddr_perf_pmu_disable(&pmu->pmu);
- cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+ ops->pmu_overflow_handler(pmu, i);
}
}
@@ -629,11 +929,68 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
}
+static const struct ddr_pmu_ops ddr_pmu_ops = {
+ .enable_read_freerun_counter = ddr_pmu_enable_read_freerun,
+ .enable_write_freerun_counter = ddr_pmu_enable_write_freerun,
+ .clear_read_freerun_counter = ddr_pmu_read_clear_freerun,
+ .clear_write_freerun_counter = ddr_pmu_write_clear_freerun,
+ .pmu_overflow_handler = ddr_pmu_overflow_hander,
+};
+
+#if defined(CONFIG_ACPI) || defined(CONFIG_OF)
+static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
+ .counter_overflow_val = BIT_ULL(48),
+ .counter_max_val = GENMASK_ULL(48, 0),
+ .cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
+ .cfg_base = CN10K_DDRC_PERF_CFG_BASE,
+ .cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
+ .cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
+ .cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
+ .cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
+ .cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
+ .cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
+ .cnt_freerun_clr = 0,
+ .cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
+ .cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
+ .is_cn10k = TRUE,
+};
+#endif
+
+static const struct ddr_pmu_ops ddr_pmu_ody_ops = {
+ .enable_read_freerun_counter = ddr_pmu_ody_enable_read_freerun,
+ .enable_write_freerun_counter = ddr_pmu_ody_enable_write_freerun,
+ .clear_read_freerun_counter = ddr_pmu_ody_read_clear_freerun,
+ .clear_write_freerun_counter = ddr_pmu_ody_write_clear_freerun,
+ .pmu_overflow_handler = ddr_pmu_ody_overflow_hander,
+};
+
+#ifdef CONFIG_ACPI
+static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
+ .counter_overflow_val = 0,
+ .counter_max_val = GENMASK_ULL(63, 0),
+ .cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
+ .cfg_base = ODY_DDRC_PERF_CFG_BASE,
+ .cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
+ .cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
+ .cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
+ .cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
+ .cnt_freerun_en = 0,
+ .cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
+ .cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
+ .cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
+ .cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
+ .is_ody = TRUE,
+};
+#endif
+
static int cn10k_ddr_perf_probe(struct platform_device *pdev)
{
+ const struct ddr_pmu_platform_data *dev_data;
struct cn10k_ddr_pmu *ddr_pmu;
struct resource *res;
void __iomem *base;
+ bool is_cn10k;
+ bool is_ody;
char *name;
int ret;
@@ -644,30 +1001,60 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
ddr_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, ddr_pmu);
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data) {
+ dev_err(&pdev->dev, "Error: No device match data found\n");
+ return -ENODEV;
+ }
+
base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(base))
return PTR_ERR(base);
ddr_pmu->base = base;
- /* Setup the PMU counter to work in manual mode */
- writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
- DDRC_PERF_CNT_OP_MODE_CTRL);
-
- ddr_pmu->pmu = (struct pmu) {
- .module = THIS_MODULE,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- .task_ctx_nr = perf_invalid_context,
- .attr_groups = cn10k_attr_groups,
- .event_init = cn10k_ddr_perf_event_init,
- .add = cn10k_ddr_perf_event_add,
- .del = cn10k_ddr_perf_event_del,
- .start = cn10k_ddr_perf_event_start,
- .stop = cn10k_ddr_perf_event_stop,
- .read = cn10k_ddr_perf_event_update,
- .pmu_enable = cn10k_ddr_perf_pmu_enable,
- .pmu_disable = cn10k_ddr_perf_pmu_disable,
- };
+ ddr_pmu->p_data = dev_data;
+ is_cn10k = ddr_pmu->p_data->is_cn10k;
+ is_ody = ddr_pmu->p_data->is_ody;
+
+ if (is_cn10k) {
+ ddr_pmu->ops = &ddr_pmu_ops;
+ /* Setup the PMU counter to work in manual mode */
+ writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
+ ddr_pmu->p_data->cnt_op_mode_ctrl);
+
+ ddr_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = cn10k_attr_groups,
+ .event_init = cn10k_ddr_perf_event_init,
+ .add = cn10k_ddr_perf_event_add,
+ .del = cn10k_ddr_perf_event_del,
+ .start = cn10k_ddr_perf_event_start,
+ .stop = cn10k_ddr_perf_event_stop,
+ .read = cn10k_ddr_perf_event_update,
+ .pmu_enable = cn10k_ddr_perf_pmu_enable,
+ .pmu_disable = cn10k_ddr_perf_pmu_disable,
+ };
+ }
+
+ if (is_ody) {
+ ddr_pmu->ops = &ddr_pmu_ody_ops;
+
+ ddr_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = odyssey_attr_groups,
+ .event_init = cn10k_ddr_perf_event_init,
+ .add = cn10k_ddr_perf_event_add,
+ .del = cn10k_ddr_perf_event_del,
+ .start = cn10k_ddr_perf_event_start,
+ .stop = cn10k_ddr_perf_event_stop,
+ .read = cn10k_ddr_perf_event_update,
+ };
+ }
/* Choose this cpu to collect perf data */
ddr_pmu->cpu = raw_smp_processor_id();
@@ -688,7 +1075,7 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
if (ret)
goto error;
- pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+ pr_info("DDR PMU Driver for ddrc@%llx\n", res->start);
return 0;
error:
cpuhp_state_remove_instance_nocalls(
@@ -697,7 +1084,7 @@ error:
return ret;
}
-static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+static void cn10k_ddr_perf_remove(struct platform_device *pdev)
{
struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
@@ -706,12 +1093,11 @@ static int cn10k_ddr_perf_remove(struct platform_device *pdev)
&ddr_pmu->node);
perf_pmu_unregister(&ddr_pmu->pmu);
- return 0;
}
#ifdef CONFIG_OF
static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
- { .compatible = "marvell,cn10k-ddr-pmu", },
+ { .compatible = "marvell,cn10k-ddr-pmu", .data = &cn10k_ddr_pmu_pdata },
{ },
};
MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
@@ -719,7 +1105,8 @@ MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = {
- {"MRVL000A", 0},
+ {"MRVL000A", (kernel_ulong_t)&cn10k_ddr_pmu_pdata },
+ {"MRVL000C", (kernel_ulong_t)&odyssey_ddr_pmu_pdata},
{},
};
MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match);
@@ -764,4 +1151,5 @@ module_init(cn10k_ddr_pmu_init);
module_exit(cn10k_ddr_pmu_exit);
MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index fec8e82edb95..51ccb0befa05 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -37,6 +37,15 @@ struct tad_pmu {
DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
};
+enum mrvl_tad_pmu_version {
+ TAD_PMU_V1 = 1,
+ TAD_PMU_V2,
+};
+
+struct tad_pmu_data {
+ int id;
+};
+
static int tad_pmu_cpuhp_state;
static void tad_pmu_event_counter_read(struct perf_event *event)
@@ -214,6 +223,24 @@ static const struct attribute_group tad_pmu_events_attr_group = {
.attrs = tad_pmu_event_attrs,
};
+static struct attribute *ody_tad_pmu_event_attrs[] = {
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
+ TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
+ TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
+ TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
+ TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
+ TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
+ TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
+ TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
+ TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
+ NULL
+};
+
+static const struct attribute_group ody_tad_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = ody_tad_pmu_event_attrs,
+};
+
PMU_FORMAT_ATTR(event, "config:0-7");
static struct attribute *tad_pmu_format_attrs[] = {
@@ -252,8 +279,16 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
NULL
};
+static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
+ &ody_tad_pmu_events_attr_group,
+ &tad_pmu_format_attr_group,
+ &tad_pmu_cpumask_attr_group,
+ NULL
+};
+
static int tad_pmu_probe(struct platform_device *pdev)
{
+ const struct tad_pmu_data *dev_data;
struct device *dev = &pdev->dev;
struct tad_region *regions;
struct tad_pmu *tad_pmu;
@@ -261,6 +296,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
u32 tad_pmu_page_size;
u32 tad_page_size;
u32 tad_cnt;
+ int version;
int i, ret;
char *name;
@@ -270,6 +306,13 @@ static int tad_pmu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tad_pmu);
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data) {
+ dev_err(&pdev->dev, "Error: No device match data found\n");
+ return -ENODEV;
+ }
+ version = dev_data->id;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(&pdev->dev, "Mem resource not found\n");
@@ -319,7 +362,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
tad_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
- .attr_groups = tad_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE |
PERF_PMU_CAP_NO_INTERRUPT,
.task_ctx_nr = perf_invalid_context,
@@ -332,6 +374,11 @@ static int tad_pmu_probe(struct platform_device *pdev)
.read = tad_pmu_event_counter_read,
};
+ if (version == TAD_PMU_V1)
+ tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
+ else
+ tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
+
tad_pmu->cpu = raw_smp_processor_id();
/* Register pmu instance for cpu hotplug */
@@ -351,27 +398,38 @@ static int tad_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int tad_pmu_remove(struct platform_device *pdev)
+static void tad_pmu_remove(struct platform_device *pdev)
{
struct tad_pmu *pmu = platform_get_drvdata(pdev);
cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
&pmu->node);
perf_pmu_unregister(&pmu->pmu);
-
- return 0;
}
+#if defined(CONFIG_OF) || defined(CONFIG_ACPI)
+static const struct tad_pmu_data tad_pmu_data = {
+ .id = TAD_PMU_V1,
+};
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct tad_pmu_data tad_pmu_v2_data = {
+ .id = TAD_PMU_V2,
+};
+#endif
+
#ifdef CONFIG_OF
static const struct of_device_id tad_pmu_of_match[] = {
- { .compatible = "marvell,cn10k-tad-pmu", },
+ { .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data },
{},
};
#endif
#ifdef CONFIG_ACPI
static const struct acpi_device_id tad_pmu_acpi_match[] = {
- {"MRVL000B", 0},
+ {"MRVL000B", (kernel_ulong_t)&tad_pmu_data},
+ {"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data},
{},
};
MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);
diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c
new file mode 100644
index 000000000000..29fbcd1848e4
--- /dev/null
+++ b/drivers/perf/marvell_pem_pmu.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Marvell PEM(PCIe RC) Performance Monitor Driver
+ *
+ * Copyright (C) 2024 Marvell.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/*
+ * Each of these events maps to a free running 64 bit counter
+ * with no event control, but can be reset.
+ */
+enum pem_events {
+ IB_TLP_NPR,
+ IB_TLP_PR,
+ IB_TLP_CPL,
+ IB_TLP_DWORDS_NPR,
+ IB_TLP_DWORDS_PR,
+ IB_TLP_DWORDS_CPL,
+ IB_INFLIGHT,
+ IB_READS,
+ IB_REQ_NO_RO_NCB,
+ IB_REQ_NO_RO_EBUS,
+ OB_TLP_NPR,
+ OB_TLP_PR,
+ OB_TLP_CPL,
+ OB_TLP_DWORDS_NPR,
+ OB_TLP_DWORDS_PR,
+ OB_TLP_DWORDS_CPL,
+ OB_INFLIGHT,
+ OB_READS,
+ OB_MERGES_NPR,
+ OB_MERGES_PR,
+ OB_MERGES_CPL,
+ ATS_TRANS,
+ ATS_TRANS_LATENCY,
+ ATS_PRI,
+ ATS_PRI_LATENCY,
+ ATS_INV,
+ ATS_INV_LATENCY,
+ PEM_EVENTIDS_MAX
+};
+
+static u64 eventid_to_offset_table[] = {
+ [IB_TLP_NPR] = 0x0,
+ [IB_TLP_PR] = 0x8,
+ [IB_TLP_CPL] = 0x10,
+ [IB_TLP_DWORDS_NPR] = 0x100,
+ [IB_TLP_DWORDS_PR] = 0x108,
+ [IB_TLP_DWORDS_CPL] = 0x110,
+ [IB_INFLIGHT] = 0x200,
+ [IB_READS] = 0x300,
+ [IB_REQ_NO_RO_NCB] = 0x400,
+ [IB_REQ_NO_RO_EBUS] = 0x408,
+ [OB_TLP_NPR] = 0x500,
+ [OB_TLP_PR] = 0x508,
+ [OB_TLP_CPL] = 0x510,
+ [OB_TLP_DWORDS_NPR] = 0x600,
+ [OB_TLP_DWORDS_PR] = 0x608,
+ [OB_TLP_DWORDS_CPL] = 0x610,
+ [OB_INFLIGHT] = 0x700,
+ [OB_READS] = 0x800,
+ [OB_MERGES_NPR] = 0x900,
+ [OB_MERGES_PR] = 0x908,
+ [OB_MERGES_CPL] = 0x910,
+ [ATS_TRANS] = 0x2D18,
+ [ATS_TRANS_LATENCY] = 0x2D20,
+ [ATS_PRI] = 0x2D28,
+ [ATS_PRI_LATENCY] = 0x2D30,
+ [ATS_INV] = 0x2D38,
+ [ATS_INV_LATENCY] = 0x2D40,
+};
+
+struct pem_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct device *dev;
+ struct hlist_node node;
+};
+
+#define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu)
+
+static int eventid_to_offset(int eventid)
+{
+ return eventid_to_offset_table[eventid];
+}
+
+/* Events */
+static ssize_t pem_pmu_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define PEM_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *pem_perf_events_attrs[] = {
+ PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR),
+ PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR),
+ PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL),
+ PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT),
+ PEM_EVENT_ATTR(ib_reads, IB_READS),
+ PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB),
+ PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS),
+ PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR),
+ PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR),
+ PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL),
+ PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT),
+ PEM_EVENT_ATTR(ob_reads_partid, OB_READS),
+ PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR),
+ PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR),
+ PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL),
+ PEM_EVENT_ATTR(ats_trans, ATS_TRANS),
+ PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY),
+ PEM_EVENT_ATTR(ats_pri, ATS_PRI),
+ PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY),
+ PEM_EVENT_ATTR(ats_inv, ATS_INV),
+ PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY),
+ NULL
+};
+
+static struct attribute_group pem_perf_events_attr_group = {
+ .name = "events",
+ .attrs = pem_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+
+static struct attribute *pem_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_format_attr_group = {
+ .name = "format",
+ .attrs = pem_perf_format_attrs,
+};
+
+/* cpumask */
+static ssize_t pem_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pem_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute pem_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL);
+
+static struct attribute *pem_perf_cpumask_attrs[] = {
+ &pem_perf_cpumask_attr.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_cpumask_attr_group = {
+ .attrs = pem_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *pem_perf_attr_groups[] = {
+ &pem_perf_events_attr_group,
+ &pem_perf_cpumask_attr_group,
+ &pem_perf_format_attr_group,
+ NULL
+};
+
+static int pem_perf_event_init(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (event->attr.config >= PEM_EVENTIDS_MAX)
+ return -EINVAL;
+
+ if (is_sampling_event(event) ||
+ event->attach_state & PERF_ATTACH_TASK) {
+ return -EOPNOTSUPP;
+ }
+
+ if (event->cpu < 0)
+ return -EOPNOTSUPP;
+
+ /* We must NOT create groups containing mixed PMUs */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+ }
+ /*
+ * Set ownership of event to one CPU, same event can not be observed
+ * on multiple cpus at same time.
+ */
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+ return 0;
+}
+
+static u64 pem_perf_read_counter(struct pem_pmu *pmu,
+ struct perf_event *event, int eventid)
+{
+ return readq_relaxed(pmu->base + eventid_to_offset(eventid));
+}
+
+static void pem_perf_event_update(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_count, new_count;
+
+ do {
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = pem_perf_read_counter(pmu, event, hwc->idx);
+ } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+ local64_add((new_count - prev_count), &event->count);
+}
+
+static void pem_perf_event_start(struct perf_event *event, int flags)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int eventid = hwc->idx;
+
+ /*
+ * All counters are free-running and associated with
+ * a fixed event to track in Hardware
+ */
+ local64_set(&hwc->prev_count,
+ pem_perf_read_counter(pmu, event, eventid));
+
+ hwc->state = 0;
+}
+
+static int pem_perf_event_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = event->attr.config;
+ if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX))
+ return -EINVAL;
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ pem_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void pem_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_UPDATE)
+ pem_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void pem_perf_event_del(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pem_perf_event_stop(event, PERF_EF_UPDATE);
+ hwc->idx = -1;
+}
+
+static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node);
+ unsigned int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+ return 0;
+}
+
+static int pem_perf_probe(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu;
+ struct resource *res;
+ void __iomem *base;
+ char *name;
+ int ret;
+
+ pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
+ if (!pem_pmu)
+ return -ENOMEM;
+
+ pem_pmu->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pem_pmu);
+
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ pem_pmu->base = base;
+
+ pem_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = pem_perf_attr_groups,
+ .event_init = pem_perf_event_init,
+ .add = pem_perf_event_add,
+ .del = pem_perf_event_del,
+ .start = pem_perf_event_start,
+ .stop = pem_perf_event_stop,
+ .read = pem_perf_event_update,
+ };
+
+ /* Choose this cpu to collect perf data */
+ pem_pmu->cpu = raw_smp_processor_id();
+
+ name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
+ res->start);
+ if (!name)
+ return -ENOMEM;
+
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ ret = perf_pmu_register(&pem_pmu->pmu, name, -1);
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+ return ret;
+}
+
+static void pem_perf_remove(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ perf_pmu_unregister(&pem_pmu->pmu);
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id pem_pmu_acpi_match[] = {
+ {"MRVL000E", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match);
+#endif
+
+static struct platform_driver pem_pmu_driver = {
+ .driver = {
+ .name = "pem-pmu",
+ .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = pem_perf_probe,
+ .remove = pem_perf_remove,
+};
+
+static int __init pem_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ "perf/marvell/pem:online", NULL,
+ pem_pmu_offline_cpu);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&pem_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
+ return ret;
+}
+
+static void __exit pem_pmu_exit(void)
+{
+ platform_driver_unregister(&pem_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
+}
+
+module_init(pem_pmu_init);
+module_exit(pem_pmu_exit);
+
+MODULE_DESCRIPTION("Marvell PEM Perf driver");
+MODULE_AUTHOR("Gowthami Thiagarajan <gthiagarajan@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 3f9a98c17a89..ea8c85729937 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
- cpumask_t cluster_online_cpus;
+ struct cluster_pmu *cluster;
unsigned int target;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
@@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
cluster->on_cpu = -1;
/* Any other CPU for this cluster which is still online */
- cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&cluster_online_cpus, cpu);
+ target = cpumask_any_and_but(&cluster->cluster_cpus,
+ cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
disable_irq(cluster->irq);
return 0;
@@ -904,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev)
l2cache_pmu->pmu = (struct pmu) {
/* suffix is instance id for future use with multiple sockets */
.name = "l2cache_0",
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = l2_cache_pmu_enable,
.pmu_disable = l2_cache_pmu_disable,
@@ -965,7 +964,7 @@ out_unregister:
return err;
}
-static int l2_cache_pmu_remove(struct platform_device *pdev)
+static void l2_cache_pmu_remove(struct platform_device *pdev)
{
struct l2cache_pmu *l2cache_pmu =
to_l2cache_pmu(platform_get_drvdata(pdev));
@@ -973,7 +972,6 @@ static int l2_cache_pmu_remove(struct platform_device *pdev)
perf_pmu_unregister(&l2cache_pmu->pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
&l2cache_pmu->node);
- return 0;
}
static struct platform_driver l2_cache_pmu_driver = {
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index f16783d03db7..66e6cabd6fff 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -609,18 +609,9 @@ static void qcom_l3_cache__event_read(struct perf_event *event)
/* formats */
-static ssize_t l3cache_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *) eattr->var);
-}
-
#define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \
(&((struct dev_ext_attribute[]) { \
- { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
.var = (void *) _config, } \
})[0].attr.attr)
@@ -748,6 +739,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
return -ENOMEM;
l3pmu->pmu = (struct pmu) {
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = qcom_l3_cache__pmu_enable,
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index c78a6fd6c57f..7644147d50b4 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -39,7 +39,6 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time_short = 0;
userpg->cap_user_rdpmc = riscv_perf_user_access(event);
-#ifdef CONFIG_RISCV_PMU
/*
* The counters are 64-bit but the priv spec doesn't mandate all the
* bits to be implemented: that's why, counter width can vary based on
@@ -47,7 +46,6 @@ void arch_perf_update_userpage(struct perf_event *event,
*/
if (userpg->cap_user_rdpmc)
userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
-#endif
do {
rd = sched_clock_read_begin(&seq);
@@ -167,7 +165,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
- if (!rvpmu->ctr_read)
+ if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
@@ -191,8 +189,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-
if (!(hwc->state & PERF_HES_STOPPED)) {
if (rvpmu->ctr_stop) {
rvpmu->ctr_stop(event, 0);
@@ -313,6 +309,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;
+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
@@ -404,6 +404,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
cpuc->n_events = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
+ cpuc->snapshot_addr = NULL;
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index fa0bccf4edf2..93c8e0fdb589 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -22,13 +22,13 @@ static int pmu_legacy_ctr_get_idx(struct perf_event *event)
struct perf_event_attr *attr = &event->attr;
if (event->attr.type != PERF_TYPE_HARDWARE)
- return -EOPNOTSUPP;
+ return -ENOENT;
if (attr->config == PERF_COUNT_HW_CPU_CYCLES)
return RISCV_PMU_LEGACY_CYCLE;
else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS)
return RISCV_PMU_LEGACY_INSTRET;
else
- return -EOPNOTSUPP;
+ return -ENOENT;
}
/* For legacy config & counter index are same */
@@ -136,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev)
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu_legacy_init(pmu);
return 0;
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 452aab49db1e..698de8ddf895 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -19,10 +19,37 @@
#include <linux/of.h>
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
+#include <linux/soc/andes/irq.h>
+#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+
+#define ALT_SBI_PMU_OVERFLOW(__ovl) \
+asm volatile(ALTERNATIVE_2( \
+ "csrr %0, " __stringify(CSR_SCOUNTOVF), \
+ "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
+ THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
+ CONFIG_ERRATA_THEAD_PMU, \
+ "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \
+ ANDES_VENDOR_ID, \
+ RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \
+ CONFIG_ANDES_CUSTOM_PMU) \
+ : "=r" (__ovl) : \
+ : "memory")
+
+#define ALT_SBI_PMU_OVF_CLEAR_PENDING(__irq_mask) \
+asm volatile(ALTERNATIVE( \
+ "csrc " __stringify(CSR_IP) ", %0\n\t", \
+ "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \
+ ANDES_VENDOR_ID, \
+ RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \
+ CONFIG_ANDES_CUSTOM_PMU) \
+ : : "r"(__irq_mask) \
+ : "memory")
#define SYSCTL_NO_USER_ACCESS 0
#define SYSCTL_USER_ACCESS 1
@@ -33,7 +60,12 @@
#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
PMU_FORMAT_ATTR(event, "config:0-47");
-PMU_FORMAT_ATTR(firmware, "config:63");
+PMU_FORMAT_ATTR(firmware, "config:62-63");
+
+static bool sbi_v2_available;
+static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
+#define sbi_pmu_snapshot_available() \
+ static_branch_unlikely(&sbi_pmu_snapshot_available)
static struct attribute *riscv_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -61,6 +93,7 @@ static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
static union sbi_pmu_ctr_info *pmu_ctr_list;
static bool riscv_pmu_use_irq;
static unsigned int riscv_pmu_irq_num;
+static unsigned int riscv_pmu_irq_mask;
static unsigned int riscv_pmu_irq;
/* Cache the available counters in a bitmask */
@@ -86,7 +119,7 @@ struct sbi_pmu_event_data {
};
};
-static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@@ -120,7 +153,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
-static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -265,6 +298,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
+static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ 0, cmask, 0, edata->event_idx, 0, 0);
+ if (!ret.error) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ } else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
+ /* This event cannot be monitored by any counter */
+ edata->event_idx = -ENOENT;
+ }
+}
+
+static void pmu_sbi_check_std_events(struct work_struct *work)
+{
+ for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ pmu_sbi_check_event(&pmu_hw_event_map[i]);
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
+ for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
+ for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
+}
+
+static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
+
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@@ -355,13 +416,13 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
* but not in the user access mode as we want to use the other counters
* that support sampling/filtering.
*/
- if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
+ if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) {
if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
cmask = 1;
} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
- cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+ cmask = BIT(CSR_INSTRET - CSR_CYCLE);
}
}
@@ -446,9 +507,13 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
{
u32 type = event->attr.type;
u64 config = event->attr.config;
- int bSoftware;
- u64 raw_config_val;
- int ret;
+ int ret = -ENOENT;
+
+ /*
+ * Ensure we are finished checking standard hardware events for
+ * validity before allowing userspace to configure any events.
+ */
+ flush_work(&check_std_events_work);
switch (type) {
case PERF_TYPE_HARDWARE:
@@ -461,46 +526,165 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
break;
case PERF_TYPE_RAW:
/*
- * As per SBI specification, the upper 16 bits must be unused for
- * a raw event. Use the MSB (63b) to distinguish between hardware
- * raw event and firmware events.
+ * As per SBI specification, the upper 16 bits must be unused
+ * for a hardware raw event.
+ * Bits 63:62 are used to distinguish between raw events
+ * 00 - Hardware raw event
+ * 10 - SBI firmware events
+ * 11 - Risc-V platform specific firmware event
*/
- bSoftware = config >> 63;
- raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
- if (bSoftware) {
- ret = (raw_config_val & 0xFFFF) |
- (SBI_PMU_EVENT_TYPE_FW << 16);
- } else {
- ret = RISCV_PMU_RAW_EVENT_IDX;
- *econfig = raw_config_val;
+
+ switch (config >> 62) {
+ case 0:
+ /* Return error any bits [48-63] is set as it is not allowed by the spec */
+ if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) {
+ *econfig = config & RISCV_PMU_RAW_EVENT_MASK;
+ ret = RISCV_PMU_RAW_EVENT_IDX;
+ }
+ break;
+ case 2:
+ ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16);
+ break;
+ case 3:
+ /*
+ * For Risc-V platform specific firmware events
+ * Event code - 0xFFFF
+ * Event data - raw event encoding
+ */
+ ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT;
+ *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK;
+ break;
+ default:
+ break;
}
break;
default:
- ret = -EINVAL;
break;
}
return ret;
}
+static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ if (!cpu_hw_evt->snapshot_addr)
+ continue;
+
+ free_page((unsigned long)cpu_hw_evt->snapshot_addr);
+ cpu_hw_evt->snapshot_addr = NULL;
+ cpu_hw_evt->snapshot_addr_phys = 0;
+ }
+}
+
+static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu)
+{
+ int cpu;
+ struct page *snapshot_page;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!snapshot_page) {
+ pmu_sbi_snapshot_free(pmu);
+ return -ENOMEM;
+ }
+ cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page);
+ cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_snapshot_disable(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0, 0, 0, 0);
+ if (ret.error) {
+ pr_warn("failed to disable snapshot shared memory\n");
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
+{
+ struct cpu_hw_events *cpu_hw_evt;
+ struct sbiret ret = {0};
+
+ cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+ if (!cpu_hw_evt->snapshot_addr_phys)
+ return -EINVAL;
+
+ if (cpu_hw_evt->snapshot_set_done)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys,
+ (u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0);
+
+ /* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */
+ if (ret.error) {
+ if (ret.error != SBI_ERR_NOT_SUPPORTED)
+ pr_warn("pmu snapshot setup failed with error %ld\n", ret.error);
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+ cpu_hw_evt->snapshot_set_done = true;
+
+ return 0;
+}
+
static u64 pmu_sbi_ctr_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
struct sbiret ret;
- union sbi_pmu_ctr_info info;
u64 val = 0;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
+
+ /* Read the value from the shared memory directly only if counter is stopped */
+ if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
+ val = sdata->ctr_values[idx];
+ return val;
+ }
if (pmu_sbi_is_fw_event(event)) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
hwc->idx, 0, 0, 0, 0, 0);
- if (!ret.error)
- val = ret.value;
+ if (ret.error)
+ return 0;
+
+ val = ret.value;
+ if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ hwc->idx, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ val |= ((u64)ret.value << 32);
+ else
+ WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n",
+ ret.error);
+ }
} else {
- info = pmu_ctr_list[idx];
val = riscv_pmu_ctr_read_csr(info.csr);
if (IS_ENABLED(CONFIG_32BIT))
- val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
+ val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32;
}
return val;
@@ -530,6 +714,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
struct hw_perf_event *hwc = &event->hw;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+ /* There is no benefit setting SNAPSHOT FLAG for a single counter */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
1, flag, ival, ival >> 32, 0);
@@ -550,16 +735,36 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
pmu_sbi_reset_scounteren((void *)event);
+ if (sbi_pmu_snapshot_available())
+ flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
- if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
- flag != SBI_PMU_STOP_FLAG_RESET)
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /*
+ * The counter snapshot is based on the index base specified by hwc->idx.
+ * The actual counter value is updated in shared memory at index 0 when counter
+ * mask is 0x01. To ensure accurate counter values, it's necessary to transfer
+ * the counter value to shared memory. However, if hwc->idx is zero, the counter
+ * value is already correctly updated in shared memory, requiring no further
+ * adjustment.
+ */
+ if (hwc->idx > 0) {
+ sdata->ctr_values[hwc->idx] = sdata->ctr_values[0];
+ sdata->ctr_values[0] = 0;
+ }
+ } else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
+ flag != SBI_PMU_STOP_FLAG_RESET) {
pr_err("Stopping counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+ }
}
static int pmu_sbi_find_num_ctrs(void)
@@ -611,16 +816,45 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, pmu->cmask, 0, 0, 0, 0);
+ 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ unsigned long flag = 0;
+ int i, idx;
+ struct sbiret ret;
+ u64 temp_ctr_overflow_mask = 0;
+
+ if (sbi_pmu_snapshot_available())
+ flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+
+ /* Reset the shadow copy to avoid save/restore any value from previous overflow */
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* No need to check the error here as we can't do anything about the error */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /* Save the counter values to avoid clobbering */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
+ sdata->ctr_values[idx];
+ /* Save the overflow mask to avoid clobbering */
+ temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);
+ }
+ }
- /* No need to check the error here as we can't do anything about the error */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
- cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+ /* Restore the counter values to the shared memory for used hw counters */
+ if (sbi_pmu_snapshot_available()) {
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
+ sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
+ if (temp_ctr_overflow_mask)
+ sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
+ }
}
/*
@@ -629,11 +863,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
* while the overflowed counters need to be started with updated initialization
* value.
*/
-static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
- unsigned long ctr_ovf_mask)
+static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
{
- int idx = 0;
- struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ int idx = 0, i;
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
unsigned long ctr_start_mask = 0;
@@ -641,11 +874,12 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
struct hw_perf_event *hwc;
u64 init_val = 0;
- ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
-
- /* Start all the counters that did not overflow in a single shot */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
- 0, 0, 0, 0);
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
+ /* Start all the counters that did not overflow in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
+ 0, 0, 0, 0);
+ }
/* Reinitialize and start all the counter that overflowed */
while (ctr_ovf_mask) {
@@ -668,6 +902,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
}
}
+static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
+{
+ int i, idx = 0;
+ struct perf_event *event;
+ unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
+ u64 max_period, init_val = 0;
+ struct hw_perf_event *hwc;
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+ if (ctr_ovf_mask & BIT(idx)) {
+ event = cpu_hw_evt->events[idx];
+ hwc = &event->hw;
+ max_period = riscv_pmu_ctr_get_width_mask(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
+ }
+ /*
+ * We do not need to update the non-overflow counters the previous
+ * value should have been there already.
+ */
+ }
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* Restore the counter values to relative indices for used hw counters */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ sdata->ctr_values[idx] =
+ cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
+ /* Start all the counters in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ }
+}
+
+static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+ u64 ctr_ovf_mask)
+{
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+ if (sbi_pmu_snapshot_available())
+ pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask);
+ else
+ pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
+}
+
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
{
struct perf_sample_data data;
@@ -677,10 +957,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
int lidx, hidx, fidx;
struct riscv_pmu *pmu;
struct perf_event *event;
- unsigned long overflow;
- unsigned long overflowed_ctrs = 0;
+ u64 overflow;
+ u64 overflowed_ctrs = 0;
struct cpu_hw_events *cpu_hw_evt = dev;
u64 start_clock = sched_clock();
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if (WARN_ON_ONCE(!cpu_hw_evt))
return IRQ_NONE;
@@ -694,7 +975,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
event = cpu_hw_evt->events[fidx];
if (!event) {
- csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
return IRQ_NONE;
}
@@ -702,13 +983,16 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
pmu_sbi_stop_hw_ctrs(pmu);
/* Overflow status register should only be read after counter are stopped */
- ALT_SBI_PMU_OVERFLOW(overflow);
+ if (sbi_pmu_snapshot_available())
+ overflow = sdata->ctr_overflow_mask;
+ else
+ ALT_SBI_PMU_OVERFLOW(overflow);
/*
* Overflow interrupt pending bit should only be cleared after stopping
* all the counters to avoid any race condition.
*/
- csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
/* No overflow bit is set */
if (!overflow)
@@ -728,9 +1012,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
continue;
- /* compute hardware counter index */
- hidx = info->csr - CSR_CYCLE;
- /* check if the corresponding bit is set in sscountovf */
+ if (sbi_pmu_snapshot_available())
+ /* SBI implementation already updated the logical indicies */
+ hidx = lidx;
+ else
+ /* compute hardware counter index */
+ hidx = info->csr - CSR_CYCLE;
+
+ /* check if the corresponding bit is set in sscountovf or overflow mask in shmem */
if (!(overflow & BIT(hidx)))
continue;
@@ -740,7 +1029,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
overflowed_ctrs |= BIT(lidx);
hw_evt = &event->hw;
+ /* Update the event states here so that we know the state while reading */
+ hw_evt->state |= PERF_HES_STOPPED;
riscv_pmu_event_update(event);
+ hw_evt->state |= PERF_HES_UPTODATE;
perf_sample_data_init(&data, 0, hw_evt->last_period);
if (riscv_pmu_event_set_period(event)) {
/*
@@ -753,6 +1045,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
perf_event_overflow(event, &data, regs);
}
+ /* Reset the state as we are going to start the counter after the loop */
+ hw_evt->state = 0;
}
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
@@ -780,11 +1074,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
if (riscv_pmu_use_irq) {
cpu_hw_evt->irq = riscv_pmu_irq;
- csr_clear(CSR_IP, BIT(riscv_pmu_irq_num));
- csr_set(CSR_IE, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_setup(pmu, cpu);
+
return 0;
}
@@ -792,12 +1088,14 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
{
if (riscv_pmu_use_irq) {
disable_percpu_irq(riscv_pmu_irq);
- csr_clear(CSR_IE, BIT(riscv_pmu_irq_num));
}
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_disable();
+
return 0;
}
@@ -816,8 +1114,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
riscv_cached_mimpid(0) == 0) {
riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
riscv_pmu_use_irq = true;
+ } else if (riscv_has_vendor_extension_unlikely(ANDES_VENDOR_ID,
+ RISCV_ISA_VENDOR_EXT_XANDESPMU) &&
+ IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) {
+ riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI;
+ riscv_pmu_use_irq = true;
}
+ riscv_pmu_irq_mask = BIT(riscv_pmu_irq_num % BITS_PER_LONG);
+
if (!riscv_pmu_use_irq)
return -EOPNOTSUPP;
@@ -900,6 +1205,12 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
static void riscv_pmu_destroy(struct riscv_pmu *pmu)
{
+ if (sbi_v2_available) {
+ if (sbi_pmu_snapshot_available()) {
+ pmu_sbi_snapshot_disable();
+ pmu_sbi_snapshot_free(pmu);
+ }
+ }
riscv_pm_pmu_unregister(pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
@@ -986,7 +1297,7 @@ static void riscv_pmu_update_counter_access(void *info)
csr_write(CSR_SCOUNTEREN, 0x2);
}
-static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
+static int riscv_pmu_proc_user_access_handler(const struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -1006,7 +1317,7 @@ static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
return 0;
}
-static struct ctl_table sbi_pmu_sysctl_table[] = {
+static const struct ctl_table sbi_pmu_sysctl_table[] = {
{
.procname = "perf_user_access",
.data = &sysctl_perf_user_access,
@@ -1016,7 +1327,6 @@ static struct ctl_table sbi_pmu_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- { }
};
static int pmu_sbi_device_probe(struct platform_device *pdev)
@@ -1054,6 +1364,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
}
pmu->pmu.attr_groups = riscv_pmu_attr_groups;
+ pmu->pmu.parent = &pdev->dev;
pmu->cmask = cmask;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -1067,10 +1378,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->event_unmapped = pmu_sbi_event_unmapped;
pmu->csr_index = pmu_sbi_csr_index;
- ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
- if (ret)
- return ret;
-
ret = riscv_pm_pmu_register(pmu);
if (ret)
goto out_unregister;
@@ -1079,8 +1386,42 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* SBI PMU Snapsphot is only available in SBI v2.0 */
+ if (sbi_v2_available) {
+ int cpu;
+
+ ret = pmu_sbi_snapshot_alloc(pmu);
+ if (ret)
+ goto out_unregister;
+
+ cpu = get_cpu();
+ ret = pmu_sbi_snapshot_setup(pmu, cpu);
+ put_cpu();
+
+ if (ret) {
+ /* Snapshot is an optional feature. Continue if not available */
+ pmu_sbi_snapshot_free(pmu);
+ } else {
+ pr_info("SBI PMU snapshot detected\n");
+ /*
+ * We enable it once here for the boot cpu. If snapshot shmem setup
+ * fails during cpu hotplug process, it will fail to start the cpu
+ * as we can not handle hetergenous PMUs with different snapshot
+ * capability.
+ */
+ static_branch_enable(&sbi_pmu_snapshot_available);
+ }
+ }
+
register_sysctl("kernel", sbi_pmu_sysctl_table);
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+ if (ret)
+ goto out_unregister;
+
+ /* Asynchronously check which standard events are available */
+ schedule_work(&check_std_events_work);
+
return 0;
out_unregister:
@@ -1108,6 +1449,9 @@ static int __init pmu_sbi_devinit(void)
return 0;
}
+ if (sbi_spec_version >= sbi_mk_version(2, 0))
+ sbi_v2_available = true;
+
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
"perf/riscv/pmu:starting",
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
diff --git a/drivers/perf/starfive_starlink_pmu.c b/drivers/perf/starfive_starlink_pmu.c
new file mode 100644
index 000000000000..5e5a672b4229
--- /dev/null
+++ b/drivers/perf/starfive_starlink_pmu.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * StarFive's StarLink PMU driver
+ *
+ * Copyright (C) 2023 StarFive Technology Co., Ltd.
+ *
+ * Author: Ji Sheng Teoh <jisheng.teoh@starfivetech.com>
+ *
+ */
+
+#define STARLINK_PMU_PDEV_NAME "starfive_starlink_pmu"
+#define pr_fmt(fmt) STARLINK_PMU_PDEV_NAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+
+#define STARLINK_PMU_MAX_COUNTERS 64
+#define STARLINK_PMU_NUM_COUNTERS 16
+#define STARLINK_PMU_IDX_CYCLE_COUNTER 63
+
+#define STARLINK_PMU_EVENT_SELECT 0x060
+#define STARLINK_PMU_EVENT_COUNTER 0x160
+#define STARLINK_PMU_COUNTER_MASK GENMASK_ULL(63, 0)
+#define STARLINK_PMU_CYCLE_COUNTER 0x058
+
+#define STARLINK_PMU_CONTROL 0x040
+#define STARLINK_PMU_GLOBAL_ENABLE BIT_ULL(0)
+
+#define STARLINK_PMU_INTERRUPT_ENABLE 0x050
+#define STARLINK_PMU_COUNTER_OVERFLOW_STATUS 0x048
+#define STARLINK_PMU_CYCLE_OVERFLOW_MASK BIT_ULL(63)
+
+#define STARLINK_CYCLES 0x058
+#define CACHE_READ_REQUEST 0x04000701
+#define CACHE_WRITE_REQUEST 0x03000001
+#define CACHE_RELEASE_REQUEST 0x0003e001
+#define CACHE_READ_HIT 0x00901202
+#define CACHE_READ_MISS 0x04008002
+#define CACHE_WRITE_HIT 0x006c0002
+#define CACHE_WRITE_MISS 0x03000002
+#define CACHE_WRITEBACK 0x00000403
+
+#define to_starlink_pmu(p) (container_of(p, struct starlink_pmu, pmu))
+
+#define STARLINK_FORMAT_ATTR(_name, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { .attr = __ATTR(_name, 0444, starlink_pmu_sysfs_format_show, NULL), \
+ .var = (void *)_config, } \
+ })[0].attr.attr)
+
+#define STARLINK_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, starlink_pmu_sysfs_event_show, _id)
+
+static int starlink_pmu_cpuhp_state;
+
+struct starlink_hw_events {
+ struct perf_event *events[STARLINK_PMU_MAX_COUNTERS];
+ DECLARE_BITMAP(used_mask, STARLINK_PMU_MAX_COUNTERS);
+};
+
+struct starlink_pmu {
+ struct pmu pmu;
+ struct starlink_hw_events __percpu *hw_events;
+ struct hlist_node node;
+ struct notifier_block starlink_pmu_pm_nb;
+ void __iomem *pmu_base;
+ cpumask_t cpumask;
+ int irq;
+};
+
+static ssize_t
+starlink_pmu_sysfs_format_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr);
+
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static struct attribute *starlink_pmu_format_attrs[] = {
+ STARLINK_FORMAT_ATTR(event, "config:0-31"),
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = starlink_pmu_format_attrs,
+};
+
+static ssize_t
+starlink_pmu_sysfs_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct perf_pmu_events_attr *eattr = container_of(attr,
+ struct perf_pmu_events_attr, attr);
+
+ return sysfs_emit(buf, "event=0x%02llx\n", eattr->id);
+}
+
+static struct attribute *starlink_pmu_event_attrs[] = {
+ STARLINK_EVENT_ATTR(cycles, STARLINK_CYCLES),
+ STARLINK_EVENT_ATTR(read_request, CACHE_READ_REQUEST),
+ STARLINK_EVENT_ATTR(write_request, CACHE_WRITE_REQUEST),
+ STARLINK_EVENT_ATTR(release_request, CACHE_RELEASE_REQUEST),
+ STARLINK_EVENT_ATTR(read_hit, CACHE_READ_HIT),
+ STARLINK_EVENT_ATTR(read_miss, CACHE_READ_MISS),
+ STARLINK_EVENT_ATTR(write_hit, CACHE_WRITE_HIT),
+ STARLINK_EVENT_ATTR(write_miss, CACHE_WRITE_MISS),
+ STARLINK_EVENT_ATTR(writeback, CACHE_WRITEBACK),
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = starlink_pmu_event_attrs,
+};
+
+static ssize_t
+cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &starlink_pmu->cpumask);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *starlink_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_cpumask_attr_group = {
+ .attrs = starlink_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *starlink_pmu_attr_groups[] = {
+ &starlink_pmu_format_attr_group,
+ &starlink_pmu_events_attr_group,
+ &starlink_pmu_cpumask_attr_group,
+ NULL
+};
+
+static void starlink_pmu_set_event_period(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+
+ /*
+ * Program counter to half of it's max count to handle
+ * cases of extreme interrupt latency.
+ */
+ u64 val = STARLINK_PMU_COUNTER_MASK >> 1;
+
+ local64_set(&hwc->prev_count, val);
+ if (hwc->config == STARLINK_CYCLES)
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CYCLE_COUNTER);
+ else
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_EVENT_COUNTER +
+ idx * sizeof(u64));
+}
+
+static void starlink_pmu_counter_start(struct perf_event *event,
+ struct starlink_pmu *starlink_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+ u64 val;
+
+ /*
+ * Enable counter overflow interrupt[63:0],
+ * which is mapped as follow:
+ *
+ * event counter 0 - Bit [0]
+ * event counter 1 - Bit [1]
+ * ...
+ * cycle counter - Bit [63]
+ */
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+
+ if (hwc->config == STARLINK_CYCLES) {
+ /*
+ * Cycle count has its dedicated register, and it starts
+ * counting as soon as STARLINK_PMU_GLOBAL_ENABLE is set.
+ */
+ val |= STARLINK_PMU_CYCLE_OVERFLOW_MASK;
+ } else {
+ writeq(event->hw.config, starlink_pmu->pmu_base +
+ STARLINK_PMU_EVENT_SELECT + idx * sizeof(u64));
+
+ val |= BIT_ULL(idx);
+ }
+
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+
+ writeq(STARLINK_PMU_GLOBAL_ENABLE, starlink_pmu->pmu_base +
+ STARLINK_PMU_CONTROL);
+}
+
+static void starlink_pmu_counter_stop(struct perf_event *event,
+ struct starlink_pmu *starlink_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+ u64 val;
+
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_CONTROL);
+ val &= ~STARLINK_PMU_GLOBAL_ENABLE;
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CONTROL);
+
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+ if (hwc->config == STARLINK_CYCLES)
+ val &= ~STARLINK_PMU_CYCLE_OVERFLOW_MASK;
+ else
+ val &= ~BIT_ULL(idx);
+
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+}
+
+static void starlink_pmu_update(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 prev_raw_count, new_raw_count;
+ u64 oldval;
+ u64 delta;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (hwc->config == STARLINK_CYCLES)
+ new_raw_count = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_CYCLE_COUNTER);
+ else
+ new_raw_count = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_EVENT_COUNTER +
+ idx * sizeof(u64));
+ oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count);
+ } while (oldval != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & STARLINK_PMU_COUNTER_MASK;
+ local64_add(delta, &event->count);
+}
+
+static void starlink_pmu_start(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ starlink_pmu_set_event_period(event);
+ starlink_pmu_counter_start(event, starlink_pmu);
+
+ perf_event_update_userpage(event);
+}
+
+static void starlink_pmu_stop(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ starlink_pmu_counter_stop(event, starlink_pmu);
+ starlink_pmu_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int starlink_pmu_add(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long *used_mask = hw_events->used_mask;
+ u32 n_events = STARLINK_PMU_NUM_COUNTERS;
+ int idx;
+
+ /*
+ * Cycle counter has dedicated register to hold counter value.
+ * Event other than cycle count has to be enabled through
+ * event select register, and assigned with independent counter
+ * as they appear.
+ */
+
+ if (hwc->config == STARLINK_CYCLES) {
+ idx = STARLINK_PMU_IDX_CYCLE_COUNTER;
+ } else {
+ idx = find_first_zero_bit(used_mask, n_events);
+ /* All counter are in use */
+ if (idx < 0)
+ return idx;
+
+ set_bit(idx, used_mask);
+ }
+
+ hwc->idx = idx;
+ hw_events->events[idx] = event;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ starlink_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void starlink_pmu_del(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ starlink_pmu_stop(event, PERF_EF_UPDATE);
+ hw_events->events[hwc->idx] = NULL;
+ clear_bit(hwc->idx, hw_events->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static bool starlink_pmu_validate_event_group(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counter = 1;
+
+ /*
+ * Ensure hardware events in the group are on the same PMU,
+ * software events are acceptable.
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return false;
+
+ for_each_sibling_event(sibling, leader) {
+ if (sibling->pmu != event->pmu && !is_software_event(sibling))
+ return false;
+
+ counter++;
+ }
+
+ return counter <= STARLINK_PMU_NUM_COUNTERS;
+}
+
+static int starlink_pmu_event_init(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * Sampling is not supported, as counters are shared
+ * by all CPU.
+ */
+ if (hwc->sample_period)
+ return -EOPNOTSUPP;
+
+ /*
+ * Per-task and attach to a task are not supported,
+ * as uncore events are not specific to any CPU.
+ */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (!starlink_pmu_validate_event_group(event))
+ return -EINVAL;
+
+ hwc->idx = -1;
+ hwc->config = event->attr.config;
+ event->cpu = cpumask_first(&starlink_pmu->cpumask);
+
+ return 0;
+}
+
+static irqreturn_t starlink_pmu_handle_irq(int irq_num, void *data)
+{
+ struct starlink_pmu *starlink_pmu = data;
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ bool handled = false;
+ int idx;
+ u64 overflow_status;
+
+ for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) {
+ struct perf_event *event = hw_events->events[idx];
+
+ if (!event)
+ continue;
+
+ overflow_status = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_COUNTER_OVERFLOW_STATUS);
+ if (!(overflow_status & BIT_ULL(idx)))
+ continue;
+
+ writeq(BIT_ULL(idx), starlink_pmu->pmu_base +
+ STARLINK_PMU_COUNTER_OVERFLOW_STATUS);
+
+ starlink_pmu_update(event);
+ starlink_pmu_set_event_period(event);
+ handled = true;
+ }
+ return IRQ_RETVAL(handled);
+}
+
+static int starlink_setup_irqs(struct starlink_pmu *starlink_pmu,
+ struct platform_device *pdev)
+{
+ int ret, irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -EINVAL;
+
+ ret = devm_request_irq(&pdev->dev, irq, starlink_pmu_handle_irq,
+ 0, STARLINK_PMU_PDEV_NAME, starlink_pmu);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to request IRQ\n");
+
+ starlink_pmu->irq = irq;
+
+ return 0;
+}
+
+static int starlink_pmu_pm_notify(struct notifier_block *b,
+ unsigned long cmd, void *v)
+{
+ struct starlink_pmu *starlink_pmu = container_of(b, struct starlink_pmu,
+ starlink_pmu_pm_nb);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ int enabled = bitmap_weight(hw_events->used_mask,
+ STARLINK_PMU_MAX_COUNTERS);
+ struct perf_event *event;
+ int idx;
+
+ if (!enabled)
+ return NOTIFY_OK;
+
+ for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) {
+ event = hw_events->events[idx];
+ if (!event)
+ continue;
+
+ switch (cmd) {
+ case CPU_PM_ENTER:
+ /* Stop and update the counter */
+ starlink_pmu_stop(event, PERF_EF_UPDATE);
+ break;
+ case CPU_PM_EXIT:
+ case CPU_PM_ENTER_FAILED:
+ /* Restore and enable the counter */
+ starlink_pmu_start(event, PERF_EF_RELOAD);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return NOTIFY_OK;
+}
+
+static int starlink_pmu_pm_register(struct starlink_pmu *starlink_pmu)
+{
+ if (!IS_ENABLED(CONFIG_CPU_PM))
+ return 0;
+
+ starlink_pmu->starlink_pmu_pm_nb.notifier_call = starlink_pmu_pm_notify;
+ return cpu_pm_register_notifier(&starlink_pmu->starlink_pmu_pm_nb);
+}
+
+static void starlink_pmu_pm_unregister(struct starlink_pmu *starlink_pmu)
+{
+ if (!IS_ENABLED(CONFIG_CPU_PM))
+ return;
+
+ cpu_pm_unregister_notifier(&starlink_pmu->starlink_pmu_pm_nb);
+}
+
+static void starlink_pmu_destroy(struct starlink_pmu *starlink_pmu)
+{
+ starlink_pmu_pm_unregister(starlink_pmu);
+ cpuhp_state_remove_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+}
+
+static int starlink_pmu_probe(struct platform_device *pdev)
+{
+ struct starlink_pmu *starlink_pmu;
+ struct starlink_hw_events *hw_events;
+ struct resource *res;
+ int cpuid, i, ret;
+
+ starlink_pmu = devm_kzalloc(&pdev->dev, sizeof(*starlink_pmu), GFP_KERNEL);
+ if (!starlink_pmu)
+ return -ENOMEM;
+
+ starlink_pmu->pmu_base =
+ devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(starlink_pmu->pmu_base))
+ return PTR_ERR(starlink_pmu->pmu_base);
+
+ starlink_pmu->hw_events = alloc_percpu_gfp(struct starlink_hw_events,
+ GFP_KERNEL);
+ if (!starlink_pmu->hw_events) {
+ dev_err(&pdev->dev, "Failed to allocate per-cpu PMU data\n");
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpuid) {
+ hw_events = per_cpu_ptr(starlink_pmu->hw_events, cpuid);
+ for (i = 0; i < STARLINK_PMU_MAX_COUNTERS; i++)
+ hw_events->events[i] = NULL;
+ }
+
+ ret = starlink_setup_irqs(starlink_pmu, pdev);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register hotplug\n");
+ return ret;
+ }
+
+ ret = starlink_pmu_pm_register(starlink_pmu);
+ if (ret) {
+ cpuhp_state_remove_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+ return ret;
+ }
+
+ starlink_pmu->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = starlink_pmu_event_init,
+ .add = starlink_pmu_add,
+ .del = starlink_pmu_del,
+ .start = starlink_pmu_start,
+ .stop = starlink_pmu_stop,
+ .read = starlink_pmu_update,
+ .attr_groups = starlink_pmu_attr_groups,
+ };
+
+ ret = perf_pmu_register(&starlink_pmu->pmu, STARLINK_PMU_PDEV_NAME, -1);
+ if (ret)
+ starlink_pmu_destroy(starlink_pmu);
+
+ return ret;
+}
+
+static const struct of_device_id starlink_pmu_of_match[] = {
+ { .compatible = "starfive,jh8100-starlink-pmu" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, starlink_pmu_of_match);
+
+static struct platform_driver starlink_pmu_driver = {
+ .driver = {
+ .name = STARLINK_PMU_PDEV_NAME,
+ .of_match_table = starlink_pmu_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = starlink_pmu_probe,
+};
+
+static int
+starlink_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct starlink_pmu *starlink_pmu = hlist_entry_safe(node,
+ struct starlink_pmu,
+ node);
+
+ if (cpumask_empty(&starlink_pmu->cpumask))
+ cpumask_set_cpu(cpu, &starlink_pmu->cpumask);
+
+ WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(cpu)));
+
+ return 0;
+}
+
+static int
+starlink_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct starlink_pmu *starlink_pmu = hlist_entry_safe(node,
+ struct starlink_pmu,
+ node);
+ unsigned int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &starlink_pmu->cpumask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&starlink_pmu->pmu, cpu, target);
+
+ cpumask_set_cpu(target, &starlink_pmu->cpumask);
+ WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(target)));
+
+ return 0;
+}
+
+static int __init starlink_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "soc/starfive/starlink_pmu:online",
+ starlink_pmu_online_cpu,
+ starlink_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ starlink_pmu_cpuhp_state = ret;
+
+ return platform_driver_register(&starlink_pmu_driver);
+}
+
+device_initcall(starlink_pmu_init);
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 1edb9c03704f..cadd60221b8f 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event)
static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
{
- int i = 0;
- struct acpi_tx2_pmu_device {
- __u8 id[ACPI_ID_LEN];
- enum tx2_uncore_type type;
- } devices[] = {
+ struct acpi_device_id devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
{"CAV901E", PMU_TYPE_CCPI2},
- {"", PMU_TYPE_INVALID}
+ {}
};
+ const struct acpi_device_id *id;
- while (devices[i].type != PMU_TYPE_INVALID) {
- if (!strcmp(acpi_device_hid(adev), devices[i].id))
- break;
- i++;
- }
+ id = acpi_match_acpi_device(devices, adev);
+ if (!id)
+ return PMU_TYPE_INVALID;
- return devices[i].type;
+ return (enum tx2_uncore_type)id->driver_data;
}
static bool tx2_uncore_validate_event(struct pmu *pmu,
@@ -729,6 +724,7 @@ static int tx2_uncore_pmu_register(
/* Perf event registration */
tx2_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = tx2_pmu->dev,
.attr_groups = tx2_pmu->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = tx2_uncore_event_init,
@@ -932,9 +928,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
struct hlist_node *hpnode)
{
- int new_cpu;
struct tx2_uncore_pmu *tx2_pmu;
- struct cpumask cpu_online_mask_temp;
+ unsigned int new_cpu;
tx2_pmu = hlist_entry_safe(hpnode,
struct tx2_uncore_pmu, hpnode);
@@ -945,11 +940,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (tx2_pmu->hrtimer_callback)
hrtimer_cancel(&tx2_pmu->hrtimer);
- cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
- cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
- new_cpu = cpumask_any_and(
- cpumask_of_node(tx2_pmu->node),
- &cpu_online_mask_temp);
+ new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node),
+ cpu_online_mask, cpu);
tx2_pmu->cpu = new_cpu;
if (new_cpu >= nr_cpu_ids)
@@ -993,7 +985,7 @@ static int tx2_uncore_probe(struct platform_device *pdev)
return 0;
}
-static int tx2_uncore_remove(struct platform_device *pdev)
+static void tx2_uncore_remove(struct platform_device *pdev)
{
struct tx2_uncore_pmu *tx2_pmu, *temp;
struct device *dev = &pdev->dev;
@@ -1009,7 +1001,6 @@ static int tx2_uncore_remove(struct platform_device *pdev)
}
}
}
- return 0;
}
static struct platform_driver tx2_uncore_driver = {
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 7ce344248dda..33b5497bdc06 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -162,18 +162,9 @@ enum xgene_pmu_dev_type {
/*
* sysfs format attributes
*/
-static ssize_t xgene_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *) eattr->var);
-}
-
#define XGENE_PMU_FORMAT_ATTR(_name, _config) \
(&((struct dev_ext_attribute[]) { \
- { .attr = __ATTR(_name, S_IRUGO, xgene_pmu_format_show, NULL), \
+ { .attr = __ATTR(_name, S_IRUGO, device_show_string, NULL), \
.var = (void *) _config, } \
})[0].attr.attr)
@@ -1102,6 +1093,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
/* Perf driver registration */
pmu_dev->pmu = (struct pmu) {
+ .parent = pmu_dev->parent->dev,
.attr_groups = pmu_dev->attr_groups,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = xgene_perf_pmu_enable,
@@ -1937,7 +1929,7 @@ xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
}
}
-static int xgene_pmu_remove(struct platform_device *pdev)
+static void xgene_pmu_remove(struct platform_device *pdev)
{
struct xgene_pmu *xgene_pmu = dev_get_drvdata(&pdev->dev);
@@ -1947,8 +1939,6 @@ static int xgene_pmu_remove(struct platform_device *pdev)
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
&xgene_pmu->node);
-
- return 0;
}
static struct platform_driver xgene_pmu_driver = {