summaryrefslogtreecommitdiff
path: root/drivers/perf
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig112
-rw-r--r--drivers/perf/Makefile12
-rw-r--r--drivers/perf/alibaba_uncore_drw_pmu.c39
-rw-r--r--drivers/perf/amlogic/meson_ddr_pmu_core.c13
-rw-r--r--drivers/perf/amlogic/meson_g12_ddr_pmu.c39
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c337
-rw-r--r--drivers/perf/arm-cci.c28
-rw-r--r--drivers/perf/arm-ccn.c24
-rw-r--r--drivers/perf/arm-cmn.c1023
-rw-r--r--drivers/perf/arm-ni.c862
-rw-r--r--drivers/perf/arm_brbe.c805
-rw-r--r--drivers/perf/arm_brbe.h47
-rw-r--r--drivers/perf/arm_cspmu/Kconfig22
-rw-r--r--drivers/perf/arm_cspmu/Makefile8
-rw-r--r--drivers/perf/arm_cspmu/ampere_cspmu.c261
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c575
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h133
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c316
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.h17
-rw-r--r--drivers/perf/arm_dmc620_pmu.c58
-rw-r--r--drivers/perf/arm_dsu_pmu.c43
-rw-r--r--drivers/perf/arm_pmu.c136
-rw-r--r--drivers/perf/arm_pmu_acpi.c139
-rw-r--r--drivers/perf/arm_pmu_platform.c24
-rw-r--r--drivers/perf/arm_pmuv3.c1666
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c75
-rw-r--r--drivers/perf/arm_spe_pmu.c388
-rw-r--r--drivers/perf/arm_v6_pmu.c431
-rw-r--r--drivers/perf/arm_v7_pmu.c1924
-rw-r--r--drivers/perf/arm_xscale_pmu.c747
-rw-r--r--drivers/perf/cxl_pmu.c983
-rw-r--r--drivers/perf/dwc_pcie_pmu.c889
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c194
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c887
-rw-r--r--drivers/perf/fujitsu_uncore_pmu.c613
-rw-r--r--drivers/perf/hisilicon/Makefile3
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c189
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c61
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c429
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c62
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c599
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_mn_pmu.c411
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_noc_pmu.c443
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c155
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c209
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h76
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c260
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_uc_pmu.c595
-rw-r--r--drivers/perf/hisilicon/hns3_pmu.c37
-rw-r--r--drivers/perf/marvell_cn10k_ddr_pmu.c549
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c93
-rw-r--r--drivers/perf/marvell_pem_pmu.c425
-rw-r--r--drivers/perf/qcom_l2_pmu.c14
-rw-r--r--drivers/perf/qcom_l3_pmu.c19
-rw-r--r--drivers/perf/riscv_pmu.c142
-rw-r--r--drivers/perf/riscv_pmu_legacy.c43
-rw-r--r--drivers/perf/riscv_pmu_sbi.c888
-rw-r--r--drivers/perf/starfive_starlink_pmu.c642
-rw-r--r--drivers/perf/thunderx2_pmu.c38
-rw-r--r--drivers/perf/xgene_pmu.c57
60 files changed, 17981 insertions, 2328 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 66c259000a44..638321fc9800 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -48,6 +48,13 @@ config ARM_CMN
Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
Network interconnect.
+config ARM_NI
+ tristate "Arm NI-700 PMU support"
+ depends on ARM64 || COMPILE_TEST
+ help
+ Support for PMU events monitoring on the Arm NI-700 Network-on-Chip
+ interconnect and family.
+
config ARM_PMU
depends on ARM || ARM64
bool "ARM PMU framework"
@@ -56,6 +63,18 @@ config ARM_PMU
Say y if you want to use CPU performance monitors on ARM-based
systems.
+config ARM_V6_PMU
+ depends on ARM_PMU && (CPU_V6 || CPU_V6K)
+ def_bool y
+
+config ARM_V7_PMU
+ depends on ARM_PMU && CPU_V7
+ def_bool y
+
+config ARM_XSCALE_PMU
+ depends on ARM_PMU && CPU_XSCALE
+ def_bool y
+
config RISCV_PMU
depends on RISCV
bool "RISC-V PMU framework"
@@ -86,13 +105,37 @@ config RISCV_PMU_SBI
full perf feature support i.e. counter overflow, privilege mode
filtering, counter configuration.
+config STARFIVE_STARLINK_PMU
+ depends on ARCH_STARFIVE || COMPILE_TEST
+ depends on 64BIT
+ bool "StarFive StarLink PMU"
+ help
+ Provide support for StarLink Performance Monitor Unit.
+ StarLink Performance Monitor Unit integrates one or more cores with
+ an L3 memory system. The L3 cache events are added into perf event
+ subsystem, allowing monitoring of various L3 cache perf events.
+
+config ANDES_CUSTOM_PMU
+ bool "Andes custom PMU support"
+ depends on ARCH_RENESAS && RISCV_ALTERNATIVE && RISCV_PMU_SBI
+ default y
+ help
+ The Andes cores implement the PMU overflow extension very
+ similar to the standard Sscofpmf and Smcntrpmf extension.
+
+ This will patch the overflow and pending CSRs and handle the
+ non-standard behaviour via the regular SBI PMU driver and
+ interface.
+
+ If you don't know what to do here, say "Y".
+
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
config ARM_SMMU_V3_PMU
tristate "ARM SMMUv3 Performance Monitors Extension"
- depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on GENERIC_MSI_IRQ
help
Provides support for the ARM SMMUv3 Performance Monitor Counter
@@ -100,6 +143,16 @@ config ARM_SMMU_V3_PMU
through the SMMU and allow the resulting information to be filtered
based on the Stream ID of the corresponding master.
+config ARM_PMUV3
+ depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64)
+ bool "ARM PMUv3 support" if !ARM64
+ default ARM64
+ help
+ Say y if you want to use the ARM performance monitor unit (PMU)
+ version 3. The PMUv3 is the CPU performance monitors on ARMv8
+ (aarch32 and aarch64) systems that implement the PMUv3
+ architecture.
+
config ARM_DSU_PMU
tristate "ARM DynamIQ Shared Unit (DSU) PMU"
depends on ARM64
@@ -117,6 +170,23 @@ config FSL_IMX8_DDR_PMU
can give information about memory throughput and other related
events.
+config FSL_IMX9_DDR_PMU
+ tristate "Freescale i.MX9 DDR perf monitor"
+ depends on ARCH_MXC
+ help
+ Provides support for the DDR performance monitor in i.MX9, which
+ can give information about memory throughput and other related
+ events.
+
+config FUJITSU_UNCORE_PMU
+ tristate "Fujitsu Uncore PMU"
+ depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+ help
+ Provides support for the Uncore performance monitor unit (PMU)
+ in Fujitsu processors.
+ Adds the Uncore PMU into the perf events subsystem for
+ monitoring Uncore events.
+
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
@@ -141,7 +211,7 @@ config THUNDERX2_PMU
tristate "Cavium ThunderX2 SoC PMU UNCORE"
depends on ARCH_THUNDER2 || COMPILE_TEST
depends on NUMA && ACPI
- default m
+ default m if ARCH_THUNDER2
help
Provides support for ThunderX2 UNCORE events.
The SoC has PMU support in its L3 cache controller (L3C) and
@@ -162,6 +232,17 @@ config ARM_SPE_PMU
Extension, which provides periodic sampling of operations in
the CPU pipeline and reports this via the perf AUX interface.
+config ARM64_BRBE
+ bool "Enable support for branch stack sampling using FEAT_BRBE"
+ depends on ARM_PMUV3 && ARM64
+ default y
+ help
+ Enable perf support for Branch Record Buffer Extension (BRBE) which
+ records all branches taken in an execution path. This supports some
+ branch types and privilege based filtering. It captures additional
+ relevant information such as cycle count, misprediction and branch
+ type, branch privilege level etc.
+
config ARM_DMC620_PMU
tristate "Enable PMU support for the ARM DMC-620 memory controller"
depends on (ARM64 && ACPI) || COMPILE_TEST
@@ -199,8 +280,35 @@ config MARVELL_CN10K_DDR_PMU
Enable perf support for Marvell DDR Performance monitoring
event on CN10K platform.
+config DWC_PCIE_PMU
+ tristate "Synopsys DesignWare PCIe PMU"
+ depends on PCI
+ help
+ Enable perf support for Synopsys DesignWare PCIe PMU Performance
+ monitoring event on platform including the Alibaba Yitian 710.
+
source "drivers/perf/arm_cspmu/Kconfig"
source "drivers/perf/amlogic/Kconfig"
+config CXL_PMU
+ tristate "CXL Performance Monitoring Unit"
+ depends on CXL_BUS
+ help
+ Support performance monitoring as defined in CXL rev 3.0
+ section 13.2: Performance Monitoring. CXL components may have
+ one or more CXL Performance Monitoring Units (CPMUs).
+
+ Say 'y/m' to enable a driver that will attach to performance
+ monitoring units and provide standard perf based interfaces.
+
+ If unsure say 'm'.
+
+config MARVELL_PEM_PMU
+ tristate "MARVELL PEM PMU Support"
+ depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+ help
+ Enable support for PCIe Interface performance monitoring
+ on Marvell platform.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 13e45da61100..ea52711a87e3 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -3,23 +3,35 @@ obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o
obj-$(CONFIG_ARM_CCN) += arm-ccn.o
obj-$(CONFIG_ARM_CMN) += arm-cmn.o
obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
+obj-$(CONFIG_ARM_NI) += arm-ni.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o
+obj-$(CONFIG_ARM_V6_PMU) += arm_v6_pmu.o
+obj-$(CONFIG_ARM_V7_PMU) += arm_v7_pmu.o
+obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
+obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
+obj-$(CONFIG_FUJITSU_UNCORE_PMU) += fujitsu_uncore_pmu.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
+obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
+obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_MARVELL_PEM_PMU) += marvell_pem_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
+obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index a7689fecb49d..99a0ef9817e0 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -236,10 +236,29 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
.attrs = ali_drw_pmu_cpumask_attrs,
};
+static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ return attr->mode;
+}
+
+static DEVICE_STRING_ATTR_RO(ali_drw_pmu_identifier, 0444, "ali_drw_pmu");
+
+static struct attribute *ali_drw_pmu_identifier_attrs[] = {
+ &dev_attr_ali_drw_pmu_identifier.attr.attr,
+ NULL
+};
+
+static const struct attribute_group ali_drw_pmu_identifier_attr_group = {
+ .attrs = ali_drw_pmu_identifier_attrs,
+ .is_visible = ali_drw_pmu_identifier_attr_visible
+};
+
static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
&ali_drw_pmu_events_attr_group,
&ali_drw_pmu_cpumask_attr_group,
&ali_drw_pmu_format_group,
+ &ali_drw_pmu_identifier_attr_group,
NULL,
};
@@ -381,7 +400,7 @@ static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data)
}
/* clear common counter intr status */
- clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1);
+ clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status);
writel(clr_status,
drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
}
@@ -656,8 +675,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
drw_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, drw_pmu);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res);
+ drw_pmu->cfg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(drw_pmu->cfg_base))
return PTR_ERR(drw_pmu->cfg_base);
@@ -683,6 +701,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
drw_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.event_init = ali_drw_pmu_event_init,
.add = ali_drw_pmu_add,
@@ -703,7 +722,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int ali_drw_pmu_remove(struct platform_device *pdev)
+static void ali_drw_pmu_remove(struct platform_device *pdev)
{
struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev);
@@ -713,8 +732,6 @@ static int ali_drw_pmu_remove(struct platform_device *pdev)
ali_drw_pmu_uninit_irq(drw_pmu);
perf_pmu_unregister(&drw_pmu->pmu);
-
- return 0;
}
static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
@@ -722,18 +739,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
struct ali_drw_pmu_irq *irq;
struct ali_drw_pmu *drw_pmu;
unsigned int target;
- int ret;
- cpumask_t node_online_cpus;
irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
if (cpu != irq->cpu)
return 0;
- ret = cpumask_and(&node_online_cpus,
- cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
- if (ret)
- target = cpumask_any_but(&node_online_cpus, cpu);
- else
+ target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index b84346dbac2c..c1e755c356a3 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -9,8 +9,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
@@ -156,10 +154,14 @@ static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
u64 config2 = event->attr.config2;
int i;
- for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1))
+ for_each_set_bit(i,
+ (const unsigned long *)&config1,
+ BITS_PER_TYPE(config1))
meson_ddr_set_axi_filter(event, i);
- for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2))
+ for_each_set_bit(i,
+ (const unsigned long *)&config2,
+ BITS_PER_TYPE(config2))
meson_ddr_set_axi_filter(event, i + 64);
if (flags & PERF_EF_START)
@@ -490,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
*pmu = (struct ddr_pmu) {
.pmu = {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
@@ -508,7 +511,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
fmt_attr_fill(pmu->info.hw_info->fmt_attr);
- pmu->cpu = smp_processor_id();
+ pmu->cpu = raw_smp_processor_id();
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
if (!name)
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index a78fdb15e26c..f33e9a456e85 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -21,23 +21,23 @@
#define DMC_QOS_IRQ BIT(30)
/* DMC bandwidth monitor register address offset */
-#define DMC_MON_G12_CTRL0 (0x20 << 2)
-#define DMC_MON_G12_CTRL1 (0x21 << 2)
-#define DMC_MON_G12_CTRL2 (0x22 << 2)
-#define DMC_MON_G12_CTRL3 (0x23 << 2)
-#define DMC_MON_G12_CTRL4 (0x24 << 2)
-#define DMC_MON_G12_CTRL5 (0x25 << 2)
-#define DMC_MON_G12_CTRL6 (0x26 << 2)
-#define DMC_MON_G12_CTRL7 (0x27 << 2)
-#define DMC_MON_G12_CTRL8 (0x28 << 2)
-
-#define DMC_MON_G12_ALL_REQ_CNT (0x29 << 2)
-#define DMC_MON_G12_ALL_GRANT_CNT (0x2a << 2)
-#define DMC_MON_G12_ONE_GRANT_CNT (0x2b << 2)
-#define DMC_MON_G12_SEC_GRANT_CNT (0x2c << 2)
-#define DMC_MON_G12_THD_GRANT_CNT (0x2d << 2)
-#define DMC_MON_G12_FOR_GRANT_CNT (0x2e << 2)
-#define DMC_MON_G12_TIMER (0x2f << 2)
+#define DMC_MON_G12_CTRL0 (0x0 << 2)
+#define DMC_MON_G12_CTRL1 (0x1 << 2)
+#define DMC_MON_G12_CTRL2 (0x2 << 2)
+#define DMC_MON_G12_CTRL3 (0x3 << 2)
+#define DMC_MON_G12_CTRL4 (0x4 << 2)
+#define DMC_MON_G12_CTRL5 (0x5 << 2)
+#define DMC_MON_G12_CTRL6 (0x6 << 2)
+#define DMC_MON_G12_CTRL7 (0x7 << 2)
+#define DMC_MON_G12_CTRL8 (0x8 << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT (0x9 << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT (0xa << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT (0xb << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT (0xc << 2)
+#define DMC_MON_G12_THD_GRANT_CNT (0xd << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT (0xe << 2)
+#define DMC_MON_G12_TIMER (0xf << 2)
/* Each bit represent a axi line */
PMU_FORMAT_ATTR(event, "config:0-7");
@@ -355,11 +355,9 @@ static int g12_ddr_pmu_probe(struct platform_device *pdev)
return meson_ddr_pmu_create(pdev);
}
-static int g12_ddr_pmu_remove(struct platform_device *pdev)
+static void g12_ddr_pmu_remove(struct platform_device *pdev)
{
meson_ddr_pmu_remove(pdev);
-
- return 0;
}
static const struct of_device_id meson_ddr_pmu_dt_match[] = {
@@ -377,6 +375,7 @@ static const struct of_device_id meson_ddr_pmu_dt_match[] = {
},
{}
};
+MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match);
static struct platform_driver g12_ddr_pmu_driver = {
.probe = g12_ddr_pmu_probe,
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index 979a7c2b4f56..81b6f1a62349 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -12,6 +12,7 @@
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
#include <linux/platform_device.h>
#include <asm/apple_m1_pmu.h>
@@ -47,46 +48,81 @@
* implementations, we'll have to introduce per cpu-type tables.
*/
enum m1_pmu_events {
- M1_PMU_PERFCTR_UNKNOWN_01 = 0x01,
- M1_PMU_PERFCTR_CPU_CYCLES = 0x02,
- M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c,
- M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d,
- M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e,
- M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f,
- M1_PMU_PERFCTR_UNKNOWN_90 = 0x90,
- M1_PMU_PERFCTR_UNKNOWN_93 = 0x93,
- M1_PMU_PERFCTR_UNKNOWN_94 = 0x94,
- M1_PMU_PERFCTR_UNKNOWN_95 = 0x95,
- M1_PMU_PERFCTR_UNKNOWN_96 = 0x96,
- M1_PMU_PERFCTR_UNKNOWN_97 = 0x97,
- M1_PMU_PERFCTR_UNKNOWN_98 = 0x98,
- M1_PMU_PERFCTR_UNKNOWN_99 = 0x99,
- M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a,
- M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b,
- M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c,
- M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
- M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf,
- M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0,
- M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1,
- M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4,
- M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5,
- M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6,
- M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8,
- M1_PMU_PERFCTR_UNKNOWN_ca = 0xca,
- M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb,
- M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
- M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
- M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
- M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
- M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
- M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
+ M1_PMU_PERFCTR_RETIRE_UOP = 0x1,
+ M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2,
+ M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4,
+ M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5,
+ M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7,
+ M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8,
+ M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa,
+ M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb,
+ M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd,
+ M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52,
+ M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c,
+ M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70,
+ M1_PMU_PERFCTR_MAP_REWIND = 0x75,
+ M1_PMU_PERFCTR_MAP_STALL = 0x76,
+ M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c,
+ M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d,
+ M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e,
+ M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84,
+ M1_PMU_PERFCTR_INST_ALL = 0x8c,
+ M1_PMU_PERFCTR_INST_BRANCH = 0x8d,
+ M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e,
+ M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f,
+ M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90,
+ M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93,
+ M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94,
+ M1_PMU_PERFCTR_INST_INT_LD = 0x95,
+ M1_PMU_PERFCTR_INST_INT_ST = 0x96,
+ M1_PMU_PERFCTR_INST_INT_ALU = 0x97,
+ M1_PMU_PERFCTR_INST_SIMD_LD = 0x98,
+ M1_PMU_PERFCTR_INST_SIMD_ST = 0x99,
+ M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a,
+ M1_PMU_PERFCTR_INST_LDST = 0x9b,
+ M1_PMU_PERFCTR_INST_BARRIER = 0x9c,
+ M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
+ M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0,
+ M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3,
+ M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6,
+ M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7,
+ M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8,
+ M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1,
+ M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2,
+ M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3,
+ M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf,
+ M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0,
+ M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1,
+ M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4,
+ M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5,
+ M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6,
+ M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8,
+ M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca,
+ M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb,
+ M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4,
+ M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6,
+ M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb,
+ M1_PMU_PERFCTR_FETCH_RESTART = 0xde,
+ M1_PMU_PERFCTR_ST_NT_UOP = 0xe5,
+ M1_PMU_PERFCTR_LD_NT_UOP = 0xe6,
+ M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
+ M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
+ M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
+ M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
+ M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
+ M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
/*
* From this point onwards, these are not actual HW events,
* but attributes that get stored in hw->config_base.
*/
- M1_PMU_CFG_COUNT_USER = BIT(8),
- M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+ M1_PMU_CFG_COUNT_USER = BIT(8),
+ M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+ M1_PMU_CFG_COUNT_HOST = BIT(10),
+ M1_PMU_CFG_COUNT_GUEST = BIT(11),
};
/*
@@ -96,46 +132,58 @@ enum m1_pmu_events {
* counters had strange affinities.
*/
static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
- [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
- [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7),
- [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0),
- [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1),
- [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
- [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7,
- [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
- [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
- [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
+ [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
+ [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7),
+ [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0),
+ [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1),
+ [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7),
+ [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7),
+ [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7),
+ [M1_PMU_PERFCTR_INST_LDST] = BIT(7),
+ [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
+ [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
+ [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
};
static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS,
- /* No idea about the rest yet */
+ [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE,
+ [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH,
+ [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC,
+};
+
+#define M1_PMUV3_EVENT_MAP(pmuv3_event, m1_event) \
+ [ARMV8_PMUV3_PERFCTR_##pmuv3_event] = M1_PMU_PERFCTR_##m1_event
+
+static const u16 m1_pmu_pmceid_map[ARMV8_PMUV3_MAX_COMMON_EVENTS] = {
+ [0 ... ARMV8_PMUV3_MAX_COMMON_EVENTS - 1] = HW_OP_UNSUPPORTED,
+ M1_PMUV3_EVENT_MAP(INST_RETIRED, INST_ALL),
+ M1_PMUV3_EVENT_MAP(CPU_CYCLES, CORE_ACTIVE_CYCLE),
+ M1_PMUV3_EVENT_MAP(BR_RETIRED, INST_BRANCH),
+ M1_PMUV3_EVENT_MAP(BR_MIS_PRED_RETIRED, BRANCH_MISPRED_NONSPEC),
};
/* sysfs definitions */
@@ -154,8 +202,8 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
static struct attribute *m1_pmu_event_attrs[] = {
- M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
- M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+ M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE),
+ M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL),
NULL,
};
@@ -293,11 +341,10 @@ static void m1_pmu_disable_counter_interrupt(unsigned int index)
__m1_pmu_enable_counter_interrupt(index, false);
}
-static void m1_pmu_configure_counter(unsigned int index, u8 event,
- bool user, bool kernel)
+static void __m1_pmu_configure_event_filter(unsigned int index, bool user,
+ bool kernel, bool host)
{
- u64 val, user_bit, kernel_bit;
- int shift;
+ u64 clear, set, user_bit, kernel_bit;
switch (index) {
case 0 ... 7:
@@ -312,19 +359,27 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
BUG();
}
- val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
-
+ clear = set = 0;
if (user)
- val |= user_bit;
+ set |= user_bit;
else
- val &= ~user_bit;
+ clear |= user_bit;
if (kernel)
- val |= kernel_bit;
+ set |= kernel_bit;
else
- val &= ~kernel_bit;
+ clear |= kernel_bit;
+
+ if (host)
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL1, clear, set);
+ else if (is_kernel_in_hyp_mode())
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL12, clear, set);
+}
- write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+static void __m1_pmu_configure_eventsel(unsigned int index, u8 event)
+{
+ u64 clear = 0, set = 0;
+ int shift;
/*
* Counters 0 and 1 have fixed events. For anything else,
@@ -337,21 +392,32 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
break;
case 2 ... 5:
shift = (index - 2) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR0_EL1, clear, set);
break;
case 6 ... 9:
shift = (index - 6) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR1_EL1, clear, set);
break;
}
}
+static void m1_pmu_configure_counter(unsigned int index, unsigned long config_base)
+{
+ bool kernel = config_base & M1_PMU_CFG_COUNT_KERNEL;
+ bool guest = config_base & M1_PMU_CFG_COUNT_GUEST;
+ bool host = config_base & M1_PMU_CFG_COUNT_HOST;
+ bool user = config_base & M1_PMU_CFG_COUNT_USER;
+ u8 evt = config_base & M1_PMU_CFG_EVENT;
+
+ __m1_pmu_configure_event_filter(index, user && host, kernel && host, true);
+ __m1_pmu_configure_event_filter(index, user && guest, kernel && guest, false);
+ __m1_pmu_configure_eventsel(index, evt);
+}
+
/* arm_pmu backend */
static void m1_pmu_enable_event(struct perf_event *event)
{
@@ -362,11 +428,7 @@ static void m1_pmu_enable_event(struct perf_event *event)
user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
- m1_pmu_disable_counter_interrupt(event->hw.idx);
- m1_pmu_disable_counter(event->hw.idx);
- isb();
-
- m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+ m1_pmu_configure_counter(event->hw.idx, event->hw.config_base);
m1_pmu_enable_counter(event->hw.idx);
m1_pmu_enable_counter_interrupt(event->hw.idx);
isb();
@@ -400,7 +462,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
regs = get_irq_regs();
- for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, M1_PMU_NR_COUNTERS) {
struct perf_event *event = cpuc->events[idx];
struct perf_sample_data data;
@@ -412,8 +474,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- m1_pmu_disable_event(event);
+ perf_event_overflow(event, &data, regs);
}
cpu_pmu->start(cpu_pmu);
@@ -493,6 +554,37 @@ static int m1_pmu_map_event(struct perf_event *event)
return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
}
+static int m2_pmu_map_event(struct perf_event *event)
+{
+ /*
+ * Same deal as the above, except that M2 has 64bit counters.
+ * Which, as far as we're concerned, actually means 63 bits.
+ * Yes, this is getting awkward.
+ */
+ event->hw.flags |= ARMPMU_EVT_63BIT;
+ return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static int m1_pmu_map_pmuv3_event(unsigned int eventsel)
+{
+ u16 m1_event = HW_OP_UNSUPPORTED;
+
+ if (eventsel < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+ m1_event = m1_pmu_pmceid_map[eventsel];
+
+ return m1_event == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : m1_event;
+}
+
+static void m1_pmu_init_pmceid(struct arm_pmu *pmu)
+{
+ unsigned int event;
+
+ for (event = 0; event < ARMV8_PMUV3_MAX_COMMON_EVENTS; event++) {
+ if (m1_pmu_map_pmuv3_event(event) >= 0)
+ set_bit(event, pmu->pmceid_bitmap);
+ }
+}
+
static void m1_pmu_reset(void *info)
{
int i;
@@ -513,19 +605,25 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (!attr->exclude_guest)
- return -EINVAL;
+ if (!attr->exclude_guest && !is_kernel_in_hyp_mode()) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
if (!attr->exclude_kernel)
config_base |= M1_PMU_CFG_COUNT_KERNEL;
if (!attr->exclude_user)
config_base |= M1_PMU_CFG_COUNT_USER;
+ if (!attr->exclude_host)
+ config_base |= M1_PMU_CFG_COUNT_HOST;
+ if (!attr->exclude_guest)
+ config_base |= M1_PMU_CFG_COUNT_GUEST;
event->config_base = config_base;
return 0;
}
-static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
{
cpu_pmu->handle_irq = m1_pmu_handle_irq;
cpu_pmu->enable = m1_pmu_enable_event;
@@ -536,11 +634,21 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx;
cpu_pmu->start = m1_pmu_start;
cpu_pmu->stop = m1_pmu_stop;
- cpu_pmu->map_event = m1_pmu_map_event;
+
+ if (flags & ARMPMU_EVT_47BIT)
+ cpu_pmu->map_event = m1_pmu_map_event;
+ else if (flags & ARMPMU_EVT_63BIT)
+ cpu_pmu->map_event = m2_pmu_map_event;
+ else
+ return WARN_ON(-EINVAL);
+
cpu_pmu->reset = m1_pmu_reset;
cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
- cpu_pmu->num_events = M1_PMU_NR_COUNTERS;
+ cpu_pmu->map_pmuv3_event = m1_pmu_map_pmuv3_event;
+ m1_pmu_init_pmceid(cpu_pmu);
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, M1_PMU_NR_COUNTERS);
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
return 0;
@@ -550,16 +658,30 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "apple_icestorm_pmu";
- return m1_pmu_init(cpu_pmu);
+ return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
}
static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "apple_firestorm_pmu";
- return m1_pmu_init(cpu_pmu);
+ return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
+}
+
+static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "apple_avalanche_pmu";
+ return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
+}
+
+static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "apple_blizzard_pmu";
+ return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
}
static const struct of_device_id m1_pmu_of_device_ids[] = {
+ { .compatible = "apple,avalanche-pmu", .data = m2_pmu_avalanche_init, },
+ { .compatible = "apple,blizzard-pmu", .data = m2_pmu_blizzard_init, },
{ .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, },
{ .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, },
{ },
@@ -581,4 +703,3 @@ static struct platform_driver m1_pmu_driver = {
};
module_platform_driver(m1_pmu_driver);
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 03b1309875ae..1cc3214d6b6d 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -7,10 +7,7 @@
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -130,8 +127,6 @@ enum cci_models {
static void pmu_write_counters(struct cci_pmu *cci_pmu,
unsigned long *mask);
-static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf);
static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
@@ -141,7 +136,7 @@ static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
})[0].attr.attr
#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
- CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
+ CCI_EXT_ATTR_ENTRY(_name, device_show_string, _config)
#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
@@ -645,7 +640,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
DECLARE_BITMAP(mask, HW_CNTRS_MAX);
- bitmap_zero(mask, cci_pmu->num_cntrs);
+ bitmap_zero(mask, HW_CNTRS_MAX);
for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
struct perf_event *event = cci_hw->events[i];
@@ -656,7 +651,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
if (event->hw.state & PERF_HES_STOPPED)
continue;
if (event->hw.state & PERF_HES_ARCH) {
- set_bit(i, mask);
+ __set_bit(i, mask);
event->hw.state &= ~PERF_HES_ARCH;
}
}
@@ -691,14 +686,6 @@ static void __cci_pmu_disable(struct cci_pmu *cci_pmu)
writel(val, cci_pmu->ctrl_base + CCI_PMCR);
}
-static ssize_t cci_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr = container_of(attr,
- struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1412,6 +1399,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
cci_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.name = cci_pmu->model->name,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = cci_pmu_enable,
@@ -1700,16 +1688,14 @@ error_pmu_init:
return ret;
}
-static int cci_pmu_remove(struct platform_device *pdev)
+static void cci_pmu_remove(struct platform_device *pdev)
{
if (!g_cci_pmu)
- return 0;
+ return;
cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
perf_pmu_unregister(&g_cci_pmu->pmu);
g_cci_pmu = NULL;
-
- return 0;
}
static struct platform_driver cci_pmu_driver = {
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..8af3563fdf60 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -215,18 +215,9 @@ static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port)
*config |= (node_xp << 0) | (type << 8) | (port << 24);
}
-static ssize_t arm_ccn_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *ea = container_of(attr,
- struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)ea->var);
-}
-
#define CCN_FORMAT_ATTR(_name, _config) \
struct dev_ext_attribute arm_ccn_pmu_format_attr_##_name = \
- { __ATTR(_name, S_IRUGO, arm_ccn_pmu_format_show, \
+ { __ATTR(_name, S_IRUGO, device_show_string, \
NULL), _config }
static CCN_FORMAT_ATTR(node, "config:0-7");
@@ -574,7 +565,7 @@ module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint,
static ktime_t arm_ccn_pmu_timer_period(void)
{
- return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000);
+ return us_to_ktime((u64)arm_ccn_pmu_poll_period_us);
}
@@ -1265,6 +1256,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = ccn->dev,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
@@ -1281,9 +1273,8 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* No overflow interrupt? Have to use a timer instead. */
if (!ccn->irq) {
dev_info(ccn->dev, "No access to interrupts, using timer.\n");
- hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler;
+ hrtimer_setup(&ccn->dt.hrtimer, arm_ccn_pmu_timer_handler, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
/* Pick one CPU which we will use to collect data from CCN... */
@@ -1515,13 +1506,11 @@ static int arm_ccn_probe(struct platform_device *pdev)
return arm_ccn_pmu_init(ccn);
}
-static int arm_ccn_remove(struct platform_device *pdev)
+static void arm_ccn_remove(struct platform_device *pdev)
{
struct arm_ccn *ccn = platform_get_drvdata(pdev);
arm_ccn_pmu_cleanup(ccn);
-
- return 0;
}
static const struct of_device_id arm_ccn_match[] = {
@@ -1571,4 +1560,5 @@ module_init(arm_ccn_init);
module_exit(arm_ccn_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
+MODULE_DESCRIPTION("ARM CCN (Cache Coherent Network) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 1deb61b22bc7..23245352a3fc 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2016-2020 Arm Limited
-// CMN-600 Coherent Mesh Network PMU driver
+// ARM CMN/CI interconnect PMU driver
#include <linux/acpi.h>
#include <linux/bitfield.h>
@@ -24,14 +24,6 @@
#define CMN_NI_NODE_ID GENMASK_ULL(31, 16)
#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32)
-#define CMN_NODEID_DEVID(reg) ((reg) & 3)
-#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1)
-#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1)
-#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3)
-#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7)
-#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits)))
-#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1))
-
#define CMN_CHILD_INFO 0x0080
#define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0)
#define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16)
@@ -43,39 +35,50 @@
#define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
#define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
+/* Currently XPs are the node type we can have most of; others top out at 128 */
+#define CMN_MAX_NODES_PER_EVENT CMN_MAX_XPS
+
/* The CFG node has various info besides the discovery tree */
-#define CMN_CFGM_PERIPH_ID_2 0x0010
-#define CMN_CFGM_PID2_REVISION GENMASK(7, 4)
+#define CMN_CFGM_PERIPH_ID_01 0x0008
+#define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0)
+#define CMN_CFGM_PID1_PART_1 GENMASK_ULL(35, 32)
+#define CMN_CFGM_PERIPH_ID_23 0x0010
+#define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4)
-#define CMN_CFGM_INFO_GLOBAL 0x900
+#define CMN_CFGM_INFO_GLOBAL 0x0900
#define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63)
#define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52)
#define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50)
+#define CMN_INFO_DEVICE_ISO_ENABLE BIT_ULL(44)
-#define CMN_CFGM_INFO_GLOBAL_1 0x908
+#define CMN_CFGM_INFO_GLOBAL_1 0x0908
#define CMN_INFO_SNP_VC_NUM GENMASK_ULL(3, 2)
#define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0)
/* XPs also have some local topology info which has uses too */
-#define CMN_MXP__CONNECT_INFO_P0 0x0008
-#define CMN_MXP__CONNECT_INFO_P1 0x0010
-#define CMN_MXP__CONNECT_INFO_P2 0x0028
-#define CMN_MXP__CONNECT_INFO_P3 0x0030
-#define CMN_MXP__CONNECT_INFO_P4 0x0038
-#define CMN_MXP__CONNECT_INFO_P5 0x0040
-#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0)
+#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p))
+#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(5, 0)
+
+#define CMN_MAX_PORTS 6
+#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10
/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
+/* ...except when they don't :( */
+#define CMN_S3_R1_DTM_OFFSET 0xa000
+#define CMN_S3_PMU_OFFSET 0xd900
/* For most nodes, this is all there is */
#define CMN_PMU_EVENT_SEL 0x000
#define CMN__PMU_CBUSY_SNTHROTTLE_SEL GENMASK_ULL(44, 42)
+#define CMN__PMU_SN_HOME_SEL GENMASK_ULL(40, 39)
+#define CMN__PMU_HBT_LBT_SEL GENMASK_ULL(38, 37)
#define CMN__PMU_CLASS_OCCUP_ID GENMASK_ULL(36, 35)
/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
#define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32)
-/* HN-Ps are weird... */
+/* Some types are designed to coexist with another device in the same node */
+#define CMN_CCLA_PMU_EVENT_SEL 0x008
#define CMN_HNP_PMU_EVENT_SEL 0x008
/* DTMs live in the PMU space of XP registers */
@@ -109,7 +112,9 @@
#define CMN_DTM_PMEVCNTSR 0x240
-#define CMN_DTM_UNIT_INFO 0x0910
+#define CMN650_DTM_UNIT_INFO 0x0910
+#define CMN_DTM_UNIT_INFO 0x0960
+#define CMN_DTM_UNIT_INFO_DTC_DOMAIN GENMASK_ULL(1, 0)
#define CMN_DTM_NUM_COUNTERS 4
/* Want more local counters? Why not replicate the whole DTM! Ugh... */
@@ -118,27 +123,28 @@
/* The DTC node is where the magic happens */
#define CMN_DT_DTC_CTL 0x0a00
#define CMN_DT_DTC_CTL_DT_EN BIT(0)
+#define CMN_DT_DTC_CTL_CG_DISABLE BIT(10)
/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4)
-#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40)
+#define CMN_DT_PMEVCNT(dtc, n) ((dtc)->pmu_base + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR(dtc) ((dtc)->pmu_base + 0x40)
-#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90)
+#define CMN_DT_PMEVCNTSR(dtc, n) ((dtc)->pmu_base + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR(dtc) ((dtc)->pmu_base + 0x90)
-#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR(dtc) ((dtc)->pmu_base + 0x100)
#define CMN_DT_PMCR_PMU_EN BIT(0)
#define CMN_DT_PMCR_CNTR_RST BIT(5)
#define CMN_DT_PMCR_OVFL_INTR_EN BIT(6)
-#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118)
-#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120)
+#define CMN_DT_PMOVSR(dtc) ((dtc)->pmu_base + 0x118)
+#define CMN_DT_PMOVSR_CLR(dtc) ((dtc)->pmu_base + 0x120)
-#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR(dtc) ((dtc)->pmu_base + 0x128)
#define CMN_DT_PMSSR_SS_STATUS(n) BIT(n)
-#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR(dtc) ((dtc)->pmu_base + 0x130)
#define CMN_DT_PMSRR_SS_REQ BIT(0)
#define CMN_DT_NUM_COUNTERS 8
@@ -166,12 +172,11 @@
#define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config)
#define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config)
-#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24)
+#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27)
#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
-/* Note that we don't yet support the tertiary match group on newer IPs */
-#define CMN_CONFIG_WP_GRP BIT_ULL(56)
-#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
+#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
+#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
@@ -188,37 +193,52 @@
#define CMN_WP_DOWN 2
+/* Internal values for encoding event support */
enum cmn_model {
CMN600 = 1,
CMN650 = 2,
CMN700 = 4,
CI700 = 8,
+ CMNS3 = 16,
/* ...and then we can use bitmap tricks for commonality */
CMN_ANY = -1,
NOT_CMN600 = -2,
- CMN_650ON = CMN650 | CMN700,
+ CMN_650ON = CMN650 | CMN700 | CMNS3,
+};
+
+/* Actual part numbers and revision IDs defined by the hardware */
+enum cmn_part {
+ PART_CMN600 = 0x434,
+ PART_CMN650 = 0x436,
+ PART_CMN700 = 0x43c,
+ PART_CI700 = 0x43a,
+ PART_CMN_S3 = 0x43e,
};
/* CMN-600 r0px shouldn't exist in silicon, thankfully */
enum cmn_revision {
- CMN600_R1P0,
- CMN600_R1P1,
- CMN600_R1P2,
- CMN600_R1P3,
- CMN600_R2P0,
- CMN600_R3P0,
- CMN600_R3P1,
- CMN650_R0P0 = 0,
- CMN650_R1P0,
- CMN650_R1P1,
- CMN650_R2P0,
- CMN650_R1P2,
- CMN700_R0P0 = 0,
- CMN700_R1P0,
- CMN700_R2P0,
- CI700_R0P0 = 0,
- CI700_R1P0,
- CI700_R2P0,
+ REV_CMN600_R1P0,
+ REV_CMN600_R1P1,
+ REV_CMN600_R1P2,
+ REV_CMN600_R1P3,
+ REV_CMN600_R2P0,
+ REV_CMN600_R3P0,
+ REV_CMN600_R3P1,
+ REV_CMN650_R0P0 = 0,
+ REV_CMN650_R1P0,
+ REV_CMN650_R1P1,
+ REV_CMN650_R2P0,
+ REV_CMN650_R1P2,
+ REV_CMN700_R0P0 = 0,
+ REV_CMN700_R1P0,
+ REV_CMN700_R2P0,
+ REV_CMN700_R3P0,
+ REV_CMNS3_R0P0 = 0,
+ REV_CMNS3_R0P1,
+ REV_CMNS3_R1P0,
+ REV_CI700_R0P0 = 0,
+ REV_CI700_R1P0,
+ REV_CI700_R2P0,
};
enum cmn_node_type {
@@ -244,6 +264,10 @@ enum cmn_node_type {
CMN_TYPE_CCHA,
CMN_TYPE_CCLA,
CMN_TYPE_CCLA_RNI,
+ CMN_TYPE_HNS = 0x200,
+ CMN_TYPE_HNS_MPAM_S,
+ CMN_TYPE_HNS_MPAM_NS,
+ CMN_TYPE_APB = 0x1000,
/* Not a real node type */
CMN_TYPE_WP = 0x7770
};
@@ -253,6 +277,8 @@ enum cmn_filter_select {
SEL_OCCUP1ID,
SEL_CLASS_OCCUP_ID,
SEL_CBUSY_SNTHROTTLE_SEL,
+ SEL_HBT_LBT_SEL,
+ SEL_SN_HOME_SEL,
SEL_MAX
};
@@ -261,16 +287,16 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
- int dtm;
- union {
- /* DN/HN-F/CXHA */
- struct {
- u8 val : 4;
- u8 count : 4;
- } occupid[SEL_MAX];
- /* XP */
- u8 dtc;
- };
+ /* XP properties really, but replicated to children for convenience */
+ u8 dtm;
+ s8 dtc;
+ u8 portid_bits:4;
+ u8 deviceid_bits:4;
+ /* DN/HN-F/CXHA */
+ struct {
+ u8 val : 4;
+ u8 count : 4;
+ } occupid[SEL_MAX];
union {
u8 event[4];
__le32 event_sel;
@@ -291,8 +317,9 @@ struct arm_cmn_dtm {
struct arm_cmn_dtc {
void __iomem *base;
+ void __iomem *pmu_base;
int irq;
- int irq_friend;
+ s8 irq_friend;
bool cc_active;
struct perf_event *counters[CMN_DT_NUM_COUNTERS];
@@ -308,7 +335,7 @@ struct arm_cmn {
unsigned int state;
enum cmn_revision rev;
- enum cmn_model model;
+ enum cmn_part part;
u8 mesh_x;
u8 mesh_y;
u16 num_xps;
@@ -341,49 +368,33 @@ struct arm_cmn {
static int arm_cmn_hp_state;
struct arm_cmn_nodeid {
- u8 x;
- u8 y;
u8 port;
u8 dev;
};
static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
{
- return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
+ return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
}
-static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
{
struct arm_cmn_nodeid nid;
- if (cmn->num_xps == 1) {
- nid.x = 0;
- nid.y = 0;
- nid.port = CMN_NODEID_1x1_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- } else {
- int bits = arm_cmn_xyidbits(cmn);
-
- nid.x = CMN_NODEID_X(id, bits);
- nid.y = CMN_NODEID_Y(id, bits);
- if (cmn->ports_used & 0xc) {
- nid.port = CMN_NODEID_EXT_PID(id);
- nid.dev = CMN_NODEID_EXT_DEVID(id);
- } else {
- nid.port = CMN_NODEID_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- }
- }
+ nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
+ nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
return nid;
}
static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
const struct arm_cmn_node *dn)
{
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
- int xp_idx = cmn->mesh_x * nid.y + nid.x;
+ int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
+ int bits = arm_cmn_xyidbits(cmn);
+ int x = id >> bits;
+ int y = id & ((1U << bits) - 1);
- return cmn->xps + xp_idx;
+ return cmn->xps + cmn->mesh_x * y + x;
}
static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
enum cmn_node_type type)
@@ -396,6 +407,53 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
return NULL;
}
+static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
+{
+ switch (cmn->part) {
+ case PART_CMN600:
+ return CMN600;
+ case PART_CMN650:
+ return CMN650;
+ case PART_CMN700:
+ return CMN700;
+ case PART_CI700:
+ return CI700;
+ case PART_CMN_S3:
+ return CMNS3;
+ default:
+ return 0;
+ };
+}
+
+static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
+{
+ if (cmn->part == PART_CMN_S3) {
+ if (cmn->rev >= REV_CMNS3_R1P0 && dn->type == CMN_TYPE_XP)
+ return CMN_S3_R1_DTM_OFFSET;
+ return CMN_S3_PMU_OFFSET;
+ }
+ return CMN_PMU_OFFSET;
+}
+
+static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
+ const struct arm_cmn_node *xp, int port)
+{
+ int offset = CMN_MXP__CONNECT_INFO(port) - arm_cmn_pmu_offset(cmn, xp);
+
+ if (port >= 2) {
+ if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
+ return 0;
+ /*
+ * CI-700 may have extra ports, but still has the
+ * mesh_port_connect_info registers in the way.
+ */
+ if (cmn->part == PART_CI700)
+ offset += CI700_CONNECT_INFO_P2_5_OFFSET;
+ }
+
+ return readl_relaxed(xp->pmu_base + offset);
+}
+
static struct dentry *arm_cmn_debugfs;
#ifdef CONFIG_DEBUG_FS
@@ -427,20 +485,26 @@ static const char *arm_cmn_device_type(u8 type)
case 0x17: return "RN-F_C_E|";
case 0x18: return " RN-F_E |";
case 0x19: return "RN-F_E_E|";
+ case 0x1a: return " HN-S |";
+ case 0x1b: return " LCN |";
case 0x1c: return " MTSX |";
case 0x1d: return " HN-V |";
case 0x1e: return " CCG |";
+ case 0x20: return " RN-F_F |";
+ case 0x21: return "RN-F_F_E|";
+ case 0x22: return " SN-F_F |";
default: return " ???? |";
}
}
-static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
{
struct arm_cmn *cmn = s->private;
struct arm_cmn_node *dn;
+ u16 id = xp->id | d | (p << xp->deviceid_bits);
for (dn = cmn->dns; dn->type; dn++) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ int pad = dn->logid < 10;
if (dn->type == CMN_TYPE_XP)
continue;
@@ -448,10 +512,10 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
if (dn->type < CMN_TYPE_HNI)
continue;
- if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+ if (dn->id != id)
continue;
- seq_printf(s, " #%-2d |", dn->logid);
+ seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid);
return;
}
seq_puts(s, " |");
@@ -464,38 +528,32 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, " X");
for (x = 0; x < cmn->mesh_x; x++)
- seq_printf(s, " %d ", x);
+ seq_printf(s, " %-2d ", x);
seq_puts(s, "\nY P D+");
y = cmn->mesh_y;
while (y--) {
int xp_base = cmn->mesh_x * y;
- u8 port[6][CMN_MAX_DIMENSION];
+ struct arm_cmn_node *xp = cmn->xps + xp_base;
+ u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
for (x = 0; x < cmn->mesh_x; x++)
seq_puts(s, "--------+");
- seq_printf(s, "\n%d |", y);
+ seq_printf(s, "\n%-2d |", y);
for (x = 0; x < cmn->mesh_x; x++) {
- struct arm_cmn_node *xp = cmn->xps + xp_base + x;
- void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET;
-
- port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0);
- port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1);
- port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2);
- port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3);
- port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4);
- port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5);
- seq_printf(s, " XP #%-2d |", xp_base + x);
+ for (p = 0; p < CMN_MAX_PORTS; p++)
+ port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
+ seq_printf(s, " XP #%-3d|", xp_base + x);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++) {
- u8 dtc = cmn->xps[xp_base + x].dtc;
+ s8 dtc = xp[x].dtc;
- if (dtc & (dtc - 1))
+ if (dtc < 0)
seq_puts(s, " DTC ?? |");
else
- seq_printf(s, " DTC %ld |", __ffs(dtc));
+ seq_printf(s, " DTC %d |", dtc);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++)
@@ -507,10 +565,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, arm_cmn_device_type(port[p][x]));
seq_puts(s, "\n 0|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 0);
+ arm_cmn_show_logid(s, xp + x, p, 0);
seq_puts(s, "\n 1|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 1);
+ arm_cmn_show_logid(s, xp + x, p, 1);
}
seq_puts(s, "\n-----+");
}
@@ -538,21 +596,31 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
- u64 dtm_idx[4];
- unsigned int dtc_idx;
- u8 dtcs_used;
+ u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
+ s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
+
+ /*
+ * WP config registers are divided to UP and DOWN events. We need to
+ * keep to track only one of them.
+ */
+ DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
+
bool wide_sel;
enum cmn_filter_select filter_sel;
};
+static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
+/* @i is the DTC number, @idx is the counter index on that DTC */
+#define for_each_hw_dtc_idx(hw, i, idx) \
+ for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
+
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
- BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
return (struct arm_cmn_hw_event *)&event->hw;
}
@@ -566,6 +634,17 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
}
+static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val)
+{
+ if (val)
+ set_bit(pos, wp_idx);
+}
+
+static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos)
+{
+ return test_bit(pos, wp_idx);
+}
+
struct arm_cmn_event_attr {
struct device_attribute attr;
enum cmn_model model;
@@ -628,7 +707,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
eattr = container_of(attr, typeof(*eattr), attr.attr);
- if (!(eattr->model & cmn->model))
+ if (!(eattr->model & arm_cmn_model(cmn)))
return 0;
type = eattr->type;
@@ -646,30 +725,30 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
return 0;
- if (chan == 4 && cmn->model == CMN600)
+ if (chan == 4 && cmn->part == PART_CMN600)
return 0;
if ((chan == 5 && cmn->rsp_vc_num < 2) ||
(chan == 6 && cmn->dat_vc_num < 2) ||
- (chan == 7 && cmn->snp_vc_num < 2) ||
- (chan == 8 && cmn->req_vc_num < 2))
+ (chan == 7 && cmn->req_vc_num < 2) ||
+ (chan == 8 && cmn->snp_vc_num < 2))
return 0;
}
/* Revision-specific differences */
- if (cmn->model == CMN600) {
- if (cmn->rev < CMN600_R1P3) {
+ if (cmn->part == PART_CMN600) {
+ if (cmn->rev < REV_CMN600_R1P3) {
if (type == CMN_TYPE_CXRA && eventid > 0x10)
return 0;
}
- if (cmn->rev < CMN600_R1P2) {
+ if (cmn->rev < REV_CMN600_R1P2) {
if (type == CMN_TYPE_HNF && eventid == 0x1b)
return 0;
if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA)
return 0;
}
- } else if (cmn->model == CMN650) {
- if (cmn->rev < CMN650_R2P0 || cmn->rev == CMN650_R1P2) {
+ } else if (cmn->part == PART_CMN650) {
+ if (cmn->rev < REV_CMN650_R2P0 || cmn->rev == REV_CMN650_R1P2) {
if (type == CMN_TYPE_HNF && eventid > 0x22)
return 0;
if (type == CMN_TYPE_SBSX && eventid == 0x17)
@@ -677,8 +756,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
if (type == CMN_TYPE_RNI && eventid > 0x10)
return 0;
}
- } else if (cmn->model == CMN700) {
- if (cmn->rev < CMN700_R2P0) {
+ } else if (cmn->part == PART_CMN700) {
+ if (cmn->rev < REV_CMN700_R2P0) {
if (type == CMN_TYPE_HNF && eventid > 0x2c)
return 0;
if (type == CMN_TYPE_CCHA && eventid > 0x74)
@@ -686,7 +765,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
if (type == CMN_TYPE_CCLA && eventid > 0x27)
return 0;
}
- if (cmn->rev < CMN700_R1P0) {
+ if (cmn->rev < REV_CMN700_R1P0) {
if (type == CMN_TYPE_HNF && eventid > 0x2b)
return 0;
}
@@ -702,8 +781,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
#define CMN_EVENT_DTC(_name) \
CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
-#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel) \
- _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel)
+#define CMN_EVENT_HNF(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNI(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
#define CMN_EVENT_HNP(_name, _event) \
@@ -722,12 +801,12 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
#define CMN_EVENT_CCRA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
-#define CMN_EVENT_CCHA(_name, _event) \
- CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCHA(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
#define CMN_EVENT_CCLA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
-#define CMN_EVENT_CCLA_RNI(_name, _event) \
- CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
+#define CMN_EVENT_HNS(_name, _event) \
+ CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
#define CMN_EVENT_DVM(_model, _name, _event) \
_CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
@@ -735,32 +814,68 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID), \
_CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \
_CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
-#define CMN_EVENT_HNF(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE)
+
+#define CMN_EVENT_HN_OCC(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID)
+#define CMN_EVENT_HN_CLS(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HN_SNT(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
+#define CMN_EVENT_HNF_OCC(_model, _name, _event) \
+ CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNF_CLS(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID)
+ CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNF_SNT(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
-
-#define _CMN_EVENT_XP(_name, _event) \
+ CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+
+#define CMN_EVENT_HNS_OCC(_name, _event) \
+ CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID)
+#define CMN_EVENT_HNS_CLS( _name, _event) \
+ CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_SNT(_name, _event) \
+ CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_HBT(_name, _event) \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL)
+#define CMN_EVENT_HNS_SNH(_name, _event) \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL)
+
+#define _CMN_EVENT_XP_MESH(_name, _event) \
__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \
__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \
__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \
- __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \
+ __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2))
+
+#define _CMN_EVENT_XP_PORT(_name, _event) \
__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \
__CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \
__CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \
__CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
+#define _CMN_EVENT_XP(_name, _event) \
+ _CMN_EVENT_XP_MESH(_name, _event), \
+ _CMN_EVENT_XP_PORT(_name, _event)
+
/* Good thing there are only 3 fundamental XP events... */
#define CMN_EVENT_XP(_name, _event) \
_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \
@@ -770,8 +885,12 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \
_CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \
_CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)), \
- _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \
- _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
+ _CMN_EVENT_XP(req2_##_name, (_event) | (7 << 5)), \
+ _CMN_EVENT_XP(snp2_##_name, (_event) | (8 << 5))
+
+#define CMN_EVENT_XP_DAT(_name, _event) \
+ _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \
+ _CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5))
static struct attribute *arm_cmn_event_attrs[] = {
@@ -822,11 +941,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c),
CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d),
CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID),
+ CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy, 0x0f),
CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10),
CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11),
CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12),
@@ -903,7 +1018,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_XP(txflit_valid, 0x01),
CMN_EVENT_XP(txflit_stall, 0x02),
- CMN_EVENT_XP(partial_dat_flit, 0x03),
+ CMN_EVENT_XP_DAT(partial_dat_flit, 0x03),
/* We treat watchpoints as a special made-up class of XP events */
CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),
@@ -1043,42 +1158,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_CCRA(wdb_alloc, 0x59),
CMN_EVENT_CCRA(ssb_alloc, 0x5a),
- CMN_EVENT_CCHA(rddatbyp, 0x61),
- CMN_EVENT_CCHA(chirsp_up_stall, 0x62),
- CMN_EVENT_CCHA(chidat_up_stall, 0x63),
- CMN_EVENT_CCHA(snppcrd_link0_stall, 0x64),
- CMN_EVENT_CCHA(snppcrd_link1_stall, 0x65),
- CMN_EVENT_CCHA(snppcrd_link2_stall, 0x66),
- CMN_EVENT_CCHA(reqtrk_occ, 0x67),
- CMN_EVENT_CCHA(rdb_occ, 0x68),
- CMN_EVENT_CCHA(rdbyp_occ, 0x69),
- CMN_EVENT_CCHA(wdb_occ, 0x6a),
- CMN_EVENT_CCHA(snptrk_occ, 0x6b),
- CMN_EVENT_CCHA(sdb_occ, 0x6c),
- CMN_EVENT_CCHA(snphaz_occ, 0x6d),
- CMN_EVENT_CCHA(reqtrk_alloc, 0x6e),
- CMN_EVENT_CCHA(rdb_alloc, 0x6f),
- CMN_EVENT_CCHA(rdbyp_alloc, 0x70),
- CMN_EVENT_CCHA(wdb_alloc, 0x71),
- CMN_EVENT_CCHA(snptrk_alloc, 0x72),
- CMN_EVENT_CCHA(sdb_alloc, 0x73),
- CMN_EVENT_CCHA(snphaz_alloc, 0x74),
- CMN_EVENT_CCHA(pb_rhu_req_occ, 0x75),
- CMN_EVENT_CCHA(pb_rhu_req_alloc, 0x76),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_occ, 0x77),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc, 0x78),
- CMN_EVENT_CCHA(pb_pcie_wr_req_occ, 0x79),
- CMN_EVENT_CCHA(pb_pcie_wr_req_alloc, 0x7a),
- CMN_EVENT_CCHA(pb_pcie_reg_req_occ, 0x7b),
- CMN_EVENT_CCHA(pb_pcie_reg_req_alloc, 0x7c),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ, 0x7d),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc, 0x7e),
- CMN_EVENT_CCHA(pb_rhu_dat_occ, 0x7f),
- CMN_EVENT_CCHA(pb_rhu_dat_alloc, 0x80),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ, 0x81),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc, 0x82),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_occ, 0x83),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMN_ANY, rddatbyp, 0x61),
+ CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall, 0x62),
+ CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall, 0x63),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall, 0x64),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall, 0x65),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall, 0x66),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ, 0x67),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_occ, 0x68),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ, 0x69),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_occ, 0x6a),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_occ, 0x6b),
+ CMN_EVENT_CCHA(CMN_ANY, sdb_occ, 0x6c),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_occ, 0x6d),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc, 0x6e),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_alloc, 0x6f),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc, 0x70),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_alloc, 0x71),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc, 0x72),
+ CMN_EVENT_CCHA(CMN_ANY, db_alloc, 0x73),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc, 0x74),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ, 0x75),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc, 0x76),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ, 0x77),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc, 0x78),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ, 0x79),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc, 0x7a),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ, 0x7b),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc, 0x7c),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ, 0x7d),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc, 0x7e),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ, 0x7f),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc, 0x80),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ, 0x81),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc, 0x82),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ, 0x83),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall, 0x85),
CMN_EVENT_CCLA(rx_cxs, 0x21),
CMN_EVENT_CCLA(tx_cxs, 0x22),
@@ -1092,6 +1208,66 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd, 0x2a),
CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd, 0x2b),
+ CMN_EVENT_HNS_HBT(cache_miss, 0x01),
+ CMN_EVENT_HNS_HBT(slc_sf_cache_access, 0x02),
+ CMN_EVENT_HNS_HBT(cache_fill, 0x03),
+ CMN_EVENT_HNS_HBT(pocq_retry, 0x04),
+ CMN_EVENT_HNS_HBT(pocq_reqs_recvd, 0x05),
+ CMN_EVENT_HNS_HBT(sf_hit, 0x06),
+ CMN_EVENT_HNS_HBT(sf_evictions, 0x07),
+ CMN_EVENT_HNS(dir_snoops_sent, 0x08),
+ CMN_EVENT_HNS(brd_snoops_sent, 0x09),
+ CMN_EVENT_HNS_HBT(slc_eviction, 0x0a),
+ CMN_EVENT_HNS_HBT(slc_fill_invalid_way, 0x0b),
+ CMN_EVENT_HNS(mc_retries_local, 0x0c),
+ CMN_EVENT_HNS_SNH(mc_reqs_local, 0x0d),
+ CMN_EVENT_HNS(qos_hh_retry, 0x0e),
+ CMN_EVENT_HNS_OCC(qos_pocq_occupancy, 0x0f),
+ CMN_EVENT_HNS(pocq_addrhaz, 0x10),
+ CMN_EVENT_HNS(pocq_atomic_addrhaz, 0x11),
+ CMN_EVENT_HNS(ld_st_swp_adq_full, 0x12),
+ CMN_EVENT_HNS(cmp_adq_full, 0x13),
+ CMN_EVENT_HNS(txdat_stall, 0x14),
+ CMN_EVENT_HNS(txrsp_stall, 0x15),
+ CMN_EVENT_HNS(seq_full, 0x16),
+ CMN_EVENT_HNS(seq_hit, 0x17),
+ CMN_EVENT_HNS(snp_sent, 0x18),
+ CMN_EVENT_HNS(sfbi_dir_snp_sent, 0x19),
+ CMN_EVENT_HNS(sfbi_brd_snp_sent, 0x1a),
+ CMN_EVENT_HNS(intv_dirty, 0x1c),
+ CMN_EVENT_HNS(stash_snp_sent, 0x1d),
+ CMN_EVENT_HNS(stash_data_pull, 0x1e),
+ CMN_EVENT_HNS(snp_fwded, 0x1f),
+ CMN_EVENT_HNS(atomic_fwd, 0x20),
+ CMN_EVENT_HNS(mpam_hardlim, 0x21),
+ CMN_EVENT_HNS(mpam_softlim, 0x22),
+ CMN_EVENT_HNS(snp_sent_cluster, 0x23),
+ CMN_EVENT_HNS(sf_imprecise_evict, 0x24),
+ CMN_EVENT_HNS(sf_evict_shared_line, 0x25),
+ CMN_EVENT_HNS_CLS(pocq_class_occup, 0x26),
+ CMN_EVENT_HNS_CLS(pocq_class_retry, 0x27),
+ CMN_EVENT_HNS_CLS(class_mc_reqs_local, 0x28),
+ CMN_EVENT_HNS_CLS(class_cgnt_cmin, 0x29),
+ CMN_EVENT_HNS_SNT(sn_throttle, 0x2a),
+ CMN_EVENT_HNS_SNT(sn_throttle_min, 0x2b),
+ CMN_EVENT_HNS(sf_precise_to_imprecise, 0x2c),
+ CMN_EVENT_HNS(snp_intv_cln, 0x2d),
+ CMN_EVENT_HNS(nc_excl, 0x2e),
+ CMN_EVENT_HNS(excl_mon_ovfl, 0x2f),
+ CMN_EVENT_HNS(snp_req_recvd, 0x30),
+ CMN_EVENT_HNS(snp_req_byp_pocq, 0x31),
+ CMN_EVENT_HNS(dir_ccgha_snp_sent, 0x32),
+ CMN_EVENT_HNS(brd_ccgha_snp_sent, 0x33),
+ CMN_EVENT_HNS(ccgha_snp_stall, 0x34),
+ CMN_EVENT_HNS(lbt_req_hardlim, 0x35),
+ CMN_EVENT_HNS(hbt_req_hardlim, 0x36),
+ CMN_EVENT_HNS(sf_reupdate, 0x37),
+ CMN_EVENT_HNS(excl_sf_imprecise, 0x38),
+ CMN_EVENT_HNS(snp_pocq_addrhaz, 0x39),
+ CMN_EVENT_HNS(mc_retries_remote, 0x3a),
+ CMN_EVENT_HNS_SNH(mc_reqs_remote, 0x3b),
+ CMN_EVENT_HNS_CLS(class_mc_reqs_remote, 0x3c),
+
NULL
};
@@ -1105,15 +1281,11 @@ static ssize_t arm_cmn_format_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
- int lo = __ffs(fmt->field), hi = __fls(fmt->field);
-
- if (lo == hi)
- return sysfs_emit(buf, "config:%d\n", lo);
if (!fmt->config)
- return sysfs_emit(buf, "config:%d-%d\n", lo, hi);
+ return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field);
- return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi);
+ return sysfs_emit(buf, "config%d:%*pbl\n", fmt->config, 64, &fmt->field);
}
#define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \
@@ -1159,28 +1331,65 @@ static ssize_t arm_cmn_cpumask_show(struct device *dev,
static struct device_attribute arm_cmn_cpumask_attr =
__ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL);
-static struct attribute *arm_cmn_cpumask_attrs[] = {
+static ssize_t arm_cmn_identifier_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%03x%02x\n", cmn->part, cmn->rev);
+}
+
+static struct device_attribute arm_cmn_identifier_attr =
+ __ATTR(identifier, 0444, arm_cmn_identifier_show, NULL);
+
+static struct attribute *arm_cmn_other_attrs[] = {
&arm_cmn_cpumask_attr.attr,
+ &arm_cmn_identifier_attr.attr,
NULL,
};
-static const struct attribute_group arm_cmn_cpumask_attr_group = {
- .attrs = arm_cmn_cpumask_attrs,
+static const struct attribute_group arm_cmn_other_attrs_group = {
+ .attrs = arm_cmn_other_attrs,
};
static const struct attribute_group *arm_cmn_attr_groups[] = {
&arm_cmn_event_attrs_group,
&arm_cmn_format_attrs_group,
- &arm_cmn_cpumask_attr_group,
+ &arm_cmn_other_attrs_group,
NULL
};
-static int arm_cmn_wp_idx(struct perf_event *event)
+static int arm_cmn_find_free_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event)
+{
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (dtm->wp_event[wp_idx] >= 0)
+ if (dtm->wp_event[++wp_idx] >= 0)
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
+static int arm_cmn_get_assigned_wp_idx(struct perf_event *event,
+ struct arm_cmn_hw_event *hw,
+ unsigned int pos)
{
- return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+ return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos);
}
-static u32 arm_cmn_wp_config(struct perf_event *event)
+static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm,
+ struct perf_event *event,
+ unsigned int dtc, int wp_idx,
+ unsigned int pos)
+{
+ struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+
+ dtm->wp_event[wp_idx] = hw->dtc_idx[dtc];
+ arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event));
+}
+
+static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
{
u32 config;
u32 dev = CMN_EVENT_WP_DEV_SEL(event);
@@ -1188,7 +1397,11 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
u32 grp = CMN_EVENT_WP_GRP(event);
u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
u32 combine = CMN_EVENT_WP_COMBINE(event);
- bool is_cmn600 = to_cmn(event->pmu)->model == CMN600;
+ bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600;
+
+ /* CMN-600 supports only primary and secondary matching groups */
+ if (is_cmn600)
+ grp &= 1;
config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
@@ -1197,7 +1410,9 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
if (exc)
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
- if (combine && !grp)
+
+ /* wp_combine is available only on WP0 and WP2 */
+ if (combine && !(wp_idx & 0x1))
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
CMN_DTM_WPn_CONFIG_WP_COMBINE;
return config;
@@ -1206,7 +1421,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
{
if (!cmn->state)
- writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+ writel_relaxed(0, CMN_DT_PMCR(&cmn->dtc[0]));
cmn->state |= state;
}
@@ -1215,7 +1430,7 @@ static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
cmn->state &= ~state;
if (!cmn->state)
writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
- cmn->dtc[0].base + CMN_DT_PMCR);
+ CMN_DT_PMCR(&cmn->dtc[0]));
}
static void arm_cmn_pmu_enable(struct pmu *pmu)
@@ -1250,18 +1465,19 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
{
- u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+ void __iomem *pmccntr = CMN_DT_PMCCNTR(dtc);
+ u64 val = readq_relaxed(pmccntr);
- writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+ writeq_relaxed(CMN_CC_INIT, pmccntr);
return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
}
static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
{
- u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+ void __iomem *pmevcnt = CMN_DT_PMEVCNT(dtc, idx);
+ u32 val = readl_relaxed(pmevcnt);
- val = readl_relaxed(dtc->base + pmevcnt);
- writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+ writel_relaxed(CMN_COUNTER_INIT, pmevcnt);
return val - CMN_COUNTER_INIT;
}
@@ -1269,12 +1485,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
- unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
u64 count;
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
- writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
- cmn->dtc[i].counters[hw->dtc_idx] = event;
+ for_each_hw_dtc_idx(hw, i, idx) {
+ writel_relaxed(CMN_COUNTER_INIT, CMN_DT_PMEVCNT(&cmn->dtc[i], idx));
+ cmn->dtc[i].counters[idx] = event;
}
count = arm_cmn_read_dtm(cmn, hw, false);
@@ -1287,11 +1502,9 @@ static void arm_cmn_event_read(struct perf_event *event)
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
u64 delta, new, prev;
unsigned long flags;
- unsigned int i;
- if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
- i = __ffs(hw->dtcs_used);
- delta = arm_cmn_read_cc(cmn->dtc + i);
+ if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
+ delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
local64_add(delta, &event->count);
return;
}
@@ -1301,8 +1514,8 @@ static void arm_cmn_event_read(struct perf_event *event)
delta = new - prev;
local_irq_save(flags);
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
- new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+ for_each_hw_dtc_idx(hw, i, idx) {
+ new = arm_cmn_read_counter(cmn->dtc + i, idx);
delta += new << 16;
}
local_irq_restore(flags);
@@ -1321,6 +1534,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
dn->occupid[fsel].val = occupid;
reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+ FIELD_PREP(CMN__PMU_SN_HOME_SEL,
+ dn->occupid[SEL_SN_HOME_SEL].val) |
+ FIELD_PREP(CMN__PMU_HBT_LBT_SEL,
+ dn->occupid[SEL_HBT_LBT_SEL].val) |
FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
dn->occupid[SEL_CLASS_OCCUP_ID].val) |
FIELD_PREP(CMN__PMU_OCCUP1_ID,
@@ -1354,16 +1571,19 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = __ffs(hw->dtcs_used);
- writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
- cmn->dtc[i].cc_active = true;
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
+ dtc->base + CMN_DT_DTC_CTL);
+ writeq_relaxed(CMN_CC_INIT, CMN_DT_PMCCNTR(dtc));
+ dtc->cc_active = true;
} else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
u64 val = CMN_EVENT_WP_VAL(event);
u64 mask = CMN_EVENT_WP_MASK(event);
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx));
writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
@@ -1385,13 +1605,14 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = __ffs(hw->dtcs_used);
- cmn->dtc[i].cc_active = false;
- } else if (type == CMN_TYPE_WP) {
- int wp_idx = arm_cmn_wp_idx(event);
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+ dtc->cc_active = false;
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
+ } else if (type == CMN_TYPE_WP) {
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx));
writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
@@ -1409,10 +1630,23 @@ struct arm_cmn_val {
u8 dtm_count[CMN_MAX_DTMS];
u8 occupid[CMN_MAX_DTMS][SEL_MAX];
u8 wp[CMN_MAX_DTMS][4];
- int dtc_count;
+ u8 wp_combine[CMN_MAX_DTMS][2];
+ int dtc_count[CMN_MAX_DTCS];
bool cycles;
};
+static int arm_cmn_val_find_free_wp_config(struct perf_event *event,
+ struct arm_cmn_val *val, int dtm)
+{
+ int wp_idx = CMN_EVENT_EVENTID(event);
+
+ if (val->wp[dtm][wp_idx])
+ if (val->wp[dtm][++wp_idx])
+ return -ENOSPC;
+
+ return wp_idx;
+}
+
static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
struct perf_event *event)
{
@@ -1430,7 +1664,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
return;
}
- val->dtc_count++;
+ for_each_hw_dtc_idx(hw, dtc, idx)
+ val->dtc_count[dtc]++;
for_each_hw_dn(hw, dn, i) {
int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1443,8 +1678,9 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ val->wp[dtm][wp_idx] = 1;
+ val->wp_combine[dtm][wp_idx >> 1] += !!CMN_EVENT_WP_COMBINE(event);
}
}
@@ -1468,6 +1704,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
return -ENOMEM;
arm_cmn_val_add_event(cmn, val, leader);
+
for_each_sibling_event(sibling, leader)
arm_cmn_val_add_event(cmn, val, sibling);
@@ -1477,11 +1714,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
goto done;
}
- if (val->dtc_count == CMN_DT_NUM_COUNTERS)
- goto done;
+ for_each_hw_dtc_idx(hw, dtc, idx)
+ if (val->dtc_count[dtc] == CMN_DT_NUM_COUNTERS)
+ goto done;
for_each_hw_dn(hw, dn, i) {
- int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
+ int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
goto done;
@@ -1493,12 +1731,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
if (type != CMN_TYPE_WP)
continue;
- wp_idx = arm_cmn_wp_idx(event);
- if (val->wp[dtm][wp_idx])
+ wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm);
+ if (wp_idx < 0)
goto done;
- wp_cmb = val->wp[dtm][wp_idx ^ 1];
- if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
+ if (wp_idx & 1 &&
+ val->wp_combine[dtm][wp_idx >> 1] != !!CMN_EVENT_WP_COMBINE(event))
goto done;
}
@@ -1508,14 +1746,14 @@ done:
return ret;
}
-static enum cmn_filter_select arm_cmn_filter_sel(enum cmn_model model,
+static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn,
enum cmn_node_type type,
unsigned int eventid)
{
struct arm_cmn_event_attr *e;
- int i;
+ enum cmn_model model = arm_cmn_model(cmn);
- for (i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
+ for (int i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr);
if (e->model & model && e->type == type && e->eventid == eventid)
return e->fsel;
@@ -1546,7 +1784,7 @@ static int arm_cmn_event_init(struct perf_event *event)
type = CMN_EVENT_TYPE(event);
/* DTC events (i.e. cycles) already have everything they need */
if (type == CMN_TYPE_DTC)
- return 0;
+ return arm_cmn_validate_group(cmn, event);
eventid = CMN_EVENT_EVENTID(event);
/* For watchpoints we need the actual XP node here */
@@ -1558,12 +1796,16 @@ static int arm_cmn_event_init(struct perf_event *event)
/* ...but the DTM may depend on which port we're watching */
if (cmn->multi_dtm)
hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
- } else if (type == CMN_TYPE_XP && cmn->model == CMN700) {
+ } else if (type == CMN_TYPE_XP &&
+ (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
hw->wide_sel = true;
+ } else if (type == CMN_TYPE_RND) {
+ /* Secretly permit this as an alias for "rnid" events */
+ type = CMN_TYPE_RNI;
}
/* This is sufficiently annoying to recalculate, so cache it */
- hw->filter_sel = arm_cmn_filter_sel(cmn->model, type, eventid);
+ hw->filter_sel = arm_cmn_filter_sel(cmn, type, eventid);
bynodeid = CMN_EVENT_BYNODEID(event);
nodeid = CMN_EVENT_NODEID(event);
@@ -1571,29 +1813,27 @@ static int arm_cmn_event_init(struct perf_event *event)
hw->dn = arm_cmn_node(cmn, type);
if (!hw->dn)
return -EINVAL;
+
+ memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
for (dn = hw->dn; dn->type == type; dn++) {
if (bynodeid && dn->id != nodeid) {
hw->dn++;
continue;
}
hw->num_dns++;
+ if (dn->dtc < 0)
+ memset(hw->dtc_idx, 0, cmn->num_dtcs);
+ else
+ hw->dtc_idx[dn->dtc] = 0;
+
if (bynodeid)
break;
}
if (!hw->num_dns) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
-
- dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
- nodeid, nid.x, nid.y, nid.port, nid.dev, type);
+ dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
return -EINVAL;
}
- /*
- * Keep assuming non-cycles events count in all DTC domains; turns out
- * it's hard to make a worthwhile optimisation around this, short of
- * going all-in with domain-local counter allocation as well.
- */
- hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
return arm_cmn_validate_group(cmn, event);
}
@@ -1608,8 +1848,11 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
- if (type == CMN_TYPE_WP)
- dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
+ if (type == CMN_TYPE_WP) {
+ int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i);
+
+ dtm->wp_event[wp_idx] = -1;
+ }
if (hw->filter_sel > SEL_NONE)
hw->dn[i].occupid[hw->filter_sel].count--;
@@ -1618,47 +1861,50 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
+ memset(hw->wp_idx, 0, sizeof(hw->wp_idx));
- for (i = 0; hw->dtcs_used & (1U << i); i++)
- cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+ for_each_hw_dtc_idx(hw, j, idx)
+ cmn->dtc[j].counters[idx] = NULL;
}
static int arm_cmn_event_add(struct perf_event *event, int flags)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
- struct arm_cmn_dtc *dtc = &cmn->dtc[0];
struct arm_cmn_node *dn;
enum cmn_node_type type = CMN_EVENT_TYPE(event);
- unsigned int i, dtc_idx, input_sel;
+ unsigned int input_sel, i = 0;
if (type == CMN_TYPE_DTC) {
- i = 0;
while (cmn->dtc[i].cycles)
if (++i == cmn->num_dtcs)
return -ENOSPC;
cmn->dtc[i].cycles = event;
- hw->dtc_idx = CMN_DT_NUM_COUNTERS;
- hw->dtcs_used = 1U << i;
+ hw->dtc_idx[0] = i;
if (flags & PERF_EF_START)
arm_cmn_event_start(event, 0);
return 0;
}
- /* Grab a free global counter first... */
- dtc_idx = 0;
- while (dtc->counters[dtc_idx])
- if (++dtc_idx == CMN_DT_NUM_COUNTERS)
- return -ENOSPC;
-
- hw->dtc_idx = dtc_idx;
+ /* Grab the global counters first... */
+ for_each_hw_dtc_idx(hw, j, idx) {
+ if (cmn->part == PART_CMN600 && j > 0) {
+ idx = hw->dtc_idx[0];
+ } else {
+ idx = 0;
+ while (cmn->dtc[j].counters[idx])
+ if (++idx == CMN_DT_NUM_COUNTERS)
+ return -ENOSPC;
+ }
+ hw->dtc_idx[j] = idx;
+ }
- /* ...then the local counters to feed it. */
+ /* ...then the local counters to feed them */
for_each_hw_dn(hw, dn, i) {
struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
- unsigned int dtm_idx, shift;
+ unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
u64 reg;
dtm_idx = 0;
@@ -1669,22 +1915,26 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
if (type == CMN_TYPE_XP) {
input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
} else if (type == CMN_TYPE_WP) {
- int tmp, wp_idx = arm_cmn_wp_idx(event);
- u32 cfg = arm_cmn_wp_config(event);
+ int tmp, wp_idx;
+ u32 cfg;
- if (dtm->wp_event[wp_idx] >= 0)
+ wp_idx = arm_cmn_find_free_wp_idx(dtm, event);
+ if (wp_idx < 0)
goto free_dtms;
+ cfg = arm_cmn_wp_config(event, wp_idx);
+
tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
- CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+ CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
- dtm->wp_event[wp_idx] = dtc_idx;
+
+ arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
if (cmn->multi_dtm)
nid.port %= 2;
@@ -1701,7 +1951,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
dtm->input_sel[dtm_idx] = input_sel;
shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
- dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+ dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1729,7 +1979,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
arm_cmn_event_stop(event, PERF_EF_UPDATE);
if (type == CMN_TYPE_DTC)
- cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+ cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
else
arm_cmn_event_clear(cmn, event, hw->num_dns);
}
@@ -1773,7 +2023,7 @@ static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_nod
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
node = dev_to_node(cmn->dev);
- if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
+ if (cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
arm_cmn_migrate(cmn, cpu);
return 0;
}
@@ -1783,20 +2033,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no
struct arm_cmn *cmn;
unsigned int target;
int node;
- cpumask_t mask;
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
return 0;
node = dev_to_node(cmn->dev);
- if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target < nr_cpu_ids)
arm_cmn_migrate(cmn, target);
+
return 0;
}
@@ -1806,11 +2056,11 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
irqreturn_t ret = IRQ_NONE;
for (;;) {
- u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+ u32 status = readl_relaxed(CMN_DT_PMOVSR(dtc));
u64 delta;
int i;
- for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) {
+ for (i = 0; i < CMN_DT_NUM_COUNTERS; i++) {
if (status & (1U << i)) {
ret = IRQ_HANDLED;
if (WARN_ON(!dtc->counters[i]))
@@ -1828,7 +2078,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
}
}
- writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+ writel_relaxed(status, CMN_DT_PMOVSR_CLR(dtc));
if (!dtc->irq_friend)
return ret;
@@ -1870,6 +2120,7 @@ static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, i
dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx);
dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
+ writeq_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
for (i = 0; i < 4; i++) {
dtm->wp_event[i] = -1;
writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i));
@@ -1881,14 +2132,16 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
{
struct arm_cmn_dtc *dtc = cmn->dtc + idx;
- dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+ dtc->pmu_base = dn->pmu_base;
+ dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn);
dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
if (dtc->irq < 0)
return dtc->irq;
- writel_relaxed(0, dtc->base + CMN_DT_PMCR);
- writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
- writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
+ writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc));
+ writeq_relaxed(0, CMN_DT_PMCCNTR(dtc));
+ writel_relaxed(0x1ff, CMN_DT_PMOVSR_CLR(dtc));
return 0;
}
@@ -1908,7 +2161,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
{
struct arm_cmn_node *dn, *xp;
int dtc_idx = 0;
- u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
if (!cmn->dtc)
@@ -1919,22 +2171,18 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
for (dn = cmn->dns; dn->type; dn++) {
- if (dn->type == CMN_TYPE_XP) {
- dn->dtc &= dtcs_present;
+ if (dn->type == CMN_TYPE_XP)
continue;
- }
xp = arm_cmn_node_to_xp(cmn, dn);
+ dn->dtc = xp->dtc;
dn->dtm = xp->dtm;
if (cmn->multi_dtm)
- dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+ dn->dtm += arm_cmn_nid(dn).port / 2;
if (dn->type == CMN_TYPE_DTC) {
- int err;
- /* We do at least know that a DTC's XP must be in that DTC's domain */
- if (xp->dtc == 0xf)
- xp->dtc = 1 << dtc_idx;
- err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+ int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+
if (err)
return err;
}
@@ -1948,11 +2196,21 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
dn->type = CMN_TYPE_CCLA;
}
- writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+ arm_cmn_set_state(cmn, CMN_STATE_DISABLED);
return 0;
}
+static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
+{
+ int offset = CMN_DTM_UNIT_INFO;
+
+ if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
+ offset = CMN650_DTM_UNIT_INFO;
+
+ return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
+}
+
static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
{
int level;
@@ -1962,7 +2220,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
- node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+ node->pmu_base = cmn->base + offset + arm_cmn_pmu_offset(cmn, node);
if (node->type == CMN_TYPE_CFG)
level = 0;
@@ -1990,11 +2248,11 @@ static enum cmn_node_type arm_cmn_subtype(enum cmn_node_type type)
static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
{
- void __iomem *cfg_region;
+ void __iomem *cfg_region, __iomem *xp_region;
struct arm_cmn_node cfg, *dn;
struct arm_cmn_dtm *dtm;
+ enum cmn_part part;
u16 child_count, child_poff;
- u32 xp_offset[CMN_MAX_XPS];
u64 reg;
int i, j;
size_t sz;
@@ -2004,10 +2262,32 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
return -ENODEV;
cfg_region = cmn->base + rgn_offset;
- reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2);
+
+ reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
+ part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
+ part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+ if (cmn->part && cmn->part != part)
+ dev_warn(cmn->dev,
+ "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n",
+ cmn->part, part);
+ cmn->part = part;
+ if (!arm_cmn_model(cmn))
+ dev_warn(cmn->dev, "Unknown part number: 0x%x\n", part);
+
+ reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
+ /*
+ * With the device isolation feature, if firmware has neglected to enable
+ * an XP port then we risk locking up if we try to access anything behind
+ * it; however we also have no way to tell from Non-Secure whether any
+ * given port is disabled or not, so the only way to win is not to play...
+ */
reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+ if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
+ dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
+ return -ENODEV;
+ }
cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
@@ -2024,11 +2304,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->num_dns = cmn->num_xps;
/* Pass 1: visit the XPs, enumerate their children */
+ cfg_region += child_poff;
for (i = 0; i < cmn->num_xps; i++) {
- reg = readq_relaxed(cfg_region + child_poff + i * 8);
- xp_offset[i] = reg & CMN_CHILD_NODE_ADDR;
+ reg = readq_relaxed(cfg_region + i * 8);
+ xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR);
- reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO);
+ reg = readq_relaxed(xp_region + CMN_CHILD_INFO);
cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
}
@@ -2054,11 +2335,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->dns = dn;
cmn->dtms = dtm;
for (i = 0; i < cmn->num_xps; i++) {
- void __iomem *xp_region = cmn->base + xp_offset[i];
struct arm_cmn_node *xp = dn++;
unsigned int xp_ports = 0;
- arm_cmn_init_node_info(cmn, xp_offset[i], xp);
+ reg = readq_relaxed(cfg_region + i * 8);
+ xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR);
+ arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, xp);
/*
* Thanks to the order in which XP logical IDs seem to be
* assigned, we can handily infer the mesh X dimension by
@@ -2068,36 +2350,36 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
if (xp->id == (1 << 3))
cmn->mesh_x = xp->logid;
- if (cmn->model == CMN600)
- xp->dtc = 0xf;
+ if (cmn->part == PART_CMN600)
+ xp->dtc = -1;
else
- xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
+ xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
xp->dtm = dtm - cmn->dtms;
arm_cmn_init_dtm(dtm++, xp, 0);
/*
* Keeping track of connected ports will let us filter out
- * unnecessary XP events easily. We can also reliably infer the
- * "extra device ports" configuration for the node ID format
- * from this, since in that case we will see at least one XP
- * with port 2 connected, for the HN-D.
+ * unnecessary XP events easily, and also infer the per-XP
+ * part of the node ID format.
*/
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0))
- xp_ports |= BIT(0);
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1))
- xp_ports |= BIT(1);
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2))
- xp_ports |= BIT(2);
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3))
- xp_ports |= BIT(3);
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4))
- xp_ports |= BIT(4);
- if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5))
- xp_ports |= BIT(5);
-
- if (cmn->multi_dtm && (xp_ports & 0xc))
+ for (int p = 0; p < CMN_MAX_PORTS; p++)
+ if (arm_cmn_device_connect_info(cmn, xp, p))
+ xp_ports |= BIT(p);
+
+ if (cmn->num_xps == 1) {
+ xp->portid_bits = 3;
+ xp->deviceid_bits = 2;
+ } else if (xp_ports > 0x3) {
+ xp->portid_bits = 2;
+ xp->deviceid_bits = 1;
+ } else {
+ xp->portid_bits = 1;
+ xp->deviceid_bits = 2;
+ }
+
+ if (cmn->multi_dtm && (xp_ports > 0x3))
arm_cmn_init_dtm(dtm++, xp, 1);
- if (cmn->multi_dtm && (xp_ports & 0x30))
+ if (cmn->multi_dtm && (xp_ports > 0xf))
arm_cmn_init_dtm(dtm++, xp, 2);
cmn->ports_used |= xp_ports;
@@ -2121,8 +2403,21 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
dev_dbg(cmn->dev, "ignoring external node %llx\n", reg);
continue;
}
+ /*
+ * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus
+ * child count larger than the number of valid child pointers.
+ * A child offset of 0 can only occur on CMN-600; otherwise it
+ * would imply the root node being its own grandchild, which
+ * we can safely dismiss in general.
+ */
+ if (reg == 0 && cmn->part != PART_CMN600) {
+ dev_dbg(cmn->dev, "bogus child pointer?\n");
+ continue;
+ }
arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
+ dn->portid_bits = xp->portid_bits;
+ dn->deviceid_bits = xp->deviceid_bits;
switch (dn->type) {
case CMN_TYPE_DTC:
@@ -2141,7 +2436,11 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CXHA:
case CMN_TYPE_CCRA:
case CMN_TYPE_CCHA:
+ case CMN_TYPE_HNS:
+ dn++;
+ break;
case CMN_TYPE_CCLA:
+ dn->pmu_base += CMN_CCLA_PMU_EVENT_SEL;
dn++;
break;
/* Nothing to see here */
@@ -2149,6 +2448,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_MPAM_NS:
case CMN_TYPE_RNSAM:
case CMN_TYPE_CXLA:
+ case CMN_TYPE_HNS_MPAM_S:
+ case CMN_TYPE_HNS_MPAM_NS:
+ case CMN_TYPE_APB:
break;
/*
* Split "optimised" combination nodes into separate
@@ -2159,7 +2461,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_HNP:
case CMN_TYPE_CCLA_RNI:
dn[1] = dn[0];
- dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL;
+ dn[0].pmu_base += CMN_CCLA_PMU_EVENT_SEL;
dn[1].type = arm_cmn_subtype(dn->type);
dn += 2;
break;
@@ -2197,7 +2499,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
if (cmn->num_xps == 1)
dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n");
- dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev);
+ dev_dbg(cmn->dev, "periph_id part 0x%03x revision %d\n", cmn->part, cmn->rev);
reg = cmn->ports_used;
dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n",
cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), &reg,
@@ -2252,17 +2554,18 @@ static int arm_cmn_probe(struct platform_device *pdev)
return -ENOMEM;
cmn->dev = &pdev->dev;
- cmn->model = (unsigned long)device_get_match_data(cmn->dev);
+ cmn->part = (unsigned long)device_get_match_data(cmn->dev);
+ cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
platform_set_drvdata(pdev, cmn);
- if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) {
+ if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) {
rootnode = arm_cmn600_acpi_probe(pdev, cmn);
} else {
rootnode = 0;
cmn->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(cmn->base))
return PTR_ERR(cmn->base);
- if (cmn->model == CMN600)
+ if (cmn->part == PART_CMN600)
rootnode = arm_cmn600_of_probe(pdev->dev.of_node);
}
if (rootnode < 0)
@@ -2280,9 +2583,9 @@ static int arm_cmn_probe(struct platform_device *pdev)
if (err)
return err;
- cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
cmn->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = cmn->dev,
.attr_groups = arm_cmn_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
@@ -2317,7 +2620,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
return err;
}
-static int arm_cmn_remove(struct platform_device *pdev)
+static void arm_cmn_remove(struct platform_device *pdev)
{
struct arm_cmn *cmn = platform_get_drvdata(pdev);
@@ -2326,15 +2629,15 @@ static int arm_cmn_remove(struct platform_device *pdev)
perf_pmu_unregister(&cmn->pmu);
cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
debugfs_remove(cmn->debug);
- return 0;
}
#ifdef CONFIG_OF
static const struct of_device_id arm_cmn_of_match[] = {
- { .compatible = "arm,cmn-600", .data = (void *)CMN600 },
- { .compatible = "arm,cmn-650", .data = (void *)CMN650 },
- { .compatible = "arm,cmn-700", .data = (void *)CMN700 },
- { .compatible = "arm,ci-700", .data = (void *)CI700 },
+ { .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
+ { .compatible = "arm,cmn-650" },
+ { .compatible = "arm,cmn-700" },
+ { .compatible = "arm,cmn-s3" },
+ { .compatible = "arm,ci-700" },
{}
};
MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
@@ -2342,9 +2645,10 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id arm_cmn_acpi_match[] = {
- { "ARMHC600", CMN600 },
- { "ARMHC650", CMN650 },
- { "ARMHC700", CMN700 },
+ { "ARMHC600", PART_CMN600 },
+ { "ARMHC650" },
+ { "ARMHC700" },
+ { "ARMHC003" },
{}
};
MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
@@ -2355,6 +2659,7 @@ static struct platform_driver arm_cmn_driver = {
.name = "arm-cmn",
.of_match_table = of_match_ptr(arm_cmn_of_match),
.acpi_match_table = ACPI_PTR(arm_cmn_acpi_match),
+ .suppress_bind_attrs = true,
},
.probe = arm_cmn_probe,
.remove = arm_cmn_remove,
@@ -2393,5 +2698,5 @@ module_init(arm_cmn_init);
module_exit(arm_cmn_exit);
MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
-MODULE_DESCRIPTION("Arm CMN-600 PMU driver");
+MODULE_DESCRIPTION("Arm CMN/CI interconnect PMU driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
new file mode 100644
index 000000000000..66858c65215d
--- /dev/null
+++ b/drivers/perf/arm-ni.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022-2024 Arm Limited
+// NI-700 Network-on-Chip PMU driver
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Common registers */
+#define NI_NODE_TYPE 0x000
+#define NI_NODE_TYPE_NODE_ID GENMASK(31, 16)
+#define NI_NODE_TYPE_NODE_TYPE GENMASK(15, 0)
+
+#define NI_CHILD_NODE_INFO 0x004
+#define NI_CHILD_PTR(n) (0x008 + (n) * 4)
+#define NI_NUM_SUB_FEATURES 0x100
+#define NI_SUB_FEATURE_TYPE(n) (0x108 + (n) * 8)
+#define NI_SUB_FEATURE_PTR(n) (0x10c + (n) * 8)
+
+#define NI_SUB_FEATURE_TYPE_FCU 0x2
+
+#define NI700_PMUSELA 0x00c
+
+/* Config node */
+#define NI_PERIPHERAL_ID0 0xfe0
+#define NI_PIDR0_PART_7_0 GENMASK(7, 0)
+#define NI_PERIPHERAL_ID1 0xfe4
+#define NI_PIDR1_PART_11_8 GENMASK(3, 0)
+#define NI_PERIPHERAL_ID2 0xfe8
+#define NI_PIDR2_VERSION GENMASK(7, 4)
+
+/* PMU node */
+#define NI700_PMEVCNTR(n) (0x008 + (n) * 8)
+#define NI700_PMCCNTR_L 0x0f8
+#define NI_PMEVCNTR(n) (0x200 + (n) * 8)
+#define NI_PMCCNTR_L 0x2f8
+#define NI_PMEVTYPER(n) (0x400 + (n) * 4)
+#define NI_PMEVTYPER_NODE_TYPE GENMASK(12, 9)
+#define NI_PMEVTYPER_NODE_ID GENMASK(8, 0)
+#define NI_PMCNTENSET 0xc00
+#define NI_PMCNTENCLR 0xc20
+#define NI_PMINTENSET 0xc40
+#define NI_PMINTENCLR 0xc60
+#define NI_PMOVSCLR 0xc80
+#define NI_PMOVSSET 0xcc0
+#define NI_PMCFGR 0xe00
+#define NI_PMCR 0xe04
+#define NI_PMCR_RESET_CCNT BIT(2)
+#define NI_PMCR_RESET_EVCNT BIT(1)
+#define NI_PMCR_ENABLE BIT(0)
+
+#define NI_NUM_COUNTERS 8
+#define NI_CCNT_IDX 31
+
+/* Event attributes */
+#define NI_CONFIG_TYPE GENMASK_ULL(15, 0)
+#define NI_CONFIG_NODEID GENMASK_ULL(31, 16)
+#define NI_CONFIG_EVENTID GENMASK_ULL(47, 32)
+
+#define NI_EVENT_TYPE(event) FIELD_GET(NI_CONFIG_TYPE, (event)->attr.config)
+#define NI_EVENT_NODEID(event) FIELD_GET(NI_CONFIG_NODEID, (event)->attr.config)
+#define NI_EVENT_EVENTID(event) FIELD_GET(NI_CONFIG_EVENTID, (event)->attr.config)
+
+enum ni_part {
+ PART_NI_700 = 0x43b,
+ PART_NI_710AE = 0x43d,
+ PART_NOC_S3 = 0x43f,
+ PART_SI_L1 = 0x455,
+};
+
+enum ni_node_type {
+ NI_GLOBAL,
+ NI_VOLTAGE,
+ NI_POWER,
+ NI_CLOCK,
+ NI_ASNI,
+ NI_AMNI,
+ NI_PMU,
+ NI_HSNI,
+ NI_HMNI,
+ NI_PMNI,
+ NI_TSNI,
+ NI_TMNI,
+ NI_CMNI = 0x0e,
+ NI_MCN = 0x63,
+};
+
+struct arm_ni_node {
+ void __iomem *base;
+ enum ni_node_type type;
+ u16 id;
+ u32 num_components;
+};
+
+struct arm_ni_unit {
+ void __iomem *pmusela;
+ enum ni_node_type type;
+ u16 id;
+ bool ns;
+ union {
+ __le64 pmusel;
+ u8 event[8];
+ };
+};
+
+struct arm_ni_cd {
+ void __iomem *pmu_base;
+ u16 id;
+ s8 irq_friend;
+ int num_units;
+ int irq;
+ struct pmu pmu;
+ struct arm_ni_unit *units;
+ struct perf_event *evcnt[NI_NUM_COUNTERS];
+ struct perf_event *ccnt;
+};
+
+struct arm_ni {
+ struct device *dev;
+ void __iomem *base;
+ enum ni_part part;
+ int id;
+ int cpu;
+ int num_cds;
+ struct hlist_node cpuhp_node;
+ struct arm_ni_cd cds[] __counted_by(num_cds);
+};
+
+#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id])
+#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu)
+
+#define ni_for_each_cd(n, c) \
+ for (struct arm_ni_cd *c = n->cds; c < n->cds + n->num_cds; c++) if (c->pmu_base)
+
+#define cd_for_each_unit(cd, u) \
+ for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++)
+
+static int arm_ni_hp_state;
+
+struct arm_ni_event_attr {
+ struct device_attribute attr;
+ enum ni_node_type type;
+};
+
+#define NI_EVENT_ATTR(_name, _type) \
+ (&((struct arm_ni_event_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, arm_ni_event_show, NULL), \
+ .type = _type, \
+ }})[0].attr.attr)
+
+static ssize_t arm_ni_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni_event_attr *eattr = container_of(attr, typeof(*eattr), attr);
+
+ if (eattr->type == NI_PMU)
+ return sysfs_emit(buf, "type=0x%x\n", eattr->type);
+
+ return sysfs_emit(buf, "type=0x%x,eventid=?,nodeid=?\n", eattr->type);
+}
+
+static umode_t arm_ni_event_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
+ struct arm_ni_event_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ cd_for_each_unit(cd, unit) {
+ if (unit->type == eattr->type && unit->ns)
+ return attr->mode;
+ }
+
+ return 0;
+}
+
+static struct attribute *arm_ni_event_attrs[] = {
+ NI_EVENT_ATTR(asni, NI_ASNI),
+ NI_EVENT_ATTR(amni, NI_AMNI),
+ NI_EVENT_ATTR(cycles, NI_PMU),
+ NI_EVENT_ATTR(hsni, NI_HSNI),
+ NI_EVENT_ATTR(hmni, NI_HMNI),
+ NI_EVENT_ATTR(pmni, NI_PMNI),
+ NI_EVENT_ATTR(tsni, NI_TSNI),
+ NI_EVENT_ATTR(tmni, NI_TMNI),
+ NI_EVENT_ATTR(cmni, NI_CMNI),
+ NULL
+};
+
+static const struct attribute_group arm_ni_event_attrs_group = {
+ .name = "events",
+ .attrs = arm_ni_event_attrs,
+ .is_visible = arm_ni_event_attr_is_visible,
+};
+
+struct arm_ni_format_attr {
+ struct device_attribute attr;
+ u64 field;
+};
+
+#define NI_FORMAT_ATTR(_name, _fld) \
+ (&((struct arm_ni_format_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, arm_ni_format_show, NULL), \
+ .field = _fld, \
+ }})[0].attr.attr)
+
+static ssize_t arm_ni_format_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
+
+ return sysfs_emit(buf, "config:%*pbl\n", 64, &fmt->field);
+}
+
+static struct attribute *arm_ni_format_attrs[] = {
+ NI_FORMAT_ATTR(type, NI_CONFIG_TYPE),
+ NI_FORMAT_ATTR(nodeid, NI_CONFIG_NODEID),
+ NI_FORMAT_ATTR(eventid, NI_CONFIG_EVENTID),
+ NULL
+};
+
+static const struct attribute_group arm_ni_format_attrs_group = {
+ .name = "format",
+ .attrs = arm_ni_format_attrs,
+};
+
+static ssize_t arm_ni_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev)));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(ni->cpu));
+}
+
+static struct device_attribute arm_ni_cpumask_attr =
+ __ATTR(cpumask, 0444, arm_ni_cpumask_show, NULL);
+
+static ssize_t arm_ni_identifier_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev)));
+ u32 reg = readl_relaxed(ni->base + NI_PERIPHERAL_ID2);
+ int version = FIELD_GET(NI_PIDR2_VERSION, reg);
+
+ return sysfs_emit(buf, "%03x%02x\n", ni->part, version);
+}
+
+static struct device_attribute arm_ni_identifier_attr =
+ __ATTR(identifier, 0444, arm_ni_identifier_show, NULL);
+
+static struct attribute *arm_ni_other_attrs[] = {
+ &arm_ni_cpumask_attr.attr,
+ &arm_ni_identifier_attr.attr,
+ NULL
+};
+
+static const struct attribute_group arm_ni_other_attr_group = {
+ .attrs = arm_ni_other_attrs,
+};
+
+static const struct attribute_group *arm_ni_attr_groups[] = {
+ &arm_ni_event_attrs_group,
+ &arm_ni_format_attrs_group,
+ &arm_ni_other_attr_group,
+ NULL
+};
+
+static void arm_ni_pmu_enable(struct pmu *pmu)
+{
+ writel_relaxed(NI_PMCR_ENABLE, pmu_to_cd(pmu)->pmu_base + NI_PMCR);
+}
+
+static void arm_ni_pmu_disable(struct pmu *pmu)
+{
+ writel_relaxed(0, pmu_to_cd(pmu)->pmu_base + NI_PMCR);
+}
+
+struct arm_ni_val {
+ unsigned int evcnt;
+ unsigned int ccnt;
+};
+
+static bool arm_ni_val_count_event(struct perf_event *evt, struct arm_ni_val *val)
+{
+ if (is_software_event(evt))
+ return true;
+
+ if (NI_EVENT_TYPE(evt) == NI_PMU) {
+ val->ccnt++;
+ return val->ccnt <= 1;
+ }
+
+ val->evcnt++;
+ return val->evcnt <= NI_NUM_COUNTERS;
+}
+
+static int arm_ni_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct arm_ni_val val = { 0 };
+
+ if (leader == event)
+ return 0;
+
+ arm_ni_val_count_event(event, &val);
+ if (!arm_ni_val_count_event(leader, &val))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!arm_ni_val_count_event(sibling, &val))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool arm_ni_is_7xx(const struct arm_ni *ni)
+{
+ return ni->part == PART_NI_700 || ni->part == PART_NI_710AE;
+}
+
+static int arm_ni_event_init(struct perf_event *event)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct arm_ni *ni;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ ni = cd_to_ni(cd);
+ event->cpu = ni->cpu;
+ event->hw.flags = arm_ni_is_7xx(ni);
+
+ if (NI_EVENT_TYPE(event) == NI_PMU)
+ return arm_ni_validate_group(event);
+
+ cd_for_each_unit(cd, unit) {
+ if (unit->type == NI_EVENT_TYPE(event) &&
+ unit->id == NI_EVENT_NODEID(event) && unit->ns) {
+ event->hw.config_base = (unsigned long)unit;
+ return arm_ni_validate_group(event);
+ }
+ }
+ return -EINVAL;
+}
+
+static u64 arm_ni_read_ccnt(void __iomem *pmccntr)
+{
+ u64 l, u_old, u_new;
+ int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
+
+ u_new = readl_relaxed(pmccntr + 4);
+ do {
+ u_old = u_new;
+ l = readl_relaxed(pmccntr);
+ u_new = readl_relaxed(pmccntr + 4);
+ } while (u_new != u_old && --retries);
+ WARN_ON(!retries);
+
+ return (u_new << 32) | l;
+}
+
+static void arm_ni_event_read(struct perf_event *event)
+{
+ struct hw_perf_event *hw = &event->hw;
+ u64 count, prev;
+ bool ccnt = hw->idx == NI_CCNT_IDX;
+
+ do {
+ prev = local64_read(&hw->prev_count);
+ if (ccnt)
+ count = arm_ni_read_ccnt((void __iomem *)event->hw.event_base);
+ else
+ count = readl_relaxed((void __iomem *)event->hw.event_base);
+ } while (local64_cmpxchg(&hw->prev_count, prev, count) != prev);
+
+ count -= prev;
+ if (!ccnt)
+ count = (u32)count;
+ local64_add(count, &event->count);
+}
+
+static void arm_ni_event_start(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+
+ writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENSET);
+}
+
+static void arm_ni_event_stop(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+
+ writel_relaxed(1U << event->hw.idx, cd->pmu_base + NI_PMCNTENCLR);
+ if (flags & PERF_EF_UPDATE)
+ arm_ni_event_read(event);
+}
+
+static void arm_ni_init_ccnt(struct hw_perf_event *hw)
+{
+ local64_set(&hw->prev_count, S64_MIN);
+ lo_hi_writeq_relaxed(S64_MIN, (void __iomem *)hw->event_base);
+}
+
+static void arm_ni_init_evcnt(struct hw_perf_event *hw)
+{
+ local64_set(&hw->prev_count, S32_MIN);
+ writel_relaxed(S32_MIN, (void __iomem *)hw->event_base);
+}
+
+static int arm_ni_event_add(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct hw_perf_event *hw = &event->hw;
+ struct arm_ni_unit *unit;
+ enum ni_node_type type = NI_EVENT_TYPE(event);
+ u32 reg;
+
+ if (type == NI_PMU) {
+ if (cd->ccnt)
+ return -ENOSPC;
+ hw->idx = NI_CCNT_IDX;
+ hw->event_base = (unsigned long)cd->pmu_base +
+ (hw->flags ? NI700_PMCCNTR_L : NI_PMCCNTR_L);
+ cd->ccnt = event;
+ arm_ni_init_ccnt(hw);
+ } else {
+ hw->idx = 0;
+ while (cd->evcnt[hw->idx]) {
+ if (++hw->idx == NI_NUM_COUNTERS)
+ return -ENOSPC;
+ }
+ cd->evcnt[hw->idx] = event;
+ unit = (void *)hw->config_base;
+ unit->event[hw->idx] = NI_EVENT_EVENTID(event);
+ hw->event_base = (unsigned long)cd->pmu_base +
+ (hw->flags ? NI700_PMEVCNTR(hw->idx) : NI_PMEVCNTR(hw->idx));
+ arm_ni_init_evcnt(hw);
+ lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela);
+
+ reg = FIELD_PREP(NI_PMEVTYPER_NODE_TYPE, type) |
+ FIELD_PREP(NI_PMEVTYPER_NODE_ID, NI_EVENT_NODEID(event));
+ writel_relaxed(reg, cd->pmu_base + NI_PMEVTYPER(hw->idx));
+ }
+ if (flags & PERF_EF_START)
+ arm_ni_event_start(event, 0);
+ return 0;
+}
+
+static void arm_ni_event_del(struct perf_event *event, int flags)
+{
+ struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
+ struct hw_perf_event *hw = &event->hw;
+
+ arm_ni_event_stop(event, PERF_EF_UPDATE);
+
+ if (hw->idx == NI_CCNT_IDX)
+ cd->ccnt = NULL;
+ else
+ cd->evcnt[hw->idx] = NULL;
+}
+
+static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id)
+{
+ struct arm_ni_cd *cd = dev_id;
+ irqreturn_t ret = IRQ_NONE;
+
+ for (;;) {
+ u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
+
+ if (reg & (1U << NI_CCNT_IDX)) {
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->ccnt))) {
+ arm_ni_event_read(cd->ccnt);
+ arm_ni_init_ccnt(&cd->ccnt->hw);
+ }
+ }
+ for (int i = 0; i < NI_NUM_COUNTERS; i++) {
+ if (!(reg & (1U << i)))
+ continue;
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->evcnt[i]))) {
+ arm_ni_event_read(cd->evcnt[i]);
+ arm_ni_init_evcnt(&cd->evcnt[i]->hw);
+ }
+ }
+ writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
+ if (!cd->irq_friend)
+ return ret;
+ cd += cd->irq_friend;
+ }
+}
+
+static void __iomem *arm_ni_get_pmusel(struct arm_ni *ni, void __iomem *unit_base)
+{
+ u32 type, ptr, num;
+
+ if (arm_ni_is_7xx(ni))
+ return unit_base + NI700_PMUSELA;
+
+ num = readl_relaxed(unit_base + NI_NUM_SUB_FEATURES);
+ for (int i = 0; i < num; i++) {
+ type = readl_relaxed(unit_base + NI_SUB_FEATURE_TYPE(i));
+ if (type != NI_SUB_FEATURE_TYPE_FCU)
+ continue;
+ ptr = readl_relaxed(unit_base + NI_SUB_FEATURE_PTR(i));
+ return ni->base + ptr;
+ }
+ /* Should be impossible */
+ return NULL;
+}
+
+static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start)
+{
+ struct arm_ni_cd *cd = ni->cds + node->id;
+ const char *name;
+
+ cd->id = node->id;
+ cd->num_units = node->num_components;
+ cd->units = devm_kcalloc(ni->dev, cd->num_units, sizeof(*(cd->units)), GFP_KERNEL);
+ if (!cd->units)
+ return -ENOMEM;
+
+ for (int i = 0; i < cd->num_units; i++) {
+ u32 reg = readl_relaxed(node->base + NI_CHILD_PTR(i));
+ void __iomem *unit_base = ni->base + reg;
+ struct arm_ni_unit *unit = cd->units + i;
+
+ reg = readl_relaxed(unit_base + NI_NODE_TYPE);
+ unit->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg);
+ unit->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg);
+
+ switch (unit->type) {
+ case NI_PMU:
+ reg = readl_relaxed(unit_base + NI_PMCFGR);
+ if (!reg) {
+ dev_info(ni->dev, "No access to PMU %d\n", cd->id);
+ devm_kfree(ni->dev, cd->units);
+ return 0;
+ }
+ unit->ns = true;
+ cd->pmu_base = unit_base;
+ break;
+ case NI_ASNI:
+ case NI_AMNI:
+ case NI_HSNI:
+ case NI_HMNI:
+ case NI_PMNI:
+ case NI_TSNI:
+ case NI_TMNI:
+ case NI_CMNI:
+ unit->pmusela = arm_ni_get_pmusel(ni, unit_base);
+ writel_relaxed(1, unit->pmusela);
+ if (readl_relaxed(unit->pmusela) != 1)
+ dev_info(ni->dev, "No access to node 0x%04x%04x\n", unit->id, unit->type);
+ else
+ unit->ns = true;
+ break;
+ case NI_MCN:
+ break;
+ default:
+ /*
+ * e.g. FMU - thankfully bits 3:2 of FMU_ERR_FR0 are RES0 so
+ * can't alias any of the leaf node types we're looking for.
+ */
+ dev_dbg(ni->dev, "Mystery node 0x%04x%04x\n", unit->id, unit->type);
+ break;
+ }
+ }
+
+ res_start += cd->pmu_base - ni->base;
+ if (!devm_request_mem_region(ni->dev, res_start, SZ_4K, dev_name(ni->dev))) {
+ dev_err(ni->dev, "Failed to request PMU region 0x%llx\n", res_start);
+ return -EBUSY;
+ }
+
+ writel_relaxed(NI_PMCR_RESET_CCNT | NI_PMCR_RESET_EVCNT,
+ cd->pmu_base + NI_PMCR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR);
+
+ cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id);
+ if (cd->irq < 0)
+ return cd->irq;
+
+ cd->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .parent = ni->dev,
+ .attr_groups = arm_ni_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .pmu_enable = arm_ni_pmu_enable,
+ .pmu_disable = arm_ni_pmu_disable,
+ .event_init = arm_ni_event_init,
+ .add = arm_ni_event_add,
+ .del = arm_ni_event_del,
+ .start = arm_ni_event_start,
+ .stop = arm_ni_event_stop,
+ .read = arm_ni_event_read,
+ };
+
+ name = devm_kasprintf(ni->dev, GFP_KERNEL, "arm_ni_%d_cd_%d", ni->id, cd->id);
+ if (!name)
+ return -ENOMEM;
+
+ return perf_pmu_register(&cd->pmu, name, -1);
+}
+
+static void arm_ni_remove(struct platform_device *pdev)
+{
+ struct arm_ni *ni = platform_get_drvdata(pdev);
+
+ ni_for_each_cd(ni, cd) {
+ writel_relaxed(0, cd->pmu_base + NI_PMCR);
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR);
+ perf_pmu_unregister(&cd->pmu);
+ }
+ cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
+}
+
+static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
+{
+ u32 reg = readl_relaxed(base + NI_NODE_TYPE);
+
+ node->base = base;
+ node->type = FIELD_GET(NI_NODE_TYPE_NODE_TYPE, reg);
+ node->id = FIELD_GET(NI_NODE_TYPE_NODE_ID, reg);
+ node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
+}
+
+static int arm_ni_init_irqs(struct arm_ni *ni)
+{
+ int err;
+
+ ni_for_each_cd(ni, cd) {
+ for (struct arm_ni_cd *prev = cd; prev-- > ni->cds; ) {
+ if (prev->irq == cd->irq) {
+ prev->irq_friend = cd - prev;
+ goto set_inten;
+ }
+ }
+ err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_AUTOEN,
+ dev_name(ni->dev), cd);
+ if (err)
+ return err;
+
+ irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
+set_inten:
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
+ }
+
+ ni_for_each_cd(ni, cd)
+ if (!cd->irq_friend)
+ enable_irq(cd->irq);
+ return 0;
+}
+
+static int arm_ni_probe(struct platform_device *pdev)
+{
+ struct arm_ni_node cfg, vd, pd, cd;
+ struct arm_ni *ni;
+ struct resource *res;
+ void __iomem *base;
+ static atomic_t id;
+ int ret, num_cds;
+ u32 reg, part;
+
+ /*
+ * We want to map the whole configuration space for ease of discovery,
+ * but the PMU pages are the only ones for which we can honestly claim
+ * exclusive ownership, so we'll request them explicitly once found.
+ */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+ if (!base)
+ return -ENOMEM;
+
+ arm_ni_probe_domain(base, &cfg);
+ if (cfg.type != NI_GLOBAL)
+ return -ENODEV;
+
+ reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID0);
+ part = FIELD_GET(NI_PIDR0_PART_7_0, reg);
+ reg = readl_relaxed(cfg.base + NI_PERIPHERAL_ID1);
+ part |= FIELD_GET(NI_PIDR1_PART_11_8, reg) << 8;
+
+ switch (part) {
+ case PART_NI_700:
+ case PART_NI_710AE:
+ case PART_NOC_S3:
+ case PART_SI_L1:
+ break;
+ default:
+ dev_WARN(&pdev->dev, "Unknown part number: 0x%03x, this may go badly\n", part);
+ break;
+ }
+
+ num_cds = 0;
+ for (int v = 0; v < cfg.num_components; v++) {
+ reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
+ arm_ni_probe_domain(base + reg, &vd);
+ for (int p = 0; p < vd.num_components; p++) {
+ reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
+ arm_ni_probe_domain(base + reg, &pd);
+ num_cds += pd.num_components;
+ }
+ }
+
+ ni = devm_kzalloc(&pdev->dev, struct_size(ni, cds, num_cds), GFP_KERNEL);
+ if (!ni)
+ return -ENOMEM;
+
+ ni->dev = &pdev->dev;
+ ni->base = base;
+ ni->num_cds = num_cds;
+ ni->part = part;
+ ni->id = atomic_fetch_inc(&id);
+ ni->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
+ platform_set_drvdata(pdev, ni);
+
+ ret = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
+ if (ret)
+ return ret;
+
+ for (int v = 0; v < cfg.num_components; v++) {
+ reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
+ arm_ni_probe_domain(base + reg, &vd);
+ for (int p = 0; p < vd.num_components; p++) {
+ reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
+ arm_ni_probe_domain(base + reg, &pd);
+ for (int c = 0; c < pd.num_components; c++) {
+ reg = readl_relaxed(pd.base + NI_CHILD_PTR(c));
+ arm_ni_probe_domain(base + reg, &cd);
+ ret = arm_ni_init_cd(ni, &cd, res->start);
+ if (ret) {
+ ni->cds[cd.id].pmu_base = NULL;
+ arm_ni_remove(pdev);
+ return ret;
+ }
+ }
+ }
+ }
+
+ ret = arm_ni_init_irqs(ni);
+ if (ret)
+ arm_ni_remove(pdev);
+
+ return ret;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arm_ni_of_match[] = {
+ { .compatible = "arm,ni-700" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, arm_ni_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id arm_ni_acpi_match[] = {
+ { "ARMHCB70" },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, arm_ni_acpi_match);
+#endif
+
+static struct platform_driver arm_ni_driver = {
+ .driver = {
+ .name = "arm-ni",
+ .of_match_table = of_match_ptr(arm_ni_of_match),
+ .acpi_match_table = ACPI_PTR(arm_ni_acpi_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = arm_ni_probe,
+ .remove = arm_ni_remove,
+};
+
+static void arm_ni_pmu_migrate(struct arm_ni *ni, unsigned int cpu)
+{
+ ni_for_each_cd(ni, cd) {
+ perf_pmu_migrate_context(&cd->pmu, ni->cpu, cpu);
+ irq_set_affinity(cd->irq, cpumask_of(cpu));
+ }
+ ni->cpu = cpu;
+}
+
+static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct arm_ni *ni;
+ int node;
+
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ node = dev_to_node(ni->dev);
+ if (cpu_to_node(ni->cpu) != node && cpu_to_node(cpu) == node)
+ arm_ni_pmu_migrate(ni, cpu);
+ return 0;
+}
+
+static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct arm_ni *ni;
+ unsigned int target;
+ int node;
+
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ if (cpu != ni->cpu)
+ return 0;
+
+ node = dev_to_node(ni->dev);
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ arm_ni_pmu_migrate(ni, target);
+ return 0;
+}
+
+static int __init arm_ni_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/arm/ni:online",
+ arm_ni_pmu_online_cpu,
+ arm_ni_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ arm_ni_hp_state = ret;
+
+ ret = platform_driver_register(&arm_ni_driver);
+ if (ret)
+ cpuhp_remove_multi_state(arm_ni_hp_state);
+ return ret;
+}
+
+static void __exit arm_ni_exit(void)
+{
+ platform_driver_unregister(&arm_ni_driver);
+ cpuhp_remove_multi_state(arm_ni_hp_state);
+}
+
+module_init(arm_ni_init);
+module_exit(arm_ni_exit);
+
+MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
+MODULE_DESCRIPTION("Arm NI-700 PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c
new file mode 100644
index 000000000000..ba554e0c846c
--- /dev/null
+++ b/drivers/perf/arm_brbe.c
@@ -0,0 +1,805 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Branch Record Buffer Extension Driver.
+ *
+ * Copyright (C) 2022-2025 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#include <linux/types.h>
+#include <linux/bitmap.h>
+#include <linux/perf/arm_pmu.h>
+#include "arm_brbe.h"
+
+#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \
+ BRBFCR_EL1_INDIRECT | \
+ BRBFCR_EL1_RTN | \
+ BRBFCR_EL1_INDCALL | \
+ BRBFCR_EL1_DIRCALL | \
+ BRBFCR_EL1_CONDDIR)
+
+/*
+ * BRBTS_EL1 is currently not used for branch stack implementation
+ * purpose but BRBCR_ELx.TS needs to have a valid value from all
+ * available options. BRBCR_ELx_TS_VIRTUAL is selected for this.
+ */
+#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL)
+
+/*
+ * BRBE Buffer Organization
+ *
+ * BRBE buffer is arranged as multiple banks of 32 branch record
+ * entries each. An individual branch record in a given bank could
+ * be accessed, after selecting the bank in BRBFCR_EL1.BANK and
+ * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with
+ * indices [0..31].
+ *
+ * Bank 0
+ *
+ * --------------------------------- ------
+ * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 |
+ * --------------------------------- ------
+ * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 |
+ * --------------------------------- ------
+ * | .. | BRBSRC | BRBTGT | BRBINF | | .. |
+ * --------------------------------- ------
+ * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 |
+ * --------------------------------- ------
+ *
+ * Bank 1
+ *
+ * --------------------------------- ------
+ * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 |
+ * --------------------------------- ------
+ * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 |
+ * --------------------------------- ------
+ * | .. | BRBSRC | BRBTGT | BRBINF | | .. |
+ * --------------------------------- ------
+ * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 |
+ * --------------------------------- ------
+ */
+#define BRBE_BANK_MAX_ENTRIES 32
+
+struct brbe_regset {
+ u64 brbsrc;
+ u64 brbtgt;
+ u64 brbinf;
+};
+
+#define PERF_BR_ARM64_MAX (PERF_BR_MAX + PERF_BR_NEW_MAX)
+
+struct brbe_hw_attr {
+ int brbe_version;
+ int brbe_cc;
+ int brbe_nr;
+ int brbe_format;
+};
+
+#define BRBE_REGN_CASE(n, case_macro) \
+ case n: case_macro(n); break
+
+#define BRBE_REGN_SWITCH(x, case_macro) \
+ do { \
+ switch (x) { \
+ BRBE_REGN_CASE(0, case_macro); \
+ BRBE_REGN_CASE(1, case_macro); \
+ BRBE_REGN_CASE(2, case_macro); \
+ BRBE_REGN_CASE(3, case_macro); \
+ BRBE_REGN_CASE(4, case_macro); \
+ BRBE_REGN_CASE(5, case_macro); \
+ BRBE_REGN_CASE(6, case_macro); \
+ BRBE_REGN_CASE(7, case_macro); \
+ BRBE_REGN_CASE(8, case_macro); \
+ BRBE_REGN_CASE(9, case_macro); \
+ BRBE_REGN_CASE(10, case_macro); \
+ BRBE_REGN_CASE(11, case_macro); \
+ BRBE_REGN_CASE(12, case_macro); \
+ BRBE_REGN_CASE(13, case_macro); \
+ BRBE_REGN_CASE(14, case_macro); \
+ BRBE_REGN_CASE(15, case_macro); \
+ BRBE_REGN_CASE(16, case_macro); \
+ BRBE_REGN_CASE(17, case_macro); \
+ BRBE_REGN_CASE(18, case_macro); \
+ BRBE_REGN_CASE(19, case_macro); \
+ BRBE_REGN_CASE(20, case_macro); \
+ BRBE_REGN_CASE(21, case_macro); \
+ BRBE_REGN_CASE(22, case_macro); \
+ BRBE_REGN_CASE(23, case_macro); \
+ BRBE_REGN_CASE(24, case_macro); \
+ BRBE_REGN_CASE(25, case_macro); \
+ BRBE_REGN_CASE(26, case_macro); \
+ BRBE_REGN_CASE(27, case_macro); \
+ BRBE_REGN_CASE(28, case_macro); \
+ BRBE_REGN_CASE(29, case_macro); \
+ BRBE_REGN_CASE(30, case_macro); \
+ BRBE_REGN_CASE(31, case_macro); \
+ default: WARN(1, "Invalid BRB* index %d\n", x); \
+ } \
+ } while (0)
+
+#define RETURN_READ_BRBSRCN(n) \
+ return read_sysreg_s(SYS_BRBSRC_EL1(n))
+static inline u64 get_brbsrc_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBSRCN);
+ return 0;
+}
+
+#define RETURN_READ_BRBTGTN(n) \
+ return read_sysreg_s(SYS_BRBTGT_EL1(n))
+static u64 get_brbtgt_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBTGTN);
+ return 0;
+}
+
+#define RETURN_READ_BRBINFN(n) \
+ return read_sysreg_s(SYS_BRBINF_EL1(n))
+static u64 get_brbinf_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBINFN);
+ return 0;
+}
+
+static u64 brbe_record_valid(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf);
+}
+
+static bool brbe_invalid(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE;
+}
+
+static bool brbe_record_is_complete(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL;
+}
+
+static bool brbe_record_is_source_only(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE;
+}
+
+static bool brbe_record_is_target_only(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET;
+}
+
+static int brbinf_get_in_tx(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf);
+}
+
+static int brbinf_get_mispredict(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf);
+}
+
+static int brbinf_get_lastfailed(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf);
+}
+
+static u16 brbinf_get_cycles(u64 brbinf)
+{
+ u32 exp, mant, cycles;
+ /*
+ * Captured cycle count is unknown and hence
+ * should not be passed on to userspace.
+ */
+ if (brbinf & BRBINFx_EL1_CCU)
+ return 0;
+
+ exp = FIELD_GET(BRBINFx_EL1_CC_EXP_MASK, brbinf);
+ mant = FIELD_GET(BRBINFx_EL1_CC_MANT_MASK, brbinf);
+
+ if (!exp)
+ return mant;
+
+ cycles = (mant | 0x100) << (exp - 1);
+
+ return min(cycles, U16_MAX);
+}
+
+static int brbinf_get_type(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf);
+}
+
+static int brbinf_get_el(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf);
+}
+
+void brbe_invalidate(void)
+{
+ /* Ensure all branches before this point are recorded */
+ isb();
+ asm volatile(BRB_IALL_INSN);
+ /* Ensure all branch records are invalidated after this point */
+ isb();
+}
+
+static bool valid_brbe_nr(int brbe_nr)
+{
+ return brbe_nr == BRBIDR0_EL1_NUMREC_8 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_16 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_32 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_64;
+}
+
+static bool valid_brbe_cc(int brbe_cc)
+{
+ return brbe_cc == BRBIDR0_EL1_CC_20_BIT;
+}
+
+static bool valid_brbe_format(int brbe_format)
+{
+ return brbe_format == BRBIDR0_EL1_FORMAT_FORMAT_0;
+}
+
+static bool valid_brbidr(u64 brbidr)
+{
+ int brbe_format, brbe_cc, brbe_nr;
+
+ brbe_format = FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr);
+ brbe_cc = FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr);
+ brbe_nr = FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr);
+
+ return valid_brbe_format(brbe_format) && valid_brbe_cc(brbe_cc) && valid_brbe_nr(brbe_nr);
+}
+
+static bool valid_brbe_version(int brbe_version)
+{
+ return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP ||
+ brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1;
+}
+
+static void select_brbe_bank(int bank)
+{
+ u64 brbfcr;
+
+ brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+ brbfcr &= ~BRBFCR_EL1_BANK_MASK;
+ brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank);
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ /*
+ * Arm ARM (DDI 0487K.a) D.18.4 rule PPBZP requires explicit sync
+ * between setting BANK and accessing branch records.
+ */
+ isb();
+}
+
+static bool __read_brbe_regset(struct brbe_regset *entry, int idx)
+{
+ entry->brbinf = get_brbinf_reg(idx);
+
+ if (brbe_invalid(entry->brbinf))
+ return false;
+
+ entry->brbsrc = get_brbsrc_reg(idx);
+ entry->brbtgt = get_brbtgt_reg(idx);
+ return true;
+}
+
+/*
+ * Generic perf branch filters supported on BRBE
+ *
+ * New branch filters need to be evaluated whether they could be supported on
+ * BRBE. This ensures that such branch filters would not just be accepted, to
+ * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively
+ * supported only on platforms where kernel is in hyp mode.
+ */
+#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \
+ PERF_SAMPLE_BRANCH_IN_TX | \
+ PERF_SAMPLE_BRANCH_NO_TX | \
+ PERF_SAMPLE_BRANCH_CALL_STACK | \
+ PERF_SAMPLE_BRANCH_COUNTERS)
+
+#define BRBE_ALLOWED_BRANCH_TYPES (PERF_SAMPLE_BRANCH_ANY | \
+ PERF_SAMPLE_BRANCH_ANY_CALL | \
+ PERF_SAMPLE_BRANCH_ANY_RETURN | \
+ PERF_SAMPLE_BRANCH_IND_CALL | \
+ PERF_SAMPLE_BRANCH_COND | \
+ PERF_SAMPLE_BRANCH_IND_JUMP | \
+ PERF_SAMPLE_BRANCH_CALL)
+
+
+#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \
+ PERF_SAMPLE_BRANCH_KERNEL | \
+ PERF_SAMPLE_BRANCH_HV | \
+ BRBE_ALLOWED_BRANCH_TYPES | \
+ PERF_SAMPLE_BRANCH_NO_FLAGS | \
+ PERF_SAMPLE_BRANCH_NO_CYCLES | \
+ PERF_SAMPLE_BRANCH_TYPE_SAVE | \
+ PERF_SAMPLE_BRANCH_HW_INDEX | \
+ PERF_SAMPLE_BRANCH_PRIV_SAVE)
+
+#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \
+ BRBE_EXCLUDE_BRANCH_FILTERS)
+
+/*
+ * BRBE supports the following functional branch type filters while
+ * generating branch records. These branch filters can be enabled,
+ * either individually or as a group i.e ORing multiple filters
+ * with each other.
+ *
+ * BRBFCR_EL1_CONDDIR - Conditional direct branch
+ * BRBFCR_EL1_DIRCALL - Direct call
+ * BRBFCR_EL1_INDCALL - Indirect call
+ * BRBFCR_EL1_INDIRECT - Indirect branch
+ * BRBFCR_EL1_DIRECT - Direct branch
+ * BRBFCR_EL1_RTN - Subroutine return
+ */
+static u64 branch_type_to_brbfcr(int branch_type)
+{
+ u64 brbfcr = 0;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+ brbfcr |= BRBFCR_EL1_BRANCH_FILTERS;
+ return brbfcr;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ brbfcr |= BRBFCR_EL1_INDCALL;
+ brbfcr |= BRBFCR_EL1_DIRCALL;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ brbfcr |= BRBFCR_EL1_RTN;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ brbfcr |= BRBFCR_EL1_INDCALL;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_COND)
+ brbfcr |= BRBFCR_EL1_CONDDIR;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ brbfcr |= BRBFCR_EL1_INDIRECT;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_CALL)
+ brbfcr |= BRBFCR_EL1_DIRCALL;
+
+ return brbfcr;
+}
+
+/*
+ * BRBE supports the following privilege mode filters while generating
+ * branch records.
+ *
+ * BRBCR_ELx_E0BRE - EL0 branch records
+ * BRBCR_ELx_ExBRE - EL1/EL2 branch records
+ *
+ * BRBE also supports the following additional functional branch type
+ * filters while generating branch records.
+ *
+ * BRBCR_ELx_EXCEPTION - Exception
+ * BRBCR_ELx_ERTN - Exception return
+ */
+static u64 branch_type_to_brbcr(int branch_type)
+{
+ u64 brbcr = BRBCR_ELx_FZP | BRBCR_ELx_DEFAULT_TS;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_USER)
+ brbcr |= BRBCR_ELx_E0BRE;
+
+ /*
+ * When running in the hyp mode, writing into BRBCR_EL1
+ * actually writes into BRBCR_EL2 instead. Field E2BRE
+ * is also at the same position as E1BRE.
+ */
+ if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
+ brbcr |= BRBCR_ELx_ExBRE;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_HV) {
+ if (is_kernel_in_hyp_mode())
+ brbcr |= BRBCR_ELx_ExBRE;
+ }
+
+ if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES))
+ brbcr |= BRBCR_ELx_CC;
+
+ if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS))
+ brbcr |= BRBCR_ELx_MPRED;
+
+ /*
+ * The exception and exception return branches could be
+ * captured, irrespective of the perf event's privilege.
+ * If the perf event does not have enough privilege for
+ * a given exception level, then addresses which falls
+ * under that exception level will be reported as zero
+ * for the captured branch record, creating source only
+ * or target only records.
+ */
+ if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) {
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+ brbcr |= BRBCR_ELx_EXCEPTION;
+ brbcr |= BRBCR_ELx_ERTN;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ brbcr |= BRBCR_ELx_EXCEPTION;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ brbcr |= BRBCR_ELx_ERTN;
+ }
+ return brbcr;
+}
+
+bool brbe_branch_attr_valid(struct perf_event *event)
+{
+ u64 branch_type = event->attr.branch_sample_type;
+
+ /*
+ * Ensure both perf branch filter allowed and exclude
+ * masks are always in sync with the generic perf ABI.
+ */
+ BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
+
+ if (branch_type & BRBE_EXCLUDE_BRANCH_FILTERS) {
+ pr_debug("requested branch filter not supported 0x%llx\n", branch_type);
+ return false;
+ }
+
+ /* Ensure at least 1 branch type is enabled */
+ if (!(branch_type & BRBE_ALLOWED_BRANCH_TYPES)) {
+ pr_debug("no branch type enabled 0x%llx\n", branch_type);
+ return false;
+ }
+
+ /*
+ * No branches are recorded in guests nor nVHE hypervisors, so
+ * excluding the host or both kernel and user is invalid.
+ *
+ * Ideally we'd just require exclude_guest and exclude_hv, but setting
+ * event filters with perf for kernel or user don't set exclude_guest.
+ * So effectively, exclude_guest and exclude_hv are ignored.
+ */
+ if (event->attr.exclude_host || (event->attr.exclude_user && event->attr.exclude_kernel)) {
+ pr_debug("branch filter in hypervisor or guest only not supported 0x%llx\n", branch_type);
+ return false;
+ }
+
+ event->hw.branch_reg.config = branch_type_to_brbfcr(event->attr.branch_sample_type);
+ event->hw.extra_reg.config = branch_type_to_brbcr(event->attr.branch_sample_type);
+
+ return true;
+}
+
+unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
+{
+ return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, armpmu->reg_brbidr);
+}
+
+void brbe_probe(struct arm_pmu *armpmu)
+{
+ u64 brbidr, aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+ u32 brbe;
+
+ brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT);
+ if (!valid_brbe_version(brbe))
+ return;
+
+ brbidr = read_sysreg_s(SYS_BRBIDR0_EL1);
+ if (!valid_brbidr(brbidr))
+ return;
+
+ armpmu->reg_brbidr = brbidr;
+}
+
+/*
+ * BRBE is assumed to be disabled/paused on entry
+ */
+void brbe_enable(const struct arm_pmu *arm_pmu)
+{
+ struct pmu_hw_events *cpuc = this_cpu_ptr(arm_pmu->hw_events);
+ u64 brbfcr = 0, brbcr = 0;
+
+ /*
+ * Discard existing records to avoid a discontinuity, e.g. records
+ * missed during handling an overflow.
+ */
+ brbe_invalidate();
+
+ /*
+ * Merge the permitted branch filters of all events.
+ */
+ for (int i = 0; i < ARMPMU_MAX_HWEVENTS; i++) {
+ struct perf_event *event = cpuc->events[i];
+
+ if (event && has_branch_stack(event)) {
+ brbfcr |= event->hw.branch_reg.config;
+ brbcr |= event->hw.extra_reg.config;
+ }
+ }
+
+ /*
+ * In VHE mode with MDCR_EL2.HPMN equal to PMCR_EL0.N, BRBCR_EL1.FZP
+ * controls freezing the branch records on counter overflow rather than
+ * BRBCR_EL2.FZP (which writes to BRBCR_EL1 are redirected to).
+ * The exception levels are enabled/disabled in BRBCR_EL2, so keep EL1
+ * and EL0 recording disabled for guests.
+ *
+ * As BRBCR_EL1 CC and MPRED bits also need to match, use the same
+ * value for both registers just masking the exception levels.
+ */
+ if (is_kernel_in_hyp_mode())
+ write_sysreg_s(brbcr & ~(BRBCR_ELx_ExBRE | BRBCR_ELx_E0BRE), SYS_BRBCR_EL12);
+ write_sysreg_s(brbcr, SYS_BRBCR_EL1);
+ /* Ensure BRBCR_ELx settings take effect before unpausing */
+ isb();
+
+ /* Finally write SYS_BRBFCR_EL to unpause BRBE */
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ /* Synchronization in PMCR write ensures ordering WRT PMU enabling */
+}
+
+void brbe_disable(void)
+{
+ /*
+ * No need for synchronization here as synchronization in PMCR write
+ * ensures ordering and in the interrupt handler this is a NOP as
+ * we're already paused.
+ */
+ write_sysreg_s(BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
+ write_sysreg_s(0, SYS_BRBCR_EL1);
+}
+
+static const int brbe_type_to_perf_type_map[BRBINFx_EL1_TYPE_DEBUG_EXIT + 1][2] = {
+ [BRBINFx_EL1_TYPE_DIRECT_UNCOND] = { PERF_BR_UNCOND, 0 },
+ [BRBINFx_EL1_TYPE_INDIRECT] = { PERF_BR_IND, 0 },
+ [BRBINFx_EL1_TYPE_DIRECT_LINK] = { PERF_BR_CALL, 0 },
+ [BRBINFx_EL1_TYPE_INDIRECT_LINK] = { PERF_BR_IND_CALL, 0 },
+ [BRBINFx_EL1_TYPE_RET] = { PERF_BR_RET, 0 },
+ [BRBINFx_EL1_TYPE_DIRECT_COND] = { PERF_BR_COND, 0 },
+ [BRBINFx_EL1_TYPE_CALL] = { PERF_BR_SYSCALL, 0 },
+ [BRBINFx_EL1_TYPE_ERET] = { PERF_BR_ERET, 0 },
+ [BRBINFx_EL1_TYPE_IRQ] = { PERF_BR_IRQ, 0 },
+ [BRBINFx_EL1_TYPE_TRAP] = { PERF_BR_IRQ, 0 },
+ [BRBINFx_EL1_TYPE_SERROR] = { PERF_BR_SERROR, 0 },
+ [BRBINFx_EL1_TYPE_ALIGN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_ALGN },
+ [BRBINFx_EL1_TYPE_INSN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_INST },
+ [BRBINFx_EL1_TYPE_DATA_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_DATA },
+};
+
+static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf)
+{
+ int brbe_type = brbinf_get_type(brbinf);
+
+ if (brbe_type <= BRBINFx_EL1_TYPE_DEBUG_EXIT) {
+ const int *br_type = brbe_type_to_perf_type_map[brbe_type];
+
+ entry->type = br_type[0];
+ entry->new_type = br_type[1];
+ }
+}
+
+static int brbinf_get_perf_priv(u64 brbinf)
+{
+ int brbe_el = brbinf_get_el(brbinf);
+
+ switch (brbe_el) {
+ case BRBINFx_EL1_EL_EL0:
+ return PERF_BR_PRIV_USER;
+ case BRBINFx_EL1_EL_EL1:
+ return PERF_BR_PRIV_KERNEL;
+ case BRBINFx_EL1_EL_EL2:
+ if (is_kernel_in_hyp_mode())
+ return PERF_BR_PRIV_KERNEL;
+ return PERF_BR_PRIV_HV;
+ default:
+ pr_warn_once("%d - unknown branch privilege captured\n", brbe_el);
+ return PERF_BR_PRIV_UNKNOWN;
+ }
+}
+
+static bool perf_entry_from_brbe_regset(int index, struct perf_branch_entry *entry,
+ const struct perf_event *event)
+{
+ struct brbe_regset bregs;
+ u64 brbinf;
+
+ if (!__read_brbe_regset(&bregs, index))
+ return false;
+
+ brbinf = bregs.brbinf;
+ perf_clear_branch_entry_bitfields(entry);
+ if (brbe_record_is_complete(brbinf)) {
+ entry->from = bregs.brbsrc;
+ entry->to = bregs.brbtgt;
+ } else if (brbe_record_is_source_only(brbinf)) {
+ entry->from = bregs.brbsrc;
+ entry->to = 0;
+ } else if (brbe_record_is_target_only(brbinf)) {
+ entry->from = 0;
+ entry->to = bregs.brbtgt;
+ }
+
+ brbe_set_perf_entry_type(entry, brbinf);
+
+ if (!branch_sample_no_cycles(event))
+ entry->cycles = brbinf_get_cycles(brbinf);
+
+ if (!branch_sample_no_flags(event)) {
+ /* Mispredict info is available for source only and complete branch records. */
+ if (!brbe_record_is_target_only(brbinf)) {
+ entry->mispred = brbinf_get_mispredict(brbinf);
+ entry->predicted = !entry->mispred;
+ }
+
+ /*
+ * Currently TME feature is neither implemented in any hardware
+ * nor it is being supported in the kernel. Just warn here once
+ * if TME related information shows up rather unexpectedly.
+ */
+ if (brbinf_get_lastfailed(brbinf) || brbinf_get_in_tx(brbinf))
+ pr_warn_once("Unknown transaction states\n");
+ }
+
+ /*
+ * Branch privilege level is available for target only and complete
+ * branch records.
+ */
+ if (!brbe_record_is_source_only(brbinf))
+ entry->priv = brbinf_get_perf_priv(brbinf);
+
+ return true;
+}
+
+#define PERF_BR_ARM64_ALL ( \
+ BIT(PERF_BR_COND) | \
+ BIT(PERF_BR_UNCOND) | \
+ BIT(PERF_BR_IND) | \
+ BIT(PERF_BR_CALL) | \
+ BIT(PERF_BR_IND_CALL) | \
+ BIT(PERF_BR_RET))
+
+#define PERF_BR_ARM64_ALL_KERNEL ( \
+ BIT(PERF_BR_SYSCALL) | \
+ BIT(PERF_BR_IRQ) | \
+ BIT(PERF_BR_SERROR) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_ALGN) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_DATA) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_INST))
+
+static void prepare_event_branch_type_mask(u64 branch_sample,
+ unsigned long *event_type_mask)
+{
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY) {
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ bitmap_from_u64(event_type_mask,
+ BIT(PERF_BR_ERET) | PERF_BR_ARM64_ALL |
+ PERF_BR_ARM64_ALL_KERNEL);
+ else
+ bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL);
+ return;
+ }
+
+ bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL_KERNEL);
+
+ set_bit(PERF_BR_CALL, event_type_mask);
+ set_bit(PERF_BR_IND_CALL, event_type_mask);
+ }
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_IND_JUMP)
+ set_bit(PERF_BR_IND, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_COND)
+ set_bit(PERF_BR_COND, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_CALL)
+ set_bit(PERF_BR_CALL, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_IND_CALL)
+ set_bit(PERF_BR_IND_CALL, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY_RETURN) {
+ set_bit(PERF_BR_RET, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ set_bit(PERF_BR_ERET, event_type_mask);
+ }
+}
+
+/*
+ * BRBE is configured with an OR of permissions from all events, so there may
+ * be events which have to be dropped or events where just the source or target
+ * address has to be zeroed.
+ */
+static bool filter_branch_privilege(struct perf_branch_entry *entry, u64 branch_sample_type)
+{
+ bool from_user = access_ok((void __user *)(unsigned long)entry->from, 4);
+ bool to_user = access_ok((void __user *)(unsigned long)entry->to, 4);
+ bool exclude_kernel = !((branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL) ||
+ (is_kernel_in_hyp_mode() && (branch_sample_type & PERF_SAMPLE_BRANCH_HV)));
+
+ /* We can only have a half record if permissions have not been expanded */
+ if (!entry->from || !entry->to)
+ return true;
+
+ /*
+ * If record is within a single exception level, just need to either
+ * drop or keep the entire record.
+ */
+ if (from_user == to_user)
+ return ((entry->priv == PERF_BR_PRIV_KERNEL) && !exclude_kernel) ||
+ ((entry->priv == PERF_BR_PRIV_USER) &&
+ (branch_sample_type & PERF_SAMPLE_BRANCH_USER));
+
+ /*
+ * Record is across exception levels, mask addresses for the exception
+ * level we're not capturing.
+ */
+ if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) {
+ if (from_user)
+ entry->from = 0;
+ if (to_user)
+ entry->to = 0;
+ }
+
+ if (exclude_kernel) {
+ if (!from_user)
+ entry->from = 0;
+ if (!to_user)
+ entry->to = 0;
+ }
+
+ return true;
+}
+
+static bool filter_branch_type(struct perf_branch_entry *entry,
+ const unsigned long *event_type_mask)
+{
+ if (entry->type == PERF_BR_EXTEND_ABI)
+ return test_bit(PERF_BR_MAX + entry->new_type, event_type_mask);
+ else
+ return test_bit(entry->type, event_type_mask);
+}
+
+static bool filter_branch_record(struct perf_branch_entry *entry,
+ u64 branch_sample,
+ const unsigned long *event_type_mask)
+{
+ return filter_branch_type(entry, event_type_mask) &&
+ filter_branch_privilege(entry, branch_sample);
+}
+
+void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int nr_hw = brbe_num_branch_records(cpu_pmu);
+ int nr_banks = DIV_ROUND_UP(nr_hw, BRBE_BANK_MAX_ENTRIES);
+ int nr_filtered = 0;
+ u64 branch_sample_type = event->attr.branch_sample_type;
+ DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX);
+
+ prepare_event_branch_type_mask(branch_sample_type, event_type_mask);
+
+ for (int bank = 0; bank < nr_banks; bank++) {
+ int nr_remaining = nr_hw - (bank * BRBE_BANK_MAX_ENTRIES);
+ int nr_this_bank = min(nr_remaining, BRBE_BANK_MAX_ENTRIES);
+
+ select_brbe_bank(bank);
+
+ for (int i = 0; i < nr_this_bank; i++) {
+ struct perf_branch_entry *pbe = &branch_stack->entries[nr_filtered];
+
+ if (!perf_entry_from_brbe_regset(i, pbe, event))
+ goto done;
+
+ if (!filter_branch_record(pbe, branch_sample_type, event_type_mask))
+ continue;
+
+ nr_filtered++;
+ }
+ }
+
+done:
+ branch_stack->nr = nr_filtered;
+}
diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h
new file mode 100644
index 000000000000..b7c7d8796c86
--- /dev/null
+++ b/drivers/perf/arm_brbe.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Branch Record Buffer Extension Helpers.
+ *
+ * Copyright (C) 2022-2025 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+
+struct arm_pmu;
+struct perf_branch_stack;
+struct perf_event;
+
+#ifdef CONFIG_ARM64_BRBE
+void brbe_probe(struct arm_pmu *arm_pmu);
+unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu);
+void brbe_invalidate(void);
+
+void brbe_enable(const struct arm_pmu *arm_pmu);
+void brbe_disable(void);
+
+bool brbe_branch_attr_valid(struct perf_event *event);
+void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event);
+#else
+static inline void brbe_probe(struct arm_pmu *arm_pmu) { }
+static inline unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
+{
+ return 0;
+}
+
+static inline void brbe_invalidate(void) { }
+
+static inline void brbe_enable(const struct arm_pmu *arm_pmu) { };
+static inline void brbe_disable(void) { };
+
+static inline bool brbe_branch_attr_valid(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+ return false;
+}
+
+static void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event)
+{
+}
+#endif
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 0b316fe69a45..6f4e28fc84a2 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -1,13 +1,29 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "ARM Coresight Architecture PMU"
- depends on ARM64 && ACPI
- depends on ACPI_APMT || COMPILE_TEST
+ depends on ARM64 || COMPILE_TEST
help
Provides support for performance monitoring unit (PMU) devices
based on ARM CoreSight PMU architecture. Note that this PMU
architecture does not have relationship with the ARM CoreSight
Self-Hosted Tracing.
+
+config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "NVIDIA Coresight Architecture PMU"
+ depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ help
+ Provides NVIDIA specific attributes for performance monitoring unit
+ (PMU) devices based on ARM CoreSight PMU architecture.
+
+config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "Ampere Coresight Architecture PMU"
+ depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ help
+ Provides Ampere specific attributes for performance monitoring unit
+ (PMU) devices based on ARM CoreSight PMU architecture.
+
+ In the first phase, the driver enables support on MCU PMU used in
+ AmpereOne SoC family.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index fedb17df982d..220a734efd54 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -1,6 +1,10 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+
+arm_cspmu_module-y := arm_cspmu.o
+
+obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
+obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
new file mode 100644
index 000000000000..b8ca69fd9d1d
--- /dev/null
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include "arm_cspmu.h"
+
+#define PMAUXR0 PMIMPDEF
+#define PMAUXR1 (PMIMPDEF + 0x4)
+#define PMAUXR2 (PMIMPDEF + 0x8)
+#define PMAUXR3 (PMIMPDEF + 0xC)
+
+#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
+
+struct ampere_cspmu_ctx {
+ const char *name;
+ struct attribute **event_attr;
+ struct attribute **format_attr;
+};
+
+static DEFINE_IDA(mcu_pmu_ida);
+
+#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
+ static inline u32 get_##_name(const struct perf_event *event) \
+ { \
+ return FIELD_GET(GENMASK_ULL(_end, _start), \
+ event->attr._config); \
+ } \
+
+SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
+
+static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00),
+ ARM_CSPMU_EVENT_ATTR(act_sent, 0x01),
+ ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02),
+ ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03),
+ ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04),
+ ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05),
+ ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06),
+ ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07),
+ ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08),
+ ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09),
+ ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a),
+ ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b),
+ ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c),
+ ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d),
+ ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e),
+ ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f),
+ ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10),
+ ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11),
+ ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12),
+ ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13),
+ ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14),
+ ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15),
+ ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16),
+ ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17),
+ ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18),
+ ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19),
+ ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a),
+ ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b),
+ ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c),
+ ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d),
+ ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e),
+ ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f),
+ ARM_CSPMU_EVENT_ATTR(rank_change, 0x20),
+ ARM_CSPMU_EVENT_ATTR(dir_change, 0x21),
+ ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22),
+ ARM_CSPMU_EVENT_ATTR(rank_active, 0x23),
+ ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24),
+ ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25),
+ ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26),
+ ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27),
+ ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28),
+ ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29),
+ ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a),
+ ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b),
+ ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c),
+ ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d),
+ ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32),
+ ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33),
+ ARM_CSPMU_EVENT_ATTR(zq_start, 0x34),
+ ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35),
+ ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36),
+ ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37),
+ ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38),
+ ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39),
+ ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a),
+ ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b),
+ ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c),
+
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute *ampereone_mcu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
+ ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
+ ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
+ NULL,
+};
+
+static struct attribute **
+ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->event_attr;
+}
+
+static struct attribute **
+ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->format_attr;
+}
+
+static const char *
+ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->name;
+}
+
+static void ampere_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ /*
+ * PMCCFILTR is RES0, so this is just a dummy callback to override
+ * the default implementation and avoid writing to it.
+ */
+}
+
+static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ u32 threshold, rank, bank;
+
+ threshold = get_threshold(event);
+ rank = get_rank(event);
+ bank = get_bank(event);
+
+ writel(threshold, cspmu->base0 + PMAUXR0);
+ writel(rank, cspmu->base0 + PMAUXR1);
+ writel(bank, cspmu->base0 + PMAUXR2);
+}
+
+static int ampere_cspmu_validate_configs(struct perf_event *event,
+ struct perf_event *event2)
+{
+ if (get_threshold(event) != get_threshold(event2) ||
+ get_rank(event) != get_rank(event2) ||
+ get_bank(event) != get_bank(event2))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+ struct perf_event *new)
+{
+ struct perf_event *curr, *leader = new->group_leader;
+ unsigned int idx;
+ int ret;
+
+ ret = ampere_cspmu_validate_configs(new, leader);
+ if (ret)
+ return ret;
+
+ /* We compare the global filter settings to the existing events */
+ idx = find_first_bit(cspmu->hw_events.used_ctrs,
+ cspmu->cycle_counter_logical_idx);
+
+ /* This is the first event, thus any configuration is fine */
+ if (idx == cspmu->cycle_counter_logical_idx)
+ return 0;
+
+ curr = cspmu->hw_events.events[idx];
+
+ return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+ const char *name_pattern)
+{
+ struct device *dev = cspmu->dev;
+ int id;
+
+ id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
+ if (id < 0)
+ return ERR_PTR(id);
+
+ return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
+}
+
+static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+ struct device *dev = cspmu->dev;
+ struct ampere_cspmu_ctx *ctx;
+ struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+ ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->event_attr = ampereone_mcu_pmu_event_attrs;
+ ctx->format_attr = ampereone_mcu_format_attrs;
+ ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
+ if (IS_ERR_OR_NULL(ctx->name))
+ return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
+
+ cspmu->impl.ctx = ctx;
+
+ impl_ops->set_cc_filter = ampere_cspmu_set_cc_filter;
+ impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
+ impl_ops->validate_event = ampere_cspmu_validate_event;
+ impl_ops->get_name = ampere_cspmu_get_name;
+ impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
+ impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
+
+ return 0;
+}
+
+/* Match all Ampere Coresight PMU devices */
+static const struct arm_cspmu_impl_match ampere_cspmu_param = {
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
+ .module = THIS_MODULE,
+ .impl_init_ops = ampere_cspmu_init_ops
+};
+
+static int __init ampere_cspmu_init(void)
+{
+ int ret;
+
+ ret = arm_cspmu_impl_register(&ampere_cspmu_param);
+ if (ret)
+ pr_err("ampere_cspmu backend registration error: %d\n", ret);
+
+ return ret;
+}
+
+static void __exit ampere_cspmu_exit(void)
+{
+ arm_cspmu_impl_unregister(&ampere_cspmu_param);
+}
+
+module_init(ampere_cspmu_init);
+module_exit(ampere_cspmu_exit);
+
+MODULE_DESCRIPTION("Ampere SoC Performance Monitor Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e31302ab7e37..34430b68f602 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -16,7 +16,7 @@
* The user should refer to the vendor technical documentation to get details
* about the supported events.
*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@@ -26,12 +26,12 @@
#include <linux/interrupt.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
-#include <acpi/processor.h>
#include "arm_cspmu.h"
-#include "nvidia_cspmu.h"
#define PMUNAME "arm_cspmu"
#define DRVNAME "arm-cs-arch-pmu"
@@ -40,51 +40,6 @@
ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show, \
(unsigned long)_config)
-/*
- * CoreSight PMU Arch register offsets.
- */
-#define PMEVCNTR_LO 0x0
-#define PMEVCNTR_HI 0x4
-#define PMEVTYPER 0x400
-#define PMCCFILTR 0x47C
-#define PMEVFILTR 0xA00
-#define PMCNTENSET 0xC00
-#define PMCNTENCLR 0xC20
-#define PMINTENSET 0xC40
-#define PMINTENCLR 0xC60
-#define PMOVSCLR 0xC80
-#define PMOVSSET 0xCC0
-#define PMCFGR 0xE00
-#define PMCR 0xE04
-#define PMIIDR 0xE08
-
-/* PMCFGR register field */
-#define PMCFGR_NCG GENMASK(31, 28)
-#define PMCFGR_HDBG BIT(24)
-#define PMCFGR_TRO BIT(23)
-#define PMCFGR_SS BIT(22)
-#define PMCFGR_FZO BIT(21)
-#define PMCFGR_MSI BIT(20)
-#define PMCFGR_UEN BIT(19)
-#define PMCFGR_NA BIT(17)
-#define PMCFGR_EX BIT(16)
-#define PMCFGR_CCD BIT(15)
-#define PMCFGR_CC BIT(14)
-#define PMCFGR_SIZE GENMASK(13, 8)
-#define PMCFGR_N GENMASK(7, 0)
-
-/* PMCR register field */
-#define PMCR_TRO BIT(11)
-#define PMCR_HDBG BIT(10)
-#define PMCR_FZO BIT(9)
-#define PMCR_NA BIT(8)
-#define PMCR_DP BIT(5)
-#define PMCR_X BIT(4)
-#define PMCR_D BIT(3)
-#define PMCR_C BIT(2)
-#define PMCR_P BIT(1)
-#define PMCR_E BIT(0)
-
/* Each SET/CLR register supports up to 32 counters. */
#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT 5
#define ARM_CSPMU_SET_CLR_COUNTER_NUM \
@@ -101,27 +56,27 @@
#define ARM_CSPMU_ACTIVE_CPU_MASK 0x0
#define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1
-/* Check if field f in flags is set with value v */
-#define CHECK_APMT_FLAG(flags, f, v) \
- ((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v))
-
-/* Check and use default if implementer doesn't provide attribute callback */
-#define CHECK_DEFAULT_IMPL_OPS(ops, callback) \
- do { \
- if (!ops->callback) \
- ops->callback = arm_cspmu_ ## callback; \
- } while (0)
-
/*
* Maximum poll count for reading counter value using high-low-high sequence.
*/
#define HILOHI_MAX_POLL 1000
-/* JEDEC-assigned JEP106 identification code */
-#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
-
static unsigned long arm_cspmu_cpuhp_state;
+static DEFINE_MUTEX(arm_cspmu_lock);
+
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
+static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
+
+static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
+{
+ struct acpi_apmt_node **ptr = dev_get_platdata(dev);
+
+ return ptr ? *ptr : NULL;
+}
+
/*
* In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
* counter register. The counter register can be implemented as 32-bit or 64-bit
@@ -156,12 +111,6 @@ static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
return val;
}
-/* Check if PMU supports 64-bit single copy atomic. */
-static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu)
-{
- return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP);
-}
-
/* Check if cycle counter is supported. */
static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
{
@@ -189,10 +138,10 @@ static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct dev_ext_attribute *eattr =
- container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "event=0x%llx\n",
- (unsigned long long)eattr->var);
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, typeof(*pmu_attr), attr);
+ return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
}
EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
@@ -231,19 +180,10 @@ arm_cspmu_event_attr_is_visible(struct kobject *kobj,
return attr->mode;
}
-ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr =
- container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
-
static struct attribute *arm_cspmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_FILTER_ATTR,
+ ARM_CSPMU_FORMAT_FILTER2_ATTR,
NULL,
};
@@ -268,11 +208,6 @@ static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
}
-static u32 arm_cspmu_event_filter(const struct perf_event *event)
-{
- return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
-}
-
static ssize_t arm_cspmu_identifier_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -320,7 +255,11 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
dev = cspmu->dev;
- apmt_node = cspmu->apmt_node;
+ apmt_node = arm_cspmu_apmt_node(dev);
+ if (!apmt_node)
+ return devm_kasprintf(dev, GFP_KERNEL, PMUNAME "_%u",
+ atomic_fetch_inc(&pmu_idx[0]));
+
pmu_type = apmt_node->type;
if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
@@ -379,61 +318,133 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
.attrs = arm_cspmu_cpumask_attrs,
};
-struct impl_match {
- u32 pmiidr;
- u32 mask;
- int (*impl_init_ops)(struct arm_cspmu *cspmu);
-};
-
-static const struct impl_match impl_match[] = {
+static struct arm_cspmu_impl_match impl_match[] = {
{
- .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
- .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
- .impl_init_ops = nv_cspmu_init_ops
+ .module_name = "nvidia_cspmu",
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
+ .pmiidr_mask = PMIIDR_IMPLEMENTER,
+ .module = NULL,
+ .impl_init_ops = NULL,
},
- {}
+ {
+ .module_name = "ampere_cspmu",
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
+ .pmiidr_mask = PMIIDR_IMPLEMENTER,
+ .module = NULL,
+ .impl_init_ops = NULL,
+ },
+
+ {0}
};
-static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr)
{
- int ret;
- struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
- struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
- const struct impl_match *match = impl_match;
+ struct arm_cspmu_impl_match *match = impl_match;
- /*
- * Get PMU implementer and product id from APMT node.
- * If APMT node doesn't have implementer/product id, try get it
- * from PMIIDR.
- */
- cspmu->impl.pmiidr =
- (apmt_node->impl_id) ? apmt_node->impl_id :
- readl(cspmu->base0 + PMIIDR);
+ for (; match->pmiidr_val; match++) {
+ u32 mask = match->pmiidr_mask;
- /* Find implementer specific attribute ops. */
- for (; match->pmiidr; match++) {
- const u32 mask = match->mask;
+ if ((match->pmiidr_val & mask) == (pmiidr & mask))
+ return match;
+ }
- if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
- ret = match->impl_init_ops(cspmu);
- if (ret)
- return ret;
+ return NULL;
+}
+
+static u32 arm_cspmu_get_pmiidr(struct arm_cspmu *cspmu)
+{
+ u32 pmiidr, pmpidr;
- break;
+ pmiidr = readl(cspmu->base0 + PMIIDR);
+
+ if (pmiidr != 0)
+ return pmiidr;
+
+ /* Construct PMIIDR value from PMPIDRs. */
+
+ pmpidr = readl(cspmu->base0 + PMPIDR0);
+ pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_0,
+ FIELD_GET(PMPIDR0_PART_0, pmpidr));
+
+ pmpidr = readl(cspmu->base0 + PMPIDR1);
+ pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_1,
+ FIELD_GET(PMPIDR1_PART_1, pmpidr));
+ pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_0,
+ FIELD_GET(PMPIDR1_DES_0, pmpidr));
+
+ pmpidr = readl(cspmu->base0 + PMPIDR2);
+ pmiidr |= FIELD_PREP(PMIIDR_VARIANT,
+ FIELD_GET(PMPIDR2_REVISION, pmpidr));
+ pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_1,
+ FIELD_GET(PMPIDR2_DES_1, pmpidr));
+
+ pmpidr = readl(cspmu->base0 + PMPIDR3);
+ pmiidr |= FIELD_PREP(PMIIDR_REVISION,
+ FIELD_GET(PMPIDR3_REVAND, pmpidr));
+
+ pmpidr = readl(cspmu->base0 + PMPIDR4);
+ pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_2,
+ FIELD_GET(PMPIDR4_DES_2, pmpidr));
+
+ return pmiidr;
+}
+
+#define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name
+
+static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+{
+ int ret = 0;
+ struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
+ struct arm_cspmu_impl_match *match;
+
+ /* Start with a default PMU implementation */
+ cspmu->impl.module = THIS_MODULE;
+ cspmu->impl.pmiidr = arm_cspmu_get_pmiidr(cspmu);
+ cspmu->impl.ops = (struct arm_cspmu_impl_ops) {
+ DEFAULT_IMPL_OP(get_event_attrs),
+ DEFAULT_IMPL_OP(get_format_attrs),
+ DEFAULT_IMPL_OP(get_identifier),
+ DEFAULT_IMPL_OP(get_name),
+ DEFAULT_IMPL_OP(is_cycle_counter_event),
+ DEFAULT_IMPL_OP(event_type),
+ DEFAULT_IMPL_OP(set_cc_filter),
+ DEFAULT_IMPL_OP(set_ev_filter),
+ DEFAULT_IMPL_OP(event_attr_is_visible),
+ };
+
+ /* Firmware may override implementer/product ID from PMIIDR */
+ if (apmt_node && apmt_node->impl_id)
+ cspmu->impl.pmiidr = apmt_node->impl_id;
+
+ /* Find implementer specific attribute ops. */
+ match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr);
+
+ /* Load implementer module and initialize the callbacks. */
+ if (match) {
+ mutex_lock(&arm_cspmu_lock);
+
+ if (match->impl_init_ops) {
+ /* Prevent unload until PMU registration is done. */
+ if (try_module_get(match->module)) {
+ cspmu->impl.module = match->module;
+ cspmu->impl.match = match;
+ ret = match->impl_init_ops(cspmu);
+ if (ret)
+ module_put(match->module);
+ } else {
+ WARN(1, "arm_cspmu failed to get module: %s\n",
+ match->module_name);
+ ret = -EINVAL;
+ }
+ } else {
+ request_module_nowait(match->module_name);
+ ret = -EPROBE_DEFER;
}
- }
- /* Use default callbacks if implementer doesn't provide one. */
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
- CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+ mutex_unlock(&arm_cspmu_lock);
+ }
- return 0;
+ return ret;
}
static struct attribute_group *
@@ -478,28 +489,16 @@ arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
return format_group;
}
-static struct attribute_group **
-arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+static int arm_cspmu_alloc_attr_groups(struct arm_cspmu *cspmu)
{
- struct attribute_group **attr_groups = NULL;
- struct device *dev = cspmu->dev;
+ const struct attribute_group **attr_groups = cspmu->attr_groups;
const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
- int ret;
-
- ret = arm_cspmu_init_impl_ops(cspmu);
- if (ret)
- return NULL;
cspmu->identifier = impl_ops->get_identifier(cspmu);
cspmu->name = impl_ops->get_name(cspmu);
if (!cspmu->identifier || !cspmu->name)
- return NULL;
-
- attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
- GFP_KERNEL);
- if (!attr_groups)
- return NULL;
+ return -ENOMEM;
attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
@@ -507,18 +506,14 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
attr_groups[3] = &arm_cspmu_cpumask_attr_group;
if (!attr_groups[0] || !attr_groups[1])
- return NULL;
+ return -ENOMEM;
- return attr_groups;
+ return 0;
}
static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
{
- u32 pmcr = 0;
-
- pmcr |= PMCR_P;
- pmcr |= PMCR_C;
- writel(pmcr, cspmu->base0 + PMCR);
+ writel(PMCR_C | PMCR_P, cspmu->base0 + PMCR);
}
static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
@@ -555,7 +550,7 @@ static void arm_cspmu_disable(struct pmu *pmu)
static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
struct perf_event *event)
{
- int idx;
+ int idx, ret;
struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
if (supports_cycle_counter(cspmu)) {
@@ -589,6 +584,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
if (idx >= cspmu->num_logical_ctrs)
return -EAGAIN;
+ if (cspmu->impl.ops.validate_event) {
+ ret = cspmu->impl.ops.validate_event(cspmu, event);
+ if (ret)
+ return ret;
+ }
+
set_bit(idx, hw_events->used_ctrs);
return idx;
@@ -641,6 +642,9 @@ static int arm_cspmu_event_init(struct perf_event *event)
cspmu = to_arm_cspmu(event->pmu);
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
/*
* Following other "uncore" PMUs, we do not support sampling mode or
* attach to a task (per-process mode).
@@ -702,7 +706,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
if (use_64b_counter_reg(cspmu)) {
offset = counter_offset(sizeof(u64), event->hw.idx);
- writeq(val, cspmu->base1 + offset);
+ if (cspmu->has_atomic_dword)
+ writeq(val, cspmu->base1 + offset);
+ else
+ lo_hi_writeq(val, cspmu->base1 + offset);
} else {
offset = counter_offset(sizeof(u32), event->hw.idx);
@@ -720,7 +727,7 @@ static u64 arm_cspmu_read_counter(struct perf_event *event)
offset = counter_offset(sizeof(u64), event->hw.idx);
counter_addr = cspmu->base1 + offset;
- return supports_64bit_atomics(cspmu) ?
+ return cspmu->has_atomic_dword ?
readq(counter_addr) :
read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
}
@@ -795,27 +802,29 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
writel(hwc->config, cspmu->base0 + offset);
}
-static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc,
- u32 filter)
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
{
- u32 offset = PMEVFILTR + (4 * hwc->idx);
+ u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+ u32 filter2 = event->attr.config2 & ARM_CSPMU_FILTER_MASK;
+ u32 offset = 4 * event->hw.idx;
- writel(filter, cspmu->base0 + offset);
+ writel(filter, cspmu->base0 + PMEVFILTR + offset);
+ writel(filter2, cspmu->base0 + PMEVFILT2R + offset);
}
-static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
{
- u32 offset = PMCCFILTR;
+ u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK;
- writel(filter, cspmu->base0 + offset);
+ writel(filter, cspmu->base0 + PMCCFILTR);
}
static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
{
struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- u32 filter;
/* We always reprogram the counter */
if (pmu_flags & PERF_EF_RELOAD)
@@ -823,13 +832,11 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
arm_cspmu_set_event_period(event);
- filter = cspmu->impl.ops.event_filter(event);
-
if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
- arm_cspmu_set_cc_filter(cspmu, filter);
+ cspmu->impl.ops.set_cc_filter(cspmu, event);
} else {
arm_cspmu_set_event(cspmu, hwc);
- arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+ cspmu->impl.ops.set_ev_filter(cspmu, event);
}
hwc->state = 0;
@@ -846,6 +853,10 @@ static void arm_cspmu_stop(struct perf_event *event, int pmu_flags)
return;
arm_cspmu_disable_counter(cspmu, hwc->idx);
+
+ if (cspmu->impl.ops.reset_ev_filter)
+ cspmu->impl.ops.reset_ev_filter(cspmu, event);
+
arm_cspmu_event_update(event);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -911,24 +922,25 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
{
struct acpi_apmt_node *apmt_node;
struct arm_cspmu *cspmu;
- struct device *dev;
-
- dev = &pdev->dev;
- apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev);
- if (!apmt_node) {
- dev_err(dev, "failed to get APMT node\n");
- return NULL;
- }
+ struct device *dev = &pdev->dev;
cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
if (!cspmu)
return NULL;
cspmu->dev = dev;
- cspmu->apmt_node = apmt_node;
-
platform_set_drvdata(pdev, cspmu);
+ apmt_node = arm_cspmu_apmt_node(dev);
+ if (apmt_node) {
+ cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC;
+ } else {
+ u32 width = 0;
+
+ device_property_read_u32(dev, "reg-io-width", &width);
+ cspmu->has_atomic_dword = (width == 8);
+ }
+
return cspmu;
}
@@ -936,11 +948,9 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
{
struct device *dev;
struct platform_device *pdev;
- struct acpi_apmt_node *apmt_node;
dev = cspmu->dev;
pdev = to_platform_device(dev);
- apmt_node = cspmu->apmt_node;
/* Base address for page 0. */
cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
@@ -951,7 +961,7 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
/* Base address for page 1 if supported. Otherwise point to page 0. */
cspmu->base1 = cspmu->base0;
- if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) {
+ if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) {
cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(cspmu->base1)) {
dev_err(dev, "ioremap failed for page-1 resource\n");
@@ -1048,19 +1058,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
int irq, ret;
struct device *dev;
struct platform_device *pdev;
- struct acpi_apmt_node *apmt_node;
dev = cspmu->dev;
pdev = to_platform_device(dev);
- apmt_node = cspmu->apmt_node;
/* Skip IRQ request if the PMU does not support overflow interrupt. */
- if (apmt_node->ovflw_irq == 0)
- return 0;
-
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq < 0)
- return irq;
+ return irq == -ENXIO ? 0 : irq;
ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
@@ -1075,20 +1080,21 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
return 0;
}
+#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
+#include <acpi/processor.h>
+
static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
{
- u32 acpi_uid;
- struct device *cpu_dev = get_cpu_device(cpu);
- struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev);
+ struct device *cpu_dev;
+ struct acpi_device *acpi_dev;
+ cpu_dev = get_cpu_device(cpu);
if (!cpu_dev)
return -ENODEV;
+ acpi_dev = ACPI_COMPANION(cpu_dev);
while (acpi_dev) {
- if (!strcmp(acpi_device_hid(acpi_dev),
- ACPI_PROCESSOR_CONTAINER_HID) &&
- !kstrtouint(acpi_device_uid(acpi_dev), 0, &acpi_uid) &&
- acpi_uid == container_uid)
+ if (acpi_dev_hid_uid_match(acpi_dev, ACPI_PROCESSOR_CONTAINER_HID, container_uid))
return 0;
acpi_dev = acpi_dev_parent(acpi_dev);
@@ -1097,15 +1103,13 @@ static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
return -ENODEV;
}
-static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
{
- struct device *dev;
struct acpi_apmt_node *apmt_node;
int affinity_flag;
int cpu;
- dev = cspmu->pmu.dev;
- apmt_node = cspmu->apmt_node;
+ apmt_node = arm_cspmu_apmt_node(cspmu->dev);
affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
@@ -1126,22 +1130,54 @@ static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
}
}
- if (cpumask_empty(&cspmu->associated_cpus)) {
- dev_dbg(dev, "No cpu associated with the PMU\n");
- return -ENODEV;
+ return 0;
+}
+#else
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
+{
+ return -ENODEV;
+}
+#endif
+
+static int arm_cspmu_of_get_cpus(struct arm_cspmu *cspmu)
+{
+ struct of_phandle_iterator it;
+ int ret, cpu;
+
+ of_for_each_phandle(&it, ret, dev_of_node(cspmu->dev), "cpus", NULL, 0) {
+ cpu = of_cpu_node_to_id(it.node);
+ if (cpu < 0)
+ continue;
+ cpumask_set_cpu(cpu, &cspmu->associated_cpus);
}
+ return ret == -ENOENT ? 0 : ret;
+}
- return 0;
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+ int ret = 0;
+
+ if (arm_cspmu_apmt_node(cspmu->dev))
+ ret = arm_cspmu_acpi_get_cpus(cspmu);
+ else if (device_property_present(cspmu->dev, "cpus"))
+ ret = arm_cspmu_of_get_cpus(cspmu);
+ else
+ cpumask_copy(&cspmu->associated_cpus, cpu_possible_mask);
+
+ if (!ret && cpumask_empty(&cspmu->associated_cpus)) {
+ dev_dbg(cspmu->dev, "No cpu associated with the PMU\n");
+ ret = -ENODEV;
+ }
+ return ret;
}
static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
{
int ret, capabilities;
- struct attribute_group **attr_groups;
- attr_groups = arm_cspmu_alloc_attr_group(cspmu);
- if (!attr_groups)
- return -ENOMEM;
+ ret = arm_cspmu_alloc_attr_groups(cspmu);
+ if (ret)
+ return ret;
ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
&cspmu->cpuhp_node);
@@ -1154,7 +1190,8 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
- .module = THIS_MODULE,
+ .module = cspmu->impl.module,
+ .parent = cspmu->dev,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@@ -1163,12 +1200,11 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
.start = arm_cspmu_start,
.stop = arm_cspmu_stop,
.read = arm_cspmu_read,
- .attr_groups = (const struct attribute_group **)attr_groups,
+ .attr_groups = cspmu->attr_groups,
.capabilities = capabilities,
};
/* Hardware counter init */
- arm_cspmu_stop_counters(cspmu);
arm_cspmu_reset_counters(cspmu);
ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
@@ -1201,36 +1237,55 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = arm_cspmu_register_pmu(cspmu);
+ ret = arm_cspmu_init_impl_ops(cspmu);
if (ret)
return ret;
- return 0;
+ ret = arm_cspmu_register_pmu(cspmu);
+
+ /* Matches arm_cspmu_init_impl_ops() above. */
+ if (cspmu->impl.module != THIS_MODULE)
+ module_put(cspmu->impl.module);
+
+ return ret;
}
-static int arm_cspmu_device_remove(struct platform_device *pdev)
+static void arm_cspmu_device_remove(struct platform_device *pdev)
{
struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&cspmu->pmu);
cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
-
- return 0;
}
+static const struct platform_device_id arm_cspmu_id[] = {
+ {DRVNAME, 0},
+ { },
+};
+MODULE_DEVICE_TABLE(platform, arm_cspmu_id);
+
+static const struct of_device_id arm_cspmu_of_match[] = {
+ { .compatible = "arm,coresight-pmu" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, arm_cspmu_of_match);
+
static struct platform_driver arm_cspmu_driver = {
.driver = {
- .name = DRVNAME,
- .suppress_bind_attrs = true,
- },
+ .name = DRVNAME,
+ .of_match_table = arm_cspmu_of_match,
+ .suppress_bind_attrs = true,
+ },
.probe = arm_cspmu_device_probe,
.remove = arm_cspmu_device_remove,
+ .id_table = arm_cspmu_id,
};
static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
{
cpumask_set_cpu(cpu, &cspmu->active_cpu);
- WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+ if (cspmu->irq)
+ WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
}
static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
@@ -1253,8 +1308,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct cpumask online_supported;
+ unsigned int dst;
struct arm_cspmu *cspmu =
hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
@@ -1264,9 +1318,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
return 0;
/* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&online_supported, &cspmu->associated_cpus,
- cpu_online_mask);
- dst = cpumask_any_but(&online_supported, cpu);
+ dst = cpumask_any_and_but(&cspmu->associated_cpus,
+ cpu_online_mask, cpu);
if (dst >= nr_cpu_ids)
return 0;
@@ -1297,7 +1350,79 @@ static void __exit arm_cspmu_exit(void)
cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
}
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match)
+{
+ struct arm_cspmu_impl_match *match;
+ int ret = 0;
+
+ match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+ if (match) {
+ mutex_lock(&arm_cspmu_lock);
+
+ if (!match->impl_init_ops) {
+ match->module = impl_match->module;
+ match->impl_init_ops = impl_match->impl_init_ops;
+ } else {
+ /* Broken match table may contain non-unique entries */
+ WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n",
+ match->module_name,
+ match->pmiidr_val,
+ match->pmiidr_mask);
+
+ ret = -EINVAL;
+ }
+
+ mutex_unlock(&arm_cspmu_lock);
+
+ if (!ret)
+ ret = driver_attach(&arm_cspmu_driver.driver);
+ } else {
+ pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n",
+ impl_match->pmiidr_val);
+
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
+
+static int arm_cspmu_match_device(struct device *dev, const void *match)
+{
+ struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev));
+
+ return (cspmu && cspmu->impl.match == match) ? 1 : 0;
+}
+
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match)
+{
+ struct device *dev;
+ struct arm_cspmu_impl_match *match;
+
+ match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+ if (WARN_ON(!match))
+ return;
+
+ /* Unbind the driver from all matching backend devices. */
+ while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL,
+ match, arm_cspmu_match_device))) {
+ device_release_driver(dev);
+ put_device(dev);
+ }
+
+ mutex_lock(&arm_cspmu_lock);
+
+ match->module = NULL;
+ match->impl_init_ops = NULL;
+
+ mutex_unlock(&arm_cspmu_lock);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
+
module_init(arm_cspmu_init);
module_exit(arm_cspmu_exit);
+MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 51323b175a4a..cd65a58dbd88 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0
*
* ARM CoreSight Architecture PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
#ifndef __ARM_CSPMU_H__
#define __ARM_CSPMU_H__
-#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/device.h>
@@ -29,7 +28,7 @@
})[0].attr.attr)
#define ARM_CSPMU_FORMAT_ATTR(_name, _config) \
- ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+ ARM_CSPMU_EXT_ATTR(_name, device_show_string, _config)
#define ARM_CSPMU_EVENT_ATTR(_name, _config) \
PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
@@ -48,6 +47,8 @@
/* Default filter format */
#define ARM_CSPMU_FORMAT_FILTER_ATTR \
ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+#define ARM_CSPMU_FORMAT_FILTER2_ATTR \
+ ARM_CSPMU_FORMAT_ATTR(filter2, "config2:0-31")
/*
* This is the default event number for cycle count, if supported, since the
@@ -66,9 +67,91 @@
/* The cycle counter, if implemented, is located at counter[31]. */
#define ARM_CSPMU_CYCLE_CNTR_IDX 31
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO 0x0
+#define PMEVCNTR_HI 0x4
+#define PMEVTYPER 0x400
+#define PMCCFILTR 0x47C
+#define PMEVFILT2R 0x800
+#define PMEVFILTR 0xA00
+#define PMCNTENSET 0xC00
+#define PMCNTENCLR 0xC20
+#define PMINTENSET 0xC40
+#define PMINTENCLR 0xC60
+#define PMOVSCLR 0xC80
+#define PMOVSSET 0xCC0
+#define PMIMPDEF 0xD80
+#define PMCFGR 0xE00
+#define PMCR 0xE04
+#define PMIIDR 0xE08
+#define PMPIDR0 0xFE0
+#define PMPIDR1 0xFE4
+#define PMPIDR2 0xFE8
+#define PMPIDR3 0xFEC
+#define PMPIDR4 0xFD0
+
+/* PMCFGR register field */
+#define PMCFGR_NCG GENMASK(31, 28)
+#define PMCFGR_HDBG BIT(24)
+#define PMCFGR_TRO BIT(23)
+#define PMCFGR_SS BIT(22)
+#define PMCFGR_FZO BIT(21)
+#define PMCFGR_MSI BIT(20)
+#define PMCFGR_UEN BIT(19)
+#define PMCFGR_NA BIT(17)
+#define PMCFGR_EX BIT(16)
+#define PMCFGR_CCD BIT(15)
+#define PMCFGR_CC BIT(14)
+#define PMCFGR_SIZE GENMASK(13, 8)
+#define PMCFGR_N GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO BIT(11)
+#define PMCR_HDBG BIT(10)
+#define PMCR_FZO BIT(9)
+#define PMCR_NA BIT(8)
+#define PMCR_DP BIT(5)
+#define PMCR_X BIT(4)
+#define PMCR_D BIT(3)
+#define PMCR_C BIT(2)
+#define PMCR_P BIT(1)
+#define PMCR_E BIT(0)
+
/* PMIIDR register field */
-#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
-#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
+#define PMIIDR_IMPLEMENTER GENMASK(11, 0)
+#define PMIIDR_IMPLEMENTER_DES_0 GENMASK(3, 0)
+#define PMIIDR_IMPLEMENTER_DES_1 GENMASK(6, 4)
+#define PMIIDR_IMPLEMENTER_DES_2 GENMASK(11, 8)
+#define PMIIDR_REVISION GENMASK(15, 12)
+#define PMIIDR_VARIANT GENMASK(19, 16)
+#define PMIIDR_PRODUCTID GENMASK(31, 20)
+#define PMIIDR_PRODUCTID_PART_0 GENMASK(27, 20)
+#define PMIIDR_PRODUCTID_PART_1 GENMASK(31, 28)
+
+/* PMPIDR0 register field */
+#define PMPIDR0_PART_0 GENMASK(7, 0)
+
+/* PMPIDR1 register field */
+#define PMPIDR1_DES_0 GENMASK(7, 4)
+#define PMPIDR1_PART_1 GENMASK(3, 0)
+
+/* PMPIDR2 register field */
+#define PMPIDR2_REVISION GENMASK(7, 4)
+#define PMPIDR2_DES_1 GENMASK(2, 0)
+
+/* PMPIDR3 register field */
+#define PMPIDR3_REVAND GENMASK(7, 4)
+#define PMPIDR3_CMOD GENMASK(3, 0)
+
+/* PMPIDR4 register field */
+#define PMPIDR4_SIZE GENMASK(7, 4)
+#define PMPIDR4_DES_2 GENMASK(3, 0)
+
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
struct arm_cspmu;
@@ -100,16 +183,38 @@ struct arm_cspmu_impl_ops {
bool (*is_cycle_counter_event)(const struct perf_event *event);
/* Decode event type/id from configs */
u32 (*event_type)(const struct perf_event *event);
- /* Decode filter value from configs */
- u32 (*event_filter)(const struct perf_event *event);
+ /* Set/reset event filters */
+ void (*set_cc_filter)(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
+ void (*set_ev_filter)(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
+ void (*reset_ev_filter)(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
+ /* Implementation specific event validation */
+ int (*validate_event)(struct arm_cspmu *cspmu,
+ struct perf_event *event);
/* Hide/show unsupported events */
umode_t (*event_attr_is_visible)(struct kobject *kobj,
struct attribute *attr, int unused);
};
+/* Vendor/implementer registration parameter. */
+struct arm_cspmu_impl_match {
+ /* Backend module. */
+ struct module *module;
+ const char *module_name;
+ /* PMIIDR value/mask. */
+ u32 pmiidr_val;
+ u32 pmiidr_mask;
+ /* Callback to vendor backend to init arm_cspmu_impl::ops. */
+ int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
/* Vendor/implementer descriptor. */
struct arm_cspmu_impl {
u32 pmiidr;
+ struct module *module;
+ struct arm_cspmu_impl_match *match;
struct arm_cspmu_impl_ops ops;
void *ctx;
};
@@ -118,22 +223,23 @@ struct arm_cspmu_impl {
struct arm_cspmu {
struct pmu pmu;
struct device *dev;
- struct acpi_apmt_node *apmt_node;
const char *name;
const char *identifier;
void __iomem *base0;
void __iomem *base1;
- int irq;
cpumask_t associated_cpus;
cpumask_t active_cpu;
struct hlist_node cpuhp_node;
+ int irq;
+ bool has_atomic_dword;
u32 pmcfgr;
u32 num_logical_ctrs;
u32 num_set_clr_reg;
int cycle_counter_logical_idx;
struct arm_cspmu_hw_events hw_events;
+ const struct attribute_group *attr_groups[5];
struct arm_cspmu_impl impl;
};
@@ -143,9 +249,10 @@ ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
struct device_attribute *attr,
char *buf);
-/* Default function to show format attribute in sysfs. */
-ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf);
+/* Register vendor backend. */
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
+
+/* Unregister vendor backend. */
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match);
#endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 72ef80caa3c8..e06a06d3407b 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -1,14 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
/* Support for NVIDIA specific attributes. */
+#include <linux/io.h>
+#include <linux/module.h>
#include <linux/topology.h>
-#include "nvidia_cspmu.h"
+#include "arm_cspmu.h"
#define NV_PCIE_PORT_COUNT 10ULL
#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
@@ -21,7 +23,7 @@
#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
-#define NV_PRODID_MASK GENMASK(31, 0)
+#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)
#define NV_FORMAT_NAME_GENERIC 0
@@ -38,10 +40,21 @@
struct nv_cspmu_ctx {
const char *name;
- u32 filter_mask;
- u32 filter_default_val;
+
struct attribute **event_attr;
struct attribute **format_attr;
+
+ u32 filter_mask;
+ u32 filter_default_val;
+ u32 filter2_mask;
+ u32 filter2_default_val;
+
+ u32 (*get_filter)(const struct perf_event *event);
+ u32 (*get_filter2)(const struct perf_event *event);
+
+ void *data;
+
+ int (*init_data)(struct arm_cspmu *cspmu);
};
static struct attribute *scf_pmu_event_attrs[] = {
@@ -53,65 +66,24 @@ static struct attribute *scf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105),
NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111),
NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119),
- NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d),
- NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129),
NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
- NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131),
NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
- NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d),
- NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169),
ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171),
- ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172),
ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
- ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177),
- ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178),
ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
- ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a),
ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188),
- NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198),
- NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c),
-
ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
@@ -121,35 +93,12 @@ static struct attribute *scf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9),
- ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa),
ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
- ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af),
- ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0),
ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
- ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2),
-
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3),
-
- ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7),
- ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8),
- ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9),
ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3),
- NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7),
-
ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
@@ -193,6 +142,7 @@ static struct attribute *pcie_pmu_format_attrs[] = {
static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
NULL,
};
@@ -205,6 +155,7 @@ static struct attribute *cnvlink_pmu_format_attrs[] = {
static struct attribute *generic_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_FILTER_ATTR,
+ ARM_CSPMU_FORMAT_FILTER2_ATTR,
NULL,
};
@@ -237,12 +188,55 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
- if (ctx->filter_mask == 0)
+ const u32 filter_val = event->attr.config1 & ctx->filter_mask;
+
+ if (filter_val == 0)
return ctx->filter_default_val;
- return event->attr.config1 & ctx->filter_mask;
+ return filter_val;
+}
+
+static u32 nv_cspmu_event_filter2(const struct perf_event *event)
+{
+ const struct nv_cspmu_ctx *ctx =
+ to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+
+ const u32 filter_val = event->attr.config2 & ctx->filter2_mask;
+
+ if (filter_val == 0)
+ return ctx->filter2_default_val;
+
+ return filter_val;
+}
+
+static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ u32 filter, offset;
+ const struct nv_cspmu_ctx *ctx =
+ to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+ offset = 4 * event->hw.idx;
+
+ if (ctx->get_filter) {
+ filter = ctx->get_filter(event);
+ writel(filter, cspmu->base0 + PMEVFILTR + offset);
+ }
+
+ if (ctx->get_filter2) {
+ filter = ctx->get_filter2(event);
+ writel(filter, cspmu->base0 + PMEVFILT2R + offset);
+ }
+}
+
+static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ u32 filter = nv_cspmu_event_filter(event);
+
+ writel(filter, cspmu->base0 + PMCCFILTR);
}
+
enum nv_cspmu_name_fmt {
NAME_FMT_GENERIC,
NAME_FMT_SOCKET
@@ -251,74 +245,120 @@ enum nv_cspmu_name_fmt {
struct nv_cspmu_match {
u32 prodid;
u32 prodid_mask;
- u64 filter_mask;
- u32 filter_default_val;
const char *name_pattern;
enum nv_cspmu_name_fmt name_fmt;
- struct attribute **event_attr;
- struct attribute **format_attr;
+ struct nv_cspmu_ctx template_ctx;
+ struct arm_cspmu_impl_ops ops;
};
static const struct nv_cspmu_match nv_cspmu_match[] = {
{
- .prodid = 0x103,
+ .prodid = 0x10300000,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = NV_PCIE_FILTER_ID_MASK,
- .filter_default_val = NV_PCIE_FILTER_ID_MASK,
.name_pattern = "nvidia_pcie_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
- .event_attr = mcf_pmu_event_attrs,
- .format_attr = pcie_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = pcie_pmu_format_attrs,
+ .filter_mask = NV_PCIE_FILTER_ID_MASK,
+ .filter_default_val = NV_PCIE_FILTER_ID_MASK,
+ .filter2_mask = 0x0,
+ .filter2_default_val = 0x0,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = NULL,
+ .data = NULL,
+ .init_data = NULL
+ },
},
{
- .prodid = 0x104,
+ .prodid = 0x10400000,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = 0x0,
- .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
- .event_attr = mcf_pmu_event_attrs,
- .format_attr = nvlink_c2c_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = nvlink_c2c_pmu_format_attrs,
+ .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
+ .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+ .filter2_mask = 0x0,
+ .filter2_default_val = 0x0,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = NULL,
+ .data = NULL,
+ .init_data = NULL
+ },
},
{
- .prodid = 0x105,
+ .prodid = 0x10500000,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = 0x0,
- .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
- .event_attr = mcf_pmu_event_attrs,
- .format_attr = nvlink_c2c_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = nvlink_c2c_pmu_format_attrs,
+ .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
+ .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+ .filter2_mask = 0x0,
+ .filter2_default_val = 0x0,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = NULL,
+ .data = NULL,
+ .init_data = NULL
+ },
},
{
- .prodid = 0x106,
+ .prodid = 0x10600000,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = NV_CNVL_FILTER_ID_MASK,
- .filter_default_val = NV_CNVL_FILTER_ID_MASK,
.name_pattern = "nvidia_cnvlink_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
- .event_attr = mcf_pmu_event_attrs,
- .format_attr = cnvlink_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = cnvlink_pmu_format_attrs,
+ .filter_mask = NV_CNVL_FILTER_ID_MASK,
+ .filter_default_val = NV_CNVL_FILTER_ID_MASK,
+ .filter2_mask = 0x0,
+ .filter2_default_val = 0x0,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = NULL,
+ .data = NULL,
+ .init_data = NULL
+ },
},
{
- .prodid = 0x2CF,
+ .prodid = 0x2CF00000,
.prodid_mask = NV_PRODID_MASK,
- .filter_mask = 0x0,
- .filter_default_val = 0x0,
.name_pattern = "nvidia_scf_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
- .event_attr = scf_pmu_event_attrs,
- .format_attr = scf_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = scf_pmu_event_attrs,
+ .format_attr = scf_pmu_format_attrs,
+ .filter_mask = 0x0,
+ .filter_default_val = 0x0,
+ .filter2_mask = 0x0,
+ .filter2_default_val = 0x0,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = NULL,
+ .data = NULL,
+ .init_data = NULL
+ },
},
{
.prodid = 0,
.prodid_mask = 0,
- .filter_mask = NV_GENERIC_FILTER_ID_MASK,
- .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
.name_pattern = "nvidia_uncore_pmu_%u",
.name_fmt = NAME_FMT_GENERIC,
- .event_attr = generic_pmu_event_attrs,
- .format_attr = generic_pmu_format_attrs
+ .template_ctx = {
+ .event_attr = generic_pmu_event_attrs,
+ .format_attr = generic_pmu_format_attrs,
+ .filter_mask = NV_GENERIC_FILTER_ID_MASK,
+ .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
+ .filter2_mask = NV_GENERIC_FILTER_ID_MASK,
+ .filter2_default_val = NV_GENERIC_FILTER_ID_MASK,
+ .get_filter = nv_cspmu_event_filter,
+ .get_filter2 = nv_cspmu_event_filter2,
+ .data = NULL,
+ .init_data = NULL
+ },
},
};
@@ -351,9 +391,16 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
return name;
}
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+#define SET_OP(name, impl, match, default_op) \
+ do { \
+ if (match->ops.name) \
+ impl->name = match->ops.name; \
+ else if (default_op != NULL) \
+ impl->name = default_op; \
+ } while (false)
+
+static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
{
- u32 prodid;
struct nv_cspmu_ctx *ctx;
struct device *dev = cspmu->dev;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
@@ -363,38 +410,59 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
if (!ctx)
return -ENOMEM;
- prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
-
/* Find matching PMU. */
for (; match->prodid; match++) {
const u32 prodid_mask = match->prodid_mask;
- if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
+ if ((match->prodid & prodid_mask) ==
+ (cspmu->impl.pmiidr & prodid_mask))
break;
}
- ctx->name = nv_cspmu_format_name(cspmu, match);
- ctx->filter_mask = match->filter_mask;
- ctx->filter_default_val = match->filter_default_val;
- ctx->event_attr = match->event_attr;
- ctx->format_attr = match->format_attr;
+ /* Initialize the context with the matched template. */
+ memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx));
+ ctx->name = nv_cspmu_format_name(cspmu, match);
cspmu->impl.ctx = ctx;
/* NVIDIA specific callbacks. */
- impl_ops->event_filter = nv_cspmu_event_filter;
- impl_ops->get_event_attrs = nv_cspmu_get_event_attrs;
- impl_ops->get_format_attrs = nv_cspmu_get_format_attrs;
- impl_ops->get_name = nv_cspmu_get_name;
+ SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);
+ SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);
+ SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);
+ SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);
+ SET_OP(get_name, impl_ops, match, nv_cspmu_get_name);
- /* Set others to NULL to use default callback. */
- impl_ops->event_type = NULL;
- impl_ops->event_attr_is_visible = NULL;
- impl_ops->get_identifier = NULL;
- impl_ops->is_cycle_counter_event = NULL;
+ if (ctx->init_data)
+ return ctx->init_data(cspmu);
return 0;
}
-EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+/* Match all NVIDIA Coresight PMU devices */
+static const struct arm_cspmu_impl_match nv_cspmu_param = {
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
+ .module = THIS_MODULE,
+ .impl_init_ops = nv_cspmu_init_ops
+};
+
+static int __init nvidia_cspmu_init(void)
+{
+ int ret;
+
+ ret = arm_cspmu_impl_register(&nv_cspmu_param);
+ if (ret)
+ pr_err("nvidia_cspmu backend registration error: %d\n", ret);
+
+ return ret;
+}
+
+static void __exit nvidia_cspmu_exit(void)
+{
+ arm_cspmu_impl_unregister(&nv_cspmu_param);
+}
+
+module_init(nvidia_cspmu_init);
+module_exit(nvidia_cspmu_exit);
+
+MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
deleted file mode 100644
index 71e18f0dc50b..000000000000
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- *
- */
-
-/* Support for NVIDIA specific attributes. */
-
-#ifndef __NVIDIA_CSPMU_H__
-#define __NVIDIA_CSPMU_H__
-
-#include "arm_cspmu.h"
-
-/* Allocate NVIDIA descriptor. */
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
-
-#endif /* __NVIDIA_CSPMU_H__ */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 54aa4658fb36..619cf937602f 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -66,8 +66,13 @@
#define DMC620_PMU_COUNTERn_OFFSET(n) \
(DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
-static LIST_HEAD(dmc620_pmu_irqs);
+/*
+ * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list
+ * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances
+ */
static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+static DEFINE_MUTEX(dmc620_pmu_node_lock);
+static LIST_HEAD(dmc620_pmu_irqs);
struct dmc620_pmu_irq {
struct hlist_node node;
@@ -227,9 +232,31 @@ static const struct attribute_group dmc620_pmu_format_attr_group = {
.attrs = dmc620_pmu_formats_attrs,
};
+static ssize_t dmc620_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf,
+ cpumask_of(dmc620_pmu->irq->cpu));
+}
+
+static struct device_attribute dmc620_pmu_cpumask_attr =
+ __ATTR(cpumask, 0444, dmc620_pmu_cpumask_show, NULL);
+
+static struct attribute *dmc620_pmu_cpumask_attrs[] = {
+ &dmc620_pmu_cpumask_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group dmc620_pmu_cpumask_attr_group = {
+ .attrs = dmc620_pmu_cpumask_attrs,
+};
+
static const struct attribute_group *dmc620_pmu_attr_groups[] = {
&dmc620_pmu_events_attr_group,
&dmc620_pmu_format_attr_group,
+ &dmc620_pmu_cpumask_attr_group,
NULL,
};
@@ -453,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
return PTR_ERR(irq);
dmc620_pmu->irq = irq;
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
- mutex_unlock(&dmc620_pmu_irqs_lock);
+ mutex_unlock(&dmc620_pmu_node_lock);
return 0;
}
@@ -464,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
{
struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_del_rcu(&dmc620_pmu->pmus_node);
+ mutex_unlock(&dmc620_pmu_node_lock);
+ mutex_lock(&dmc620_pmu_irqs_lock);
if (!refcount_dec_and_test(&irq->refcount)) {
mutex_unlock(&dmc620_pmu_irqs_lock);
return;
@@ -513,12 +542,16 @@ static int dmc620_pmu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
+ hwc->idx = -1;
+
+ if (event->group_leader == event)
+ return 0;
+
/*
* We can't atomically disable all HW counters so only one event allowed,
* although software events are acceptable.
*/
- if (event->group_leader != event &&
- !is_software_event(event->group_leader))
+ if (!is_software_event(event->group_leader))
return -EINVAL;
for_each_sibling_event(sibling, event->group_leader) {
@@ -527,7 +560,6 @@ static int dmc620_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
- hwc->idx = -1;
return 0;
}
@@ -616,10 +648,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
return 0;
/* We're only reading, but this isn't the place to be involving RCU */
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
- mutex_unlock(&dmc620_pmu_irqs_lock);
+ mutex_unlock(&dmc620_pmu_node_lock);
WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
irq->cpu = target;
@@ -644,6 +676,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
dmc620_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.event_init = dmc620_pmu_event_init,
@@ -655,8 +688,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
.attr_groups = dmc620_pmu_attr_groups,
};
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- dmc620_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ dmc620_pmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(dmc620_pmu->base))
return PTR_ERR(dmc620_pmu->base);
@@ -696,7 +728,7 @@ out_teardown_dev:
return ret;
}
-static int dmc620_pmu_device_remove(struct platform_device *pdev)
+static void dmc620_pmu_device_remove(struct platform_device *pdev)
{
struct dmc620_pmu *dmc620_pmu = platform_get_drvdata(pdev);
@@ -704,8 +736,6 @@ static int dmc620_pmu_device_remove(struct platform_device *pdev)
/* perf will synchronise RCU before devres can free dmc620_pmu */
perf_pmu_unregister(&dmc620_pmu->pmu);
-
- return 0;
}
static const struct acpi_device_id dmc620_acpi_match[] = {
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index fe2abb412c00..cb4fb59fe04b 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -20,7 +20,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
@@ -85,7 +85,7 @@
DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
#define DSU_FORMAT_ATTR(_name, _config) \
- DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
+ DSU_EXT_ATTR(_name, device_show_string, _config)
#define DSU_CPUMASK_ATTR(_name, _config) \
DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
@@ -139,15 +139,6 @@ static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var);
}
-static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr = container_of(attr,
- struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t dsu_pmu_cpumask_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -230,15 +221,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = {
NULL,
};
-static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
-{
- struct cpumask online_supported;
-
- cpumask_and(&online_supported,
- &dsu_pmu->associated_cpus, cpu_online_mask);
- return cpumask_any_but(&online_supported, cpu);
-}
-
static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
{
return (idx < dsu_pmu->num_counters) ||
@@ -371,7 +353,7 @@ static inline u32 dsu_pmu_get_reset_overflow(void)
return __dsu_pmu_get_reset_overflow();
}
-/**
+/*
* dsu_pmu_set_event_period: Set the period for the counter.
*
* All DSU PMU event counters, except the cycle counter are 32bit
@@ -602,7 +584,7 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
return dsu_pmu;
}
-/**
+/*
* dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster
* from device tree.
*/
@@ -632,7 +614,7 @@ static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask)
return 0;
}
-/**
+/*
* dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster
* from ACPI.
*/
@@ -751,6 +733,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
dsu_pmu->pmu = (struct pmu) {
.task_ctx_nr = perf_invalid_context,
+ .parent = &pdev->dev,
.module = THIS_MODULE,
.pmu_enable = dsu_pmu_enable,
.pmu_disable = dsu_pmu_disable,
@@ -774,14 +757,12 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
return rc;
}
-static int dsu_pmu_device_remove(struct platform_device *pdev)
+static void dsu_pmu_device_remove(struct platform_device *pdev)
{
struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&dsu_pmu->pmu);
cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
-
- return 0;
}
static const struct of_device_id dsu_pmu_of_match[] = {
@@ -829,14 +810,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
- cpuhp_node);
+ struct dsu_pmu *dsu_pmu;
+ unsigned int dst;
+
+ dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node);
if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
return 0;
- dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
+ dst = cpumask_any_and_but(&dsu_pmu->associated_cpus,
+ cpu_online_mask, cpu);
/* If there are no active CPUs in the DSU, leave IRQ disabled */
if (dst >= nr_cpu_ids)
return 0;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 9b593f985805..973a027d9063 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -26,7 +26,8 @@
#include <asm/irq_regs.h>
-static int armpmu_count_irq_users(const int irq);
+static int armpmu_count_irq_users(const struct cpumask *affinity,
+ const int irq);
struct pmu_irq_ops {
void (*enable_pmuirq)(unsigned int irq);
@@ -64,7 +65,9 @@ static void armpmu_enable_percpu_pmuirq(unsigned int irq)
static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu,
void __percpu *devid)
{
- if (armpmu_count_irq_users(irq) == 1)
+ struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu);
+
+ if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1)
free_percpu_irq(irq, devid);
}
@@ -89,7 +92,9 @@ static void armpmu_disable_percpu_pmunmi(unsigned int irq)
static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu,
void __percpu *devid)
{
- if (armpmu_count_irq_users(irq) == 1)
+ struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu);
+
+ if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1)
free_percpu_nmi(irq, devid);
}
@@ -99,7 +104,6 @@ static const struct pmu_irq_ops percpu_pmunmi_ops = {
.free_pmuirq = armpmu_free_percpu_pmunmi
};
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
static DEFINE_PER_CPU(int, cpu_irq);
static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
@@ -109,6 +113,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
{
if (event->hw.flags & ARMPMU_EVT_64BIT)
return GENMASK_ULL(63, 0);
+ else if (event->hw.flags & ARMPMU_EVT_63BIT)
+ return GENMASK_ULL(62, 0);
else if (event->hw.flags & ARMPMU_EVT_47BIT)
return GENMASK_ULL(46, 0);
else
@@ -316,6 +322,12 @@ armpmu_del(struct perf_event *event, int flags)
int idx = hwc->idx;
armpmu_stop(event, PERF_EF_UPDATE);
+
+ if (has_branch_stack(event)) {
+ hw_events->branch_users--;
+ perf_sched_cb_dec(event->pmu);
+ }
+
hw_events->events[idx] = NULL;
armpmu->clear_event_idx(hw_events, event);
perf_event_update_userpage(event);
@@ -340,12 +352,15 @@ armpmu_add(struct perf_event *event, int flags)
if (idx < 0)
return idx;
- /*
- * If there is an event in the counter we are going to use then make
- * sure it is disabled.
- */
+ /* The newly-allocated counter should be empty */
+ WARN_ON_ONCE(hw_events->events[idx]);
+
+ if (has_branch_stack(event)) {
+ hw_events->branch_users++;
+ perf_sched_cb_inc(event->pmu);
+ }
+
event->hw.idx = idx;
- armpmu->disable(event);
hw_events->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -443,7 +458,7 @@ __hw_perf_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- int mapping;
+ int mapping, ret;
hwc->flags = 0;
mapping = armpmu->map_event(event);
@@ -468,11 +483,10 @@ __hw_perf_event_init(struct perf_event *event)
/*
* Check whether we need to exclude the counter from certain modes.
*/
- if (armpmu->set_event_filter &&
- armpmu->set_event_filter(hwc, &event->attr)) {
- pr_debug("ARM performance counters do not support "
- "mode exclusion\n");
- return -EOPNOTSUPP;
+ if (armpmu->set_event_filter) {
+ ret = armpmu->set_event_filter(hwc, &event->attr);
+ if (ret)
+ return ret;
}
/*
@@ -510,8 +524,7 @@ static int armpmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
return -ENOENT;
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !armpmu->reg_brbidr)
return -EOPNOTSUPP;
return __hw_perf_event_init(event);
@@ -521,7 +534,7 @@ static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
/* For task-bound events we may be called on other CPUs */
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -550,13 +563,7 @@ static void armpmu_disable(struct pmu *pmu)
static bool armpmu_filter(struct pmu *pmu, int cpu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
- bool ret;
-
- ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
- if (ret && armpmu->filter)
- return armpmu->filter(pmu, cpu);
-
- return ret;
+ return !cpumask_test_cpu(cpu, &armpmu->supported_cpus);
}
static ssize_t cpus_show(struct device *dev,
@@ -577,11 +584,11 @@ static const struct attribute_group armpmu_common_attr_group = {
.attrs = armpmu_common_attrs,
};
-static int armpmu_count_irq_users(const int irq)
+static int armpmu_count_irq_users(const struct cpumask *affinity, const int irq)
{
int cpu, count = 0;
- for_each_possible_cpu(cpu) {
+ for_each_cpu(cpu, affinity) {
if (per_cpu(cpu_irq, cpu) == irq)
count++;
}
@@ -589,12 +596,13 @@ static int armpmu_count_irq_users(const int irq)
return count;
}
-static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq)
+static const struct pmu_irq_ops *
+armpmu_find_irq_ops(const struct cpumask *affinity, int irq)
{
const struct pmu_irq_ops *ops = NULL;
int cpu;
- for_each_possible_cpu(cpu) {
+ for_each_cpu(cpu, affinity) {
if (per_cpu(cpu_irq, cpu) != irq)
continue;
@@ -606,22 +614,25 @@ static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq)
return ops;
}
-void armpmu_free_irq(int irq, int cpu)
+void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu)
{
if (per_cpu(cpu_irq, cpu) == 0)
return;
if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
return;
- per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu);
+ per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, armpmu);
per_cpu(cpu_irq, cpu) = 0;
per_cpu(cpu_irq_ops, cpu) = NULL;
}
-int armpmu_request_irq(int irq, int cpu)
+int armpmu_request_irq(struct arm_pmu * __percpu *pcpu_armpmu, int irq, int cpu)
{
int err = 0;
+ struct arm_pmu **armpmu = per_cpu_ptr(pcpu_armpmu, cpu);
+ const struct cpumask *affinity = *armpmu ? &(*armpmu)->supported_cpus :
+ cpu_possible_mask; /* ACPI */
const irq_handler_t handler = armpmu_dispatch_irq;
const struct pmu_irq_ops *irq_ops;
@@ -643,25 +654,24 @@ int armpmu_request_irq(int irq, int cpu)
IRQF_NOBALANCING | IRQF_NO_AUTOEN |
IRQF_NO_THREAD;
- err = request_nmi(irq, handler, irq_flags, "arm-pmu",
- per_cpu_ptr(&cpu_armpmu, cpu));
+ err = request_nmi(irq, handler, irq_flags, "arm-pmu", armpmu);
/* If cannot get an NMI, get a normal interrupt */
if (err) {
err = request_irq(irq, handler, irq_flags, "arm-pmu",
- per_cpu_ptr(&cpu_armpmu, cpu));
+ armpmu);
irq_ops = &pmuirq_ops;
} else {
has_nmi = true;
irq_ops = &pmunmi_ops;
}
- } else if (armpmu_count_irq_users(irq) == 0) {
- err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu);
+ } else if (armpmu_count_irq_users(affinity, irq) == 0) {
+ err = request_percpu_nmi(irq, handler, "arm-pmu", affinity, pcpu_armpmu);
/* If cannot get an NMI, get a normal interrupt */
if (err) {
- err = request_percpu_irq(irq, handler, "arm-pmu",
- &cpu_armpmu);
+ err = request_percpu_irq_affinity(irq, handler, "arm-pmu",
+ affinity, pcpu_armpmu);
irq_ops = &percpu_pmuirq_ops;
} else {
has_nmi = true;
@@ -669,7 +679,7 @@ int armpmu_request_irq(int irq, int cpu)
}
} else {
/* Per cpudevid irq was already requested by another CPU */
- irq_ops = armpmu_find_irq_ops(irq);
+ irq_ops = armpmu_find_irq_ops(affinity, irq);
if (WARN_ON(!irq_ops))
err = -EINVAL;
@@ -693,6 +703,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
return per_cpu(hw_events->irq, cpu);
}
+bool arm_pmu_irq_is_nmi(void)
+{
+ return has_nmi;
+}
+
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
@@ -709,8 +724,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
if (pmu->reset)
pmu->reset(pmu);
- per_cpu(cpu_armpmu, cpu) = pmu;
-
irq = armpmu_get_cpu_irq(pmu, cpu);
if (irq)
per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq);
@@ -730,8 +743,6 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
if (irq)
per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq);
- per_cpu(cpu_armpmu, cpu) = NULL;
-
return 0;
}
@@ -742,7 +753,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
struct perf_event *event;
int idx;
- for (idx = 0; idx < armpmu->num_events; idx++) {
+ for_each_set_bit(idx, armpmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
event = hw_events->events[idx];
if (!event)
continue;
@@ -758,17 +769,8 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
case CPU_PM_ENTER_FAILED:
/*
* Restore and enable the counter.
- * armpmu_start() indirectly calls
- *
- * perf_event_update_userpage()
- *
- * that requires RCU read locking to be functional,
- * wrap the call within RCU_NONIDLE to make the
- * RCU subsystem aware this cpu is not idle from
- * an RCU perspective for the armpmu_start() call
- * duration.
*/
- RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD));
+ armpmu_start(event, PERF_EF_RELOAD);
break;
default:
break;
@@ -781,7 +783,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
@@ -885,11 +887,13 @@ struct arm_pmu *armpmu_alloc(void)
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
- * configuration (e.g. big.LITTLE). This is not an uncore PMU,
- * and we have taken ctx sharing into account (e.g. with our
- * pmu::filter callback and pmu::event_init group validation).
+ * configuration (e.g. big.LITTLE) so
+ * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+ * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+ * specific PMU.
*/
- .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS |
+ PERF_PMU_CAP_EXTENDED_HW_TYPE,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
@@ -899,7 +903,6 @@ struct arm_pmu *armpmu_alloc(void)
struct pmu_hw_events *events;
events = per_cpu_ptr(pmu->hw_events, cpu);
- raw_spin_lock_init(&events->pmu_lock);
events->percpu_pmu = pmu;
}
@@ -925,6 +928,12 @@ int armpmu_register(struct arm_pmu *pmu)
if (ret)
return ret;
+ /*
+ * By this stage we know our supported CPUs on either DT/ACPI platforms,
+ * detect the SMT implementation.
+ */
+ pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus));
+
if (!pmu->set_event_filter)
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
@@ -932,8 +941,9 @@ int armpmu_register(struct arm_pmu *pmu)
if (ret)
goto out_destroy;
- pr_info("enabled with %s PMU driver, %d counters available%s\n",
- pmu->name, pmu->num_events,
+ pr_info("enabled with %s PMU driver, %d (%*pb) counters available%s\n",
+ pmu->name, bitmap_weight(pmu->cntr_mask, ARMPMU_MAX_HWEVENTS),
+ ARMPMU_MAX_HWEVENTS, &pmu->cntr_mask,
has_nmi ? ", using NMIs" : "");
kvm_host_pmu_init(pmu);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 90815ad762eb..e80f76d95e68 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
acpi_unregister_gsi(gsi);
}
+static int __maybe_unused
+arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len,
+ u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *))
+{
+ int cpu, this_hetid, hetid, irq, ret;
+ u16 this_gsi = 0, gsi = 0;
+
+ /*
+ * Ensure that platform device must have IORESOURCE_IRQ
+ * resource to hold gsi interrupt.
+ */
+ if (pdev->num_resources != 1)
+ return -ENXIO;
+
+ if (pdev->resource[0].flags != IORESOURCE_IRQ)
+ return -ENXIO;
+
+ /*
+ * Sanity check all the GICC tables for the same interrupt
+ * number. For now, only support homogeneous ACPI machines.
+ */
+ for_each_possible_cpu(cpu) {
+ struct acpi_madt_generic_interrupt *gicc;
+
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->header.length < len)
+ return gsi ? -ENXIO : 0;
+
+ this_gsi = parse_gsi(gicc);
+ this_hetid = find_acpi_cpu_topology_hetero_id(cpu);
+ if (!gsi) {
+ hetid = this_hetid;
+ gsi = this_gsi;
+ } else if (hetid != this_hetid || gsi != this_gsi) {
+ pr_warn("ACPI: %s: must be homogeneous\n", pdev->name);
+ return -ENXIO;
+ }
+ }
+
+ if (!this_gsi)
+ return 0;
+
+ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+ if (irq < 0) {
+ pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi);
+ return -ENXIO;
+ }
+
+ pdev->resource[0].start = irq;
+ ret = platform_device_register(pdev);
+ if (ret)
+ acpi_unregister_gsi(gsi);
+
+ return ret;
+}
+
#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
static struct resource spe_resources[] = {
{
@@ -84,6 +140,11 @@ static struct platform_device spe_dev = {
.num_resources = ARRAY_SIZE(spe_resources)
};
+static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->spe_interrupt;
+}
+
/*
* For lack of a better place, hook the normal PMU MADT walk
* and create a SPE device if we detect a recent MADT with
@@ -91,53 +152,50 @@ static struct platform_device spe_dev = {
*/
static void arm_spe_acpi_register_device(void)
{
- int cpu, hetid, irq, ret;
- bool first = true;
- u16 gsi = 0;
-
- /*
- * Sanity check all the GICC tables for the same interrupt number.
- * For now, we only support homogeneous ACPI/SPE machines.
- */
- for_each_possible_cpu(cpu) {
- struct acpi_madt_generic_interrupt *gicc;
+ int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE,
+ arm_spe_parse_gsi);
+ if (ret)
+ pr_warn("ACPI: SPE: Unable to register device\n");
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
- gicc = acpi_cpu_get_madt_gicc(cpu);
- if (gicc->header.length < ACPI_MADT_GICC_SPE)
- return;
-
- if (first) {
- gsi = gicc->spe_interrupt;
- if (!gsi)
- return;
- hetid = find_acpi_cpu_topology_hetero_id(cpu);
- first = false;
- } else if ((gsi != gicc->spe_interrupt) ||
- (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
- pr_warn("ACPI: SPE must be homogeneous\n");
- return;
- }
+#if IS_ENABLED(CONFIG_CORESIGHT_TRBE)
+static struct resource trbe_resources[] = {
+ {
+ /* irq */
+ .flags = IORESOURCE_IRQ,
}
+};
- irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
- ACPI_ACTIVE_HIGH);
- if (irq < 0) {
- pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
- return;
- }
+static struct platform_device trbe_dev = {
+ .name = ARMV8_TRBE_PDEV_NAME,
+ .id = -1,
+ .resource = trbe_resources,
+ .num_resources = ARRAY_SIZE(trbe_resources)
+};
- spe_resources[0].start = irq;
- ret = platform_device_register(&spe_dev);
- if (ret < 0) {
- pr_warn("ACPI: SPE: Unable to register device\n");
- acpi_unregister_gsi(gsi);
- }
+static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->trbe_interrupt;
+}
+
+static void arm_trbe_acpi_register_device(void)
+{
+ int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE,
+ arm_trbe_parse_gsi);
+ if (ret)
+ pr_warn("ACPI: TRBE: Unable to register device\n");
}
#else
-static inline void arm_spe_acpi_register_device(void)
+static inline void arm_trbe_acpi_register_device(void)
{
+
}
-#endif /* CONFIG_ARM_SPE_PMU */
+#endif /* CONFIG_CORESIGHT_TRBE */
static int arm_pmu_acpi_parse_irqs(void)
{
@@ -160,7 +218,7 @@ static int arm_pmu_acpi_parse_irqs(void)
* them with their PMUs.
*/
per_cpu(pmu_irqs, cpu) = irq;
- err = armpmu_request_irq(irq, cpu);
+ err = armpmu_request_irq(&probed_pmus, irq, cpu);
if (err)
goto out_err;
}
@@ -374,6 +432,7 @@ static int arm_pmu_acpi_init(void)
return 0;
arm_spe_acpi_register_device();
+ arm_trbe_acpi_register_device();
return 0;
}
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 933b96e243b8..1c9e50a13201 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -16,7 +16,6 @@
#include <linux/irqdesc.h>
#include <linux/kconfig.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/percpu.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -43,14 +42,13 @@ static int probe_current_pmu(struct arm_pmu *pmu,
return ret;
}
-static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
+static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq,
+ const struct cpumask *affinity)
{
- int cpu, ret;
struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+ int cpu;
- ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
- if (ret)
- return ret;
+ cpumask_copy(&pmu->supported_cpus, affinity);
for_each_cpu(cpu, &pmu->supported_cpus)
per_cpu(hw_events->irq, cpu) = irq;
@@ -60,7 +58,7 @@ static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
static bool pmu_has_irq_affinity(struct device_node *node)
{
- return !!of_find_property(node, "interrupt-affinity", NULL);
+ return of_property_present(node, "interrupt-affinity");
}
static int pmu_parse_irq_affinity(struct device *dev, int i)
@@ -116,9 +114,12 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
}
if (num_irqs == 1) {
- int irq = platform_get_irq(pdev, 0);
+ const struct cpumask *affinity;
+ int irq;
+
+ irq = platform_get_irq_affinity(pdev, 0, &affinity);
if ((irq > 0) && irq_is_percpu_devid(irq))
- return pmu_parse_percpu_irq(pmu, irq);
+ return pmu_parse_percpu_irq(pmu, irq, affinity);
}
if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node))
@@ -164,7 +165,7 @@ static int armpmu_request_irqs(struct arm_pmu *armpmu)
if (!irq)
continue;
- err = armpmu_request_irq(irq, cpu);
+ err = armpmu_request_irq(&hw_events->percpu_pmu, irq, cpu);
if (err)
break;
}
@@ -180,7 +181,7 @@ static void armpmu_free_irqs(struct arm_pmu *armpmu)
for_each_cpu(cpu, &armpmu->supported_cpus) {
int irq = per_cpu(hw_events->irq, cpu);
- armpmu_free_irq(irq, cpu);
+ armpmu_free_irq(&hw_events->percpu_pmu, irq, cpu);
}
}
@@ -197,6 +198,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu->plat_device = pdev;
ret = pmu_parse_irqs(pmu);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
new file mode 100644
index 000000000000..8014ff766cff
--- /dev/null
+++ b/drivers/perf/arm_pmuv3.c
@@ -0,0 +1,1666 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARMv8 PMUv3 Performance Events handling code.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based heavily on the ARMv7 perf event code.
+ */
+
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+#include <asm/virt.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/clocksource.h>
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
+#include <linux/platform_device.h>
+#include <linux/sched_clock.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+
+#include "arm_brbe.h"
+
+/* ARMv8 Cortex-A53 specific event types. */
+#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
+
+/* ARMv8 Cavium ThunderX specific event types. */
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
+
+/*
+ * ARMv8 Architectural defined events, not all of these may
+ * be supported on any given implementation. Unsupported events will
+ * be disabled at run-time based on the PMCEID registers.
+ */
+static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
+};
+
+static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD,
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+};
+
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+};
+
+static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
+
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+};
+
+static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static ssize_t
+armv8pmu_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define ARMV8_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
+
+static struct attribute *armv8_pmuv3_event_attrs[] = {
+ /*
+ * Don't expose the sw_incr event in /sys. It's not usable as writes to
+ * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation
+ * means we don't have a fixed event<->counter relationship regardless.
+ */
+ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+ ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+ ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+ ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+ ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+ ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+ ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+ ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+ ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+ ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+ ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+ ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+ ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+ ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+ ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+ ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+ ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+ ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+ ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+ /* Don't expose the chain event in /sys, since it's useless in isolation */
+ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+ ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+ ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+ ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+ ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+ ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+ ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+ ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+ ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+ ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+ ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+ ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+ ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+ ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+ ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED),
+ ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC),
+ ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL),
+ ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND),
+ ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT),
+ ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+ ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+ ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+ ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
+ ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES),
+ ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM),
+ ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP),
+ ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG),
+ ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0),
+ ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1),
+ ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2),
+ ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3),
+ ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4),
+ ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5),
+ ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6),
+ ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7),
+ ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED),
+ ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD),
+ ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR),
+ NULL,
+};
+
+static umode_t
+armv8pmu_event_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+ if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+ test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
+ return attr->mode;
+
+ if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) {
+ u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
+
+ if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+ test_bit(id, cpu_pmu->pmceid_ext_bitmap))
+ return attr->mode;
+ }
+
+ return 0;
+}
+
+static const struct attribute_group armv8_pmuv3_events_attr_group = {
+ .name = "events",
+ .attrs = armv8_pmuv3_event_attrs,
+ .is_visible = armv8pmu_event_attr_is_visible,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_event_CFG config
+#define ATTR_CFG_FLD_event_LO 0
+#define ATTR_CFG_FLD_event_HI 15
+#define ATTR_CFG_FLD_long_CFG config1
+#define ATTR_CFG_FLD_long_LO 0
+#define ATTR_CFG_FLD_long_HI 0
+#define ATTR_CFG_FLD_rdpmc_CFG config1
+#define ATTR_CFG_FLD_rdpmc_LO 1
+#define ATTR_CFG_FLD_rdpmc_HI 1
+#define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */
+#define ATTR_CFG_FLD_threshold_count_LO 2
+#define ATTR_CFG_FLD_threshold_count_HI 2
+#define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */
+#define ATTR_CFG_FLD_threshold_compare_LO 3
+#define ATTR_CFG_FLD_threshold_compare_HI 4
+#define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */
+#define ATTR_CFG_FLD_threshold_LO 5
+#define ATTR_CFG_FLD_threshold_HI 16
+
+GEN_PMU_FORMAT_ATTR(event);
+GEN_PMU_FORMAT_ATTR(long);
+GEN_PMU_FORMAT_ATTR(rdpmc);
+GEN_PMU_FORMAT_ATTR(threshold_count);
+GEN_PMU_FORMAT_ATTR(threshold_compare);
+GEN_PMU_FORMAT_ATTR(threshold);
+
+static int sysctl_perf_user_access __read_mostly;
+
+static bool armv8pmu_event_is_64bit(struct perf_event *event)
+{
+ return ATTR_CFG_GET_FLD(&event->attr, long);
+}
+
+static bool armv8pmu_event_want_user_access(struct perf_event *event)
+{
+ return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
+}
+
+static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr)
+{
+ return ATTR_CFG_GET_FLD(attr, threshold);
+}
+
+static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
+{
+ u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
+ u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count);
+
+ /*
+ * The count bit is always the bottom bit of the full control field, and
+ * the comparison is the upper two bits, but it's not explicitly
+ * labelled in the Arm ARM. For the Perf interface we split it into two
+ * fields, so reconstruct it here.
+ */
+ return (th_compare << 1) | th_count;
+}
+
+static struct attribute *armv8_pmuv3_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_long.attr,
+ &format_attr_rdpmc.attr,
+ &format_attr_threshold.attr,
+ &format_attr_threshold_compare.attr,
+ &format_attr_threshold_count.attr,
+ NULL,
+};
+
+static const struct attribute_group armv8_pmuv3_format_attr_group = {
+ .name = "format",
+ .attrs = armv8_pmuv3_format_attrs,
+};
+
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir);
+
+ return sysfs_emit(page, "0x%08x\n", slots);
+}
+
+static DEVICE_ATTR_RO(slots);
+
+static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir);
+
+ return sysfs_emit(page, "0x%08x\n", bus_slots);
+}
+
+static DEVICE_ATTR_RO(bus_slots);
+
+static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir);
+ u32 val = 0;
+
+ /* Encoded as Log2(number of bytes), plus one */
+ if (bus_width > 2 && bus_width < 13)
+ val = 1 << (bus_width - 1);
+
+ return sysfs_emit(page, "0x%08x\n", val);
+}
+
+static DEVICE_ATTR_RO(bus_width);
+
+static u32 threshold_max(struct arm_pmu *cpu_pmu)
+{
+ /*
+ * PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be
+ * impossible to write the threshold in the upper 32 bits of PMEVTYPER.
+ */
+ if (IS_ENABLED(CONFIG_ARM))
+ return 0;
+
+ /*
+ * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
+ * (2 ^ PMMIR.THWIDTH) - 1.
+ */
+ return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
+}
+
+static ssize_t threshold_max_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(threshold_max);
+
+static ssize_t branches_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "%d\n", brbe_num_branch_records(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *armv8_pmuv3_caps_attrs[] = {
+ &dev_attr_branches.attr,
+ &dev_attr_slots.attr,
+ &dev_attr_bus_slots.attr,
+ &dev_attr_bus_width.attr,
+ &dev_attr_threshold_max.attr,
+ NULL,
+};
+
+static umode_t caps_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ if (i == 0)
+ return brbe_num_branch_records(cpu_pmu) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group armv8_pmuv3_caps_attr_group = {
+ .name = "caps",
+ .attrs = armv8_pmuv3_caps_attrs,
+ .is_visible = caps_is_visible,
+};
+
+/*
+ * We unconditionally enable ARMv8.5-PMU long event counter support
+ * (64-bit events) where supported. Indicate if this arm_pmu has long
+ * event counter support.
+ *
+ * On AArch32, long counters make no sense (you can't access the top
+ * bits), so we only enable this on AArch64.
+ */
+static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
+{
+ return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver));
+}
+
+static bool armv8pmu_event_has_user_read(struct perf_event *event)
+{
+ return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+/*
+ * We must chain two programmable counters for 64 bit events,
+ * except when we have allocated the 64bit cycle counter (for CPU
+ * cycles event) or when user space counter access is enabled.
+ */
+static bool armv8pmu_event_is_chained(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ return !armv8pmu_event_has_user_read(event) &&
+ armv8pmu_event_is_64bit(event) &&
+ !armv8pmu_has_long_event(cpu_pmu) &&
+ (idx < ARMV8_PMU_MAX_GENERAL_COUNTERS);
+}
+
+/*
+ * ARMv8 low level PMU access
+ */
+static u64 armv8pmu_pmcr_read(void)
+{
+ return read_pmcr();
+}
+
+static void armv8pmu_pmcr_write(u64 val)
+{
+ val &= ARMV8_PMU_PMCR_MASK;
+ isb();
+ write_pmcr(val);
+}
+
+static int armv8pmu_has_overflowed(u64 pmovsr)
+{
+ return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
+}
+
+static int armv8pmu_counter_has_overflowed(u64 pmnc, int idx)
+{
+ return !!(pmnc & BIT(idx));
+}
+
+static u64 armv8pmu_read_evcntr(int idx)
+{
+ return read_pmevcntrn(idx);
+}
+
+static u64 armv8pmu_read_hw_counter(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+ u64 val = armv8pmu_read_evcntr(idx);
+
+ if (armv8pmu_event_is_chained(event))
+ val = (val << 32) | armv8pmu_read_evcntr(idx - 1);
+ return val;
+}
+
+/*
+ * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP
+ * is set the event counters also become 64-bit counters. Unless the
+ * user has requested a long counter (attr.config1) then we want to
+ * interrupt upon 32-bit overflow - we achieve this by applying a bias.
+ */
+static bool armv8pmu_event_needs_bias(struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (armv8pmu_event_is_64bit(event))
+ return false;
+
+ if (armv8pmu_has_long_event(cpu_pmu) ||
+ idx >= ARMV8_PMU_MAX_GENERAL_COUNTERS)
+ return true;
+
+ return false;
+}
+
+static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value)
+{
+ if (armv8pmu_event_needs_bias(event))
+ value |= GENMASK_ULL(63, 32);
+
+ return value;
+}
+
+static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value)
+{
+ if (armv8pmu_event_needs_bias(event))
+ value &= ~GENMASK_ULL(63, 32);
+
+ return value;
+}
+
+static u64 armv8pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 value;
+
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ value = read_pmccntr();
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ value = read_pmicntr();
+ else
+ value = armv8pmu_read_hw_counter(event);
+
+ return armv8pmu_unbias_long_counter(event, value);
+}
+
+static void armv8pmu_write_evcntr(int idx, u64 value)
+{
+ write_pmevcntrn(idx, value);
+}
+
+static void armv8pmu_write_hw_counter(struct perf_event *event,
+ u64 value)
+{
+ int idx = event->hw.idx;
+
+ if (armv8pmu_event_is_chained(event)) {
+ armv8pmu_write_evcntr(idx, upper_32_bits(value));
+ armv8pmu_write_evcntr(idx - 1, lower_32_bits(value));
+ } else {
+ armv8pmu_write_evcntr(idx, value);
+ }
+}
+
+static void armv8pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ value = armv8pmu_bias_long_counter(event, value);
+
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ write_pmccntr(value);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicntr(value);
+ else
+ armv8pmu_write_hw_counter(event, value);
+}
+
+static void armv8pmu_write_evtype(int idx, unsigned long val)
+{
+ unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
+ ARMV8_PMU_INCLUDE_EL2 |
+ ARMV8_PMU_EXCLUDE_EL0 |
+ ARMV8_PMU_EXCLUDE_EL1;
+
+ if (IS_ENABLED(CONFIG_ARM64))
+ mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH;
+
+ val &= mask;
+ write_pmevtypern(idx, val);
+}
+
+static void armv8pmu_write_event_type(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * For chained events, the low counter is programmed to count
+ * the event of interest and the high counter is programmed
+ * with CHAIN event code with filters set to count at all ELs.
+ */
+ if (armv8pmu_event_is_chained(event)) {
+ u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN |
+ ARMV8_PMU_INCLUDE_EL2;
+
+ armv8pmu_write_evtype(idx - 1, hwc->config_base);
+ armv8pmu_write_evtype(idx, chain_evt);
+ } else {
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ write_pmccfiltr(hwc->config_base);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicfiltr(hwc->config_base);
+ else
+ armv8pmu_write_evtype(idx, hwc->config_base);
+ }
+}
+
+static u64 armv8pmu_event_cnten_mask(struct perf_event *event)
+{
+ int counter = event->hw.idx;
+ u64 mask = BIT(counter);
+
+ if (armv8pmu_event_is_chained(event))
+ mask |= BIT(counter - 1);
+ return mask;
+}
+
+static void armv8pmu_enable_counter(u64 mask)
+{
+ /*
+ * Make sure event configuration register writes are visible before we
+ * enable the counter.
+ * */
+ isb();
+ write_pmcntenset(mask);
+}
+
+static void armv8pmu_enable_event_counter(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 mask = armv8pmu_event_cnten_mask(event);
+
+ kvm_set_pmu_events(mask, attr);
+
+ /* We rely on the hypervisor switch code to enable guest counters */
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_enable_counter(mask);
+}
+
+static void armv8pmu_disable_counter(u64 mask)
+{
+ write_pmcntenclr(mask);
+ /*
+ * Make sure the effects of disabling the counter are visible before we
+ * start configuring the event.
+ */
+ isb();
+}
+
+static void armv8pmu_disable_event_counter(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 mask = armv8pmu_event_cnten_mask(event);
+
+ kvm_clr_pmu_events(mask);
+
+ /* We rely on the hypervisor switch code to disable guest counters */
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_disable_counter(mask);
+}
+
+static void armv8pmu_enable_intens(u64 mask)
+{
+ write_pmintenset(mask);
+}
+
+static void armv8pmu_enable_event_irq(struct perf_event *event)
+{
+ armv8pmu_enable_intens(BIT(event->hw.idx));
+}
+
+static void armv8pmu_disable_intens(u64 mask)
+{
+ write_pmintenclr(mask);
+ isb();
+ /* Clear the overflow flag in case an interrupt is pending. */
+ write_pmovsclr(mask);
+ isb();
+}
+
+static void armv8pmu_disable_event_irq(struct perf_event *event)
+{
+ armv8pmu_disable_intens(BIT(event->hw.idx));
+}
+
+static u64 armv8pmu_getreset_flags(void)
+{
+ u64 value;
+
+ /* Read */
+ value = read_pmovsclr();
+
+ /* Write to clear flags */
+ value &= ARMV8_PMU_OVERFLOWED_MASK;
+ write_pmovsclr(value);
+
+ return value;
+}
+
+static void update_pmuserenr(u64 val)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * The current PMUSERENR_EL0 value might be the value for the guest.
+ * If that's the case, have KVM keep tracking of the register value
+ * for the host EL0 so that KVM can restore it before returning to
+ * the host EL0. Otherwise, update the register now.
+ */
+ if (kvm_set_pmuserenr(val))
+ return;
+
+ write_pmuserenr(val);
+}
+
+static void armv8pmu_disable_user_access(void)
+{
+ update_pmuserenr(0);
+}
+
+static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
+{
+ int i;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+
+ if (is_pmuv3p9(cpu_pmu->pmuver)) {
+ u64 mask = 0;
+ for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) {
+ if (armv8pmu_event_has_user_read(cpuc->events[i]))
+ mask |= BIT(i);
+ }
+ write_pmuacr(mask);
+ } else {
+ /* Clear any unused counters to avoid leaking their contents */
+ for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask,
+ ARMPMU_MAX_HWEVENTS) {
+ if (i == ARMV8_PMU_CYCLE_IDX)
+ write_pmccntr(0);
+ else if (i == ARMV8_PMU_INSTR_IDX)
+ write_pmicntr(0);
+ else
+ armv8pmu_write_evcntr(i, 0);
+ }
+ }
+
+ update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_UEN);
+}
+
+static void armv8pmu_enable_event(struct perf_event *event)
+{
+ armv8pmu_write_event_type(event);
+ armv8pmu_enable_event_irq(event);
+ armv8pmu_enable_event_counter(event);
+}
+
+static void armv8pmu_disable_event(struct perf_event *event)
+{
+ armv8pmu_disable_event_counter(event);
+ armv8pmu_disable_event_irq(event);
+}
+
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
+{
+ struct perf_event_context *ctx;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
+ int nr_user = 0;
+
+ ctx = perf_cpu_task_ctx();
+ if (ctx)
+ nr_user = ctx->nr_user;
+
+ if (sysctl_perf_user_access && nr_user)
+ armv8pmu_enable_user_access(cpu_pmu);
+ else
+ armv8pmu_disable_user_access();
+
+ kvm_vcpu_pmu_resync_el0();
+
+ if (hw_events->branch_users)
+ brbe_enable(cpu_pmu);
+
+ /* Enable all counters */
+ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+}
+
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
+
+ if (hw_events->branch_users)
+ brbe_disable();
+
+ /* Disable all counters */
+ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
+}
+
+static void read_branch_records(struct pmu_hw_events *cpuc,
+ struct perf_event *event,
+ struct perf_sample_data *data)
+{
+ struct perf_branch_stack *branch_stack = cpuc->branch_stack;
+
+ brbe_read_filtered_entries(branch_stack, event);
+ perf_sample_save_brstack(data, event, branch_stack, NULL);
+}
+
+static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ u64 pmovsr;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * Get and reset the IRQ flags
+ */
+ pmovsr = armv8pmu_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!armv8pmu_has_overflowed(pmovsr))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ /*
+ * Stop the PMU while processing the counter overflows
+ * to prevent skews in group events.
+ */
+ armv8pmu_stop(cpu_pmu);
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (has_branch_stack(event))
+ read_branch_records(cpuc, event, &data);
+
+ /*
+ * Perf event overflow will queue the processing of the event as
+ * an irq_work which will be taken care of in the handling of
+ * IPI_IRQ_WORK.
+ */
+ perf_event_overflow(event, &data, regs);
+ }
+ armv8pmu_start(cpu_pmu);
+
+ return IRQ_HANDLED;
+}
+
+static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
+ struct arm_pmu *cpu_pmu)
+{
+ int idx;
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+ return -EAGAIN;
+}
+
+static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
+ struct arm_pmu *cpu_pmu)
+{
+ int idx;
+
+ /*
+ * Chaining requires two consecutive event counters, where
+ * the lower idx must be even.
+ */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) {
+ if (!(idx & 0x1))
+ continue;
+ if (!test_and_set_bit(idx, cpuc->used_mask)) {
+ /* Check if the preceding even counter is available */
+ if (!test_and_set_bit(idx - 1, cpuc->used_mask))
+ return idx;
+ /* Release the Odd counter */
+ clear_bit(idx, cpuc->used_mask);
+ }
+ }
+ return -EAGAIN;
+}
+
+static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+ if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES)
+ return false;
+
+ /*
+ * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0
+ * since it lacks threshold support.
+ */
+ if (armv8pmu_event_get_threshold(&event->attr))
+ return false;
+
+ /*
+ * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP.
+ * So don't use it for branch events.
+ */
+ if (has_branch_stack(event))
+ return false;
+
+ /*
+ * The PMCCNTR_EL0 increments from the processor clock rather than
+ * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue
+ * counting on a WFI PE if one of its SMT sibling is not idle on a
+ * multi-threaded implementation. So don't use it on SMT cores.
+ */
+ if (cpu_pmu->has_smt)
+ return false;
+
+ return true;
+}
+
+static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+ /* Always prefer to place a cycle counter into the cycle counter. */
+ if (armv8pmu_can_use_pmccntr(cpuc, event)) {
+ if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask))
+ return ARMV8_PMU_CYCLE_IDX;
+ else if (armv8pmu_event_is_64bit(event) &&
+ armv8pmu_event_want_user_access(event) &&
+ !armv8pmu_has_long_event(cpu_pmu))
+ return -EAGAIN;
+ }
+
+ /*
+ * Always prefer to place a instruction counter into the instruction counter,
+ * but don't expose the instruction counter to userspace access as userspace
+ * may not know how to handle it.
+ */
+ if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) &&
+ !armv8pmu_event_get_threshold(&event->attr) &&
+ test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) &&
+ !armv8pmu_event_want_user_access(event)) {
+ if (!test_and_set_bit(ARMV8_PMU_INSTR_IDX, cpuc->used_mask))
+ return ARMV8_PMU_INSTR_IDX;
+ }
+
+ /*
+ * Otherwise use events counters
+ */
+ if (armv8pmu_event_is_chained(event))
+ return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
+ else
+ return armv8pmu_get_single_idx(cpuc, cpu_pmu);
+}
+
+static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ clear_bit(idx, cpuc->used_mask);
+ if (armv8pmu_event_is_chained(event))
+ clear_bit(idx - 1, cpuc->used_mask);
+}
+
+static int armv8pmu_user_event_idx(struct perf_event *event)
+{
+ if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
+ return 0;
+
+ return event->hw.idx + 1;
+}
+
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+
+ if (!hw_events->branch_users)
+ return;
+
+ if (sched_in)
+ brbe_invalidate();
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int armv8pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+ struct perf_event *perf_event = container_of(attr, struct perf_event,
+ attr);
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
+ u32 th;
+
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (has_branch_stack(perf_event)) {
+ if (!brbe_num_branch_records(cpu_pmu) || !brbe_branch_attr_valid(perf_event))
+ return -EOPNOTSUPP;
+
+ perf_event->attach_state |= PERF_ATTACH_SCHED_CB;
+ }
+
+ /*
+ * If we're running in hyp mode, then we *are* the hypervisor.
+ * Therefore we ignore exclude_hv in this configuration, since
+ * there's no hypervisor to sample anyway. This is consistent
+ * with other architectures (x86 and Power).
+ */
+ if (is_kernel_in_hyp_mode()) {
+ if (!attr->exclude_kernel && !attr->exclude_host)
+ config_base |= ARMV8_PMU_INCLUDE_EL2;
+ if (attr->exclude_guest)
+ config_base |= ARMV8_PMU_EXCLUDE_EL1;
+ if (attr->exclude_host)
+ config_base |= ARMV8_PMU_EXCLUDE_EL0;
+ } else {
+ if (!attr->exclude_hv && !attr->exclude_host)
+ config_base |= ARMV8_PMU_INCLUDE_EL2;
+ }
+
+ /*
+ * Filter out !VHE kernels and guest kernels
+ */
+ if (attr->exclude_kernel)
+ config_base |= ARMV8_PMU_EXCLUDE_EL1;
+
+ if (attr->exclude_user)
+ config_base |= ARMV8_PMU_EXCLUDE_EL0;
+
+ /*
+ * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
+ * be 0 and will also trigger this check, preventing it from being used.
+ */
+ th = armv8pmu_event_get_threshold(attr);
+ if (th > threshold_max(cpu_pmu)) {
+ pr_debug("PMU event threshold exceeds max value\n");
+ return -EINVAL;
+ }
+
+ if (th) {
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
+ armv8pmu_event_threshold_control(attr));
+ }
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base = config_base;
+
+ return 0;
+}
+
+static void armv8pmu_reset(void *info)
+{
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+ u64 pmcr, mask;
+
+ bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
+
+ /* The counter and interrupt enable registers are unknown at reset. */
+ armv8pmu_disable_counter(mask);
+ armv8pmu_disable_intens(mask);
+
+ /* Clear the counters we flip at guest entry/exit */
+ kvm_clr_pmu_events(mask);
+
+ if (brbe_num_branch_records(cpu_pmu)) {
+ brbe_disable();
+ brbe_invalidate();
+ }
+
+ /*
+ * Initialize & Reset PMNC. Request overflow interrupt for
+ * 64 bit cycle counter but cheat in armv8pmu_write_counter().
+ */
+ pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC;
+
+ /* Enable long event counter support where available */
+ if (armv8pmu_has_long_event(cpu_pmu))
+ pmcr |= ARMV8_PMU_PMCR_LP;
+
+ armv8pmu_pmcr_write(pmcr);
+}
+
+static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
+ struct perf_event *event)
+{
+ if (event->attr.type == PERF_TYPE_HARDWARE &&
+ event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
+
+ if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
+ armpmu->pmceid_bitmap))
+ return ARMV8_PMUV3_PERFCTR_BR_RETIRED;
+
+ if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+ armpmu->pmceid_bitmap))
+ return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
+
+ return HW_OP_UNSUPPORTED;
+ }
+
+ return armpmu_map_event(event, &armv8_pmuv3_perf_map,
+ &armv8_pmuv3_perf_cache_map,
+ ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+ const unsigned (*extra_event_map)
+ [PERF_COUNT_HW_MAX],
+ const unsigned (*extra_cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX])
+{
+ int hw_event_id;
+ struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+ hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
+
+ /*
+ * CHAIN events only work when paired with an adjacent counter, and it
+ * never makes sense for a user to open one in isolation, as they'll be
+ * rotated arbitrarily.
+ */
+ if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN)
+ return -EINVAL;
+
+ if (armv8pmu_event_is_64bit(event))
+ event->hw.flags |= ARMPMU_EVT_64BIT;
+
+ /*
+ * User events must be allocated into a single counter, and so
+ * must not be chained.
+ *
+ * Most 64-bit events require long counter support, but 64-bit
+ * CPU_CYCLES events can be placed into the dedicated cycle
+ * counter when this is free.
+ */
+ if (armv8pmu_event_want_user_access(event)) {
+ if (!(event->attach_state & PERF_ATTACH_TASK))
+ return -EINVAL;
+ if (armv8pmu_event_is_64bit(event) &&
+ (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
+ !armv8pmu_has_long_event(armpmu))
+ return -EOPNOTSUPP;
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+ }
+
+ /* Only expose micro/arch events supported by this PMU */
+ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+ && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+ return hw_event_id;
+ }
+
+ return armpmu_map_event(event, extra_event_map, extra_cache_map,
+ ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, NULL);
+}
+
+static int armv8_a53_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
+}
+
+static int armv8_a57_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
+}
+
+static int armv8_a73_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
+}
+
+static int armv8_thunder_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL,
+ &armv8_thunder_perf_cache_map);
+}
+
+static int armv8_vulcan_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL,
+ &armv8_vulcan_perf_cache_map);
+}
+
+struct armv8pmu_probe_info {
+ struct arm_pmu *pmu;
+ bool present;
+};
+
+static void __armv8pmu_probe_pmu(void *info)
+{
+ struct armv8pmu_probe_info *probe = info;
+ struct arm_pmu *cpu_pmu = probe->pmu;
+ u64 pmceid_raw[2];
+ u32 pmceid[2];
+ int pmuver;
+
+ pmuver = read_pmuver();
+ if (!pmuv3_implemented(pmuver))
+ return;
+
+ cpu_pmu->pmuver = pmuver;
+ probe->present = true;
+
+ /* Read the nb of CNTx counters supported from PMNC */
+ bitmap_set(cpu_pmu->cntr_mask,
+ 0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
+
+ /* Add the CPU cycles counter */
+ set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
+
+ /* Add the CPU instructions counter */
+ if (pmuv3_has_icntr())
+ set_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask);
+
+ pmceid[0] = pmceid_raw[0] = read_pmceid0();
+ pmceid[1] = pmceid_raw[1] = read_pmceid1();
+
+ bitmap_from_arr32(cpu_pmu->pmceid_bitmap,
+ pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+ pmceid[0] = pmceid_raw[0] >> 32;
+ pmceid[1] = pmceid_raw[1] >> 32;
+
+ bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
+ pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+ /* store PMMIR register for sysfs */
+ if (is_pmuv3p4(pmuver))
+ cpu_pmu->reg_pmmir = read_pmmir();
+ else
+ cpu_pmu->reg_pmmir = 0;
+
+ brbe_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+ size_t size = struct_size_t(struct perf_branch_stack, entries,
+ brbe_num_branch_records(armpmu));
+ int cpu;
+
+ for_each_cpu(cpu, &armpmu->supported_cpus) {
+ struct pmu_hw_events *events_cpu;
+
+ events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
+ events_cpu->branch_stack = kmalloc(size, GFP_KERNEL);
+ if (!events_cpu->branch_stack)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
+{
+ struct armv8pmu_probe_info probe = {
+ .pmu = cpu_pmu,
+ .present = false,
+ };
+ int ret;
+
+ ret = smp_call_function_any(&cpu_pmu->supported_cpus,
+ __armv8pmu_probe_pmu,
+ &probe, 1);
+ if (ret)
+ return ret;
+
+ if (!probe.present)
+ return -ENODEV;
+
+ if (brbe_num_branch_records(cpu_pmu)) {
+ ret = branch_records_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static void armv8pmu_disable_user_access_ipi(void *unused)
+{
+ armv8pmu_disable_user_access();
+}
+
+static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret || !write || sysctl_perf_user_access)
+ return ret;
+
+ on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1);
+ return 0;
+}
+
+static const struct ctl_table armv8_pmu_sysctl_table[] = {
+ {
+ .procname = "perf_user_access",
+ .data = &sysctl_perf_user_access,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = armv8pmu_proc_user_access_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static void armv8_pmu_register_sysctl_table(void)
+{
+ static u32 tbl_registered = 0;
+
+ if (!cmpxchg_relaxed(&tbl_registered, 0, 1))
+ register_sysctl("kernel", armv8_pmu_sysctl_table);
+}
+
+static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
+ int (*map_event)(struct perf_event *event))
+{
+ int ret = armv8pmu_probe_pmu(cpu_pmu);
+ if (ret)
+ return ret;
+
+ cpu_pmu->handle_irq = armv8pmu_handle_irq;
+ cpu_pmu->enable = armv8pmu_enable_event;
+ cpu_pmu->disable = armv8pmu_disable_event;
+ cpu_pmu->read_counter = armv8pmu_read_counter;
+ cpu_pmu->write_counter = armv8pmu_write_counter;
+ cpu_pmu->get_event_idx = armv8pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx;
+ cpu_pmu->start = armv8pmu_start;
+ cpu_pmu->stop = armv8pmu_stop;
+ cpu_pmu->reset = armv8pmu_reset;
+ cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
+
+ cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
+ if (brbe_num_branch_records(cpu_pmu))
+ cpu_pmu->pmu.sched_task = armv8pmu_sched_task;
+
+ cpu_pmu->name = name;
+ cpu_pmu->map_event = map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group;
+ armv8_pmu_register_sysctl_table();
+ return 0;
+}
+
+#define PMUV3_INIT_SIMPLE(name) \
+static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
+{ \
+ return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \
+}
+
+#define PMUV3_INIT_MAP_EVENT(name, map_event) \
+static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
+{ \
+ return armv8_pmu_init(cpu_pmu, #name, map_event); \
+}
+
+PMUV3_INIT_SIMPLE(armv8_pmuv3)
+
+PMUV3_INIT_SIMPLE(armv8_c1_nano)
+PMUV3_INIT_SIMPLE(armv8_c1_premium)
+PMUV3_INIT_SIMPLE(armv8_c1_pro)
+PMUV3_INIT_SIMPLE(armv8_c1_ultra)
+PMUV3_INIT_SIMPLE(armv8_cortex_a34)
+PMUV3_INIT_SIMPLE(armv8_cortex_a55)
+PMUV3_INIT_SIMPLE(armv8_cortex_a65)
+PMUV3_INIT_SIMPLE(armv8_cortex_a75)
+PMUV3_INIT_SIMPLE(armv8_cortex_a76)
+PMUV3_INIT_SIMPLE(armv8_cortex_a77)
+PMUV3_INIT_SIMPLE(armv8_cortex_a78)
+PMUV3_INIT_SIMPLE(armv9_cortex_a320)
+PMUV3_INIT_SIMPLE(armv9_cortex_a510)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520ae)
+PMUV3_INIT_SIMPLE(armv9_cortex_a710)
+PMUV3_INIT_SIMPLE(armv9_cortex_a715)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720ae)
+PMUV3_INIT_SIMPLE(armv9_cortex_a725)
+PMUV3_INIT_SIMPLE(armv8_cortex_x1)
+PMUV3_INIT_SIMPLE(armv9_cortex_x2)
+PMUV3_INIT_SIMPLE(armv9_cortex_x3)
+PMUV3_INIT_SIMPLE(armv9_cortex_x4)
+PMUV3_INIT_SIMPLE(armv9_cortex_x925)
+PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
+PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
+PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
+PMUV3_INIT_SIMPLE(armv9_neoverse_n3)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v2)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae)
+PMUV3_INIT_SIMPLE(armv8_rainier)
+
+PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
+PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
+
+PMUV3_INIT_SIMPLE(armv8_samsung_mongoose)
+
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a72, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a73, armv8_a73_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cavium_thunder, armv8_thunder_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event)
+
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+ {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init},
+ {.compatible = "arm,c1-nano-pmu", .data = armv8_c1_nano_pmu_init},
+ {.compatible = "arm,c1-premium-pmu", .data = armv8_c1_premium_pmu_init},
+ {.compatible = "arm,c1-pro-pmu", .data = armv8_c1_pro_pmu_init},
+ {.compatible = "arm,c1-ultra-pmu", .data = armv8_c1_ultra_pmu_init},
+ {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init},
+ {.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init},
+ {.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init},
+ {.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init},
+ {.compatible = "arm,cortex-a57-pmu", .data = armv8_cortex_a57_pmu_init},
+ {.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init},
+ {.compatible = "arm,cortex-a72-pmu", .data = armv8_cortex_a72_pmu_init},
+ {.compatible = "arm,cortex-a73-pmu", .data = armv8_cortex_a73_pmu_init},
+ {.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init},
+ {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init},
+ {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init},
+ {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init},
+ {.compatible = "arm,cortex-a320-pmu", .data = armv9_cortex_a320_pmu_init},
+ {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init},
+ {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init},
+ {.compatible = "arm,cortex-a520ae-pmu", .data = armv9_cortex_a520ae_pmu_init},
+ {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init},
+ {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init},
+ {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init},
+ {.compatible = "arm,cortex-a720ae-pmu", .data = armv9_cortex_a720ae_pmu_init},
+ {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init},
+ {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init},
+ {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init},
+ {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init},
+ {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init},
+ {.compatible = "arm,cortex-x925-pmu", .data = armv9_cortex_x925_pmu_init},
+ {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init},
+ {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
+ {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
+ {.compatible = "arm,neoverse-n3-pmu", .data = armv9_neoverse_n3_pmu_init},
+ {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init},
+ {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init},
+ {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init},
+ {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init},
+ {.compatible = "arm,rainier-pmu", .data = armv8_rainier_pmu_init},
+ {.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
+ {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
+ {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
+ {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init},
+ {.compatible = "samsung,mongoose-pmu", .data = armv8_samsung_mongoose_pmu_init},
+ {},
+};
+
+static int armv8_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv8_pmu_driver = {
+ .driver = {
+ .name = ARMV8_PMU_PDEV_NAME,
+ .of_match_table = armv8_pmu_of_device_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = armv8_pmu_device_probe,
+};
+
+static int __init armv8_pmu_driver_init(void)
+{
+ int ret;
+
+ if (acpi_disabled)
+ ret = platform_driver_register(&armv8_pmu_driver);
+ else
+ ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+
+ if (!ret)
+ lockup_detector_retry_init();
+
+ return ret;
+}
+device_initcall(armv8_pmu_driver_init)
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event);
+
+ if (userpg->cap_user_rdpmc) {
+ if (event->hw.flags & ARMPMU_EVT_64BIT)
+ userpg->pmc_width = 64;
+ else
+ userpg->pmc_width = 32;
+ }
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ if (rd->read_sched_clock != arch_timer_read_counter)
+ return;
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 25a269d431e4..621f02a7f43b 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -115,6 +115,7 @@
#define SMMU_PMCG_PA_SHIFT 12
#define SMMU_PMCG_EVCNTR_RDONLY BIT(0)
+#define SMMU_PMCG_HARDEN_DISABLE BIT(1)
static int cpuhp_state_num;
@@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu)
writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
}
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+ struct perf_event *event, int idx);
+
+static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu)
+{
+ struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+ unsigned int idx;
+
+ for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+ smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx);
+
+ smmu_pmu_enable(pmu);
+}
+
static inline void smmu_pmu_disable(struct pmu *pmu)
{
struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
@@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu)
writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
}
+static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu)
+{
+ struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+ unsigned int idx;
+
+ /*
+ * The global disable of PMU sometimes fail to stop the counting.
+ * Harden this by writing an invalid event type to each used counter
+ * to forcibly stop counting.
+ */
+ for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+ writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+
+ smmu_pmu_disable(pmu);
+}
+
static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
u32 idx, u64 value)
{
@@ -400,6 +431,17 @@ static int smmu_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
+ /*
+ * Ensure all events are on the same cpu so all events are in the
+ * same cpu context, to avoid races on pmu_enable etc.
+ */
+ event->cpu = smmu_pmu->on_cpu;
+
+ hwc->idx = -1;
+
+ if (event->group_leader == event)
+ return 0;
+
for_each_sibling_event(sibling, event->group_leader) {
if (is_software_event(sibling))
continue;
@@ -411,14 +453,6 @@ static int smmu_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
- hwc->idx = -1;
-
- /*
- * Ensure all events are on the same cpu so all events are in the
- * same cpu context, to avoid races on pmu_enable etc.
- */
- event->cpu = smmu_pmu->on_cpu;
-
return 0;
}
@@ -685,7 +719,7 @@ static void smmu_pmu_free_msis(void *data)
{
struct device *dev = data;
- platform_msi_domain_free_irqs(dev);
+ platform_device_msi_free_irqs_all(dev);
}
static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
@@ -715,7 +749,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
return;
- ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+ ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs\n");
return;
@@ -765,7 +799,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
switch (model) {
case IORT_SMMU_V3_PMCG_HISI_HIP08:
/* HiSilicon Erratum 162001800 */
- smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
+ smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE;
+ break;
+ case IORT_SMMU_V3_PMCG_HISI_HIP09:
+ smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE;
break;
}
@@ -826,6 +863,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = smmu_pmu_enable,
.pmu_disable = smmu_pmu_disable,
@@ -890,6 +928,16 @@ static int smmu_pmu_probe(struct platform_device *pdev)
if (!dev->of_node)
smmu_pmu_get_acpi_options(smmu_pmu);
+ /*
+ * For platforms suffer this quirk, the PMU disable sometimes fails to
+ * stop the counters. This will leads to inaccurate or error counting.
+ * Forcibly disable the counters with these quirk handler.
+ */
+ if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) {
+ smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09;
+ smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09;
+ }
+
/* Pick one CPU to be the preferred one to use */
smmu_pmu->on_cpu = raw_smp_processor_id();
WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
@@ -921,14 +969,12 @@ out_unregister:
return err;
}
-static int smmu_pmu_remove(struct platform_device *pdev)
+static void smmu_pmu_remove(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&smmu_pmu->pmu);
cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-
- return 0;
}
static void smmu_pmu_shutdown(struct platform_device *pdev)
@@ -984,6 +1030,7 @@ static void __exit arm_smmu_pmu_exit(void)
module_exit(arm_smmu_pmu_exit);
+MODULE_ALIAS("platform:arm-smmu-v3-pmcg");
MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 00e3a637f7b6..4801115f2b54 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -12,6 +12,7 @@
#define DRVNAME PMUNAME "_pmu"
#define pr_fmt(fmt) DRVNAME ": " fmt
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/capability.h>
@@ -24,8 +25,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -41,7 +41,7 @@
/*
* Cache if the event is allowed to trace Context information.
- * This allows us to perform the check, i.e, perfmon_capable(),
+ * This allows us to perform the check, i.e, perf_allow_kernel(),
* in the context of the event owner, once, during the event_init().
*/
#define SPE_PMU_HW_FLAGS_CX 0x00001
@@ -50,7 +50,7 @@ static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_C
static void set_spe_event_has_cx(struct perf_event *event)
{
- if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+ if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && !perf_allow_kernel())
event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
}
@@ -84,9 +84,14 @@ struct arm_spe_pmu {
#define SPE_PMU_FEAT_ARCH_INST (1UL << 3)
#define SPE_PMU_FEAT_LDS (1UL << 4)
#define SPE_PMU_FEAT_ERND (1UL << 5)
+#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6)
+#define SPE_PMU_FEAT_DISCARD (1UL << 7)
+#define SPE_PMU_FEAT_EFT (1UL << 8)
+#define SPE_PMU_FEAT_FDS (1UL << 9)
#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
u64 features;
+ u64 pmsevfr_res0;
u16 max_record_sz;
u16 align;
struct perf_output_handle __percpu *handle;
@@ -95,7 +100,8 @@ struct arm_spe_pmu {
#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
/* Convert a free-running index from perf into an SPE buffer offset */
-#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+#define PERF_IDX2OFF(idx, buf) \
+ ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
/* Keep track of our dynamic hotplug state */
static enum cpuhp_state arm_spe_pmu_online;
@@ -113,6 +119,7 @@ enum arm_spe_pmu_capabilities {
SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_MIN_IVAL,
+ SPE_PMU_CAP_EVENT_FILTER,
};
static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
@@ -120,7 +127,7 @@ static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
[SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND,
};
-static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+static u64 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
{
if (cap < SPE_PMU_CAP_FEAT_MAX)
return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
@@ -130,6 +137,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
return spe_pmu->counter_sz;
case SPE_PMU_CAP_MIN_IVAL:
return spe_pmu->min_period;
+ case SPE_PMU_CAP_EVENT_FILTER:
+ return ~spe_pmu->pmsevfr_res0;
default:
WARN(1, "unknown cap %d\n", cap);
}
@@ -146,7 +155,19 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
container_of(attr, struct dev_ext_attribute, attr);
int cap = (long)ea->var;
- return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+ return sysfs_emit(buf, "%llu\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+static ssize_t arm_spe_pmu_cap_show_hex(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return sysfs_emit(buf, "0x%llx\n", arm_spe_pmu_cap_get(spe_pmu, cap));
}
#define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \
@@ -156,12 +177,15 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \
SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+#define SPE_CAP_EXT_ATTR_ENTRY_HEX(_name, _var) \
+ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show_hex, _var)
static struct attribute *arm_spe_pmu_cap_attr[] = {
SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+ SPE_CAP_EXT_ATTR_ENTRY_HEX(event_filter, SPE_PMU_CAP_EVENT_FILTER),
NULL,
};
@@ -192,6 +216,30 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */
#define ATTR_CFG_FLD_store_filter_LO 34
#define ATTR_CFG_FLD_store_filter_HI 34
+#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */
+#define ATTR_CFG_FLD_discard_LO 35
+#define ATTR_CFG_FLD_discard_HI 35
+#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */
+#define ATTR_CFG_FLD_branch_filter_mask_LO 36
+#define ATTR_CFG_FLD_branch_filter_mask_HI 36
+#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */
+#define ATTR_CFG_FLD_load_filter_mask_LO 37
+#define ATTR_CFG_FLD_load_filter_mask_HI 37
+#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */
+#define ATTR_CFG_FLD_store_filter_mask_LO 38
+#define ATTR_CFG_FLD_store_filter_mask_HI 38
+#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */
+#define ATTR_CFG_FLD_simd_filter_LO 39
+#define ATTR_CFG_FLD_simd_filter_HI 39
+#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */
+#define ATTR_CFG_FLD_simd_filter_mask_LO 40
+#define ATTR_CFG_FLD_simd_filter_mask_HI 40
+#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */
+#define ATTR_CFG_FLD_float_filter_LO 41
+#define ATTR_CFG_FLD_float_filter_HI 41
+#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */
+#define ATTR_CFG_FLD_float_filter_mask_LO 42
+#define ATTR_CFG_FLD_float_filter_mask_HI 42
#define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
#define ATTR_CFG_FLD_event_filter_LO 0
@@ -201,37 +249,33 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_min_latency_LO 0
#define ATTR_CFG_FLD_min_latency_HI 11
-/* Why does everything I do descend into this? */
-#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
- (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
-
-#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
- __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+#define ATTR_CFG_FLD_inv_event_filter_CFG config3 /* PMSNEVFR_EL1 */
+#define ATTR_CFG_FLD_inv_event_filter_LO 0
+#define ATTR_CFG_FLD_inv_event_filter_HI 63
-#define GEN_PMU_FORMAT_ATTR(name) \
- PMU_FORMAT_ATTR(name, \
- _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
- ATTR_CFG_FLD_##name##_LO, \
- ATTR_CFG_FLD_##name##_HI))
-
-#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
- ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
-
-#define ATTR_CFG_GET_FLD(attr, name) \
- _ATTR_CFG_GET_FLD(attr, \
- ATTR_CFG_FLD_##name##_CFG, \
- ATTR_CFG_FLD_##name##_LO, \
- ATTR_CFG_FLD_##name##_HI)
+#define ATTR_CFG_FLD_inv_data_src_filter_CFG config4 /* inverse of PMSDSFR_EL1 */
+#define ATTR_CFG_FLD_inv_data_src_filter_LO 0
+#define ATTR_CFG_FLD_inv_data_src_filter_HI 63
GEN_PMU_FORMAT_ATTR(ts_enable);
GEN_PMU_FORMAT_ATTR(pa_enable);
GEN_PMU_FORMAT_ATTR(pct_enable);
GEN_PMU_FORMAT_ATTR(jitter);
GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(branch_filter_mask);
GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(load_filter_mask);
GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(store_filter_mask);
+GEN_PMU_FORMAT_ATTR(simd_filter);
+GEN_PMU_FORMAT_ATTR(simd_filter_mask);
+GEN_PMU_FORMAT_ATTR(float_filter);
+GEN_PMU_FORMAT_ATTR(float_filter_mask);
GEN_PMU_FORMAT_ATTR(event_filter);
+GEN_PMU_FORMAT_ATTR(inv_event_filter);
+GEN_PMU_FORMAT_ATTR(inv_data_src_filter);
GEN_PMU_FORMAT_ATTR(min_latency);
+GEN_PMU_FORMAT_ATTR(discard);
static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_ts_enable.attr,
@@ -239,15 +283,56 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_pct_enable.attr,
&format_attr_jitter.attr,
&format_attr_branch_filter.attr,
+ &format_attr_branch_filter_mask.attr,
&format_attr_load_filter.attr,
+ &format_attr_load_filter_mask.attr,
&format_attr_store_filter.attr,
+ &format_attr_store_filter_mask.attr,
+ &format_attr_simd_filter.attr,
+ &format_attr_simd_filter_mask.attr,
+ &format_attr_float_filter.attr,
+ &format_attr_float_filter_mask.attr,
&format_attr_event_filter.attr,
+ &format_attr_inv_event_filter.attr,
+ &format_attr_inv_data_src_filter.attr,
&format_attr_min_latency.attr,
+ &format_attr_discard.attr,
NULL,
};
+static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int unused)
+ {
+ struct device *dev = kobj_to_dev(kobj);
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+
+ if (attr == &format_attr_discard.attr && !(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
+ return 0;
+
+ if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
+ return 0;
+
+ if (attr == &format_attr_inv_data_src_filter.attr &&
+ !(spe_pmu->features & SPE_PMU_FEAT_FDS))
+ return 0;
+
+ if ((attr == &format_attr_branch_filter_mask.attr ||
+ attr == &format_attr_load_filter_mask.attr ||
+ attr == &format_attr_store_filter_mask.attr ||
+ attr == &format_attr_simd_filter.attr ||
+ attr == &format_attr_simd_filter_mask.attr ||
+ attr == &format_attr_float_filter.attr ||
+ attr == &format_attr_float_filter_mask.attr) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group arm_spe_pmu_format_group = {
.name = "format",
+ .is_visible = arm_spe_pmu_format_attr_is_visible,
.attrs = arm_spe_pmu_formats_attr,
};
@@ -282,36 +367,39 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
struct perf_event_attr *attr = &event->attr;
u64 reg = 0;
- reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT;
- reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT;
- reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT;
+ reg |= FIELD_PREP(PMSCR_EL1_TS, ATTR_CFG_GET_FLD(attr, ts_enable));
+ reg |= FIELD_PREP(PMSCR_EL1_PA, ATTR_CFG_GET_FLD(attr, pa_enable));
+ reg |= FIELD_PREP(PMSCR_EL1_PCT, ATTR_CFG_GET_FLD(attr, pct_enable));
if (!attr->exclude_user)
- reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT);
+ reg |= PMSCR_EL1_E0SPE;
if (!attr->exclude_kernel)
- reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
+ reg |= PMSCR_EL1_E1SPE;
if (get_spe_event_has_cx(event))
- reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
+ reg |= PMSCR_EL1_CX;
return reg;
}
static void arm_spe_event_sanitise_period(struct perf_event *event)
{
- struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
u64 period = event->hw.sample_period;
- u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK
- << SYS_PMSIRR_EL1_INTERVAL_SHIFT;
+ u64 max_period = PMSIRR_EL1_INTERVAL_MASK;
- if (period < spe_pmu->min_period)
- period = spe_pmu->min_period;
- else if (period > max_period)
- period = max_period;
- else
- period &= max_period;
+ /*
+ * The PMSIDR_EL1.Interval field (stored in spe_pmu->min_period) is a
+ * recommendation for the minimum interval, not a hardware limitation.
+ *
+ * According to the Arm ARM (DDI 0487 L.a), section D24.7.12 PMSIRR_EL1,
+ * Sampling Interval Reload Register, the INTERVAL field (bits [31:8])
+ * states: "Software must set this to a nonzero value". Use 1 as the
+ * minimum value.
+ */
+ u64 min_period = FIELD_PREP(PMSIRR_EL1_INTERVAL_MASK, 1);
+ period = clamp_t(u64, period, min_period, max_period) & max_period;
event->hw.sample_period = period;
}
@@ -322,7 +410,7 @@ static u64 arm_spe_event_to_pmsirr(struct perf_event *event)
arm_spe_event_sanitise_period(event);
- reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT;
+ reg |= FIELD_PREP(PMSIRR_EL1_RND, ATTR_CFG_GET_FLD(attr, jitter));
reg |= event->hw.sample_period;
return reg;
@@ -333,18 +421,31 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
struct perf_event_attr *attr = &event->attr;
u64 reg = 0;
- reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT;
- reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT;
- reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT;
+ reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask));
if (reg)
- reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT);
+ reg |= PMSFCR_EL1_FT;
if (ATTR_CFG_GET_FLD(attr, event_filter))
- reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT);
+ reg |= PMSFCR_EL1_FE;
+
+ if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
+ reg |= PMSFCR_EL1_FnE;
+
+ if (ATTR_CFG_GET_FLD(attr, inv_data_src_filter))
+ reg |= PMSFCR_EL1_FDS;
if (ATTR_CFG_GET_FLD(attr, min_latency))
- reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT);
+ reg |= PMSFCR_EL1_FL;
return reg;
}
@@ -355,11 +456,27 @@ static u64 arm_spe_event_to_pmsevfr(struct perf_event *event)
return ATTR_CFG_GET_FLD(attr, event_filter);
}
+static u64 arm_spe_event_to_pmsnevfr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ return ATTR_CFG_GET_FLD(attr, inv_event_filter);
+}
+
static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- return ATTR_CFG_GET_FLD(attr, min_latency)
- << SYS_PMSLATFR_EL1_MINLAT_SHIFT;
+ return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency));
+}
+
+static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ /*
+ * Data src filter is inverted so that the default value of 0 is
+ * equivalent to no filtering.
+ */
+ return ~ATTR_CFG_GET_FLD(attr, inv_data_src_filter);
}
static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
@@ -496,6 +613,12 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
u64 base, limit;
struct arm_spe_pmu_buf *buf;
+ if (ATTR_CFG_GET_FLD(&event->attr, discard)) {
+ limit = FIELD_PREP(PMBLIMITR_EL1_FM, PMBLIMITR_EL1_FM_DISCARD);
+ limit |= PMBLIMITR_EL1_E;
+ goto out_write_limit;
+ }
+
/* Start a new aux session */
buf = perf_aux_output_begin(handle, event);
if (!buf) {
@@ -511,7 +634,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle)
: arm_spe_pmu_next_off(handle);
if (limit)
- limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT);
+ limit |= PMBLIMITR_EL1_E;
limit += (u64)buf->base;
base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf);
@@ -570,28 +693,28 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle)
/* Service required? */
pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
- if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT)))
+ if (!FIELD_GET(PMBSR_EL1_S, pmbsr))
return SPE_PMU_BUF_FAULT_ACT_SPURIOUS;
/*
* If we've lost data, disable profiling and also set the PARTIAL
* flag to indicate that the last record is corrupted.
*/
- if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT))
+ if (FIELD_GET(PMBSR_EL1_DL, pmbsr))
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED |
PERF_AUX_FLAG_PARTIAL);
/* Report collisions to userspace so that it can up the period */
- if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT))
+ if (FIELD_GET(PMBSR_EL1_COLL, pmbsr))
perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
/* We only expect buffer management events */
- switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) {
- case SYS_PMBSR_EL1_EC_BUF:
+ switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) {
+ case PMBSR_EL1_EC_BUF:
/* Handled below */
break;
- case SYS_PMBSR_EL1_EC_FAULT_S1:
- case SYS_PMBSR_EL1_EC_FAULT_S2:
+ case PMBSR_EL1_EC_FAULT_S1:
+ case PMBSR_EL1_EC_FAULT_S2:
err_str = "Unexpected buffer fault";
goto out_err;
default:
@@ -600,9 +723,8 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle)
}
/* Buffer management event */
- switch (pmbsr &
- (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) {
- case SYS_PMBSR_EL1_BUF_BSC_FULL:
+ switch (FIELD_GET(PMBSR_EL1_BUF_BSC_MASK, pmbsr)) {
+ case PMBSR_EL1_BUF_BSC_FULL:
ret = SPE_PMU_BUF_FAULT_ACT_OK;
goto out_stop;
default:
@@ -673,18 +795,6 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
return IRQ_HANDLED;
}
-static u64 arm_spe_pmsevfr_res0(u16 pmsver)
-{
- switch (pmsver) {
- case ID_AA64DFR0_EL1_PMSVer_IMP:
- return SYS_PMSEVFR_EL1_RES0_8_2;
- case ID_AA64DFR0_EL1_PMSVer_V1P1:
- /* Return the highest version we support in default */
- default:
- return SYS_PMSEVFR_EL1_RES0_8_3;
- }
-}
-
/* Perf callbacks */
static int arm_spe_pmu_event_init(struct perf_event *event)
{
@@ -700,7 +810,14 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;
- if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+ if (arm_spe_event_to_pmsevfr(event) & spe_pmu->pmsevfr_res0)
+ return -EOPNOTSUPP;
+
+ if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0)
+ return -EOPNOTSUPP;
+
+ if (arm_spe_event_to_pmsdsfr(event) != U64_MAX &&
+ !(spe_pmu->features & SPE_PMU_FEAT_FDS))
return -EOPNOTSUPP;
if (attr->exclude_idle)
@@ -717,24 +834,40 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
return -EINVAL;
reg = arm_spe_event_to_pmsfcr(event);
- if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) &&
+ if ((FIELD_GET(PMSFCR_EL1_FE, reg)) &&
!(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT))
return -EOPNOTSUPP;
- if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) &&
+ if ((FIELD_GET(PMSFCR_EL1_FnE, reg)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
+ return -EOPNOTSUPP;
+
+ if ((FIELD_GET(PMSFCR_EL1_FT, reg)) &&
!(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP))
return -EOPNOTSUPP;
- if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) &&
+ if ((FIELD_GET(PMSFCR_EL1_FL, reg)) &&
!(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
return -EOPNOTSUPP;
+ if ((FIELD_GET(PMSFCR_EL1_LDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_STm, reg) ||
+ FIELD_GET(PMSFCR_EL1_Bm, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMD, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_FP, reg) ||
+ FIELD_GET(PMSFCR_EL1_FPm, reg)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return -EOPNOTSUPP;
+
+ if (ATTR_CFG_GET_FLD(&event->attr, discard) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
+ return -EOPNOTSUPP;
+
set_spe_event_has_cx(event);
reg = arm_spe_event_to_pmscr(event);
- if (!perfmon_capable() &&
- (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
- BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
- return -EACCES;
+ if (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT))
+ return perf_allow_kernel();
return 0;
}
@@ -757,6 +890,16 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags)
reg = arm_spe_event_to_pmsevfr(event);
write_sysreg_s(reg, SYS_PMSEVFR_EL1);
+ if (spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT) {
+ reg = arm_spe_event_to_pmsnevfr(event);
+ write_sysreg_s(reg, SYS_PMSNEVFR_EL1);
+ }
+
+ if (spe_pmu->features & SPE_PMU_FEAT_FDS) {
+ reg = arm_spe_event_to_pmsdsfr(event);
+ write_sysreg_s(reg, SYS_PMSDSFR_EL1);
+ }
+
reg = arm_spe_event_to_pmslatfr(event);
write_sysreg_s(reg, SYS_PMSLATFR_EL1);
@@ -914,6 +1057,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
spe_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &spe_pmu->pdev->dev,
.capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
.attr_groups = arm_spe_pmu_attr_groups,
/*
@@ -971,14 +1115,14 @@ static void __arm_spe_pmu_dev_probe(void *info)
/* Read PMBIDR first to determine whether or not we have access */
reg = read_sysreg_s(SYS_PMBIDR_EL1);
- if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) {
+ if (FIELD_GET(PMBIDR_EL1_P, reg)) {
dev_err(dev,
"profiling buffer owned by higher exception level\n");
return;
}
/* Minimum alignment. If it's out-of-range, then fail the probe */
- fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK;
+ fld = FIELD_GET(PMBIDR_EL1_ALIGN, reg);
spe_pmu->align = 1 << fld;
if (spe_pmu->align > SZ_2K) {
dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n",
@@ -988,58 +1132,70 @@ static void __arm_spe_pmu_dev_probe(void *info)
/* It's now safe to read PMSIDR and figure out what we've got */
reg = read_sysreg_s(SYS_PMSIDR_EL1);
- if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_FE, reg))
spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT;
- if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_FnE, reg))
+ spe_pmu->features |= SPE_PMU_FEAT_INV_FILT_EVT;
+
+ if (FIELD_GET(PMSIDR_EL1_FT, reg))
spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP;
- if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_FL, reg))
spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT;
- if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_ARCHINST, reg))
spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST;
- if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_LDS, reg))
spe_pmu->features |= SPE_PMU_FEAT_LDS;
- if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT))
+ if (FIELD_GET(PMSIDR_EL1_ERND, reg))
spe_pmu->features |= SPE_PMU_FEAT_ERND;
+ if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2)
+ spe_pmu->features |= SPE_PMU_FEAT_DISCARD;
+
+ if (FIELD_GET(PMSIDR_EL1_EFT, reg))
+ spe_pmu->features |= SPE_PMU_FEAT_EFT;
+
+ if (FIELD_GET(PMSIDR_EL1_FDS, reg))
+ spe_pmu->features |= SPE_PMU_FEAT_FDS;
+
/* This field has a spaced out encoding, so just use a look-up */
- fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK;
+ fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
switch (fld) {
- case 0:
+ case PMSIDR_EL1_INTERVAL_256:
spe_pmu->min_period = 256;
break;
- case 2:
+ case PMSIDR_EL1_INTERVAL_512:
spe_pmu->min_period = 512;
break;
- case 3:
+ case PMSIDR_EL1_INTERVAL_768:
spe_pmu->min_period = 768;
break;
- case 4:
+ case PMSIDR_EL1_INTERVAL_1024:
spe_pmu->min_period = 1024;
break;
- case 5:
+ case PMSIDR_EL1_INTERVAL_1536:
spe_pmu->min_period = 1536;
break;
- case 6:
+ case PMSIDR_EL1_INTERVAL_2048:
spe_pmu->min_period = 2048;
break;
- case 7:
+ case PMSIDR_EL1_INTERVAL_3072:
spe_pmu->min_period = 3072;
break;
default:
dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n",
fld);
fallthrough;
- case 8:
+ case PMSIDR_EL1_INTERVAL_4096:
spe_pmu->min_period = 4096;
}
/* Maximum record size. If it's out-of-range, then fail the probe */
- fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK;
+ fld = FIELD_GET(PMSIDR_EL1_MAXSIZE, reg);
spe_pmu->max_record_sz = 1 << fld;
if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) {
dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n",
@@ -1047,22 +1203,26 @@ static void __arm_spe_pmu_dev_probe(void *info)
return;
}
- fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK;
+ fld = FIELD_GET(PMSIDR_EL1_COUNTSIZE, reg);
switch (fld) {
default:
dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n",
fld);
fallthrough;
- case 2:
+ case PMSIDR_EL1_COUNTSIZE_12_BIT_SAT:
spe_pmu->counter_sz = 12;
break;
- case 3:
+ case PMSIDR_EL1_COUNTSIZE_16_BIT_SAT:
spe_pmu->counter_sz = 16;
}
+ /* Write all 1s and then read back. Unsupported filter bits are RAZ/WI. */
+ write_sysreg_s(U64_MAX, SYS_PMSEVFR_EL1);
+ spe_pmu->pmsevfr_res0 = ~read_sysreg_s(SYS_PMSEVFR_EL1);
+
dev_info(dev,
- "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
- cpumask_pr_args(&spe_pmu->supported_cpus),
+ "probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
+ spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus),
spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
@@ -1136,8 +1296,8 @@ static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu)
return -ENXIO;
/* Request our PPIs (note that the IRQ is still disabled) */
- ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME,
- spe_pmu->handle);
+ ret = request_percpu_irq_affinity(spe_pmu->irq, arm_spe_pmu_irq_handler,
+ DRVNAME, mask, spe_pmu->handle);
if (ret)
return ret;
@@ -1164,8 +1324,10 @@ static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu)
static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
{
struct platform_device *pdev = spe_pmu->pdev;
- int irq = platform_get_irq(pdev, 0);
+ const struct cpumask *affinity;
+ int irq;
+ irq = platform_get_irq_affinity(pdev, 0, &affinity);
if (irq < 0)
return -ENXIO;
@@ -1174,10 +1336,7 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
return -EINVAL;
}
- if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) {
- dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq);
- return -EINVAL;
- }
+ cpumask_copy(&spe_pmu->supported_cpus, affinity);
spe_pmu->irq = irq;
return 0;
@@ -1242,14 +1401,13 @@ out_free_handle:
return ret;
}
-static int arm_spe_pmu_device_remove(struct platform_device *pdev)
+static void arm_spe_pmu_device_remove(struct platform_device *pdev)
{
struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
arm_spe_pmu_perf_destroy(spe_pmu);
arm_spe_pmu_dev_teardown(spe_pmu);
free_percpu(spe_pmu->handle);
- return 0;
}
static struct platform_driver arm_spe_pmu_driver = {
@@ -1260,7 +1418,7 @@ static struct platform_driver arm_spe_pmu_driver = {
.suppress_bind_attrs = true,
},
.probe = arm_spe_pmu_device_probe,
- .remove = arm_spe_pmu_device_remove,
+ .remove = arm_spe_pmu_device_remove,
};
static int __init arm_spe_pmu_init(void)
diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c
new file mode 100644
index 000000000000..7cb12c8e06c7
--- /dev/null
+++ b/drivers/perf/arm_v6_pmu.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ * effectively stops the counter from counting.
+ * - disable the counter's interrupt generation (each counter has it's
+ * own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ * - enable the counter's interrupt generation.
+ * - set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum armv6_perf_types {
+ ARMV6_PERFCTR_ICACHE_MISS = 0x0,
+ ARMV6_PERFCTR_IBUF_STALL = 0x1,
+ ARMV6_PERFCTR_DDEP_STALL = 0x2,
+ ARMV6_PERFCTR_ITLB_MISS = 0x3,
+ ARMV6_PERFCTR_DTLB_MISS = 0x4,
+ ARMV6_PERFCTR_BR_EXEC = 0x5,
+ ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
+ ARMV6_PERFCTR_INSTR_EXEC = 0x7,
+ ARMV6_PERFCTR_DCACHE_HIT = 0x9,
+ ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
+ ARMV6_PERFCTR_DCACHE_MISS = 0xB,
+ ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
+ ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
+ ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
+ ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
+ ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
+ ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
+ ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
+ ARMV6_PERFCTR_NOP = 0x20,
+};
+
+enum armv6_counters {
+ ARMV6_CYCLE_COUNTER = 0,
+ ARMV6_COUNTER0,
+ ARMV6_COUNTER1,
+ ARMV6_NUM_COUNTERS
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+ asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE (1 << 0)
+#define ARMV6_PMCR_CTR01_RESET (1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
+#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
+#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+ (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+ ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+ return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+ enum armv6_counters counter)
+{
+ int ret = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+ else if (ARMV6_COUNTER0 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+ else if (ARMV6_COUNTER1 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return ret;
+}
+
+static inline u64 armv6pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ unsigned long value = 0;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+ return value;
+}
+
+static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
+ else
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+static void armv6pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = 0;
+ evt = ARMV6_PMCR_CCOUNT_IEN;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+ ARMV6_PMCR_COUNT0_IEN;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+ ARMV6_PMCR_COUNT1_IEN;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the event
+ * that we're interested in.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmcr = armv6_pmcr_read();
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ if (!armv6_pmcr_has_overflowed(pmcr))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ /*
+ * The interrupts are cleared by writing the overflow flags back to
+ * the control register. All of the other bits don't have any effect
+ * if they are rewritten, so write the whole value back.
+ */
+ armv6_pmcr_write(pmcr);
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV6_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ perf_event_overflow(event, &data, regs);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val |= ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = armv6_pmcr_read();
+ val &= ~ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+}
+
+static int
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ /* Always place a cycle counter into the cycle counter. */
+ if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
+ if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV6_CYCLE_COUNTER;
+ } else {
+ /*
+ * For anything other than a cycle counter, try and use
+ * counter0 and counter1.
+ */
+ if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+ return ARMV6_COUNTER1;
+
+ if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+ return ARMV6_COUNTER0;
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+ }
+}
+
+static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void armv6pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = ARMV6_PMCR_CCOUNT_IEN;
+ evt = 0;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+ } else {
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the number
+ * of ETM bus signal assertion cycles. The external reporting should
+ * be disabled and so this should never increment.
+ */
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv6_perf_map,
+ &armv6_perf_cache_map, 0xFF);
+}
+
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv6pmu_handle_irq;
+ cpu_pmu->enable = armv6pmu_enable_event;
+ cpu_pmu->disable = armv6pmu_disable_event;
+ cpu_pmu->read_counter = armv6pmu_read_counter;
+ cpu_pmu->write_counter = armv6pmu_write_counter;
+ cpu_pmu->get_event_idx = armv6pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx;
+ cpu_pmu->start = armv6pmu_start;
+ cpu_pmu->stop = armv6pmu_stop;
+ cpu_pmu->map_event = armv6_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, ARMV6_NUM_COUNTERS);
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1136";
+ return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1176";
+ return 0;
+}
+
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
+ { /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv6_pmu_driver = {
+ .driver = {
+ .name = "armv6-pmu",
+ .of_match_table = armv6_pmu_of_device_ids,
+ },
+ .probe = armv6_pmu_device_probe,
+};
+
+builtin_platform_driver(armv6_pmu_driver);
diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c
new file mode 100644
index 000000000000..a1e438101114
--- /dev/null
+++ b/drivers/perf/arm_v7_pmu.c
@@ -0,0 +1,1924 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ * by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ * a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ * a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ * counter and all 4 performance counters together can be reset separately.
+ */
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/vfp.h>
+#include "../vfp/vfpinstr.h"
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+/*
+ * Common ARMv7 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ */
+#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00
+#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01
+#define ARMV7_PERFCTR_ITLB_REFILL 0x02
+#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03
+#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04
+#define ARMV7_PERFCTR_DTLB_REFILL 0x05
+#define ARMV7_PERFCTR_MEM_READ 0x06
+#define ARMV7_PERFCTR_MEM_WRITE 0x07
+#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08
+#define ARMV7_PERFCTR_EXC_TAKEN 0x09
+#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A
+#define ARMV7_PERFCTR_CID_WRITE 0x0B
+
+/*
+ * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+ * It counts:
+ * - all (taken) branch instructions,
+ * - instructions that explicitly write the PC,
+ * - exception generating instructions.
+ */
+#define ARMV7_PERFCTR_PC_WRITE 0x0C
+#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D
+#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E
+#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F
+#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10
+#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11
+#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12
+
+/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
+#define ARMV7_PERFCTR_MEM_ACCESS 0x13
+#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14
+#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15
+#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16
+#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17
+#define ARMV7_PERFCTR_L2_CACHE_WB 0x18
+#define ARMV7_PERFCTR_BUS_ACCESS 0x19
+#define ARMV7_PERFCTR_MEM_ERROR 0x1A
+#define ARMV7_PERFCTR_INSTR_SPEC 0x1B
+#define ARMV7_PERFCTR_TTBR_WRITE 0x1C
+#define ARMV7_PERFCTR_BUS_CYCLES 0x1D
+
+#define ARMV7_PERFCTR_CPU_CYCLES 0xFF
+
+/* ARMv7 Cortex-A8 specific event types */
+#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43
+#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44
+#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50
+#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56
+
+/* ARMv7 Cortex-A9 specific event types */
+#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68
+#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60
+#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66
+
+/* ARMv7 Cortex-A5 specific event types */
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2
+#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3
+
+/* ARMv7 Cortex-A15 specific event types */
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42
+#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43
+
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C
+#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D
+
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52
+#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53
+
+#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76
+
+/* ARMv7 Cortex-A12 specific event types */
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
+#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
+
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50
+#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
+
+#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76
+
+#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7
+
+/* ARMv7 Krait specific event types */
+#define KRAIT_PMRESR0_GROUP0 0xcc
+#define KRAIT_PMRESR1_GROUP0 0xd0
+#define KRAIT_PMRESR2_GROUP0 0xd4
+#define KRAIT_VPMRESR0_GROUP0 0xd8
+
+#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011
+#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010
+
+#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222
+#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210
+
+/* ARMv7 Scorpion specific event types */
+#define SCORPION_LPM0_GROUP0 0x4c
+#define SCORPION_LPM1_GROUP0 0x50
+#define SCORPION_LPM2_GROUP0 0x54
+#define SCORPION_L2LPM_GROUP0 0x58
+#define SCORPION_VLPM_GROUP0 0x5c
+
+#define SCORPION_ICACHE_ACCESS 0x10053
+#define SCORPION_ICACHE_MISS 0x10052
+
+#define SCORPION_DTLB_ACCESS 0x12013
+#define SCORPION_DTLB_MISS 0x12012
+
+#define SCORPION_ITLB_MISS 0x12021
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A5 HW events mapping
+ */
+static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+ /*
+ * The prefetch counters don't differentiate between the I side and the
+ * D side.
+ */
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A15 HW events mapping
+ */
+static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A7 HW events mapping
+ */
+static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A12 HW events mapping
+ */
+static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Krait HW events mapping
+ */
+static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+ /*
+ * Only ITLB misses and DTLB refills are supported. If users want the
+ * DTLB refills misses a raw counter must be used.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *armv7_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = armv7_pmu_format_attrs,
+};
+
+#define ARMV7_EVENT_ATTR_RESOLVE(m) #m
+#define ARMV7_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \
+ "event=" ARMV7_EVENT_ATTR_RESOLVE(config))
+
+ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR);
+ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL);
+ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL);
+ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL);
+ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ);
+ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE);
+ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED);
+ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN);
+ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED);
+ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE);
+ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE);
+ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH);
+ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN);
+ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS);
+ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED);
+ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES);
+ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED);
+
+static struct attribute *armv7_pmuv1_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv1_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv1_event_attrs,
+};
+
+ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
+ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
+ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
+ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS);
+ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL);
+ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB);
+ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS);
+ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR);
+ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC);
+ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE);
+ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES);
+
+static struct attribute *armv7_pmuv2_event_attrs[] = {
+ &armv7_event_attr_sw_incr.attr.attr,
+ &armv7_event_attr_l1i_cache_refill.attr.attr,
+ &armv7_event_attr_l1i_tlb_refill.attr.attr,
+ &armv7_event_attr_l1d_cache_refill.attr.attr,
+ &armv7_event_attr_l1d_cache.attr.attr,
+ &armv7_event_attr_l1d_tlb_refill.attr.attr,
+ &armv7_event_attr_ld_retired.attr.attr,
+ &armv7_event_attr_st_retired.attr.attr,
+ &armv7_event_attr_inst_retired.attr.attr,
+ &armv7_event_attr_exc_taken.attr.attr,
+ &armv7_event_attr_exc_return.attr.attr,
+ &armv7_event_attr_cid_write_retired.attr.attr,
+ &armv7_event_attr_pc_write_retired.attr.attr,
+ &armv7_event_attr_br_immed_retired.attr.attr,
+ &armv7_event_attr_br_return_retired.attr.attr,
+ &armv7_event_attr_unaligned_ldst_retired.attr.attr,
+ &armv7_event_attr_br_mis_pred.attr.attr,
+ &armv7_event_attr_cpu_cycles.attr.attr,
+ &armv7_event_attr_br_pred.attr.attr,
+ &armv7_event_attr_mem_access.attr.attr,
+ &armv7_event_attr_l1i_cache.attr.attr,
+ &armv7_event_attr_l1d_cache_wb.attr.attr,
+ &armv7_event_attr_l2d_cache.attr.attr,
+ &armv7_event_attr_l2d_cache_refill.attr.attr,
+ &armv7_event_attr_l2d_cache_wb.attr.attr,
+ &armv7_event_attr_bus_access.attr.attr,
+ &armv7_event_attr_memory_error.attr.attr,
+ &armv7_event_attr_inst_spec.attr.attr,
+ &armv7_event_attr_ttbr_write_retired.attr.attr,
+ &armv7_event_attr_bus_cycles.attr.attr,
+ NULL,
+};
+
+static struct attribute_group armv7_pmuv2_events_attr_group = {
+ .name = "events",
+ .attrs = armv7_pmuv2_event_attrs,
+};
+
+/*
+ * Perf Events' indices
+ */
+#define ARMV7_IDX_CYCLE_COUNTER 31
+#define ARMV7_IDX_COUNTER_MAX 31
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
+#define ARMV7_PMNC_N_MASK 0x1f
+#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
+#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */
+#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv2
+ */
+#define ARMV7_EXCLUDE_PL1 BIT(31)
+#define ARMV7_EXCLUDE_USER BIT(30)
+#define ARMV7_INCLUDE_HYP BIT(27)
+
+/*
+ * Secure debug enable reg
+ */
+#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */
+
+static inline u32 armv7_pmnc_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void armv7_pmnc_write(u32 val)
+{
+ val &= ARMV7_PMNC_MASK;
+ isb();
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
+{
+ return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
+{
+ return test_bit(idx, cpu_pmu->cntr_mask);
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
+{
+ return pmnc & BIT(idx);
+}
+
+static inline void armv7_pmnc_select_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (idx));
+ isb();
+}
+
+static inline u64 armv7pmu_read_counter(struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u32 value = 0;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u reading wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+ }
+
+ return value;
+}
+
+static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u writing wrong counter %d\n",
+ smp_processor_id(), idx);
+ } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
+ } else {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
+ }
+}
+
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+{
+ armv7_pmnc_select_counter(idx);
+ val &= ARMV7_EVTYPE_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+}
+
+static inline void armv7_pmnc_enable_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_disable_counter(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_enable_intens(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(idx)));
+}
+
+static inline void armv7_pmnc_disable_intens(int idx)
+{
+ asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(idx)));
+ isb();
+ /* Clear the overflow flag in case an interrupt is pending. */
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(idx)));
+ isb();
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+ u32 val;
+
+ /* Read */
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+ /* Write to clear flags */
+ val &= ARMV7_FLAG_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+ return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+{
+ u32 val;
+ unsigned int cnt;
+
+ pr_info("PMNC registers dump:\n");
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+ pr_info("PMNC =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+ pr_info("CNTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+ pr_info("INTENS=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+ pr_info("FLAGS =0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+ pr_info("SELECT=0x%08x\n", val);
+
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+ pr_info("CCNT =0x%08x\n", val);
+
+ for_each_set_bit(cnt, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+ pr_info("CNT[%d] count =0x%08x\n", cnt, val);
+ asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+ pr_info("CNT[%d] evtsel=0x%08x\n", cnt, val);
+ }
+}
+#endif
+
+static void armv7pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We only need to set the event for the cycle counter if we
+ * have the ability to perform event filtering.
+ */
+ if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ armv7_pmnc_enable_intens(idx);
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void armv7pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int idx = hwc->idx;
+
+ if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+ smp_processor_id(), idx);
+ return;
+ }
+
+ armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_intens(idx);
+}
+
+static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ u32 pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * Get and reset the IRQ flags
+ */
+ pmnc = armv7_pmnc_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!armv7_pmnc_has_overflowed(pmnc))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ perf_event_overflow(event, &data, regs);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+{
+ /* Enable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+}
+
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ /* Disable all counters */
+ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+}
+
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
+
+ /* Always place a cycle counter into the cycle counter. */
+ if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+ if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV7_IDX_CYCLE_COUNTER;
+ }
+
+ /*
+ * For anything other than a cycle counter, try and use
+ * the events counters
+ */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+}
+
+static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+ if (attr->exclude_user)
+ config_base |= ARMV7_EXCLUDE_USER;
+ if (attr->exclude_kernel)
+ config_base |= ARMV7_EXCLUDE_PL1;
+ if (!attr->exclude_hv)
+ config_base |= ARMV7_INCLUDE_HYP;
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base = config_base;
+
+ return 0;
+}
+
+static void armv7pmu_reset(void *info)
+{
+ struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+ u32 idx, val;
+
+ if (cpu_pmu->secure_access) {
+ asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
+ val |= ARMV7_SDER_SUNIDEN;
+ asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
+ }
+
+ /* The counter and interrupt enable registers are unknown at reset. */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
+ armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_intens(idx);
+ }
+
+ /* Initialize & Reset PMNC: C and P bits */
+ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+}
+
+static int armv7_a8_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a8_perf_map,
+ &armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a9_perf_map,
+ &armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a5_perf_map,
+ &armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a15_perf_map,
+ &armv7_a15_perf_cache_map, 0xFF);
+}
+
+static int armv7_a7_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a7_perf_map,
+ &armv7_a7_perf_cache_map, 0xFF);
+}
+
+static int armv7_a12_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv7_a12_perf_map,
+ &armv7_a12_perf_cache_map, 0xFF);
+}
+
+static int krait_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int krait_map_event_no_branch(struct perf_event *event)
+{
+ return armpmu_map_event(event, &krait_perf_map_no_branch,
+ &krait_perf_cache_map, 0xFFFFF);
+}
+
+static int scorpion_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &scorpion_perf_map,
+ &scorpion_perf_cache_map, 0xFFFFF);
+}
+
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = armv7pmu_handle_irq;
+ cpu_pmu->enable = armv7pmu_enable_event;
+ cpu_pmu->disable = armv7pmu_disable_event;
+ cpu_pmu->read_counter = armv7pmu_read_counter;
+ cpu_pmu->write_counter = armv7pmu_write_counter;
+ cpu_pmu->get_event_idx = armv7pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx;
+ cpu_pmu->start = armv7pmu_start;
+ cpu_pmu->stop = armv7pmu_stop;
+ cpu_pmu->reset = armv7pmu_reset;
+};
+
+static void armv7_read_num_pmnc_events(void *info)
+{
+ int nb_cnt;
+ struct arm_pmu *cpu_pmu = info;
+
+ /* Read the nb of CNTx counters supported from PMNC */
+ nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+ bitmap_set(cpu_pmu->cntr_mask, 0, nb_cnt);
+
+ /* Add the CPU cycles counter */
+ set_bit(ARMV7_IDX_CYCLE_COUNTER, cpu_pmu->cntr_mask);
+}
+
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+ return smp_call_function_any(&arm_pmu->supported_cpus,
+ armv7_read_num_pmnc_events,
+ arm_pmu, 1);
+}
+
+static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a8";
+ cpu_pmu->map_event = armv7_a8_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a9";
+ cpu_pmu->map_event = armv7_a9_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a5";
+ cpu_pmu->map_event = armv7_a5_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a15";
+ cpu_pmu->map_event = armv7_a15_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a7";
+ cpu_pmu->map_event = armv7_a7_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a12";
+ cpu_pmu->map_event = armv7_a12_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int ret = armv7_a12_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_cortex_a17";
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
+ return ret;
+}
+
+/*
+ * Krait Performance Monitor Region Event Selection Register (PMRESRn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+#define KRAIT_EVENT (1 << 16)
+#define VENUM_EVENT (2 << 16)
+#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT)
+#define PMRESRn_EN BIT(31)
+
+#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */
+#define EVENT_GROUP(event) ((event) & 0xf) /* G */
+#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */
+#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */
+#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */
+
+static u32 krait_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void krait_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+ }
+}
+
+static u32 venum_read_pmresr(void)
+{
+ u32 val;
+ asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+ return val;
+}
+
+static void venum_write_pmresr(u32 val)
+{
+ asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
+{
+ u32 venum_new_val;
+ u32 fp_new_val;
+
+ BUG_ON(preemptible());
+ /* CPACR Enable CP10 and CP11 access */
+ *venum_orig_val = get_copro_access();
+ venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
+ set_copro_access(venum_new_val);
+
+ /* Enable FPEXC */
+ *fp_orig_val = fmrx(FPEXC);
+ fp_new_val = *fp_orig_val | FPEXC_EN;
+ fmxr(FPEXC, fp_new_val);
+}
+
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
+{
+ BUG_ON(preemptible());
+ /* Restore FPEXC */
+ fmxr(FPEXC, fp_orig_val);
+ isb();
+ /* Restore CPACR */
+ set_copro_access(venum_orig_val);
+}
+
+static u32 krait_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
+ KRAIT_PMRESR1_GROUP0,
+ KRAIT_PMRESR2_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void krait_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = KRAIT_VPMRESR0_GROUP0;
+ else
+ val = krait_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static u32 clear_pmresrn_group(u32 val, int group)
+{
+ u32 mask;
+ int group_shift;
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+ val &= ~mask;
+
+ /* Don't clear enable bit if entire region isn't disabled */
+ if (val & ~PMRESRn_EN)
+ return val |= PMRESRn_EN;
+
+ return 0;
+}
+
+static void krait_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = krait_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ krait_write_pmresrn(region, val);
+ }
+}
+
+static void krait_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void krait_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We set the event for the cycle counter because we
+ * have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ krait_evt_setup(idx, hwc->config_base);
+ else
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ armv7_pmnc_enable_intens(idx);
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void krait_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ krait_write_pmresrn(0, 0);
+ krait_write_pmresrn(1, 0);
+ krait_write_pmresrn(2, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+
+}
+
+static int krait_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = KRAIT_VPMRESR0_GROUP0;
+ else
+ bit = krait_get_pmresrn_event(region);
+ bit -= krait_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX);
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int code = EVENT_CODE(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || krait_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 2)
+ return -EINVAL;
+ if (venum_event && (code & 0xe0))
+ return -EINVAL;
+
+ bit = krait_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool krait_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || krait_event) {
+ bit = krait_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_krait";
+ /* Some early versions of Krait don't support PC write events */
+ if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
+ "qcom,no-pc-write"))
+ cpu_pmu->map_event = krait_map_event_no_branch;
+ else
+ cpu_pmu->map_event = krait_map_event;
+ cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+ cpu_pmu->reset = krait_pmu_reset;
+ cpu_pmu->enable = krait_pmu_enable_event;
+ cpu_pmu->disable = krait_pmu_disable_event;
+ cpu_pmu->get_event_idx = krait_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ * 31 30 24 16 8 0
+ * +--------------------------------+
+ * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0
+ * +--------------------------------+
+ * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1
+ * +--------------------------------+
+ * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2
+ * +--------------------------------+
+ * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3
+ * +--------------------------------+
+ * VLPM | EN | CC | CC | CC | CC | N = 2, R = ?
+ * +--------------------------------+
+ * EN | G=3 | G=2 | G=1 | G=0
+ *
+ *
+ * Event Encoding:
+ *
+ * hwc->config_base = 0xNRCCG
+ *
+ * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ * R = region register
+ * CC = class of events the group G is choosing from
+ * G = group or particular event
+ *
+ * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ * A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ * unit, etc.) while the event code (CC) corresponds to a particular class of
+ * events (interrupts for example). An event code is broken down into
+ * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ * example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 1:
+ asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 2:
+ asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+ break;
+ case 3:
+ asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+
+ return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+ switch (n) {
+ case 0:
+ asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 1:
+ asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 2:
+ asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+ break;
+ case 3:
+ asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+ break;
+ default:
+ BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+ }
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+ static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+ SCORPION_LPM1_GROUP0,
+ SCORPION_LPM2_GROUP0,
+ SCORPION_L2LPM_GROUP0 };
+ return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+ u32 val;
+ u32 mask;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ unsigned int code = EVENT_CODE(config_base);
+ unsigned int group_shift;
+ bool venum_event = EVENT_VENUM(config_base);
+
+ group_shift = group * 8;
+ mask = 0xff << group_shift;
+
+ /* Configure evtsel for the region and group */
+ if (venum_event)
+ val = SCORPION_VLPM_GROUP0;
+ else
+ val = scorpion_get_pmresrn_event(region);
+ val += group;
+ /* Mix in mode-exclusion bits */
+ val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+ armv7_pmnc_write_evtsel(idx, val);
+
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val &= ~mask;
+ val |= code << group_shift;
+ val |= PMRESRn_EN;
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+ u32 val;
+ u32 vval, fval;
+ unsigned int region = EVENT_REGION(config_base);
+ unsigned int group = EVENT_GROUP(config_base);
+ bool venum_event = EVENT_VENUM(config_base);
+
+ if (venum_event) {
+ venum_pre_pmresr(&vval, &fval);
+ val = venum_read_pmresr();
+ val = clear_pmresrn_group(val, group);
+ venum_write_pmresr(val);
+ venum_post_pmresr(vval, fval);
+ } else {
+ val = scorpion_read_pmresrn(region);
+ val = clear_pmresrn_group(val, group);
+ scorpion_write_pmresrn(region, val);
+ }
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /* Disable counter and interrupt */
+
+ /* Disable counter */
+ armv7_pmnc_disable_counter(idx);
+
+ /*
+ * Clear pmresr code (if destined for PMNx counters)
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_clearpmu(hwc->config_base);
+
+ /* Disable interrupt for this counter */
+ armv7_pmnc_disable_intens(idx);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ /*
+ * Set event (if destined for PMNx counters)
+ * We don't set the event for the cycle counter because we
+ * don't have the ability to perform event filtering.
+ */
+ if (hwc->config_base & KRAIT_EVENT_MASK)
+ scorpion_evt_setup(idx, hwc->config_base);
+ else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+ armv7_pmnc_enable_intens(idx);
+ armv7_pmnc_enable_counter(idx);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+ u32 vval, fval;
+ struct arm_pmu *cpu_pmu = info;
+ u32 idx;
+
+ armv7pmu_reset(info);
+
+ /* Clear all pmresrs */
+ scorpion_write_pmresrn(0, 0);
+ scorpion_write_pmresrn(1, 0);
+ scorpion_write_pmresrn(2, 0);
+ scorpion_write_pmresrn(3, 0);
+
+ venum_pre_pmresr(&vval, &fval);
+ venum_write_pmresr(0);
+ venum_post_pmresr(vval, fval);
+
+ /* Reset PMxEVNCTCR to sane default */
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX) {
+ armv7_pmnc_select_counter(idx);
+ asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+ }
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+ unsigned int group)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+ if (hwc->config_base & VENUM_EVENT)
+ bit = SCORPION_VLPM_GROUP0;
+ else
+ bit = scorpion_get_pmresrn_event(region);
+ bit -= scorpion_get_pmresrn_event(0);
+ bit += group;
+ /*
+ * Lower bits are reserved for use by the counters (see
+ * armv7pmu_get_event_idx() for more info)
+ */
+ bit += bitmap_weight(cpu_pmu->cntr_mask, ARMV7_IDX_COUNTER_MAX);
+
+ return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ int bit = -1;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ if (venum_event || scorpion_event) {
+ /* Ignore invalid events */
+ if (group > 3 || region > 3)
+ return -EINVAL;
+
+ bit = scorpion_event_to_bit(event, region, group);
+ if (test_and_set_bit(bit, cpuc->used_mask))
+ return -EAGAIN;
+ }
+
+ idx = armv7pmu_get_event_idx(cpuc, event);
+ if (idx < 0 && bit >= 0)
+ clear_bit(bit, cpuc->used_mask);
+
+ return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int bit;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int region = EVENT_REGION(hwc->config_base);
+ unsigned int group = EVENT_GROUP(hwc->config_base);
+ bool venum_event = EVENT_VENUM(hwc->config_base);
+ bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+ armv7pmu_clear_event_idx(cpuc, event);
+ if (venum_event || scorpion_event) {
+ bit = scorpion_event_to_bit(event, region, group);
+ clear_bit(bit, cpuc->used_mask);
+ }
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv7pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv7_scorpion_mp";
+ cpu_pmu->map_event = scorpion_map_event;
+ cpu_pmu->reset = scorpion_pmu_reset;
+ cpu_pmu->enable = scorpion_pmu_enable_event;
+ cpu_pmu->disable = scorpion_pmu_disable_event;
+ cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+ {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
+ {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
+ {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
+ {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
+ {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
+ {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
+ {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
+ {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
+ {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
+ {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
+ {},
+};
+
+static int armv7_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv7_pmu_driver = {
+ .driver = {
+ .name = "armv7-pmu",
+ .of_match_table = armv7_pmu_of_device_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = armv7_pmu_device_probe,
+};
+
+builtin_platform_driver(armv7_pmu_driver);
diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c
new file mode 100644
index 000000000000..c2ac41dd9e19
--- /dev/null
+++ b/drivers/perf/arm_xscale_pmu.c
@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * - xscale1pmu: 2 event counters and a cycle counter
+ * - xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+enum xscale_perf_types {
+ XSCALE_PERFCTR_ICACHE_MISS = 0x00,
+ XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
+ XSCALE_PERFCTR_DATA_STALL = 0x02,
+ XSCALE_PERFCTR_ITLB_MISS = 0x03,
+ XSCALE_PERFCTR_DTLB_MISS = 0x04,
+ XSCALE_PERFCTR_BRANCH = 0x05,
+ XSCALE_PERFCTR_BRANCH_MISS = 0x06,
+ XSCALE_PERFCTR_INSTRUCTION = 0x07,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
+ XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
+ XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
+ XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
+ XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
+ XSCALE_PERFCTR_PC_CHANGED = 0x0D,
+ XSCALE_PERFCTR_BCU_REQUEST = 0x10,
+ XSCALE_PERFCTR_BCU_FULL = 0x11,
+ XSCALE_PERFCTR_BCU_DRAIN = 0x12,
+ XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
+ XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
+ XSCALE_PERFCTR_RMW = 0x16,
+ /* XSCALE_PERFCTR_CCNT is not hardware defined */
+ XSCALE_PERFCTR_CCNT = 0xFE,
+ XSCALE_PERFCTR_UNUSED = 0xFF,
+};
+
+enum xscale_counters {
+ XSCALE_CYCLE_COUNTER = 0,
+ XSCALE_COUNTER0,
+ XSCALE_COUNTER1,
+ XSCALE_COUNTER2,
+ XSCALE_COUNTER3,
+};
+#define XSCALE1_NUM_COUNTERS 3
+#define XSCALE2_NUM_COUNTERS 5
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
+ [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
+ [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+};
+
+#define XSCALE_PMU_ENABLE 0x001
+#define XSCALE_PMN_RESET 0x002
+#define XSCALE_CCNT_RESET 0x004
+#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64 0x008
+
+#define XSCALE1_OVERFLOWED_MASK 0x700
+#define XSCALE1_CCOUNT_OVERFLOW 0x400
+#define XSCALE1_COUNT0_OVERFLOW 0x100
+#define XSCALE1_COUNT1_OVERFLOW 0x200
+#define XSCALE1_CCOUNT_INT_EN 0x040
+#define XSCALE1_COUNT0_INT_EN 0x010
+#define XSCALE1_COUNT1_INT_EN 0x020
+#define XSCALE1_COUNT0_EVT_SHFT 12
+#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT 20
+#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+ /* upper 4bits and 7, 11 are write-as-0 */
+ val &= 0xffff77f;
+ asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /*
+ * NOTE: there's an A stepping erratum that states if an overflow
+ * bit already exists and another occurs, the previous
+ * Overflow bit gets cleared. There's no workaround.
+ * Fixed in B stepping or later.
+ */
+ pmnc = xscale1pmu_read_pmnc();
+
+ /*
+ * Write the value back to clear the overflow flags. Overflow
+ * flags remain in pmnc for use below. We also disable the PMU
+ * while we process the interrupt.
+ */
+ xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE1_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ perf_event_overflow(event, &data, regs);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale1pmu_enable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = 0;
+ evt = XSCALE1_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+ XSCALE1_COUNT0_INT_EN;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_EVT_MASK;
+ evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+ XSCALE1_COUNT1_INT_EN;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_disable_event(struct perf_event *event)
+{
+ unsigned long val, mask, evt;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ mask = XSCALE1_CCOUNT_INT_EN;
+ evt = 0;
+ break;
+ case XSCALE_COUNTER0:
+ mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+ evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~mask;
+ val |= evt;
+ xscale1pmu_write_pmnc(val);
+}
+
+static int
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
+ if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return XSCALE_CYCLE_COUNTER;
+ } else {
+ if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+ return XSCALE_COUNTER1;
+
+ if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+ return XSCALE_COUNTER0;
+
+ return -EAGAIN;
+ }
+}
+
+static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val |= XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale1pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale1pmu_write_pmnc(val);
+}
+
+static inline u64 xscale1pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &xscale_perf_map,
+ &xscale_perf_cache_map, 0xFF);
+}
+
+static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale1";
+ cpu_pmu->handle_irq = xscale1pmu_handle_irq;
+ cpu_pmu->enable = xscale1pmu_enable_event;
+ cpu_pmu->disable = xscale1pmu_disable_event;
+ cpu_pmu->read_counter = xscale1pmu_read_counter;
+ cpu_pmu->write_counter = xscale1pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale1pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale1pmu_start;
+ cpu_pmu->stop = xscale1pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE1_NUM_COUNTERS);
+
+ return 0;
+}
+
+#define XSCALE2_OVERFLOWED_MASK 0x01f
+#define XSCALE2_CCOUNT_OVERFLOW 0x001
+#define XSCALE2_COUNT0_OVERFLOW 0x002
+#define XSCALE2_COUNT1_OVERFLOW 0x004
+#define XSCALE2_COUNT2_OVERFLOW 0x008
+#define XSCALE2_COUNT3_OVERFLOW 0x010
+#define XSCALE2_CCOUNT_INT_EN 0x001
+#define XSCALE2_COUNT0_INT_EN 0x002
+#define XSCALE2_COUNT1_INT_EN 0x004
+#define XSCALE2_COUNT2_INT_EN 0x008
+#define XSCALE2_COUNT3_INT_EN 0x010
+#define XSCALE2_COUNT0_EVT_SHFT 0
+#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT 8
+#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT 16
+#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT 24
+#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+ /* bits 1-2 and 4-23 are read-unpredictable */
+ return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+ /* bits 4-23 are write-as-0, 24-31 are write ignored */
+ val &= 0xf;
+ asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+ return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+ u32 val;
+ asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+ return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+ asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+ enum xscale_counters counter)
+{
+ int ret = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+ }
+
+ return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ unsigned long pmnc, of_flags;
+ struct perf_sample_data data;
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ int idx;
+
+ /* Disable the PMU. */
+ pmnc = xscale2pmu_read_pmnc();
+ xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+ /* Check the overflow flag register. */
+ of_flags = xscale2pmu_read_overflow_flags();
+ if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+ return IRQ_NONE;
+
+ /* Clear the overflow bits. */
+ xscale2pmu_write_overflow_flags(of_flags);
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(idx, cpu_pmu->cntr_mask, XSCALE2_NUM_COUNTERS) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ continue;
+
+ if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ perf_event_overflow(event, &data, regs);
+ }
+
+ irq_work_run();
+
+ /*
+ * Re-enable the PMU.
+ */
+ pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(pmnc);
+
+ return IRQ_HANDLED;
+}
+
+static void xscale2pmu_enable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien |= XSCALE2_CCOUNT_INT_EN;
+ break;
+ case XSCALE_COUNTER0:
+ ien |= XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER1:
+ ien |= XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER2:
+ ien |= XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+ break;
+ case XSCALE_COUNTER3:
+ ien |= XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+}
+
+static void xscale2pmu_disable_event(struct perf_event *event)
+{
+ unsigned long ien, evtsel, of_flags;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ ien = xscale2pmu_read_int_enable();
+ evtsel = xscale2pmu_read_event_select();
+
+ switch (idx) {
+ case XSCALE_CYCLE_COUNTER:
+ ien &= ~XSCALE2_CCOUNT_INT_EN;
+ of_flags = XSCALE2_CCOUNT_OVERFLOW;
+ break;
+ case XSCALE_COUNTER0:
+ ien &= ~XSCALE2_COUNT0_INT_EN;
+ evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+ of_flags = XSCALE2_COUNT0_OVERFLOW;
+ break;
+ case XSCALE_COUNTER1:
+ ien &= ~XSCALE2_COUNT1_INT_EN;
+ evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+ of_flags = XSCALE2_COUNT1_OVERFLOW;
+ break;
+ case XSCALE_COUNTER2:
+ ien &= ~XSCALE2_COUNT2_INT_EN;
+ evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+ of_flags = XSCALE2_COUNT2_OVERFLOW;
+ break;
+ case XSCALE_COUNTER3:
+ ien &= ~XSCALE2_COUNT3_INT_EN;
+ evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+ evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+ of_flags = XSCALE2_COUNT3_OVERFLOW;
+ break;
+ default:
+ WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+ return;
+ }
+
+ xscale2pmu_write_event_select(evtsel);
+ xscale2pmu_write_int_enable(ien);
+ xscale2pmu_write_overflow_flags(of_flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx = xscale1pmu_get_event_idx(cpuc, event);
+ if (idx >= 0)
+ goto out;
+
+ if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+ idx = XSCALE_COUNTER3;
+ else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+ idx = XSCALE_COUNTER2;
+out:
+ return idx;
+}
+
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+ val |= XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ unsigned long val;
+
+ val = xscale2pmu_read_pmnc();
+ val &= ~XSCALE_PMU_ENABLE;
+ xscale2pmu_write_pmnc(val);
+}
+
+static inline u64 xscale2pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u32 val = 0;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+ break;
+ }
+
+ return val;
+}
+
+static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ switch (counter) {
+ case XSCALE_CYCLE_COUNTER:
+ asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER0:
+ asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER1:
+ asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER2:
+ asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+ break;
+ case XSCALE_COUNTER3:
+ asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+ break;
+ }
+}
+
+static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "armv5_xscale2";
+ cpu_pmu->handle_irq = xscale2pmu_handle_irq;
+ cpu_pmu->enable = xscale2pmu_enable_event;
+ cpu_pmu->disable = xscale2pmu_disable_event;
+ cpu_pmu->read_counter = xscale2pmu_read_counter;
+ cpu_pmu->write_counter = xscale2pmu_write_counter;
+ cpu_pmu->get_event_idx = xscale2pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
+ cpu_pmu->start = xscale2pmu_start;
+ cpu_pmu->stop = xscale2pmu_stop;
+ cpu_pmu->map_event = xscale_map_event;
+
+ bitmap_set(cpu_pmu->cntr_mask, 0, XSCALE2_NUM_COUNTERS);
+
+ return 0;
+}
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+ { /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
+}
+
+static struct platform_driver xscale_pmu_driver = {
+ .driver = {
+ .name = "xscale-pmu",
+ },
+ .probe = xscale_pmu_device_probe,
+};
+
+builtin_platform_driver(xscale_pmu_driver);
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
new file mode 100644
index 000000000000..d094030220bf
--- /dev/null
+++ b/drivers/perf/cxl_pmu.c
@@ -0,0 +1,983 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright(c) 2023 Huawei
+ *
+ * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
+ * called the CXL PMU, or CPMU. In order to allow a high degree of
+ * implementation flexibility the specification provides a wide range of
+ * options all of which are self describing.
+ *
+ * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/perf_event.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/bits.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+
+#include "../cxl/cxlpci.h"
+#include "../cxl/cxl.h"
+#include "../cxl/pmu.h"
+
+#define CXL_PMU_CAP_REG 0x0
+#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(5, 0)
+#define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8)
+#define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20)
+#define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32)
+#define CXL_PMU_FILTER_HDM BIT(0)
+#define CXL_PMU_FILTER_CHAN_RANK_BANK BIT(1)
+#define CXL_PMU_CAP_MSI_N_MSK GENMASK_ULL(47, 44)
+#define CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN BIT_ULL(48)
+#define CXL_PMU_CAP_FREEZE BIT_ULL(49)
+#define CXL_PMU_CAP_INT BIT_ULL(50)
+#define CXL_PMU_CAP_VERSION_MSK GENMASK_ULL(63, 60)
+
+#define CXL_PMU_OVERFLOW_REG 0x10
+#define CXL_PMU_FREEZE_REG 0x18
+#define CXL_PMU_EVENT_CAP_REG(n) (0x100 + 8 * (n))
+#define CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK GENMASK_ULL(31, 0)
+#define CXL_PMU_EVENT_CAP_GROUP_ID_MSK GENMASK_ULL(47, 32)
+#define CXL_PMU_EVENT_CAP_VENDOR_ID_MSK GENMASK_ULL(63, 48)
+
+#define CXL_PMU_COUNTER_CFG_REG(n) (0x200 + 8 * (n))
+#define CXL_PMU_COUNTER_CFG_TYPE_MSK GENMASK_ULL(1, 0)
+#define CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN 0
+#define CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN 1
+#define CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE 2
+#define CXL_PMU_COUNTER_CFG_ENABLE BIT_ULL(8)
+#define CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW BIT_ULL(9)
+#define CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW BIT_ULL(10)
+#define CXL_PMU_COUNTER_CFG_EDGE BIT_ULL(11)
+#define CXL_PMU_COUNTER_CFG_INVERT BIT_ULL(12)
+#define CXL_PMU_COUNTER_CFG_THRESHOLD_MSK GENMASK_ULL(23, 16)
+#define CXL_PMU_COUNTER_CFG_EVENTS_MSK GENMASK_ULL(55, 24)
+#define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59)
+
+#define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8))
+#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(31, 0)
+
+#define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n))
+
+/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
+#define CXL_PMU_GID_CLOCK_TICKS 0x00
+#define CXL_PMU_GID_D2H_REQ 0x0010
+#define CXL_PMU_GID_D2H_RSP 0x0011
+#define CXL_PMU_GID_H2D_REQ 0x0012
+#define CXL_PMU_GID_H2D_RSP 0x0013
+#define CXL_PMU_GID_CACHE_DATA 0x0014
+#define CXL_PMU_GID_M2S_REQ 0x0020
+#define CXL_PMU_GID_M2S_RWD 0x0021
+#define CXL_PMU_GID_M2S_BIRSP 0x0022
+#define CXL_PMU_GID_S2M_BISNP 0x0023
+#define CXL_PMU_GID_S2M_NDR 0x0024
+#define CXL_PMU_GID_S2M_DRS 0x0025
+#define CXL_PMU_GID_DDR 0x8000
+
+static int cxl_pmu_cpuhp_state_num;
+
+struct cxl_pmu_ev_cap {
+ u16 vid;
+ u16 gid;
+ u32 msk;
+ union {
+ int counter_idx; /* fixed counters */
+ int event_idx; /* configurable counters */
+ };
+ struct list_head node;
+};
+
+#define CXL_PMU_MAX_COUNTERS 64
+struct cxl_pmu_info {
+ struct pmu pmu;
+ void __iomem *base;
+ struct perf_event **hw_events;
+ struct list_head event_caps_configurable;
+ struct list_head event_caps_fixed;
+ DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
+ DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
+ u16 counter_width;
+ u8 num_counters;
+ u8 num_event_capabilities;
+ int on_cpu;
+ struct hlist_node node;
+ bool filter_hdm;
+ int irq;
+};
+
+#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
+
+/*
+ * All CPMU counters are discoverable via the Event Capabilities Registers.
+ * Each Event Capability register contains a VID / GroupID.
+ * A counter may then count any combination (by summing) of events in
+ * that group which are in the Supported Events Bitmask.
+ * However, there are some complexities to the scheme.
+ * - Fixed function counters refer to an Event Capabilities register.
+ * That event capability register is not then used for Configurable
+ * counters.
+ */
+static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
+{
+ unsigned long fixed_counter_event_cap_bm = 0;
+ void __iomem *base = info->base;
+ bool freeze_for_enable;
+ u64 val, eval;
+ int i;
+
+ val = readq(base + CXL_PMU_CAP_REG);
+ freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
+ FIELD_GET(CXL_PMU_CAP_FREEZE, val);
+ if (!freeze_for_enable) {
+ dev_err(dev, "Counters not writable while frozen\n");
+ return -ENODEV;
+ }
+
+ info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
+ info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
+ info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
+
+ info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
+ if (FIELD_GET(CXL_PMU_CAP_INT, val))
+ info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
+ else
+ info->irq = -1;
+
+ /* First handle fixed function counters; note if configurable counters found */
+ for (i = 0; i < info->num_counters; i++) {
+ struct cxl_pmu_ev_cap *pmu_ev;
+ u32 events_msk;
+ u8 group_idx;
+
+ val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
+
+ if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
+ CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
+ set_bit(i, info->conf_counter_bm);
+ }
+
+ if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
+ CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
+ continue;
+
+ /* In this case we know which fields are const */
+ group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
+ events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
+ eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
+ pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+ if (!pmu_ev)
+ return -ENOMEM;
+
+ pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+ pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+ /* For a fixed purpose counter use the events mask from the counter CFG */
+ pmu_ev->msk = events_msk;
+ pmu_ev->counter_idx = i;
+ /* This list add is never unwound as all entries deleted on remove */
+ list_add(&pmu_ev->node, &info->event_caps_fixed);
+ /*
+ * Configurable counters must not use an Event Capability registers that
+ * is in use for a Fixed counter
+ */
+ set_bit(group_idx, &fixed_counter_event_cap_bm);
+ }
+
+ if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
+ struct cxl_pmu_ev_cap *pmu_ev;
+ int j;
+ /* Walk event capabilities unused by fixed counters */
+ for_each_clear_bit(j, &fixed_counter_event_cap_bm,
+ info->num_event_capabilities) {
+ pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+ if (!pmu_ev)
+ return -ENOMEM;
+
+ eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
+ pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+ pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+ pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
+ pmu_ev->event_idx = j;
+ list_add(&pmu_ev->node, &info->event_caps_configurable);
+ }
+ }
+
+ return 0;
+}
+
+#define CXL_PMU_FORMAT_ATTR(_name, _format)\
+ (&((struct dev_ext_attribute[]) { \
+ { \
+ .attr = __ATTR(_name, 0444, device_show_string, NULL), \
+ .var = (void *)_format \
+ } \
+ })[0].attr.attr)
+
+enum {
+ cxl_pmu_mask_attr,
+ cxl_pmu_gid_attr,
+ cxl_pmu_vid_attr,
+ cxl_pmu_threshold_attr,
+ cxl_pmu_invert_attr,
+ cxl_pmu_edge_attr,
+ cxl_pmu_hdm_filter_en_attr,
+ cxl_pmu_hdm_attr,
+};
+
+static struct attribute *cxl_pmu_format_attr[] = {
+ [cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
+ [cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
+ [cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
+ [cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
+ [cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
+ [cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
+ [cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
+ [cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
+ NULL
+};
+
+#define CXL_PMU_ATTR_CONFIG_MASK_MSK GENMASK_ULL(31, 0)
+#define CXL_PMU_ATTR_CONFIG_GID_MSK GENMASK_ULL(47, 32)
+#define CXL_PMU_ATTR_CONFIG_VID_MSK GENMASK_ULL(63, 48)
+#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK GENMASK_ULL(15, 0)
+#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK BIT(16)
+#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK BIT(17)
+#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK BIT(18)
+#define CXL_PMU_ATTR_CONFIG2_HDM_MSK GENMASK(15, 0)
+
+static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
+ struct attribute *attr, int a)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+ /*
+ * Filter capability at the CPMU level, so hide the attributes if the particular
+ * filter is not supported.
+ */
+ if (!info->filter_hdm &&
+ (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
+ attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group cxl_pmu_format_group = {
+ .name = "format",
+ .attrs = cxl_pmu_format_attr,
+ .is_visible = cxl_pmu_format_is_visible,
+};
+
+static u32 cxl_pmu_config_get_mask(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_gid(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_vid(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
+}
+
+static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_invert(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_edge(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
+}
+
+/*
+ * CPMU specification allows for 8 filters, each with a 32 bit value...
+ * So we need to find 8x32bits to store it in.
+ * As the value used for disable is 0xffff_ffff, a separate enable switch
+ * is needed.
+ */
+
+static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
+}
+
+static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
+}
+
+static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
+}
+
+#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk) \
+ PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show, \
+ ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
+
+/* For CXL spec defined events */
+#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \
+ CXL_PMU_EVENT_ATTR(_name, PCI_VENDOR_ID_CXL, _gid, _msk)
+
+static struct attribute *cxl_pmu_event_attrs[] = {
+ CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
+ /* CXL rev 3.0 Table 3-17 - Device to Host Requests */
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr, CXL_PMU_GID_D2H_REQ, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown, CXL_PMU_GID_D2H_REQ, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared, CXL_PMU_GID_D2H_REQ, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany, CXL_PMU_GID_D2H_REQ, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata, CXL_PMU_GID_D2H_REQ, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr, CXL_PMU_GID_D2H_REQ, BIT(6)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr, CXL_PMU_GID_D2H_REQ, BIT(7)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush, CXL_PMU_GID_D2H_REQ, BIT(8)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict, CXL_PMU_GID_D2H_REQ, BIT(9)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict, CXL_PMU_GID_D2H_REQ, BIT(10)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata, CXL_PMU_GID_D2H_REQ, BIT(11)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv, CXL_PMU_GID_D2H_REQ, BIT(12)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)),
+ /* CXL rev 3.0 Table 3-20 - D2H Response Encodings */
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse, CXL_PMU_GID_D2H_RSP, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm, CXL_PMU_GID_D2H_RSP, BIT(7)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm, CXL_PMU_GID_D2H_RSP, BIT(15)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv, CXL_PMU_GID_D2H_RSP, BIT(22)),
+ /* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
+ CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata, CXL_PMU_GID_H2D_REQ, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv, CXL_PMU_GID_H2D_REQ, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur, CXL_PMU_GID_H2D_REQ, BIT(3)),
+ /* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull, CXL_PMU_GID_H2D_RSP, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go, CXL_PMU_GID_H2D_RSP, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull, CXL_PMU_GID_H2D_RSP, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp, CXL_PMU_GID_H2D_RSP, BIT(6)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop, CXL_PMU_GID_H2D_RSP, BIT(8)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull, CXL_PMU_GID_H2D_RSP, BIT(13)),
+ CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull, CXL_PMU_GID_H2D_RSP, BIT(15)),
+ /* CXL rev 3.0 Table 13-5 directly lists these */
+ CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)),
+ /* CXL rev 3.1 Table 3-35 M2S Req Memory Opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdtee, CXL_PMU_GID_M2S_REQ, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddatatee, CXL_PMU_GID_M2S_REQ, BIT(6)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)),
+ /* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr, CXL_PMU_GID_M2S_RWD, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl, CXL_PMU_GID_M2S_RWD, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict, CXL_PMU_GID_M2S_RWD, BIT(4)),
+ /* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i, CXL_PMU_GID_M2S_BIRSP, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s, CXL_PMU_GID_M2S_BIRSP, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e, CXL_PMU_GID_M2S_BIRSP, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk, CXL_PMU_GID_M2S_BIRSP, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk, CXL_PMU_GID_M2S_BIRSP, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk, CXL_PMU_GID_M2S_BIRSP, BIT(6)),
+ /* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur, CXL_PMU_GID_S2M_BISNP, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data, CXL_PMU_GID_S2M_BISNP, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv, CXL_PMU_GID_S2M_BISNP, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)),
+ /* CXL rev 3.1 Table 3-50 S2M NDR Opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpm, CXL_PMU_GID_S2M_NDR, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)),
+ /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
+ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)),
+ /* CXL rev 3.0 Table 13-5 directly lists these */
+ CXL_PMU_EVENT_CXL_ATTR(ddr_act, CXL_PMU_GID_DDR, BIT(0)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_pre, CXL_PMU_GID_DDR, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_casrd, CXL_PMU_GID_DDR, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_caswr, CXL_PMU_GID_DDR, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_refresh, CXL_PMU_GID_DDR, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent, CXL_PMU_GID_DDR, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(ddr_rfm, CXL_PMU_GID_DDR, BIT(6)),
+ NULL
+};
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
+ int vid, int gid, int msk)
+{
+ struct cxl_pmu_ev_cap *pmu_ev;
+
+ list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
+ if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+ continue;
+
+ /* Precise match for fixed counter */
+ if (msk == pmu_ev->msk)
+ return pmu_ev;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
+ int vid, int gid, int msk)
+{
+ struct cxl_pmu_ev_cap *pmu_ev;
+
+ list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
+ if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+ continue;
+
+ /* Request mask must be subset of supported */
+ if (msk & ~pmu_ev->msk)
+ continue;
+
+ return pmu_ev;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
+{
+ struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(dev_attr, struct perf_pmu_events_attr, attr);
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_pmu_info *info = dev_get_drvdata(dev);
+ int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
+ int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
+ int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
+
+ if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
+ return attr->mode;
+
+ if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
+ return attr->mode;
+
+ return 0;
+}
+
+static const struct attribute_group cxl_pmu_events = {
+ .name = "events",
+ .attrs = cxl_pmu_event_attrs,
+ .is_visible = cxl_pmu_event_is_visible,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *cxl_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group cxl_pmu_cpumask_group = {
+ .attrs = cxl_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *cxl_pmu_attr_groups[] = {
+ &cxl_pmu_events,
+ &cxl_pmu_format_group,
+ &cxl_pmu_cpumask_group,
+ NULL
+};
+
+/* If counter_idx == NULL, don't try to allocate a counter. */
+static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
+ int *event_idx)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+ struct cxl_pmu_ev_cap *pmu_ev;
+ u32 mask;
+ u16 gid, vid;
+ int i;
+
+ vid = cxl_pmu_config_get_vid(event);
+ gid = cxl_pmu_config_get_gid(event);
+ mask = cxl_pmu_config_get_mask(event);
+
+ pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
+ if (!IS_ERR(pmu_ev)) {
+ if (!counter_idx)
+ return 0;
+ if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
+ *counter_idx = pmu_ev->counter_idx;
+ return 0;
+ }
+ /* Fixed counter is in use, but maybe a configurable one? */
+ }
+
+ pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
+ if (!IS_ERR(pmu_ev)) {
+ if (!counter_idx)
+ return 0;
+
+ bitmap_andnot(configurable_and_free, info->conf_counter_bm,
+ info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
+
+ i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+ if (i == CXL_PMU_MAX_COUNTERS)
+ return -EINVAL;
+
+ *counter_idx = i;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int cxl_pmu_event_init(struct perf_event *event)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ int rc;
+
+ /* Top level type sanity check - is this a Hardware Event being requested */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+ /* TODO: Validation of any filter */
+
+ /*
+ * Verify that it is possible to count what was requested. Either must
+ * be a fixed counter that is a precise match or a configurable counter
+ * where this is a subset.
+ */
+ rc = cxl_pmu_get_event_idx(event, NULL, NULL);
+ if (rc < 0)
+ return rc;
+
+ event->cpu = info->on_cpu;
+
+ return 0;
+}
+
+static void cxl_pmu_enable(struct pmu *pmu)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+ void __iomem *base = info->base;
+
+ /* Can assume frozen at this stage */
+ writeq(0, base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_disable(struct pmu *pmu)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+ void __iomem *base = info->base;
+
+ /*
+ * Whilst bits above number of counters are RsvdZ
+ * they are unlikely to be repurposed given
+ * number of counters is allowed to be 64 leaving
+ * no reserved bits. Hence this is only slightly
+ * naughty.
+ */
+ writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ void __iomem *base = info->base;
+ u64 cfg;
+
+ /*
+ * All paths to here should either set these flags directly or
+ * call cxl_pmu_event_stop() which will ensure the correct state.
+ */
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ /*
+ * Currently only hdm filter control is implemented, this code will
+ * want generalizing when more filters are added.
+ */
+ if (info->filter_hdm) {
+ if (cxl_pmu_config1_hdm_filter_en(event))
+ cfg = cxl_pmu_config2_get_hdm_decoder(event);
+ else
+ cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
+ writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
+ }
+
+ cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
+ cxl_pmu_config1_get_edge(event) ? 1 : 0);
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
+ cxl_pmu_config1_get_invert(event) ? 1 : 0);
+
+ /* Fixed purpose counters have next two fields RO */
+ if (test_bit(hwc->idx, info->conf_counter_bm)) {
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
+ hwc->event_base);
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
+ cxl_pmu_config_get_mask(event));
+ }
+ cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
+ /*
+ * For events that generate only 1 count per clock the CXL 3.0 spec
+ * states the threshold shall be set to 1 but if set to 0 it will
+ * count the raw value anwyay?
+ * There is no definition of what events will count multiple per cycle
+ * and hence to which non 1 values of threshold can apply.
+ * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
+ */
+ cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
+ cxl_pmu_config1_get_threshold(event));
+ writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+ local64_set(&hwc->prev_count, 0);
+ writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
+
+ perf_event_update_userpage(event);
+}
+
+static u64 cxl_pmu_read_counter(struct perf_event *event)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ void __iomem *base = info->base;
+
+ return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
+}
+
+static void __cxl_pmu_read(struct perf_event *event, bool overflow)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 new_cnt, prev_cnt, delta;
+
+ do {
+ prev_cnt = local64_read(&hwc->prev_count);
+ new_cnt = cxl_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
+
+ /*
+ * If we know an overflow occur then take that into account.
+ * Note counter is not reset as that would lose events
+ */
+ delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
+ if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
+ delta += (1UL << info->counter_width);
+
+ local64_add(delta, &event->count);
+}
+
+static void cxl_pmu_read(struct perf_event *event)
+{
+ __cxl_pmu_read(event, false);
+}
+
+static void cxl_pmu_event_stop(struct perf_event *event, int flags)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ void __iomem *base = info->base;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 cfg;
+
+ cxl_pmu_read(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+ cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
+ FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
+ writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int cxl_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx, rc;
+ int event_idx = 0;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
+ if (rc < 0)
+ return rc;
+
+ hwc->idx = idx;
+
+ /* Only set for configurable counters */
+ hwc->event_base = event_idx;
+ info->hw_events[idx] = event;
+ set_bit(idx, info->used_counter_bm);
+
+ if (flags & PERF_EF_START)
+ cxl_pmu_event_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void cxl_pmu_event_del(struct perf_event *event, int flags)
+{
+ struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ cxl_pmu_event_stop(event, PERF_EF_UPDATE);
+ clear_bit(hwc->idx, info->used_counter_bm);
+ info->hw_events[hwc->idx] = NULL;
+ perf_event_update_userpage(event);
+}
+
+static irqreturn_t cxl_pmu_irq(int irq, void *data)
+{
+ struct cxl_pmu_info *info = data;
+ void __iomem *base = info->base;
+ u64 overflowed;
+ DECLARE_BITMAP(overflowedbm, 64);
+ int i;
+
+ overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
+
+ /* Interrupt may be shared, so maybe it isn't ours */
+ if (!overflowed)
+ return IRQ_NONE;
+
+ bitmap_from_arr64(overflowedbm, &overflowed, 64);
+ for_each_set_bit(i, overflowedbm, info->num_counters) {
+ struct perf_event *event = info->hw_events[i];
+
+ if (!event) {
+ dev_dbg(info->pmu.dev,
+ "overflow but on non enabled counter %d\n", i);
+ continue;
+ }
+
+ __cxl_pmu_read(event, true);
+ }
+
+ writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
+
+ return IRQ_HANDLED;
+}
+
+static void cxl_pmu_perf_unregister(void *_info)
+{
+ struct cxl_pmu_info *info = _info;
+
+ perf_pmu_unregister(&info->pmu);
+}
+
+static void cxl_pmu_cpuhp_remove(void *_info)
+{
+ struct cxl_pmu_info *info = _info;
+
+ cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
+}
+
+static int cxl_pmu_probe(struct device *dev)
+{
+ struct cxl_pmu *pmu = to_cxl_pmu(dev);
+ struct pci_dev *pdev = to_pci_dev(dev->parent);
+ struct cxl_pmu_info *info;
+ char *irq_name;
+ char *dev_name;
+ int rc, irq;
+
+ info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ dev_set_drvdata(dev, info);
+ INIT_LIST_HEAD(&info->event_caps_fixed);
+ INIT_LIST_HEAD(&info->event_caps_configurable);
+
+ info->base = pmu->base;
+
+ info->on_cpu = -1;
+ rc = cxl_pmu_parse_caps(dev, info);
+ if (rc)
+ return rc;
+
+ info->hw_events = devm_kcalloc(dev, info->num_counters,
+ sizeof(*info->hw_events), GFP_KERNEL);
+ if (!info->hw_events)
+ return -ENOMEM;
+
+ switch (pmu->type) {
+ case CXL_PMU_MEMDEV:
+ dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
+ pmu->assoc_id, pmu->index);
+ break;
+ }
+ if (!dev_name)
+ return -ENOMEM;
+
+ info->pmu = (struct pmu) {
+ .name = dev_name,
+ .parent = dev,
+ .module = THIS_MODULE,
+ .event_init = cxl_pmu_event_init,
+ .pmu_enable = cxl_pmu_enable,
+ .pmu_disable = cxl_pmu_disable,
+ .add = cxl_pmu_event_add,
+ .del = cxl_pmu_event_del,
+ .start = cxl_pmu_event_start,
+ .stop = cxl_pmu_event_stop,
+ .read = cxl_pmu_read,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = cxl_pmu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ };
+
+ if (info->irq <= 0)
+ return -EINVAL;
+
+ rc = pci_irq_vector(pdev, info->irq);
+ if (rc < 0)
+ return rc;
+ irq = rc;
+
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow", dev_name);
+ if (!irq_name)
+ return -ENOMEM;
+
+ rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
+ irq_name, info);
+ if (rc)
+ return rc;
+ info->irq = irq;
+
+ rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
+ if (rc)
+ return rc;
+
+ rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
+ if (rc)
+ return rc;
+
+ rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
+ if (rc)
+ return rc;
+
+ rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static struct cxl_driver cxl_pmu_driver = {
+ .name = "cxl_pmu",
+ .probe = cxl_pmu_probe,
+ .id = CXL_DEVICE_PMU,
+};
+
+static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+
+ if (info->on_cpu != -1)
+ return 0;
+
+ info->on_cpu = cpu;
+ /*
+ * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
+ * gone away again.
+ */
+ WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
+
+ return 0;
+}
+
+static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+ unsigned int target;
+
+ if (info->on_cpu != cpu)
+ return 0;
+
+ info->on_cpu = -1;
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids) {
+ dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
+ return 0;
+ }
+
+ perf_pmu_migrate_context(&info->pmu, cpu, target);
+ info->on_cpu = target;
+ /*
+ * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
+ * gone away.
+ */
+ WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
+
+ return 0;
+}
+
+static __init int cxl_pmu_init(void)
+{
+ int rc;
+
+ rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "AP_PERF_CXL_PMU_ONLINE",
+ cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
+ if (rc < 0)
+ return rc;
+ cxl_pmu_cpuhp_state_num = rc;
+
+ rc = cxl_driver_register(&cxl_pmu_driver);
+ if (rc)
+ cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+
+ return rc;
+}
+
+static __exit void cxl_pmu_exit(void)
+{
+ cxl_driver_unregister(&cxl_pmu_driver);
+ cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+}
+
+MODULE_DESCRIPTION("CXL Performance Monitor Driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("CXL");
+module_init(cxl_pmu_init);
+module_exit(cxl_pmu_exit);
+MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
new file mode 100644
index 000000000000..22f73ac894e9
--- /dev/null
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe PMU driver
+ *
+ * Copyright (C) 2021-2023 Alibaba Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pcie-dwc.h>
+#include <linux/perf_event.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define DWC_PCIE_EVENT_CNT_CTL 0x8
+
+/*
+ * Event Counter Data Select includes two parts:
+ * - 27-24: Group number(4-bit: 0..0x7)
+ * - 23-16: Event number(8-bit: 0..0x13) within the Group
+ *
+ * Put them together as in TRM.
+ */
+#define DWC_PCIE_CNT_EVENT_SEL GENMASK(27, 16)
+#define DWC_PCIE_CNT_LANE_SEL GENMASK(11, 8)
+#define DWC_PCIE_CNT_STATUS BIT(7)
+#define DWC_PCIE_CNT_ENABLE GENMASK(4, 2)
+#define DWC_PCIE_PER_EVENT_OFF 0x1
+#define DWC_PCIE_PER_EVENT_ON 0x3
+#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
+#define DWC_PCIE_EVENT_PER_CLEAR 0x1
+
+/* Event Selection Field has two subfields */
+#define DWC_PCIE_CNT_EVENT_SEL_GROUP GENMASK(11, 8)
+#define DWC_PCIE_CNT_EVENT_SEL_EVID GENMASK(7, 0)
+
+#define DWC_PCIE_EVENT_CNT_DATA 0xC
+
+#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
+#define DWC_PCIE_TIME_BASED_REPORT_SEL GENMASK(31, 24)
+#define DWC_PCIE_TIME_BASED_DURATION_SEL GENMASK(15, 8)
+#define DWC_PCIE_DURATION_MANUAL_CTL 0x0
+#define DWC_PCIE_DURATION_1MS 0x1
+#define DWC_PCIE_DURATION_10MS 0x2
+#define DWC_PCIE_DURATION_100MS 0x3
+#define DWC_PCIE_DURATION_1S 0x4
+#define DWC_PCIE_DURATION_2S 0x5
+#define DWC_PCIE_DURATION_4S 0x6
+#define DWC_PCIE_DURATION_4US 0xFF
+#define DWC_PCIE_TIME_BASED_TIMER_START BIT(0)
+#define DWC_PCIE_TIME_BASED_CNT_ENABLE 0x1
+
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW 0x14
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH 0x18
+
+/* Event attributes */
+#define DWC_PCIE_CONFIG_EVENTID GENMASK(15, 0)
+#define DWC_PCIE_CONFIG_TYPE GENMASK(19, 16)
+#define DWC_PCIE_CONFIG_LANE GENMASK(27, 20)
+
+#define DWC_PCIE_EVENT_ID(event) FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config)
+#define DWC_PCIE_EVENT_TYPE(event) FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config)
+#define DWC_PCIE_EVENT_LANE(event) FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config)
+
+enum dwc_pcie_event_type {
+ DWC_PCIE_TIME_BASE_EVENT,
+ DWC_PCIE_LANE_EVENT,
+ DWC_PCIE_EVENT_TYPE_MAX,
+};
+
+#define DWC_PCIE_LANE_GROUP_6 6
+#define DWC_PCIE_LANE_GROUP_7 7
+#define DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP 256
+
+#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
+#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
+
+struct dwc_pcie_pmu {
+ struct pmu pmu;
+ struct pci_dev *pdev; /* Root Port device */
+ u16 ras_des_offset;
+ u32 nr_lanes;
+
+ /* Groups #6 and #7 */
+ DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ struct perf_event *time_based_event;
+
+ struct hlist_node cpuhp_node;
+ int on_cpu;
+};
+
+#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu))
+
+static int dwc_pcie_pmu_hp_state;
+static struct list_head dwc_pcie_dev_info_head =
+ LIST_HEAD_INIT(dwc_pcie_dev_info_head);
+static bool notify;
+
+struct dwc_pcie_dev_info {
+ struct platform_device *plat_dev;
+ struct pci_dev *pdev;
+ struct list_head dev_node;
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group dwc_pcie_cpumask_attr_group = {
+ .attrs = dwc_pcie_pmu_cpumask_attrs,
+};
+
+struct dwc_pcie_format_attr {
+ struct device_attribute attr;
+ u64 field;
+ int config;
+};
+
+PMU_FORMAT_ATTR(eventid, "config:0-15");
+PMU_FORMAT_ATTR(type, "config:16-19");
+PMU_FORMAT_ATTR(lane, "config:20-27");
+
+static struct attribute *dwc_pcie_format_attrs[] = {
+ &format_attr_type.attr,
+ &format_attr_eventid.attr,
+ &format_attr_lane.attr,
+ NULL,
+};
+
+static struct attribute_group dwc_pcie_format_attrs_group = {
+ .name = "format",
+ .attrs = dwc_pcie_format_attrs,
+};
+
+struct dwc_pcie_event_attr {
+ struct device_attribute attr;
+ enum dwc_pcie_event_type type;
+ u16 eventid;
+ u8 lane;
+};
+
+static ssize_t dwc_pcie_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dwc_pcie_event_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr);
+
+ if (eattr->type == DWC_PCIE_LANE_EVENT)
+ return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n",
+ eattr->eventid, eattr->type);
+ else if (eattr->type == DWC_PCIE_TIME_BASE_EVENT)
+ return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n",
+ eattr->eventid, eattr->type);
+
+ return 0;
+}
+
+#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane) \
+ (&((struct dwc_pcie_event_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL), \
+ .type = _type, \
+ .eventid = _eventid, \
+ .lane = _lane, \
+ }})[0].attr.attr)
+
+#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid) \
+ DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0)
+#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid) \
+ DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0)
+
+static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
+ /* Group #0 */
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x08),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x09),
+
+ /* Group #1 */
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_pcie_tlp_data_payload, 0x20),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_pcie_tlp_data_payload, 0x21),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_ccix_tlp_data_payload, 0x22),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_ccix_tlp_data_payload, 0x23),
+
+ /*
+ * Leave it to the user to specify the lane ID to avoid generating
+ * a list of hundreds of events.
+ */
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nullified_tlp, 0x604),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nullified_tlp, 0x605),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tlp, 0x606),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717),
+ NULL
+};
+
+static const struct attribute_group dwc_pcie_event_attrs_group = {
+ .name = "events",
+ .attrs = dwc_pcie_pmu_time_event_attrs,
+};
+
+static const struct attribute_group *dwc_pcie_attr_groups[] = {
+ &dwc_pcie_event_attrs_group,
+ &dwc_pcie_format_attrs_group,
+ &dwc_pcie_cpumask_attr_group,
+ NULL
+};
+
+static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ struct perf_event *event,
+ bool enable)
+{
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u32 ctrl;
+
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+
+ if (enable)
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ else
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+}
+
+static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ bool enable)
+{
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+
+ pci_clear_and_set_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
+ DWC_PCIE_TIME_BASED_TIMER_START, enable);
+}
+
+static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 val, ctrl;
+
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+ pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
+
+ ctrl |= FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+
+ return val;
+}
+
+static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 lo, hi, ss;
+ u64 val;
+
+ /*
+ * The 64-bit value of the data counter is spread across two
+ * registers that are not synchronized. In order to read them
+ * atomically, ensure that the high 32 bits match before and after
+ * reading the low 32 bits.
+ */
+ pci_read_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi);
+ do {
+ /* snapshot the high 32 bits */
+ ss = hi;
+
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW,
+ &lo);
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH,
+ &hi);
+ } while (hi != ss);
+
+ val = ((u64)hi << 32) | lo;
+ /*
+ * The Group#1 event measures the amount of data processed in 16-byte
+ * units. Simplify the end-user interface by multiplying the counter
+ * at the point of read.
+ */
+ if (event_id >= 0x20 && event_id <= 0x23)
+ val *= 16;
+
+ return val;
+}
+
+static void dwc_pcie_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ u64 delta, prev, now;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ now = dwc_pcie_pmu_read_lane_event_counter(event) &
+ DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+ local64_add(now, &event->count);
+ return;
+ }
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+ now = dwc_pcie_pmu_read_time_based_counter(event);
+
+ } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+ delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
+ local64_add(delta, &event->count);
+}
+
+static int dwc_pcie_pmu_validate_add_lane_event(struct perf_event *event,
+ unsigned long val_lane_events[])
+{
+ int event_id, event_nr, group;
+
+ event_id = DWC_PCIE_EVENT_ID(event);
+ event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id);
+
+ if (group != DWC_PCIE_LANE_GROUP_6 && group != DWC_PCIE_LANE_GROUP_7)
+ return -EINVAL;
+
+ group -= DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ val_lane_events))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ DECLARE_BITMAP(val_lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ bool time_event = false;
+ int type;
+
+ type = DWC_PCIE_EVENT_TYPE(leader);
+ if (type == DWC_PCIE_TIME_BASE_EVENT)
+ time_event = true;
+ else
+ if (dwc_pcie_pmu_validate_add_lane_event(leader, val_lane_events))
+ return -ENOSPC;
+
+ for_each_sibling_event(sibling, leader) {
+ type = DWC_PCIE_EVENT_TYPE(sibling);
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (time_event)
+ return -ENOSPC;
+
+ time_event = true;
+ continue;
+ }
+
+ if (dwc_pcie_pmu_validate_add_lane_event(sibling, val_lane_events))
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_event_init(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ struct perf_event *sibling;
+ u32 lane;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* We don't support sampling */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* We cannot support task bound events */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu && !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ if (type < 0 || type >= DWC_PCIE_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ lane = DWC_PCIE_EVENT_LANE(event);
+ if (lane < 0 || lane >= pcie_pmu->nr_lanes)
+ return -EINVAL;
+ }
+
+ if (dwc_pcie_pmu_validate_group(event))
+ return -ENOSPC;
+
+ event->cpu = pcie_pmu->on_cpu;
+
+ return 0;
+}
+
+static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+ hwc->state = 0;
+ local64_set(&hwc->prev_count, 0);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+}
+
+static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ dwc_pcie_pmu_event_update(event);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
+
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ struct hw_perf_event *hwc = &event->hw;
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 ctrl;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events))
+ return -ENOSPC;
+
+ /* EVENT_COUNTER_DATA_REG needs clear manually */
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) |
+ FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+ } else if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (pcie_pmu->time_based_event)
+ return -ENOSPC;
+
+ pcie_pmu->time_based_event = event;
+
+ /*
+ * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
+ * use it with any manually controlled duration. And it is
+ * cleared when next measurement starts.
+ */
+ ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL,
+ DWC_PCIE_DURATION_MANUAL_CTL) |
+ DWC_PCIE_TIME_BASED_CNT_ENABLE;
+ pci_write_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl);
+ }
+
+ if (flags & PERF_EF_START)
+ dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+ dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
+ perf_event_update_userpage(event);
+
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ pcie_pmu->time_based_event = NULL;
+ } else {
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ clear_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events);
+ }
+}
+
+static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(dwc_pcie_pmu_hp_state, hotplug_node);
+}
+
+/*
+ * Find the binded DES capability device info of a PCI device.
+ * @pdev: The PCI device.
+ */
+static struct dwc_pcie_dev_info *dwc_pcie_find_dev_info(struct pci_dev *pdev)
+{
+ struct dwc_pcie_dev_info *dev_info;
+
+ list_for_each_entry(dev_info, &dwc_pcie_dev_info_head, dev_node)
+ if (dev_info->pdev == pdev)
+ return dev_info;
+
+ return NULL;
+}
+
+static void dwc_pcie_unregister_pmu(void *data)
+{
+ struct dwc_pcie_pmu *pcie_pmu = data;
+
+ perf_pmu_unregister(&pcie_pmu->pmu);
+}
+
+static u16 dwc_pcie_des_cap(struct pci_dev *pdev)
+{
+ const struct dwc_pcie_vsec_id *vid;
+ u16 vsec;
+ u32 val;
+
+ if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
+ return 0;
+
+ for (vid = dwc_pcie_rasdes_vsec_ids; vid->vendor_id; vid++) {
+ vsec = pci_find_vsec_capability(pdev, vid->vendor_id,
+ vid->vsec_id);
+ if (vsec) {
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER,
+ &val);
+ if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) {
+ pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
+ return vsec;
+ }
+ }
+ }
+ return 0;
+}
+
+static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info)
+{
+ platform_device_unregister(dev_info->plat_dev);
+ list_del(&dev_info->dev_node);
+ kfree(dev_info);
+}
+
+static int dwc_pcie_register_dev(struct pci_dev *pdev)
+{
+ struct platform_device *plat_dev;
+ struct dwc_pcie_dev_info *dev_info;
+ u32 sbdf;
+
+ sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn);
+ plat_dev = platform_device_register_simple("dwc_pcie_pmu", sbdf, NULL, 0);
+ if (IS_ERR(plat_dev))
+ return PTR_ERR(plat_dev);
+
+ dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
+ if (!dev_info) {
+ platform_device_unregister(plat_dev);
+ return -ENOMEM;
+ }
+
+ /* Cache platform device to handle pci device hotplug */
+ dev_info->plat_dev = plat_dev;
+ dev_info->pdev = pdev;
+ list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head);
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dwc_pcie_dev_info *dev_info;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ if (!dwc_pcie_des_cap(pdev))
+ return NOTIFY_DONE;
+ if (dwc_pcie_register_dev(pdev))
+ return NOTIFY_BAD;
+ break;
+ case BUS_NOTIFY_DEL_DEVICE:
+ dev_info = dwc_pcie_find_dev_info(pdev);
+ if (!dev_info)
+ return NOTIFY_DONE;
+ dwc_pcie_unregister_dev(dev_info);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block dwc_pcie_pmu_nb = {
+ .notifier_call = dwc_pcie_pmu_notifier,
+};
+
+static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
+{
+ struct pci_dev *pdev;
+ struct dwc_pcie_pmu *pcie_pmu;
+ char *name;
+ u32 sbdf;
+ u16 vsec;
+ int ret;
+
+ sbdf = plat_dev->id;
+ pdev = pci_get_domain_bus_and_slot(sbdf >> 16, PCI_BUS_NUM(sbdf & 0xffff),
+ sbdf & 0xff);
+ if (!pdev) {
+ pr_err("No pdev found for the sbdf 0x%x\n", sbdf);
+ return -ENODEV;
+ }
+
+ vsec = dwc_pcie_des_cap(pdev);
+ if (!vsec)
+ return -ENODEV;
+
+ pci_dev_put(pdev);
+ name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf);
+ if (!name)
+ return -ENOMEM;
+
+ pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
+ if (!pcie_pmu)
+ return -ENOMEM;
+
+ pcie_pmu->pdev = pdev;
+ pcie_pmu->ras_des_offset = vsec;
+ pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
+ pcie_pmu->on_cpu = -1;
+ pcie_pmu->pmu = (struct pmu){
+ .name = name,
+ .parent = &plat_dev->dev,
+ .module = THIS_MODULE,
+ .attr_groups = dwc_pcie_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = dwc_pcie_pmu_event_init,
+ .add = dwc_pcie_pmu_event_add,
+ .del = dwc_pcie_pmu_event_del,
+ .start = dwc_pcie_pmu_event_start,
+ .stop = dwc_pcie_pmu_event_stop,
+ .read = dwc_pcie_pmu_event_update,
+ };
+
+ /* Add this instance to the list used by the offline callback */
+ ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
+ &pcie_pmu->cpuhp_node);
+ if (ret) {
+ pci_err(pdev, "Error %d registering hotplug @%x\n", ret, sbdf);
+ return ret;
+ }
+
+ /* Unwind when platform driver removes */
+ ret = devm_add_action_or_reset(&plat_dev->dev,
+ dwc_pcie_pmu_remove_cpuhp_instance,
+ &pcie_pmu->cpuhp_node);
+ if (ret)
+ return ret;
+
+ ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
+ if (ret) {
+ pci_err(pdev, "Error %d registering PMU @%x\n", ret, sbdf);
+ return ret;
+ }
+ ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
+ pcie_pmu);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct dwc_pcie_pmu *pcie_pmu;
+
+ pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+ if (pcie_pmu->on_cpu == -1)
+ pcie_pmu->on_cpu = cpumask_local_spread(
+ 0, dev_to_node(&pcie_pmu->pdev->dev));
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct dwc_pcie_pmu *pcie_pmu;
+ struct pci_dev *pdev;
+ unsigned int target;
+ int node;
+
+ pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (cpu != pcie_pmu->on_cpu)
+ return 0;
+
+ pcie_pmu->on_cpu = -1;
+ pdev = pcie_pmu->pdev;
+ node = dev_to_node(&pdev->dev);
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target >= nr_cpu_ids) {
+ pci_err(pdev, "There is no CPU to set\n");
+ return 0;
+ }
+
+ /* This PMU does NOT support interrupt, just migrate context. */
+ perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
+ pcie_pmu->on_cpu = target;
+
+ return 0;
+}
+
+static struct platform_driver dwc_pcie_pmu_driver = {
+ .probe = dwc_pcie_pmu_probe,
+ .driver = {.name = "dwc_pcie_pmu",},
+};
+
+static void dwc_pcie_cleanup_devices(void)
+{
+ struct dwc_pcie_dev_info *dev_info, *tmp;
+
+ list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node) {
+ dwc_pcie_unregister_dev(dev_info);
+ }
+}
+
+static int __init dwc_pcie_pmu_init(void)
+{
+ struct pci_dev *pdev = NULL;
+ int ret;
+
+ for_each_pci_dev(pdev) {
+ if (!dwc_pcie_des_cap(pdev))
+ continue;
+
+ ret = dwc_pcie_register_dev(pdev);
+ if (ret) {
+ pci_dev_put(pdev);
+ goto err_cleanup;
+ }
+ }
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/dwc_pcie_pmu:online",
+ dwc_pcie_pmu_online_cpu,
+ dwc_pcie_pmu_offline_cpu);
+ if (ret < 0)
+ goto err_cleanup;
+
+ dwc_pcie_pmu_hp_state = ret;
+
+ ret = platform_driver_register(&dwc_pcie_pmu_driver);
+ if (ret)
+ goto err_remove_cpuhp;
+
+ ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
+ if (ret)
+ goto err_unregister_driver;
+ notify = true;
+
+ return 0;
+
+err_unregister_driver:
+ platform_driver_unregister(&dwc_pcie_pmu_driver);
+err_remove_cpuhp:
+ cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
+err_cleanup:
+ dwc_pcie_cleanup_devices();
+ return ret;
+}
+
+static void __exit dwc_pcie_pmu_exit(void)
+{
+ if (notify)
+ bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
+ dwc_pcie_cleanup_devices();
+ platform_driver_unregister(&dwc_pcie_pmu_driver);
+ cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
+}
+
+module_init(dwc_pcie_pmu_init);
+module_exit(dwc_pcie_pmu_exit);
+
+MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller");
+MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 8e058e08fe81..bcdf5575d71c 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -5,21 +5,23 @@
*/
#include <linux/bitfield.h>
+#include <linux/clk.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/perf_event.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#define COUNTER_CNTL 0x0
#define COUNTER_READ 0x20
#define COUNTER_DPCR1 0x30
+#define COUNTER_MUX_CNTL 0x50
+#define COUNTER_MASK_COMP 0x54
#define CNTL_OVER 0x1
#define CNTL_CLEAR 0x2
@@ -28,29 +30,50 @@
#define CNTL_CLEAR_MASK 0xFFFFFFFD
#define CNTL_OVER_MASK 0xFFFFFFFE
+#define CNTL_CP_SHIFT 16
+#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT)
#define CNTL_CSV_SHIFT 24
#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
+#define READ_PORT_SHIFT 0
+#define READ_PORT_MASK (0x7 << READ_PORT_SHIFT)
+#define READ_CHANNEL_REVERT 0x00000008 /* bit 3 for read channel select */
+#define WRITE_PORT_SHIFT 8
+#define WRITE_PORT_MASK (0x7 << WRITE_PORT_SHIFT)
+#define WRITE_CHANNEL_REVERT 0x00000800 /* bit 11 for write channel select */
+
#define EVENT_CYCLES_ID 0
#define EVENT_CYCLES_COUNTER 0
#define NUM_COUNTERS 4
+/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */
+#define CYCLES_COUNTER_MASK 0x0FFFFFFF
#define AXI_MASKING_REVERT 0xffff0000 /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
#define DDR_PERF_DEV_NAME "imx8_ddr"
+#define DB_PERF_DEV_NAME "imx8_db"
#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
static DEFINE_IDA(ddr_ida);
+static DEFINE_IDA(db_ida);
/* DDR Perf hardware feature */
#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
+#define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */
+
+/* Perf type */
+enum fsl_ddr_type {
+ DDR_PERF_TYPE = 0, /* ddr Perf (default) */
+ DB_PERF_TYPE, /* db Perf */
+};
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
const char *identifier; /* system PMU identifier for userspace */
+ enum fsl_ddr_type type; /* types of Perf, ddr or db */
};
static const struct fsl_ddr_devtype_data imx8_devtype_data;
@@ -79,6 +102,17 @@ static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
.identifier = "i.MX8MP",
};
+static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ .identifier = "i.MX8DXL",
+};
+
+static const struct fsl_ddr_devtype_data imx8dxl_db_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ .identifier = "i.MX8DXL",
+ .type = DB_PERF_TYPE,
+};
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
@@ -86,6 +120,8 @@ static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
{ .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
{ .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
+ { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data},
+ { .compatible = "fsl,imx8dxl-db-pmu", .data = &imx8dxl_db_devtype_data},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -97,11 +133,11 @@ struct ddr_pmu {
struct hlist_node node;
struct device *dev;
struct perf_event *events[NUM_COUNTERS];
- int active_events;
enum cpuhp_state cpuhp_state;
const struct fsl_ddr_devtype_data *devtype_data;
int irq;
int id;
+ int active_counter;
};
static ssize_t ddr_perf_identifier_show(struct device *dev,
@@ -141,6 +177,7 @@ static const struct attribute_group ddr_perf_identifier_attr_group = {
enum ddr_perf_filter_capabilities {
PERF_CAP_AXI_ID_FILTER = 0,
PERF_CAP_AXI_ID_FILTER_ENHANCED,
+ PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER,
PERF_CAP_AXI_ID_FEAT_MAX,
};
@@ -154,6 +191,8 @@ static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
case PERF_CAP_AXI_ID_FILTER_ENHANCED:
quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ case PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER);
default:
WARN(1, "unknown filter cap %d\n", cap);
}
@@ -184,6 +223,7 @@ static ssize_t ddr_perf_filter_cap_show(struct device *dev,
static struct attribute *ddr_perf_filter_cap_attr[] = {
PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+ PERF_FILTER_EXT_ATTR_ENTRY(super_filter, PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER),
NULL,
};
@@ -261,19 +301,51 @@ static struct attribute *ddr_perf_events_attrs[] = {
NULL,
};
+static const int ddr_perf_db_visible_event_list[] = {
+ EVENT_CYCLES_ID,
+ 0x41,
+ 0x42,
+};
+
+static umode_t ddr_perf_events_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+ struct perf_pmu_events_attr *pmu_attr;
+ unsigned int i;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+ if (pmu->devtype_data->type == DDR_PERF_TYPE)
+ return attr->mode;
+
+ /* DB Type */
+ for (i = 0; i < ARRAY_SIZE(ddr_perf_db_visible_event_list); i++)
+ if (pmu_attr->id == ddr_perf_db_visible_event_list[i])
+ return attr->mode;
+
+ return 0;
+}
+
static const struct attribute_group ddr_perf_events_attr_group = {
.name = "events",
.attrs = ddr_perf_events_attrs,
+ .is_visible = ddr_perf_events_attrs_is_visible,
};
PMU_FORMAT_ATTR(event, "config:0-7");
PMU_FORMAT_ATTR(axi_id, "config1:0-15");
PMU_FORMAT_ATTR(axi_mask, "config1:16-31");
+PMU_FORMAT_ATTR(axi_port, "config2:0-2");
+PMU_FORMAT_ATTR(axi_channel, "config2:3-3");
static struct attribute *ddr_perf_format_attrs[] = {
&format_attr_event.attr,
&format_attr_axi_id.attr,
&format_attr_axi_mask.attr,
+ &format_attr_axi_port.attr,
+ &format_attr_axi_channel.attr,
NULL,
};
@@ -428,6 +500,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
writel(0, pmu->base + reg);
val = CNTL_EN | CNTL_CLEAR;
val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+ /*
+ * On i.MX8MP we need to bias the cycle counter to overflow more often.
+ * We do this by initializing bits [23:16] of the counter value via the
+ * COUNTER_CTRL Counter Parameter (CP) field.
+ */
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+ if (counter == EVENT_CYCLES_COUNTER)
+ val |= FIELD_PREP(CNTL_CP_MASK, 0xf0);
+ }
+
writel(val, pmu->base + reg);
} else {
/* Disable counter */
@@ -467,6 +550,12 @@ static void ddr_perf_event_update(struct perf_event *event)
int ret;
new_raw_count = ddr_perf_read_counter(pmu, counter);
+ /* Remove the bias applied in ddr_perf_counter_enable(). */
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+ if (counter == EVENT_CYCLES_COUNTER)
+ new_raw_count &= CYCLES_COUNTER_MASK;
+ }
+
local64_add(new_raw_count, &event->count);
/*
@@ -496,6 +585,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+ if (!pmu->active_counter++)
+ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER, true);
+
hwc->state = 0;
}
@@ -506,6 +599,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
int counter;
int cfg = event->attr.config;
int cfg1 = event->attr.config1;
+ int cfg2 = event->attr.config2;
if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
int i;
@@ -529,8 +623,27 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
return -EOPNOTSUPP;
}
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER) {
+ if (ddr_perf_is_filtered(event)) {
+ /* revert axi id masking(axi_mask) value */
+ cfg1 ^= AXI_MASKING_REVERT;
+ writel(cfg1, pmu->base + COUNTER_MASK_COMP + ((counter - 1) << 4));
+
+ if (cfg == 0x41) {
+ /* revert axi read channel(axi_channel) value */
+ cfg2 ^= READ_CHANNEL_REVERT;
+ cfg2 |= FIELD_PREP(READ_PORT_MASK, cfg2);
+ } else {
+ /* revert axi write channel(axi_channel) value */
+ cfg2 ^= WRITE_CHANNEL_REVERT;
+ cfg2 |= FIELD_PREP(WRITE_PORT_MASK, cfg2);
+ }
+
+ writel(cfg2, pmu->base + COUNTER_MUX_CNTL + ((counter - 1) << 4));
+ }
+ }
+
pmu->events[counter] = event;
- pmu->active_events++;
hwc->idx = counter;
hwc->state |= PERF_HES_STOPPED;
@@ -550,6 +663,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags)
ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
ddr_perf_event_update(event);
+ if (!--pmu->active_counter)
+ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER, false);
+
hwc->state |= PERF_HES_STOPPED;
}
@@ -562,39 +679,24 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
ddr_perf_event_stop(event, PERF_EF_UPDATE);
ddr_perf_free_counter(pmu, counter);
- pmu->active_events--;
hwc->idx = -1;
}
static void ddr_perf_pmu_enable(struct pmu *pmu)
{
- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
- /* enable cycle counter if cycle is not active event list */
- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
- ddr_perf_counter_enable(ddr_pmu,
- EVENT_CYCLES_ID,
- EVENT_CYCLES_COUNTER,
- true);
}
static void ddr_perf_pmu_disable(struct pmu *pmu)
{
- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
- ddr_perf_counter_enable(ddr_pmu,
- EVENT_CYCLES_ID,
- EVENT_CYCLES_COUNTER,
- false);
}
-static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
- struct device *dev)
+static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+ struct device *dev)
{
*pmu = (struct ddr_pmu) {
.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
@@ -610,9 +712,6 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
.base = base,
.dev = dev,
};
-
- pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
- return pmu->id;
}
static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
@@ -678,10 +777,13 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
static int ddr_perf_probe(struct platform_device *pdev)
{
+ struct clk_bulk_data *clks;
struct ddr_pmu *pmu;
struct device_node *np;
void __iomem *base;
+ struct ida *ida;
char *name;
+ int nclks;
int num;
int ret;
int irq;
@@ -696,19 +798,33 @@ static int ddr_perf_probe(struct platform_device *pdev)
if (!pmu)
return -ENOMEM;
- num = ddr_perf_init(pmu, base, &pdev->dev);
+ ddr_perf_init(pmu, base, &pdev->dev);
platform_set_drvdata(pdev, pmu);
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
- num);
+ nclks = devm_clk_bulk_get_all_enabled(&pdev->dev, &clks);
+ if (nclks < 0)
+ return dev_err_probe(&pdev->dev, nclks, "Failure get clks\n");
+
+ pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+
+ ida = pmu->devtype_data->type == DDR_PERF_TYPE ? &ddr_ida : &db_ida;
+ num = ida_alloc(ida, GFP_KERNEL);
+ if (num < 0)
+ return num;
+
+ pmu->id = num;
+
+ if (pmu->devtype_data->type == DDR_PERF_TYPE)
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", num);
+ else
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DB_PERF_DEV_NAME "%d", num);
+
if (!name) {
ret = -ENOMEM;
- goto cpuhp_state_err;
+ goto idr_free;
}
- pmu->devtype_data = of_device_get_match_data(&pdev->dev);
-
pmu->cpu = raw_smp_processor_id();
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
DDR_CPUHP_CB_NAME,
@@ -717,7 +833,7 @@ static int ddr_perf_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
- goto cpuhp_state_err;
+ goto idr_free;
}
pmu->cpuhp_state = ret;
@@ -764,13 +880,13 @@ ddr_perf_err:
cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
cpuhp_instance_err:
cpuhp_remove_multi_state(pmu->cpuhp_state);
-cpuhp_state_err:
- ida_free(&ddr_ida, pmu->id);
+idr_free:
+ ida_free(ida, pmu->id);
dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
return ret;
}
-static int ddr_perf_remove(struct platform_device *pdev)
+static void ddr_perf_remove(struct platform_device *pdev)
{
struct ddr_pmu *pmu = platform_get_drvdata(pdev);
@@ -779,8 +895,11 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
- ida_free(&ddr_ida, pmu->id);
- return 0;
+ if (pmu->devtype_data->type == DDR_PERF_TYPE)
+ ida_free(&ddr_ida, pmu->id);
+ else
+ ida_free(&db_ida, pmu->id);
+
}
static struct platform_driver imx_ddr_pmu_driver = {
@@ -794,4 +913,5 @@ static struct platform_driver imx_ddr_pmu_driver = {
};
module_platform_driver(imx_ddr_pmu_driver);
+MODULE_DESCRIPTION("Freescale i.MX8 DDR Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
new file mode 100644
index 000000000000..7050b48c0467
--- /dev/null
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -0,0 +1,887 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2023 NXP
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/perf_event.h>
+
+/* Performance monitor configuration */
+#define PMCFG1 0x00
+#define MX93_PMCFG1_RD_TRANS_FILT_EN BIT(31)
+#define MX93_PMCFG1_WR_TRANS_FILT_EN BIT(30)
+#define MX93_PMCFG1_RD_BT_FILT_EN BIT(29)
+#define MX93_PMCFG1_ID_MASK GENMASK(17, 0)
+
+#define MX95_PMCFG1_WR_BEAT_FILT_EN BIT(31)
+#define MX95_PMCFG1_RD_BEAT_FILT_EN BIT(30)
+
+#define PMCFG2 0x04
+#define MX93_PMCFG2_ID GENMASK(17, 0)
+
+#define PMCFG3 0x08
+#define PMCFG4 0x0C
+#define PMCFG5 0x10
+#define PMCFG6 0x14
+#define MX95_PMCFG_ID_MASK GENMASK(9, 0)
+#define MX95_PMCFG_ID GENMASK(25, 16)
+
+/* Global control register affects all counters and takes priority over local control registers */
+#define PMGC0 0x40
+/* Global control register bits */
+#define PMGC0_FAC BIT(31)
+#define PMGC0_PMIE BIT(30)
+#define PMGC0_FCECE BIT(29)
+
+/*
+ * 64bit counter0 exclusively dedicated to counting cycles
+ * 32bit counters monitor counter-specific events in addition to counting reference events
+ */
+#define PMLCA(n) (0x40 + 0x10 + (0x10 * n))
+#define PMLCB(n) (0x40 + 0x14 + (0x10 * n))
+#define PMC(n) (0x40 + 0x18 + (0x10 * n))
+/* Local control register bits */
+#define PMLCA_FC BIT(31)
+#define PMLCA_CE BIT(26)
+#define PMLCA_EVENT GENMASK(22, 16)
+
+#define NUM_COUNTERS 11
+#define CYCLES_COUNTER 0
+#define CYCLES_EVENT_ID 0
+
+#define CONFIG_EVENT_MASK GENMASK(7, 0)
+#define CONFIG_COUNTER_MASK GENMASK(23, 16)
+
+#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME "imx9_ddr"
+#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+/*
+ * V1 support 1 read transaction, 1 write transaction and 1 read beats
+ * event which corresponding respecitively to counter 2, 3 and 4.
+ */
+#define DDR_PERF_AXI_FILTER_V1 0x1
+
+/*
+ * V2 support 1 read beats and 3 write beats events which corresponding
+ * respecitively to counter 2-5.
+ */
+#define DDR_PERF_AXI_FILTER_V2 0x2
+
+struct imx_ddr_devtype_data {
+ const char *identifier; /* system PMU identifier for userspace */
+ unsigned int filter_ver; /* AXI filter version */
+};
+
+struct ddr_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct hlist_node node;
+ struct device *dev;
+ struct perf_event *events[NUM_COUNTERS];
+ int active_events;
+ enum cpuhp_state cpuhp_state;
+ const struct imx_ddr_devtype_data *devtype_data;
+ int irq;
+ int id;
+};
+
+static const struct imx_ddr_devtype_data imx91_devtype_data = {
+ .identifier = "imx91",
+ .filter_ver = DDR_PERF_AXI_FILTER_V1
+};
+
+static const struct imx_ddr_devtype_data imx93_devtype_data = {
+ .identifier = "imx93",
+ .filter_ver = DDR_PERF_AXI_FILTER_V1
+};
+
+static const struct imx_ddr_devtype_data imx94_devtype_data = {
+ .identifier = "imx94",
+ .filter_ver = DDR_PERF_AXI_FILTER_V2
+};
+
+static const struct imx_ddr_devtype_data imx95_devtype_data = {
+ .identifier = "imx95",
+ .filter_ver = DDR_PERF_AXI_FILTER_V2
+};
+
+static inline bool axi_filter_v1(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V1;
+}
+
+static inline bool axi_filter_v2(struct ddr_pmu *pmu)
+{
+ return pmu->devtype_data->filter_ver == DDR_PERF_AXI_FILTER_V2;
+}
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+ { .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data },
+ { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data },
+ { .compatible = "fsl,imx94-ddr-pmu", .data = &imx94_devtype_data },
+ { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
+
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static struct device_attribute ddr_perf_identifier_attr =
+ __ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+ &ddr_perf_identifier_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_identifier_attr_group = {
+ .attrs = ddr_perf_identifier_attrs,
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+ &ddr_perf_cpumask_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+ .attrs = ddr_perf_cpumask_attrs,
+};
+
+struct imx9_pmu_events_attr {
+ struct device_attribute attr;
+ u64 id;
+ const struct imx_ddr_devtype_data *devtype_data;
+};
+
+static ssize_t ddr_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct imx9_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct imx9_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define COUNTER_OFFSET_IN_EVENT 8
+#define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id)
+
+#define DDR_PMU_EVENT_ATTR_COMM(_name, _id, _data) \
+ (&((struct imx9_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+ .id = _id, \
+ .devtype_data = _data, } \
+ })[0].attr.attr)
+
+#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, NULL)
+
+#define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data)
+
+#define IMX95_DDR_PMU_EVENT_ATTR(_name, _id) \
+ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx95_devtype_data)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+ /* counter0 cycles event */
+ IMX9_DDR_PMU_EVENT_ATTR(cycles, 0),
+
+ /* reference events for all normal counters, need assert DEBUG19[21] bit */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ddrc1_rmw_for_ecc, 12),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_rreorder, 13),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_wreorder, 14),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_0, 15),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_1, 16),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_2, 17),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_3, 18),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_4, 19),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_5, 22),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_6, 23),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_7, 24),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_8, 25),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_9, 26),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_10, 27),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_11, 28),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_12, 31),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_13, 59),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_15, 61),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63),
+
+ /* counter1 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, ID(1, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, ID(1, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, ID(1, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, ID(1, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, ID(1, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, ID(1, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, ID(1, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, ID(1, 71)),
+
+ /* counter2 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, ID(2, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, ID(2, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, ID(2, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, ID(2, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, ID(2, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, ID(2, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_beat_filt, ID(2, 73)), /* imx95 specific*/
+
+ /* counter3 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, ID(3, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, ID(3, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, ID(3, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, ID(3, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, ID(3, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt2, ID(3, 73)), /* imx95 specific*/
+
+ /* counter4 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, ID(4, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, ID(4, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, ID(4, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, ID(4, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, ID(4, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)),
+ IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt1, ID(4, 73)), /* imx95 specific*/
+
+ /* counter5 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, ID(5, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, ID(5, 66)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, ID(5, 67)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, ID(5, 68)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, ID(5, 69)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)),
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)),
+ IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt0, ID(5, 73)), /* imx95 specific*/
+
+ /* counter6 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, ID(6, 72)),
+
+ /* counter7 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, ID(7, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, ID(7, 65)),
+
+ /* counter8 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, ID(8, 64)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, ID(8, 65)),
+
+ /* counter9 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, ID(9, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, ID(9, 66)),
+
+ /* counter10 specific events */
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, ID(10, 65)),
+ IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, ID(10, 66)),
+ NULL,
+};
+
+static umode_t
+ddr_perf_events_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+ struct imx9_pmu_events_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ if (!eattr->devtype_data)
+ return attr->mode;
+
+ if (eattr->devtype_data != ddr_pmu->devtype_data &&
+ eattr->devtype_data->filter_ver != ddr_pmu->devtype_data->filter_ver)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = ddr_perf_events_attrs,
+ .is_visible = ddr_perf_events_attrs_is_visible,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,16-23");
+PMU_FORMAT_ATTR(counter, "config:8-15");
+PMU_FORMAT_ATTR(axi_id, "config1:0-17");
+PMU_FORMAT_ATTR(axi_mask, "config2:0-17");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_counter.attr,
+ &format_attr_axi_id.attr,
+ &format_attr_axi_mask.attr,
+ NULL,
+};
+
+static const struct attribute_group ddr_perf_format_attr_group = {
+ .name = "format",
+ .attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &ddr_perf_identifier_attr_group,
+ &ddr_perf_cpumask_attr_group,
+ &ddr_perf_events_attr_group,
+ &ddr_perf_format_attr_group,
+ NULL,
+};
+
+static void ddr_perf_clear_counter(struct ddr_pmu *pmu, int counter)
+{
+ if (counter == CYCLES_COUNTER) {
+ writel(0, pmu->base + PMC(counter) + 0x4);
+ writel(0, pmu->base + PMC(counter));
+ } else {
+ writel(0, pmu->base + PMC(counter));
+ }
+}
+
+static u64 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+ u32 val_lower, val_upper;
+ u64 val;
+
+ if (counter != CYCLES_COUNTER) {
+ val = readl_relaxed(pmu->base + PMC(counter));
+ goto out;
+ }
+
+ /* special handling for reading 64bit cycle counter */
+ do {
+ val_upper = readl_relaxed(pmu->base + PMC(counter) + 0x4);
+ val_lower = readl_relaxed(pmu->base + PMC(counter));
+ } while (val_upper != readl_relaxed(pmu->base + PMC(counter) + 0x4));
+
+ val = val_upper;
+ val = (val << 32);
+ val |= val_lower;
+out:
+ return val;
+}
+
+static void ddr_perf_counter_global_config(struct ddr_pmu *pmu, bool enable)
+{
+ u32 ctrl;
+
+ ctrl = readl_relaxed(pmu->base + PMGC0);
+
+ if (enable) {
+ /*
+ * The performance monitor must be reset before event counting
+ * sequences. The performance monitor can be reset by first freezing
+ * one or more counters and then clearing the freeze condition to
+ * allow the counters to count according to the settings in the
+ * performance monitor registers. Counters can be frozen individually
+ * by setting PMLCAn[FC] bits, or simultaneously by setting PMGC0[FAC].
+ * Simply clearing these freeze bits will then allow the performance
+ * monitor to begin counting based on the register settings.
+ */
+ ctrl |= PMGC0_FAC;
+ writel(ctrl, pmu->base + PMGC0);
+
+ /*
+ * Freeze all counters disabled, interrupt enabled, and freeze
+ * counters on condition enabled.
+ */
+ ctrl &= ~PMGC0_FAC;
+ ctrl |= PMGC0_PMIE | PMGC0_FCECE;
+ writel(ctrl, pmu->base + PMGC0);
+ } else {
+ ctrl |= PMGC0_FAC;
+ ctrl &= ~(PMGC0_PMIE | PMGC0_FCECE);
+ writel(ctrl, pmu->base + PMGC0);
+ }
+}
+
+static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
+ int counter, bool enable)
+{
+ u32 ctrl_a;
+ int event;
+
+ ctrl_a = readl_relaxed(pmu->base + PMLCA(counter));
+ event = FIELD_GET(CONFIG_EVENT_MASK, config);
+
+ if (enable) {
+ ctrl_a |= PMLCA_FC;
+ writel(ctrl_a, pmu->base + PMLCA(counter));
+
+ ddr_perf_clear_counter(pmu, counter);
+
+ /* Freeze counter disabled, condition enabled, and program event.*/
+ ctrl_a &= ~PMLCA_FC;
+ ctrl_a |= PMLCA_CE;
+ ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F);
+ ctrl_a |= FIELD_PREP(PMLCA_EVENT, event);
+ writel(ctrl_a, pmu->base + PMLCA(counter));
+ } else {
+ /* Freeze counter. */
+ ctrl_a |= PMLCA_FC;
+ writel(ctrl_a, pmu->base + PMLCA(counter));
+ }
+}
+
+static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
+{
+ u32 pmcfg1, pmcfg2;
+ static const u32 mask[] = {
+ MX93_PMCFG1_RD_TRANS_FILT_EN,
+ MX93_PMCFG1_WR_TRANS_FILT_EN,
+ MX93_PMCFG1_RD_BT_FILT_EN
+ };
+
+ pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+ if (counter >= 2 && counter <= 4)
+ pmcfg1 = event == 73 ? pmcfg1 | mask[counter - 2] :
+ pmcfg1 & ~mask[counter - 2];
+
+ pmcfg1 &= ~FIELD_PREP(MX93_PMCFG1_ID_MASK, 0x3FFFF);
+ pmcfg1 |= FIELD_PREP(MX93_PMCFG1_ID_MASK, axi_mask);
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
+
+ pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
+ pmcfg2 &= ~FIELD_PREP(MX93_PMCFG2_ID, 0x3FFFF);
+ pmcfg2 |= FIELD_PREP(MX93_PMCFG2_ID, axi_id);
+ writel_relaxed(pmcfg2, pmu->base + PMCFG2);
+}
+
+static void imx95_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
+ int counter, int axi_id, int axi_mask)
+{
+ u32 pmcfg1, pmcfg, offset = 0;
+
+ pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+ if (event == 73) {
+ switch (counter) {
+ case 2:
+ pmcfg1 |= MX95_PMCFG1_WR_BEAT_FILT_EN;
+ offset = PMCFG3;
+ break;
+ case 3:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG4;
+ break;
+ case 4:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG5;
+ break;
+ case 5:
+ pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN;
+ offset = PMCFG6;
+ break;
+ }
+ } else {
+ switch (counter) {
+ case 2:
+ pmcfg1 &= ~MX95_PMCFG1_WR_BEAT_FILT_EN;
+ break;
+ case 3:
+ case 4:
+ case 5:
+ pmcfg1 &= ~MX95_PMCFG1_RD_BEAT_FILT_EN;
+ break;
+ }
+ }
+
+ writel_relaxed(pmcfg1, pmu->base + PMCFG1);
+
+ if (offset) {
+ pmcfg = readl_relaxed(pmu->base + offset);
+ pmcfg &= ~(FIELD_PREP(MX95_PMCFG_ID_MASK, 0x3FF) |
+ FIELD_PREP(MX95_PMCFG_ID, 0x3FF));
+ pmcfg |= (FIELD_PREP(MX95_PMCFG_ID_MASK, axi_mask) |
+ FIELD_PREP(MX95_PMCFG_ID, axi_id));
+ writel_relaxed(pmcfg, pmu->base + offset);
+ }
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+ u64 new_raw_count;
+
+ new_raw_count = ddr_perf_read_counter(pmu, counter);
+ local64_add(new_raw_count, &event->count);
+
+ /* clear counter's value every time */
+ ddr_perf_clear_counter(pmu, counter);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (event->cpu < 0) {
+ dev_warn(pmu->dev, "Can't provide per-task data!\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * We must NOT create groups containing mixed PMUs, although software
+ * events are acceptable (for example to create a CCN group
+ * periodically read when a hrtimer aka cpu-clock leader triggers).
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+
+ return 0;
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ local64_set(&hwc->prev_count, 0);
+
+ ddr_perf_counter_local_config(pmu, event->attr.config, counter, true);
+ hwc->state = 0;
+}
+
+static int ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event, int counter)
+{
+ int i;
+
+ if (event == CYCLES_EVENT_ID) {
+ // Cycles counter is dedicated for cycle event.
+ if (pmu->events[CYCLES_COUNTER] == NULL)
+ return CYCLES_COUNTER;
+ } else if (counter != 0) {
+ // Counter specific event use specific counter.
+ if (pmu->events[counter] == NULL)
+ return counter;
+ } else {
+ // Auto allocate counter for referene event.
+ for (i = 1; i < NUM_COUNTERS; i++)
+ if (pmu->events[i] == NULL)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int cfg = event->attr.config;
+ int cfg1 = event->attr.config1;
+ int cfg2 = event->attr.config2;
+ int event_id, counter;
+
+ event_id = FIELD_GET(CONFIG_EVENT_MASK, cfg);
+ counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg);
+
+ counter = ddr_perf_alloc_counter(pmu, event_id, counter);
+ if (counter < 0) {
+ dev_dbg(pmu->dev, "There are not enough counters\n");
+ return -EOPNOTSUPP;
+ }
+
+ pmu->events[counter] = event;
+ pmu->active_events++;
+ hwc->idx = counter;
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (axi_filter_v1(pmu))
+ /* read trans, write trans, read beat */
+ imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
+ if (axi_filter_v2(pmu))
+ /* write beat, read beat2, read beat1, read beat */
+ imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2);
+
+ if (flags & PERF_EF_START)
+ ddr_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_counter_local_config(pmu, event->attr.config, counter, false);
+ ddr_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+ pmu->events[counter] = NULL;
+ pmu->active_events--;
+ hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ ddr_perf_counter_global_config(ddr_pmu, true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ ddr_perf_counter_global_config(ddr_pmu, false);
+}
+
+static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+ struct device *dev)
+{
+ *pmu = (struct ddr_pmu) {
+ .pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = attr_groups,
+ .event_init = ddr_perf_event_init,
+ .add = ddr_perf_event_add,
+ .del = ddr_perf_event_del,
+ .start = ddr_perf_event_start,
+ .stop = ddr_perf_event_stop,
+ .read = ddr_perf_event_update,
+ .pmu_enable = ddr_perf_pmu_enable,
+ .pmu_disable = ddr_perf_pmu_disable,
+ },
+ .base = base,
+ .dev = dev,
+ };
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+ struct ddr_pmu *pmu = (struct ddr_pmu *)p;
+ struct perf_event *event;
+ int i;
+
+ /*
+ * Counters can generate an interrupt on an overflow when msb of a
+ * counter changes from 0 to 1. For the interrupt to be signalled,
+ * below condition mush be satisfied:
+ * PMGC0[PMIE] = 1, PMGC0[FCECE] = 1, PMLCAn[CE] = 1
+ * When an interrupt is signalled, PMGC0[FAC] is set by hardware and
+ * all of the registers are frozen.
+ * Software can clear the interrupt condition by resetting the performance
+ * monitor and clearing the most significant bit of the counter that
+ * generate the overflow.
+ */
+ for (i = 0; i < NUM_COUNTERS; i++) {
+ if (!pmu->events[i])
+ continue;
+
+ event = pmu->events[i];
+
+ ddr_perf_event_update(event);
+ }
+
+ ddr_perf_counter_global_config(pmu, true);
+
+ return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+ int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+
+ WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
+
+ return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu;
+ void __iomem *base;
+ int ret, irq;
+ char *name;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ ddr_perf_init(pmu, base, &pdev->dev);
+
+ pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+
+ platform_set_drvdata(pdev, pmu);
+
+ pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
+ if (!name) {
+ ret = -ENOMEM;
+ goto format_string_err;
+ }
+
+ pmu->cpu = raw_smp_processor_id();
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME,
+ NULL, ddr_perf_offline_cpu);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to add callbacks for multi state\n");
+ goto cpuhp_state_err;
+ }
+ pmu->cpuhp_state = ret;
+
+ /* Register the pmu instance for cpu hotplug */
+ ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+ goto cpuhp_instance_err;
+ }
+
+ /* Request irq */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto ddr_perf_err;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, ddr_perf_irq_handler,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ DDR_CPUHP_CB_NAME, pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Request irq failed: %d", ret);
+ goto ddr_perf_err;
+ }
+
+ pmu->irq = irq;
+ ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu));
+ if (ret) {
+ dev_err(pmu->dev, "Failed to set interrupt affinity\n");
+ goto ddr_perf_err;
+ }
+
+ ret = perf_pmu_register(&pmu->pmu, name, -1);
+ if (ret)
+ goto ddr_perf_err;
+
+ return 0;
+
+ddr_perf_err:
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+cpuhp_instance_err:
+ cpuhp_remove_multi_state(pmu->cpuhp_state);
+cpuhp_state_err:
+format_string_err:
+ ida_free(&ddr_ida, pmu->id);
+ dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret);
+ return ret;
+}
+
+static void ddr_perf_remove(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ cpuhp_remove_multi_state(pmu->cpuhp_state);
+
+ perf_pmu_unregister(&pmu->pmu);
+
+ ida_free(&ddr_ida, pmu->id);
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+ .driver = {
+ .name = "imx9-ddr-pmu",
+ .of_match_table = imx_ddr_pmu_dt_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = ddr_perf_probe,
+ .remove = ddr_perf_remove,
+};
+module_platform_driver(imx_ddr_pmu_driver);
+
+MODULE_AUTHOR("Xu Yang <xu.yang_2@nxp.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DDRC PerfMon for i.MX9 SoCs");
diff --git a/drivers/perf/fujitsu_uncore_pmu.c b/drivers/perf/fujitsu_uncore_pmu.c
new file mode 100644
index 000000000000..c3c6f56474ad
--- /dev/null
+++ b/drivers/perf/fujitsu_uncore_pmu.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Uncore PMUs in Fujitsu chips.
+ *
+ * See Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst for more details.
+ *
+ * Copyright (c) 2025 Fujitsu. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/* Number of counters on each PMU */
+#define MAC_NUM_COUNTERS 8
+#define PCI_NUM_COUNTERS 8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define UNCORE_EVTYPE_MASK 0xFF
+
+/* Perfmon registers */
+#define PM_EVCNTR(__cntr) (0x000 + (__cntr) * 8)
+#define PM_CNTCTL(__cntr) (0x100 + (__cntr) * 8)
+#define PM_CNTCTL_RESET 0
+#define PM_EVTYPE(__cntr) (0x200 + (__cntr) * 8)
+#define PM_EVTYPE_EVSEL(__val) FIELD_GET(UNCORE_EVTYPE_MASK, __val)
+#define PM_CR 0x400
+#define PM_CR_RESET BIT(1)
+#define PM_CR_ENABLE BIT(0)
+#define PM_CNTENSET 0x410
+#define PM_CNTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR 0x418
+#define PM_CNTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR_RESET 0xFF
+#define PM_INTENSET 0x420
+#define PM_INTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR 0x428
+#define PM_INTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR_RESET 0xFF
+#define PM_OVSR 0x440
+#define PM_OVSR_OVSRCLR_RESET 0xFF
+
+enum fujitsu_uncore_pmu {
+ FUJITSU_UNCORE_PMU_MAC = 1,
+ FUJITSU_UNCORE_PMU_PCI = 2,
+};
+
+struct uncore_pmu {
+ int num_counters;
+ struct pmu pmu;
+ struct hlist_node node;
+ void __iomem *regs;
+ struct perf_event **events;
+ unsigned long *used_mask;
+ int cpu;
+ int irq;
+ struct device *dev;
+};
+
+#define to_uncore_pmu(p) (container_of(p, struct uncore_pmu, pmu))
+
+static int uncore_pmu_cpuhp_state;
+
+static void fujitsu_uncore_counter_start(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Initialize the hardware counter and reset prev_count*/
+ local64_set(&event->hw.prev_count, 0);
+ writeq_relaxed(0, uncorepmu->regs + PM_EVCNTR(idx));
+
+ /* Set the event type */
+ writeq_relaxed(PM_EVTYPE_EVSEL(event->attr.config), uncorepmu->regs + PM_EVTYPE(idx));
+
+ /* Enable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENSET_IDX(idx), uncorepmu->regs + PM_INTENSET);
+
+ /* Finally, enable the counter */
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(idx));
+ writeq_relaxed(PM_CNTENSET_IDX(idx), uncorepmu->regs + PM_CNTENSET);
+}
+
+static void fujitsu_uncore_counter_stop(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Disable the counter */
+ writeq_relaxed(PM_CNTENCLR_IDX(idx), uncorepmu->regs + PM_CNTENCLR);
+
+ /* Disable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENCLR_IDX(idx), uncorepmu->regs + PM_INTENCLR);
+}
+
+static void fujitsu_uncore_counter_update(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+ u64 prev, new;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ new = readq_relaxed(uncorepmu->regs + PM_EVCNTR(idx));
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ local64_add(new - prev, &event->count);
+}
+
+static inline void fujitsu_uncore_init(struct uncore_pmu *uncorepmu)
+{
+ int i;
+
+ writeq_relaxed(PM_CR_RESET, uncorepmu->regs + PM_CR);
+
+ writeq_relaxed(PM_CNTENCLR_RESET, uncorepmu->regs + PM_CNTENCLR);
+ writeq_relaxed(PM_INTENCLR_RESET, uncorepmu->regs + PM_INTENCLR);
+ writeq_relaxed(PM_OVSR_OVSRCLR_RESET, uncorepmu->regs + PM_OVSR);
+
+ for (i = 0; i < uncorepmu->num_counters; ++i) {
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(i));
+ writeq_relaxed(PM_EVTYPE_EVSEL(0), uncorepmu->regs + PM_EVTYPE(i));
+ }
+ writeq_relaxed(PM_CR_ENABLE, uncorepmu->regs + PM_CR);
+}
+
+static irqreturn_t fujitsu_uncore_handle_irq(int irq_num, void *data)
+{
+ struct uncore_pmu *uncorepmu = data;
+ /* Read the overflow status register */
+ long status = readq_relaxed(uncorepmu->regs + PM_OVSR);
+ int idx;
+
+ if (status == 0)
+ return IRQ_NONE;
+
+ /* Clear the bits we read on the overflow status register */
+ writeq_relaxed(status, uncorepmu->regs + PM_OVSR);
+
+ for_each_set_bit(idx, &status, uncorepmu->num_counters) {
+ struct perf_event *event;
+
+ event = uncorepmu->events[idx];
+ if (!event)
+ continue;
+
+ fujitsu_uncore_counter_update(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void fujitsu_uncore_pmu_enable(struct pmu *pmu)
+{
+ writeq_relaxed(PM_CR_ENABLE, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static void fujitsu_uncore_pmu_disable(struct pmu *pmu)
+{
+ writeq_relaxed(0, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static bool fujitsu_uncore_validate_event_group(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counters = 1;
+
+ if (leader == event)
+ return true;
+
+ if (leader->pmu == event->pmu)
+ counters++;
+
+ for_each_sibling_event(sibling, leader) {
+ if (sibling->pmu == event->pmu)
+ counters++;
+ }
+
+ /*
+ * If the group requires more counters than the HW has, it
+ * cannot ever be scheduled.
+ */
+ return counters <= uncorepmu->num_counters;
+}
+
+static int fujitsu_uncore_event_init(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Is the event for this PMU? */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * Sampling not supported since these events are not
+ * core-attributable.
+ */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /*
+ * Task mode not available, we run the counters as socket counters,
+ * not attributable to any CPU and therefore cannot attribute per-task.
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* Validate the group */
+ if (!fujitsu_uncore_validate_event_group(event))
+ return -EINVAL;
+
+ hwc->idx = -1;
+
+ event->cpu = uncorepmu->cpu;
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->state = 0;
+ fujitsu_uncore_counter_start(event);
+}
+
+static void fujitsu_uncore_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ fujitsu_uncore_counter_stop(event);
+ if (flags & PERF_EF_UPDATE)
+ fujitsu_uncore_counter_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int fujitsu_uncore_event_add(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ /* Try to allocate a counter. */
+ idx = bitmap_find_free_region(uncorepmu->used_mask, uncorepmu->num_counters, 0);
+ if (idx < 0)
+ /* The counters are all in use. */
+ return -EAGAIN;
+
+ hwc->idx = idx;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ uncorepmu->events[idx] = event;
+
+ if (flags & PERF_EF_START)
+ fujitsu_uncore_event_start(event, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_del(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Stop and clean up */
+ fujitsu_uncore_event_stop(event, flags | PERF_EF_UPDATE);
+ uncorepmu->events[hwc->idx] = NULL;
+ bitmap_release_region(uncorepmu->used_mask, hwc->idx, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+}
+
+static void fujitsu_uncore_event_read(struct perf_event *event)
+{
+ fujitsu_uncore_counter_update(event);
+}
+
+#define UNCORE_PMU_FORMAT_ATTR(_name, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
+ .var = (void *)_config, } \
+ })[0].attr.attr)
+
+static struct attribute *fujitsu_uncore_pmu_formats[] = {
+ UNCORE_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_format_group = {
+ .name = "format",
+ .attrs = fujitsu_uncore_pmu_formats,
+};
+
+static ssize_t fujitsu_uncore_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define MAC_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_mac_pmu_events[] = {
+ MAC_EVENT_ATTR(cycles, 0x00),
+ MAC_EVENT_ATTR(read-count, 0x10),
+ MAC_EVENT_ATTR(read-count-request, 0x11),
+ MAC_EVENT_ATTR(read-count-return, 0x12),
+ MAC_EVENT_ATTR(read-count-request-pftgt, 0x13),
+ MAC_EVENT_ATTR(read-count-request-normal, 0x14),
+ MAC_EVENT_ATTR(read-count-return-pftgt-hit, 0x15),
+ MAC_EVENT_ATTR(read-count-return-pftgt-miss, 0x16),
+ MAC_EVENT_ATTR(read-wait, 0x17),
+ MAC_EVENT_ATTR(write-count, 0x20),
+ MAC_EVENT_ATTR(write-count-write, 0x21),
+ MAC_EVENT_ATTR(write-count-pwrite, 0x22),
+ MAC_EVENT_ATTR(memory-read-count, 0x40),
+ MAC_EVENT_ATTR(memory-write-count, 0x50),
+ MAC_EVENT_ATTR(memory-pwrite-count, 0x60),
+ MAC_EVENT_ATTR(ea-mac, 0x80),
+ MAC_EVENT_ATTR(ea-memory, 0x90),
+ MAC_EVENT_ATTR(ea-memory-mac-write, 0x92),
+ MAC_EVENT_ATTR(ea-ha, 0xa0),
+ NULL
+};
+
+#define PCI_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_pci_pmu_events[] = {
+ PCI_EVENT_ATTR(pci-port0-cycles, 0x00),
+ PCI_EVENT_ATTR(pci-port0-read-count, 0x10),
+ PCI_EVENT_ATTR(pci-port0-read-count-bus, 0x14),
+ PCI_EVENT_ATTR(pci-port0-write-count, 0x20),
+ PCI_EVENT_ATTR(pci-port0-write-count-bus, 0x24),
+ PCI_EVENT_ATTR(pci-port1-cycles, 0x40),
+ PCI_EVENT_ATTR(pci-port1-read-count, 0x50),
+ PCI_EVENT_ATTR(pci-port1-read-count-bus, 0x54),
+ PCI_EVENT_ATTR(pci-port1-write-count, 0x60),
+ PCI_EVENT_ATTR(pci-port1-write-count-bus, 0x64),
+ PCI_EVENT_ATTR(ea-pci, 0x80),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_mac_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_mac_pmu_events,
+};
+
+static const struct attribute_group fujitsu_uncore_pci_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_pci_pmu_events,
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(uncorepmu->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *fujitsu_uncore_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_cpumask_attr_group = {
+ .attrs = fujitsu_uncore_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *fujitsu_uncore_mac_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_mac_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static const struct attribute_group *fujitsu_uncore_pci_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_pci_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static void fujitsu_uncore_pmu_migrate(struct uncore_pmu *uncorepmu, unsigned int cpu)
+{
+ perf_pmu_migrate_context(&uncorepmu->pmu, uncorepmu->cpu, cpu);
+ irq_set_affinity(uncorepmu->irq, cpumask_of(cpu));
+ uncorepmu->cpu = cpu;
+}
+
+static int fujitsu_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ node = dev_to_node(uncorepmu->dev);
+ if (cpu_to_node(uncorepmu->cpu) != node && cpu_to_node(cpu) == node)
+ fujitsu_uncore_pmu_migrate(uncorepmu, cpu);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ unsigned int target;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ if (cpu != uncorepmu->cpu)
+ return 0;
+
+ node = dev_to_node(uncorepmu->dev);
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ fujitsu_uncore_pmu_migrate(uncorepmu, target);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long device_type = (unsigned long)device_get_match_data(dev);
+ const struct attribute_group **attr_groups;
+ struct uncore_pmu *uncorepmu;
+ struct resource *memrc;
+ size_t alloc_size;
+ char *name;
+ int ret;
+ int irq;
+ u64 uid;
+
+ ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid);
+ if (ret)
+ return dev_err_probe(dev, ret, "unable to read ACPI uid\n");
+
+ uncorepmu = devm_kzalloc(dev, sizeof(*uncorepmu), GFP_KERNEL);
+ if (!uncorepmu)
+ return -ENOMEM;
+ uncorepmu->dev = dev;
+ uncorepmu->cpu = cpumask_local_spread(0, dev_to_node(dev));
+ platform_set_drvdata(pdev, uncorepmu);
+
+ switch (device_type) {
+ case FUJITSU_UNCORE_PMU_MAC:
+ uncorepmu->num_counters = MAC_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_mac_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "mac_iod%llu_mac%llu_ch%llu",
+ (uid >> 8) & 0xF, (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ case FUJITSU_UNCORE_PMU_PCI:
+ uncorepmu->num_counters = PCI_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_pci_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "pci_iod%llu_pci%llu",
+ (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ default:
+ return dev_err_probe(dev, -EINVAL, "illegal device type: %lu\n", device_type);
+ }
+ if (!name)
+ return -ENOMEM;
+
+ uncorepmu->pmu = (struct pmu) {
+ .parent = dev,
+ .task_ctx_nr = perf_invalid_context,
+
+ .attr_groups = attr_groups,
+
+ .pmu_enable = fujitsu_uncore_pmu_enable,
+ .pmu_disable = fujitsu_uncore_pmu_disable,
+ .event_init = fujitsu_uncore_event_init,
+ .add = fujitsu_uncore_event_add,
+ .del = fujitsu_uncore_event_del,
+ .start = fujitsu_uncore_event_start,
+ .stop = fujitsu_uncore_event_stop,
+ .read = fujitsu_uncore_event_read,
+
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ };
+
+ alloc_size = sizeof(uncorepmu->events[0]) * uncorepmu->num_counters;
+ uncorepmu->events = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->events)
+ return -ENOMEM;
+
+ alloc_size = sizeof(uncorepmu->used_mask[0]) * BITS_TO_LONGS(uncorepmu->num_counters);
+ uncorepmu->used_mask = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->used_mask)
+ return -ENOMEM;
+
+ uncorepmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc);
+ if (IS_ERR(uncorepmu->regs))
+ return PTR_ERR(uncorepmu->regs);
+
+ fujitsu_uncore_init(uncorepmu);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, fujitsu_uncore_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ name, uncorepmu);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request IRQ:%d\n", irq);
+
+ ret = irq_set_affinity(irq, cpumask_of(uncorepmu->cpu));
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to set irq affinity:%d\n", irq);
+
+ uncorepmu->irq = irq;
+
+ /* Add this instance to the list used by the offline callback */
+ ret = cpuhp_state_add_instance(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Error registering hotplug");
+
+ ret = perf_pmu_register(&uncorepmu->pmu, name, -1);
+ if (ret < 0) {
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ return dev_err_probe(dev, ret, "Failed to register %s PMU\n", name);
+ }
+
+ dev_dbg(dev, "Registered %s, type: %d\n", name, uncorepmu->pmu.type);
+
+ return 0;
+}
+
+static void fujitsu_uncore_pmu_remove(struct platform_device *pdev)
+{
+ struct uncore_pmu *uncorepmu = platform_get_drvdata(pdev);
+
+ writeq_relaxed(0, uncorepmu->regs + PM_CR);
+
+ perf_pmu_unregister(&uncorepmu->pmu);
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+}
+
+static const struct acpi_device_id fujitsu_uncore_pmu_acpi_match[] = {
+ { "FUJI200C", FUJITSU_UNCORE_PMU_MAC },
+ { "FUJI200D", FUJITSU_UNCORE_PMU_PCI },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, fujitsu_uncore_pmu_acpi_match);
+
+static struct platform_driver fujitsu_uncore_pmu_driver = {
+ .driver = {
+ .name = "fujitsu-uncore-pmu",
+ .acpi_match_table = fujitsu_uncore_pmu_acpi_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = fujitsu_uncore_pmu_probe,
+ .remove = fujitsu_uncore_pmu_remove,
+};
+
+static int __init fujitsu_uncore_pmu_init(void)
+{
+ int ret;
+
+ /* Install a hook to update the reader CPU in case it goes offline */
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/fujitsu/uncore:online",
+ fujitsu_uncore_pmu_online_cpu,
+ fujitsu_uncore_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ uncore_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&fujitsu_uncore_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+
+ return ret;
+}
+
+static void __exit fujitsu_uncore_pmu_exit(void)
+{
+ platform_driver_unregister(&fujitsu_uncore_pmu_driver);
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+}
+
+module_init(fujitsu_uncore_pmu_init);
+module_exit(fujitsu_uncore_pmu_exit);
+
+MODULE_AUTHOR("Koichi Okuno <fj2767dz@fujitsu.com>");
+MODULE_DESCRIPTION("Fujitsu Uncore PMU driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 4d2c9abe3372..186be3d02238 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
- hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
+ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \
+ hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o
obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 6fee0b6e163b..c5394d007b61 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -99,16 +99,6 @@ HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
-static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -120,8 +110,7 @@ static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attr
#define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format) \
(&((struct dev_ext_attribute[]){ \
- { .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show, \
- NULL), \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
.var = (void *)_format } \
})[0].attr.attr)
@@ -152,6 +141,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR_RO(bus);
+static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min);
+}
+static DEVICE_ATTR_RO(bdf_min);
+
+static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max);
+}
+static DEVICE_ATTR_RO(bdf_max);
+
static struct hisi_pcie_reg_pair
hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off)
{
@@ -216,12 +221,10 @@ static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset,
writeq_relaxed(val, pcie_pmu->base + offset);
}
-static void hisi_pcie_pmu_config_filter(struct perf_event *event)
+static u64 hisi_pcie_pmu_get_event_ctrl_val(struct perf_event *event)
{
- struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
u64 port, trig_len, thr_len, len_mode;
- u64 reg = HISI_PCIE_INIT_SET;
+ u64 reg = 0;
/* Config HISI_PCIE_EVENT_CTRL according to event. */
reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -256,10 +259,19 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
else
reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+ return reg;
+}
+
+static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event)
+{
+ struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 reg = hisi_pcie_pmu_get_event_ctrl_val(event);
+
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
}
-static void hisi_pcie_pmu_clear_filter(struct perf_event *event)
+static void hisi_pcie_pmu_clear_event_ctrl(struct perf_event *event)
{
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
@@ -299,18 +311,24 @@ static bool hisi_pcie_pmu_valid_filter(struct perf_event *event,
if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL)
return false;
- if (requester_id) {
- if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
- return false;
- }
+ /* Need to explicitly set filter of "port" or "bdf" */
+ if (!hisi_pcie_get_port(event) &&
+ !hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
+ return false;
return true;
}
+/*
+ * Check Whether two events share the same config. The same config means not
+ * only the event code, but also the filter settings of the two events are
+ * the same.
+ */
static bool hisi_pcie_pmu_cmp_event(struct perf_event *target,
struct perf_event *event)
{
- return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event);
+ return hisi_pcie_pmu_get_event_ctrl_val(target) ==
+ hisi_pcie_pmu_get_event_ctrl_val(event);
}
static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
@@ -337,15 +355,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
+ /*
+ * If we find a related event, then it's a valid group
+ * since we don't need to allocate a new counter for it.
+ */
if (hisi_pcie_pmu_cmp_event(event_group[num], sibling))
break;
}
+ /*
+ * Otherwise it's a new event but if there's no available counter,
+ * fail the check since we cannot schedule all the events in
+ * the group simultaneously.
+ */
+ if (num == HISI_PCIE_MAX_COUNTERS)
+ return false;
+
if (num == counters)
event_group[counters++] = sibling;
}
- return counters <= HISI_PCIE_MAX_COUNTERS;
+ return true;
}
static int hisi_pcie_pmu_event_init(struct perf_event *event)
@@ -353,16 +383,15 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- event->cpu = pcie_pmu->on_cpu;
+ /* Check the type first before going on, otherwise it's not our event */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
hwc->event_base = HISI_PCIE_EXT_CNT;
else
hwc->event_base = HISI_PCIE_CNT;
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
/* Sampling is not supported. */
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EOPNOTSUPP;
@@ -373,6 +402,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
if (!hisi_pcie_pmu_validate_event_group(event))
return -EINVAL;
+ event->cpu = pcie_pmu->on_cpu;
+
return 0;
}
@@ -384,40 +415,32 @@ static u64 hisi_pcie_pmu_read_counter(struct perf_event *event)
return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx);
}
-static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu,
- struct perf_event *event)
+/*
+ * Check all work events, if a relevant event is found then we return it
+ * first, otherwise return the first idle counter (need to reset).
+ */
+static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu,
+ struct perf_event *event)
{
+ int first_idle = -EAGAIN;
struct perf_event *sibling;
int idx;
for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
sibling = pcie_pmu->hw_events[idx];
- if (!sibling)
- continue;
-
- if (!hisi_pcie_pmu_cmp_event(sibling, event))
+ if (!sibling) {
+ if (first_idle == -EAGAIN)
+ first_idle = idx;
continue;
+ }
/* Related events must be used in group */
- if (sibling->group_leader == event->group_leader)
+ if (hisi_pcie_pmu_cmp_event(sibling, event) &&
+ sibling->group_leader == event->group_leader)
return idx;
- else
- return -EINVAL;
}
- return idx;
-}
-
-static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu)
-{
- int idx;
-
- for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
- if (!pcie_pmu->hw_events[idx])
- return idx;
- }
-
- return -EINVAL;
+ return first_idle;
}
static void hisi_pcie_pmu_event_update(struct perf_event *event)
@@ -445,10 +468,24 @@ static void hisi_pcie_pmu_set_period(struct perf_event *event)
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ u64 orig_cnt, cnt;
+
+ orig_cnt = hisi_pcie_pmu_read_counter(event);
local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL);
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL);
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL);
+
+ /*
+ * The counter maybe unwritable if the target event is unsupported.
+ * Check this by comparing the counts after setting the period. If
+ * the counts stay unchanged after setting the period then update
+ * the hwc->prev_count correctly. Otherwise the final counts user
+ * get maybe totally wrong.
+ */
+ cnt = hisi_pcie_pmu_read_counter(event);
+ if (orig_cnt == cnt)
+ local64_set(&hwc->prev_count, cnt);
}
static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
@@ -504,7 +541,7 @@ static void hisi_pcie_pmu_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
- hisi_pcie_pmu_config_filter(event);
+ hisi_pcie_pmu_config_event_ctrl(event);
hisi_pcie_pmu_enable_counter(pcie_pmu, hwc);
hisi_pcie_pmu_enable_int(pcie_pmu, hwc);
hisi_pcie_pmu_set_period(event);
@@ -525,7 +562,7 @@ static void hisi_pcie_pmu_stop(struct perf_event *event, int flags)
hisi_pcie_pmu_event_update(event);
hisi_pcie_pmu_disable_int(pcie_pmu, hwc);
hisi_pcie_pmu_disable_counter(pcie_pmu, hwc);
- hisi_pcie_pmu_clear_filter(event);
+ hisi_pcie_pmu_clear_event_ctrl(event);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
@@ -543,27 +580,18 @@ static int hisi_pcie_pmu_add(struct perf_event *event, int flags)
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
- /* Check all working events to find a related event. */
- idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event);
- if (idx < 0)
- return idx;
-
- /* Current event shares an enabled counter with the related event */
- if (idx < HISI_PCIE_MAX_COUNTERS) {
- hwc->idx = idx;
- goto start_count;
- }
-
- idx = hisi_pcie_pmu_get_event_idx(pcie_pmu);
+ idx = hisi_pcie_pmu_get_event_idx(pcie_pmu, event);
if (idx < 0)
return idx;
hwc->idx = idx;
- pcie_pmu->hw_events[idx] = event;
- /* Reset Counter to avoid previous statistic interference. */
- hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
-start_count:
+ /* No enabled counter found with related event, reset it */
+ if (!pcie_pmu->hw_events[idx]) {
+ hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
+ pcie_pmu->hw_events[idx] = event;
+ }
+
if (flags & PERF_EF_START)
hisi_pcie_pmu_start(event, PERF_EF_RELOAD);
@@ -665,8 +693,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
if (pcie_pmu->on_cpu == -1) {
- pcie_pmu->on_cpu = cpu;
- WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+ pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev));
+ WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu)));
}
return 0;
@@ -676,14 +704,22 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
unsigned int target;
+ int numa_node;
/* Nothing to do if this CPU doesn't own the PMU */
if (pcie_pmu->on_cpu != cpu)
return 0;
pcie_pmu->on_cpu = -1;
- /* Choose a new CPU from all online cpus. */
- target = cpumask_first(cpu_online_mask);
+
+ /* Choose a local CPU from all online cpus. */
+ numa_node = dev_to_node(&pcie_pmu->pdev->dev);
+
+ target = cpumask_any_and_but(cpumask_of_node(numa_node),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target >= nr_cpu_ids) {
pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
return 0;
@@ -704,10 +740,18 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_flux, 0x0104),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_time, 0x10104),
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_flux, 0x2004),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_time, 0x12004),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_flux, 0x0105),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_time, 0x10105),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_flux, 0x1005),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_time, 0x11005),
NULL
};
@@ -735,6 +779,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = {
static struct attribute *hisi_pcie_pmu_bus_attrs[] = {
&dev_attr_bus.attr,
+ &dev_attr_bdf_max.attr,
+ &dev_attr_bdf_min.attr,
NULL
};
@@ -793,6 +839,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_
pcie_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hisi_pcie_pmu_event_init,
.pmu_enable = hisi_pcie_pmu_enable,
.pmu_disable = hisi_pcie_pmu_disable,
diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
index a9bb73f76be4..b879b81adfdd 100644
--- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
@@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match);
static int hisi_cpa_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *cpa_pmu)
{
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &cpa_pmu->sicl_id)) {
+ hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev);
+
+ if (cpa_pmu->topo.sicl_id < 0) {
dev_err(&pdev->dev, "Can not read sicl-id\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &cpa_pmu->index_id)) {
+ if (cpa_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id\n");
return -EINVAL;
}
- cpa_pmu->ccl_id = -1;
- cpa_pmu->sccl_id = -1;
cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(cpa_pmu->base))
return PTR_ERR(cpa_pmu->base);
@@ -227,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = {
.attrs = hisi_cpa_pmu_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL
-};
-
-static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = {
- .attrs = hisi_cpa_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_cpa_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_cpa_pmu_identifier_attrs[] = {
- &hisi_cpa_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_cpa_pmu_identifier_group = {
- .attrs = hisi_cpa_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = {
&hisi_cpa_pmu_format_group,
&hisi_cpa_pmu_events_group,
- &hisi_cpa_pmu_cpumask_attr_group,
- &hisi_cpa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -311,26 +286,12 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u",
- cpa_pmu->sicl_id, cpa_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d",
+ cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id);
if (!name)
return -ENOMEM;
- cpa_pmu->pmu = (struct pmu) {
- .name = name,
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = cpa_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
+ hisi_pmu_init(cpa_pmu, THIS_MODULE);
/* Power Management should be disabled before using CPA PMU. */
hisi_cpa_pmu_disable_pm(cpa_pmu);
@@ -355,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_cpa_pmu_remove(struct platform_device *pdev)
+static void hisi_cpa_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev);
@@ -363,7 +324,6 @@ static int hisi_cpa_pmu_remove(struct platform_device *pdev)
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
&cpa_pmu->node);
hisi_cpa_pmu_enable_pm(cpa_pmu);
- return 0;
}
static struct platform_driver hisi_cpa_pmu_driver = {
@@ -404,6 +364,7 @@ static void __exit hisi_cpa_pmu_module_exit(void)
}
module_exit(hisi_cpa_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 50d0c0a2f1fe..21c494881ca0 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -43,12 +43,21 @@
#define DDRC_V2_EVENT_TYPE 0xe74
#define DDRC_V2_PERF_CTRL 0xeA0
+/* DDRC interrupt registers definition in v3 */
+#define DDRC_V3_INT_MASK 0x534
+#define DDRC_V3_INT_STATUS 0x538
+#define DDRC_V3_INT_CLEAR 0x53C
+
/* DDRC has 8-counters */
#define DDRC_NR_COUNTERS 0x8
#define DDRC_V1_PERF_CTRL_EN 0x2
#define DDRC_V2_PERF_CTRL_EN 0x1
#define DDRC_V1_NR_EVENTS 0x7
-#define DDRC_V2_NR_EVENTS 0x90
+#define DDRC_V2_NR_EVENTS 0xFF
+
+#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8)
+#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4)
+#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0)
/*
* For PMU v1, there are eight-events and every event has been mapped
@@ -63,47 +72,37 @@ static const u32 ddrc_reg_off[] = {
DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG
};
-/*
- * Select the counter register offset using the counter index.
- * In PMU v1, there are no programmable counter, the count
- * is read form the statistics counter register itself.
- */
-static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx)
-{
- return ddrc_reg_off[cntr_idx];
-}
-
-static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx)
-{
- return DDRC_V2_EVENT_CNT + cntr_idx * 8;
-}
+struct hisi_ddrc_pmu_regs {
+ u32 event_cnt;
+ u32 event_ctrl;
+ u32 event_type;
+ u32 perf_ctrl;
+ u32 perf_ctrl_en;
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+};
-static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu,
+static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
- return readl(ddrc_pmu->base +
- hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc, u64 val)
-{
- writel((u32)val,
- ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
-}
+ if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG)
+ return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]);
-static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- return readq(ddrc_pmu->base +
- hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+ return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx));
}
-static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc, u64 val)
+static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc, u64 val)
{
- writeq(val,
- ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
+
+ if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG)
+ writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]);
+ else
+ writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx));
}
/*
@@ -111,57 +110,15 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
* so there is no need to write event type, while it is programmable counter in
* PMU v2.
*/
-static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx,
u32 type)
{
- u32 offset;
-
- if (hha_pmu->identifier >= HISI_PMU_V2) {
- offset = DDRC_V2_EVENT_TYPE + 4 * idx;
- writel(type, hha_pmu->base + offset);
- }
-}
-
-static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu)
-{
- u32 val;
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
- /* Set perf_enable in DDRC_PERF_CTRL to start event counting */
- val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
- val |= DDRC_V1_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
-}
+ if (regs->event_type == DDRC_UNIMPLEMENTED_REG)
+ return;
-static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu)
-{
- u32 val;
-
- /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
- val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
- val &= ~DDRC_V1_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
-}
-
-static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Set counter index(event code) in DDRC_EVENT_CTRL register */
- val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
- val |= (1 << GET_DDRC_EVENTID(hwc));
- writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
-}
-
-static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Clear counter index(event code) in DDRC_EVENT_CTRL register */
- val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
- val &= ~(1 << GET_DDRC_EVENTID(hwc));
- writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+ writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx));
}
static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
@@ -180,140 +137,119 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
return idx;
}
-static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event)
+static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
{
+ struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
+
+ if (regs->event_type == DDRC_UNIMPLEMENTED_REG)
+ return hisi_ddrc_pmu_v1_get_event_idx(event);
+
return hisi_uncore_pmu_get_event_idx(event);
}
-static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
- val |= DDRC_V2_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+ val = readl(ddrc_pmu->base + regs->perf_ctrl);
+ val |= regs->perf_ctrl_en;
+ writel(val, ddrc_pmu->base + regs->perf_ctrl);
}
-static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
- val &= ~DDRC_V2_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+ val = readl(ddrc_pmu->base + regs->perf_ctrl);
+ val &= ~regs->perf_ctrl_en;
+ writel(val, ddrc_pmu->base + regs->perf_ctrl);
}
-static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu,
+static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+ val = readl(ddrc_pmu->base + regs->event_ctrl);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->event_ctrl);
}
-static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu,
+static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+ val = readl(ddrc_pmu->base + regs->event_ctrl);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->event_ctrl);
}
-static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Write 0 to enable interrupt */
- val = readl(ddrc_pmu->base + DDRC_INT_MASK);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_INT_MASK);
-}
-
-static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Write 1 to mask interrupt */
- val = readl(ddrc_pmu->base + DDRC_INT_MASK);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_INT_MASK);
-}
-
-static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+ val = readl(ddrc_pmu->base + regs->int_mask);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->int_mask);
}
-static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+ val = readl(ddrc_pmu->base + regs->int_mask);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->int_mask);
}
-static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu)
+static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu)
{
- return readl(ddrc_pmu->base + DDRC_INT_STATUS);
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu,
- int idx)
-{
- writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR);
+ return readl(ddrc_pmu->base + regs->int_status);
}
-static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu,
+ int idx)
{
- return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS);
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu,
- int idx)
-{
- writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR);
+ writel(1 << idx, ddrc_pmu->base + regs->int_clear);
}
-static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
- { "HISI0233", },
- { "HISI0234", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
-
static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *ddrc_pmu)
{
+ hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and DDRC channel ID to identify the
* DDRC PMU, while SCCL_ID is in MPIDR[aff2].
*/
if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id",
- &ddrc_pmu->index_id)) {
+ &ddrc_pmu->topo.index_id)) {
dev_err(&pdev->dev, "Can not read ddrc channel-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &ddrc_pmu->sccl_id)) {
+ if (ddrc_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n");
return -EINVAL;
}
- /* DDRC PMUs only share the same SCCL */
- ddrc_pmu->ccl_id = -1;
+
+ ddrc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!ddrc_pmu->dev_info)
+ return -ENODEV;
ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ddrc_pmu->base)) {
@@ -323,8 +259,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION);
if (ddrc_pmu->identifier >= HISI_PMU_V2) {
- if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
- &ddrc_pmu->sub_id)) {
+ if (ddrc_pmu->topo.sub_id < 0) {
dev_err(&pdev->dev, "Can not read sub-id!\n");
return -EINVAL;
}
@@ -382,73 +317,35 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = {
.attrs = hisi_ddrc_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
- .attrs = hisi_ddrc_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_ddrc_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
- &hisi_ddrc_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
- .attrs = hisi_ddrc_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = {
&hisi_ddrc_pmu_v1_format_group,
&hisi_ddrc_pmu_v1_events_group,
- &hisi_ddrc_pmu_cpumask_attr_group,
- &hisi_ddrc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = {
&hisi_ddrc_pmu_v2_format_group,
&hisi_ddrc_pmu_v2_events_group,
- &hisi_ddrc_pmu_cpumask_attr_group,
- &hisi_ddrc_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
-static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = {
- .write_evtype = hisi_ddrc_pmu_write_evtype,
- .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx,
- .start_counters = hisi_ddrc_pmu_v1_start_counters,
- .stop_counters = hisi_ddrc_pmu_v1_stop_counters,
- .enable_counter = hisi_ddrc_pmu_v1_enable_counter,
- .disable_counter = hisi_ddrc_pmu_v1_disable_counter,
- .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int,
- .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int,
- .write_counter = hisi_ddrc_pmu_v1_write_counter,
- .read_counter = hisi_ddrc_pmu_v1_read_counter,
- .get_int_status = hisi_ddrc_pmu_v1_get_int_status,
- .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status,
-};
-
-static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = {
+static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = {
.write_evtype = hisi_ddrc_pmu_write_evtype,
- .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx,
- .start_counters = hisi_ddrc_pmu_v2_start_counters,
- .stop_counters = hisi_ddrc_pmu_v2_stop_counters,
- .enable_counter = hisi_ddrc_pmu_v2_enable_counter,
- .disable_counter = hisi_ddrc_pmu_v2_disable_counter,
- .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int,
- .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int,
- .write_counter = hisi_ddrc_pmu_v2_write_counter,
- .read_counter = hisi_ddrc_pmu_v2_read_counter,
- .get_int_status = hisi_ddrc_pmu_v2_get_int_status,
- .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status,
+ .get_event_idx = hisi_ddrc_pmu_get_event_idx,
+ .start_counters = hisi_ddrc_pmu_start_counters,
+ .stop_counters = hisi_ddrc_pmu_stop_counters,
+ .enable_counter = hisi_ddrc_pmu_enable_counter,
+ .disable_counter = hisi_ddrc_pmu_disable_counter,
+ .enable_counter_int = hisi_ddrc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_ddrc_pmu_disable_counter_int,
+ .write_counter = hisi_ddrc_pmu_write_counter,
+ .read_counter = hisi_ddrc_pmu_read_counter,
+ .get_int_status = hisi_ddrc_pmu_get_int_status,
+ .clear_int_status = hisi_ddrc_pmu_clear_int_status,
};
static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
@@ -464,18 +361,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- if (ddrc_pmu->identifier >= HISI_PMU_V2) {
- ddrc_pmu->counter_bits = 48;
- ddrc_pmu->check_event = DDRC_V2_NR_EVENTS;
- ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups;
- ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops;
- } else {
- ddrc_pmu->counter_bits = 32;
- ddrc_pmu->check_event = DDRC_V1_NR_EVENTS;
- ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups;
- ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops;
- }
-
+ ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups;
+ ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits;
+ ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event;
+ ddrc_pmu->ops = &hisi_uncore_ddrc_ops;
ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
ddrc_pmu->dev = &pdev->dev;
ddrc_pmu->on_cpu = -1;
@@ -499,6 +388,19 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
+ if (ddrc_pmu->identifier >= HISI_PMU_V2)
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "hisi_sccl%d_ddrc%d_%d",
+ ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id,
+ ddrc_pmu->topo.sub_id);
+ else
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id,
+ ddrc_pmu->topo.index_id);
+
+ if (!name)
+ return -ENOMEM;
+
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
&ddrc_pmu->node);
if (ret) {
@@ -506,17 +408,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
return ret;
}
- if (ddrc_pmu->identifier >= HISI_PMU_V2)
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
- "hisi_sccl%u_ddrc%u_%u",
- ddrc_pmu->sccl_id, ddrc_pmu->index_id,
- ddrc_pmu->sub_id);
- else
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
- "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
- ddrc_pmu->index_id);
-
- hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE);
+ hisi_pmu_init(ddrc_pmu, THIS_MODULE);
ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
if (ret) {
@@ -528,16 +420,77 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
+static void hisi_ddrc_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&ddrc_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
&ddrc_pmu->node);
- return 0;
}
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = {
+ .event_cnt = DDRC_UNIMPLEMENTED_REG,
+ .event_ctrl = DDRC_EVENT_CTRL,
+ .event_type = DDRC_UNIMPLEMENTED_REG,
+ .perf_ctrl = DDRC_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN,
+ .int_mask = DDRC_INT_MASK,
+ .int_clear = DDRC_INT_CLEAR,
+ .int_status = DDRC_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v1 = {
+ .counter_bits = 32,
+ .check_event = DDRC_V1_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v1_attr_groups,
+ .private = &hisi_ddrc_v1_pmu_regs,
+};
+
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = {
+ .event_cnt = DDRC_V2_EVENT_CNT,
+ .event_ctrl = DDRC_V2_EVENT_CTRL,
+ .event_type = DDRC_V2_EVENT_TYPE,
+ .perf_ctrl = DDRC_V2_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN,
+ .int_mask = DDRC_V2_INT_MASK,
+ .int_clear = DDRC_V2_INT_CLEAR,
+ .int_status = DDRC_V2_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v2 = {
+ .counter_bits = 48,
+ .check_event = DDRC_V2_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v2_attr_groups,
+ .private = &hisi_ddrc_v2_pmu_regs,
+};
+
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = {
+ .event_cnt = DDRC_V2_EVENT_CNT,
+ .event_ctrl = DDRC_V2_EVENT_CTRL,
+ .event_type = DDRC_V2_EVENT_TYPE,
+ .perf_ctrl = DDRC_V2_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN,
+ .int_mask = DDRC_V3_INT_MASK,
+ .int_clear = DDRC_V3_INT_CLEAR,
+ .int_status = DDRC_V3_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v3 = {
+ .counter_bits = 48,
+ .check_event = DDRC_V2_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v2_attr_groups,
+ .private = &hisi_ddrc_v3_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
+ { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1 },
+ { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2 },
+ { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
+
static struct platform_driver hisi_ddrc_pmu_driver = {
.driver = {
.name = "hisi_ddrc_pmu",
@@ -573,10 +526,10 @@ static void __exit hisi_ddrc_pmu_module_exit(void)
{
platform_driver_unregister(&hisi_ddrc_pmu_driver);
cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
-
}
module_exit(hisi_ddrc_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 13017b3412a5..97cfaa586a87 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -47,9 +47,9 @@
#define HHA_SRCID_CMD GENMASK(16, 6)
#define HHA_SRCID_MSK GENMASK(30, 20)
#define HHA_DATSRC_SKT_EN BIT(23)
-#define HHA_EVTYPE_NONE 0xff
+#define HHA_EVTYPE_MASK GENMASK(7, 0)
#define HHA_V1_NR_EVENT 0x65
-#define HHA_V2_NR_EVENT 0xCE
+#define HHA_V2_NR_EVENT 0xFF
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11);
@@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
/* Write event code to HHA_EVENT_TYPEx register */
val = readl(hha_pmu->base + reg);
- val &= ~(HHA_EVTYPE_NONE << shift);
+ val &= ~(HHA_EVTYPE_MASK << shift);
val |= (type << shift);
writel(val, hha_pmu->base + reg);
}
@@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
unsigned long long id;
acpi_status status;
+ hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev);
+
/*
* Use SCCL_ID and UID to identify the HHA PMU, while
* SCCL_ID is in MPIDR[aff2].
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &hha_pmu->sccl_id)) {
+ if (hha_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
return -EINVAL;
}
@@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
* Early versions of BIOS support _UID by mistake, so we support
* both "hisilicon, idx-id" as preference, if available.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &hha_pmu->index_id)) {
+ if (hha_pmu->topo.index_id < 0) {
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
"_UID", NULL, &id);
if (ACPI_FAILURE(status)) {
@@ -318,10 +318,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
- hha_pmu->index_id = id;
+ hha_pmu->topo.index_id = id;
}
- /* HHA PMUs only share the same SCCL */
- hha_pmu->ccl_id = -1;
hha_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hha_pmu->base)) {
@@ -407,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = {
.attrs = hisi_hha_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_hha_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
- .attrs = hisi_hha_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_hha_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
- &hisi_hha_pmu_identifier_attr.attr,
- NULL
-};
-
-static const struct attribute_group hisi_hha_pmu_identifier_group = {
- .attrs = hisi_hha_pmu_identifier_attrs,
-};
-
static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = {
&hisi_hha_pmu_v1_format_group,
&hisi_hha_pmu_v1_events_group,
- &hisi_hha_pmu_cpumask_attr_group,
- &hisi_hha_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = {
&hisi_hha_pmu_v2_format_group,
&hisi_hha_pmu_v2_events_group,
- &hisi_hha_pmu_cpumask_attr_group,
- &hisi_hha_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
@@ -510,6 +485,11 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d",
+ hha_pmu->topo.sccl_id, hha_pmu->topo.index_id);
+ if (!name)
+ return -ENOMEM;
+
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
&hha_pmu->node);
if (ret) {
@@ -517,9 +497,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
return ret;
}
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
- hha_pmu->sccl_id, hha_pmu->index_id);
- hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE);
+ hisi_pmu_init(hha_pmu, THIS_MODULE);
ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
if (ret) {
@@ -531,14 +509,13 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_hha_pmu_remove(struct platform_device *pdev)
+static void hisi_hha_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&hha_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
&hha_pmu->node);
- return 0;
}
static struct platform_driver hisi_hha_pmu_driver = {
@@ -579,6 +556,7 @@ static void __exit hisi_hha_pmu_module_exit(void)
}
module_exit(hisi_hha_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 2995f3630d49..f963e4f9e552 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -39,6 +39,7 @@
/* L3C has 8-counters */
#define L3C_NR_COUNTERS 0x8
+#define L3C_MAX_EXT 2
#define L3C_PERF_CTRL_EN 0x10000
#define L3C_TRACETAG_EN BIT(31)
@@ -55,59 +56,172 @@
#define L3C_V1_NR_EVENTS 0x59
#define L3C_V2_NR_EVENTS 0xFF
-HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16);
+/*
+ * Remain the config1:0-7 for backward compatibility if some existing users
+ * hardcode the config1:0-7 directly without parsing the sysfs attribute.
+ */
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core_deprecated, config1, 7, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0);
-static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+struct hisi_l3c_pmu {
+ struct hisi_pmu l3c_pmu;
+
+ /* MMIO and IRQ resources for extension events */
+ void __iomem *ext_base[L3C_MAX_EXT];
+ int ext_irq[L3C_MAX_EXT];
+ int ext_num;
+};
+
+#define to_hisi_l3c_pmu(_l3c_pmu) \
+ container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu)
+
+/*
+ * The hardware counter idx used in counter enable/disable,
+ * interrupt enable/disable and status check, etc.
+ */
+#define L3C_HW_IDX(_cntr_idx) ((_cntr_idx) % L3C_NR_COUNTERS)
+
+/* Range of ext counters in used mask. */
+#define L3C_CNTR_EXT_L(_ext) (((_ext) + 1) * L3C_NR_COUNTERS)
+#define L3C_CNTR_EXT_H(_ext) (((_ext) + 2) * L3C_NR_COUNTERS)
+
+struct hisi_l3c_pmu_ext {
+ bool support_ext;
+};
+
+static bool support_ext(struct hisi_l3c_pmu *pmu)
+{
+ struct hisi_l3c_pmu_ext *l3c_pmu_ext = pmu->l3c_pmu.dev_info->private;
+
+ return l3c_pmu_ext->support_ext;
+}
+
+/*
+ * tt_core was extended to cover all the CPUs sharing the L3 and was moved from
+ * config1:0-7 to config2:0-*. Try it first and fallback to tt_core_deprecated
+ * if user's still using the deprecated one.
+ */
+static u32 hisi_l3c_pmu_get_tt_core(struct perf_event *event)
+{
+ u32 core = hisi_get_tt_core(event);
+
+ if (core)
+ return core;
+
+ return hisi_get_tt_core_deprecated(event);
+}
+
+static int hisi_l3c_pmu_get_event_idx(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ int ext = hisi_get_ext(event);
+ int idx;
+
+ /*
+ * For an L3C PMU that supports extension events, we can monitor
+ * maximum 2 * num_counters to 3 * num_counters events, depending on
+ * the number of ext regions supported by hardware. Thus use bit
+ * [0, num_counters - 1] for normal events and bit
+ * [ext * num_counters, (ext + 1) * num_counters - 1] for extension
+ * events. The idx allocation will keep unchanged for normal events and
+ * we can also use the idx to distinguish whether it's an extension
+ * event or not.
+ *
+ * Since normal events and extension events locates on the different
+ * address space, save the base address to the event->hw.event_base.
+ */
+ if (ext && !support_ext(hisi_l3c_pmu))
+ return -EOPNOTSUPP;
+
+ if (ext)
+ event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base[ext - 1];
+ else
+ event->hw.event_base = (unsigned long)l3c_pmu->base;
+
+ ext -= 1;
+ idx = find_next_zero_bit(used_mask, L3C_CNTR_EXT_H(ext), L3C_CNTR_EXT_L(ext));
+
+ if (idx >= L3C_CNTR_EXT_H(ext))
+ return -EAGAIN;
+
+ set_bit(idx, used_mask);
+
+ return idx;
+}
+
+static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg)
+{
+ return readl((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val)
+{
+ writel(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg)
+{
+ return readq((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val)
+{
+ writeq(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Set request-type for tracetag */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
val |= L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Enable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Clear request-type */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
val &= ~L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Disable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
u32 reg, reg_idx, shift, val;
- int idx = hwc->idx;
+ int idx = L3C_HW_IDX(hwc->idx);
/*
* Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
@@ -120,15 +234,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
reg_idx = idx % 4;
shift = 8 * reg_idx;
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_DATSRC_MASK << shift);
val |= ds_cfg << shift;
- writel(val, l3c_pmu->base + reg);
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_config_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -138,15 +252,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val |= L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -156,57 +270,63 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val &= ~L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
- u32 core = hisi_get_tt_core(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 core = hisi_l3c_pmu_get_tt_core(event);
if (core) {
u32 val;
/* Config and enable core information */
- writel(core, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Enable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
- u32 core = hisi_get_tt_core(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 core = hisi_l3c_pmu_get_tt_core(event);
if (core) {
u32 val;
/* Clear core information */
- writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Disable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
+static bool hisi_l3c_pmu_have_filter(struct perf_event *event)
+{
+ return hisi_get_tt_req(event) || hisi_l3c_pmu_get_tt_core(event) ||
+ hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event);
+}
+
static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_config_req_tracetag(event);
hisi_l3c_pmu_config_core_tracetag(event);
hisi_l3c_pmu_config_ds(event);
@@ -215,38 +335,56 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_clear_ds(event);
hisi_l3c_pmu_clear_core_tracetag(event);
hisi_l3c_pmu_clear_req_tracetag(event);
}
}
+static int hisi_l3c_pmu_check_filter(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ext = hisi_get_ext(event);
+
+ if (ext < 0 || ext > hisi_l3c_pmu->ext_num)
+ return -EINVAL;
+
+ if (hisi_get_tt_core(event) && hisi_get_tt_core_deprecated(event))
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* Select the counter register offset using the counter index
*/
static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
{
- return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+ return L3C_CNTR0_LOWER + L3C_HW_IDX(cntr_idx) * 8;
}
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc)
{
- return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc, u64 val)
{
- writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val);
}
static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
u32 type)
{
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
u32 reg, reg_idx, shift, val;
+ idx = L3C_HW_IDX(idx);
+
/*
* Select the appropriate event select register(L3C_EVENT_TYPE0/1).
* There are 2 event select registers for the 8 hardware counters.
@@ -259,36 +397,72 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
shift = 8 * reg_idx;
/* Write event code to L3C_EVENT_TYPEx Register */
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_EVTYPE_NONE << shift);
- val |= (type << shift);
- writel(val, l3c_pmu->base + reg);
+ val |= type << shift;
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Set perf_enable bit in L3C_PERF_CTRL register to start counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, enable it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val |= L3C_PERF_CTRL_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do enable it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, stop it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val &= ~(L3C_PERF_CTRL_EN);
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do stop it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
@@ -297,9 +471,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Enable counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
@@ -308,9 +482,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Clear counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -318,10 +492,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 0 to enable interrupt */
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -329,48 +503,61 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 1 to mask interrupt */
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu)
{
- return readl(l3c_pmu->base + L3C_INT_STATUS);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ u32 ext_int, status, status_ext = 0;
+ int i;
+
+ status = readl(l3c_pmu->base + L3C_INT_STATUS);
+
+ if (!support_ext(hisi_l3c_pmu))
+ return status;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ ext_int = readl(hisi_l3c_pmu->ext_base[i] + L3C_INT_STATUS);
+ status_ext |= ext_int << (L3C_NR_COUNTERS * i);
+ }
+
+ return status | (status_ext << L3C_NR_COUNTERS);
}
static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
{
- writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR);
-}
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
-static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
- { "HISI0213", },
- { "HISI0214", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx));
+}
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
+ hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
* SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &l3c_pmu->sccl_id)) {
+ if (l3c_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
- &l3c_pmu->ccl_id)) {
+ if (l3c_pmu->topo.ccl_id < 0) {
dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
return -EINVAL;
}
+ l3c_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!l3c_pmu->dev_info)
+ return -ENODEV;
+
l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(l3c_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
@@ -382,6 +569,50 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return 0;
}
+static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev)
+{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, irq, ext_num, i;
+ char *irqname;
+
+ /* HiSilicon L3C PMU supporting ext should have more than 1 irq resources. */
+ ext_num = platform_irq_count(pdev);
+ if (ext_num < L3C_MAX_EXT)
+ return -ENODEV;
+
+ /*
+ * The number of ext supported equals the number of irq - 1, since one
+ * of the irqs belongs to the normal part of PMU.
+ */
+ hisi_l3c_pmu->ext_num = ext_num - 1;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ hisi_l3c_pmu->ext_base[i] = devm_platform_ioremap_resource(pdev, i + 1);
+ if (IS_ERR(hisi_l3c_pmu->ext_base[i]))
+ return PTR_ERR(hisi_l3c_pmu->ext_base[i]);
+
+ irq = platform_get_irq(pdev, i + 1);
+ if (irq < 0)
+ return irq;
+
+ irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext%d",
+ dev_name(&pdev->dev), i + 1);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ irqname, l3c_pmu);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Fail to request EXT IRQ: %d.\n", irq);
+
+ hisi_l3c_pmu->ext_irq[i] = irq;
+ }
+
+ return 0;
+}
+
static struct attribute *hisi_l3c_pmu_v1_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
NULL,
@@ -394,10 +625,11 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
- HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"),
HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
NULL
};
@@ -406,6 +638,20 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
.attrs = hisi_l3c_pmu_v2_format_attr,
};
+static struct attribute *hisi_l3c_pmu_v3_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ext, "config:16-17"),
+ HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v3_format_group = {
+ .name = "format",
+ .attrs = hisi_l3c_pmu_v3_format_attr,
+};
+
static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
@@ -441,48 +687,82 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
.attrs = hisi_l3c_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
- .attrs = hisi_l3c_pmu_cpumask_attrs,
-};
-
-static struct device_attribute hisi_l3c_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
- &hisi_l3c_pmu_identifier_attr.attr,
+static struct attribute *hisi_l3c_pmu_v3_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rd_spipe, 0x18),
+ HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x19),
+ HISI_PMU_EVENT_ATTR(wr_spipe, 0x1a),
+ HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b),
+ HISI_PMU_EVENT_ATTR(io_rd_spipe, 0x1c),
+ HISI_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d),
+ HISI_PMU_EVENT_ATTR(io_wr_spipe, 0x1e),
+ HISI_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f),
+ HISI_PMU_EVENT_ATTR(cycles, 0x7f),
+ HISI_PMU_EVENT_ATTR(l3c_ref, 0xbc),
+ HISI_PMU_EVENT_ATTR(l3c2ring, 0xbd),
NULL
};
-static const struct attribute_group hisi_l3c_pmu_identifier_group = {
- .attrs = hisi_l3c_pmu_identifier_attrs,
+static const struct attribute_group hisi_l3c_pmu_v3_events_group = {
+ .name = "events",
+ .attrs = hisi_l3c_pmu_v3_events_attr,
};
static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
&hisi_l3c_pmu_v1_format_group,
&hisi_l3c_pmu_v1_events_group,
- &hisi_l3c_pmu_cpumask_attr_group,
- &hisi_l3c_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL,
};
static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
&hisi_l3c_pmu_v2_format_group,
&hisi_l3c_pmu_v2_events_group,
- &hisi_l3c_pmu_cpumask_attr_group,
- &hisi_l3c_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = {
+ &hisi_l3c_pmu_v3_format_group,
+ &hisi_l3c_pmu_v3_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_support_ext = {
+ .support_ext = true,
+};
+
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_not_support_ext = {
+ .support_ext = false,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = {
+ .attr_groups = hisi_l3c_pmu_v1_attr_groups,
+ .counter_bits = 48,
+ .check_event = L3C_V1_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = {
+ .attr_groups = hisi_l3c_pmu_v2_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = {
+ .attr_groups = hisi_l3c_pmu_v3_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_support_ext,
+};
+
static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.write_evtype = hisi_l3c_pmu_write_evtype,
- .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .get_event_idx = hisi_l3c_pmu_get_event_idx,
.start_counters = hisi_l3c_pmu_start_counters,
.stop_counters = hisi_l3c_pmu_stop_counters,
.enable_counter = hisi_l3c_pmu_enable_counter,
@@ -495,11 +775,14 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.clear_int_status = hisi_l3c_pmu_clear_int_status,
.enable_filter = hisi_l3c_pmu_enable_filter,
.disable_filter = hisi_l3c_pmu_disable_filter,
+ .check_filter = hisi_l3c_pmu_check_filter,
};
static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ struct hisi_l3c_pmu_ext *l3c_pmu_dev_ext;
int ret;
ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
@@ -510,40 +793,58 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- if (l3c_pmu->identifier >= HISI_PMU_V2) {
- l3c_pmu->counter_bits = 64;
- l3c_pmu->check_event = L3C_V2_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
- } else {
- l3c_pmu->counter_bits = 48;
- l3c_pmu->check_event = L3C_V1_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
- }
-
+ l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups;
+ l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits;
+ l3c_pmu->check_event = l3c_pmu->dev_info->check_event;
l3c_pmu->num_counters = L3C_NR_COUNTERS;
l3c_pmu->ops = &hisi_uncore_l3c_ops;
l3c_pmu->dev = &pdev->dev;
l3c_pmu->on_cpu = -1;
+ l3c_pmu_dev_ext = l3c_pmu->dev_info->private;
+ if (l3c_pmu_dev_ext->support_ext) {
+ ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev);
+ if (ret)
+ return ret;
+ /*
+ * The extension events have their own counters with the
+ * same number of the normal events counters. So we can
+ * have at maximum num_counters * ext events monitored.
+ */
+ l3c_pmu->num_counters += hisi_l3c_pmu->ext_num * L3C_NR_COUNTERS;
+ }
+
return 0;
}
static int hisi_l3c_pmu_probe(struct platform_device *pdev)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu;
struct hisi_pmu *l3c_pmu;
char *name;
int ret;
- l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
- if (!l3c_pmu)
+ hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL);
+ if (!hisi_l3c_pmu)
return -ENOMEM;
+ l3c_pmu = &hisi_l3c_pmu->l3c_pmu;
platform_set_drvdata(pdev, l3c_pmu);
ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
if (ret)
return ret;
+ if (l3c_pmu->topo.sub_id >= 0)
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id,
+ l3c_pmu->topo.sub_id);
+ else
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
+ if (!name)
+ return -ENOMEM;
+
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
&l3c_pmu->node);
if (ret) {
@@ -551,13 +852,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
return ret;
}
- /*
- * CCL_ID is used to identify the L3C in the same SCCL which was
- * used _UID by mistake.
- */
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
- l3c_pmu->sccl_id, l3c_pmu->ccl_id);
- hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE);
+ hisi_pmu_init(l3c_pmu, THIS_MODULE);
ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
if (ret) {
@@ -569,16 +864,23 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+static void hisi_l3c_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&l3c_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
&l3c_pmu->node);
- return 0;
}
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+ { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 },
+ { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 },
+ { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
static struct platform_driver hisi_l3c_pmu_driver = {
.driver = {
.name = "hisi_l3c_pmu",
@@ -589,14 +891,60 @@ static struct platform_driver hisi_l3c_pmu_driver = {
.remove = hisi_l3c_pmu_remove,
};
+static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_online_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
+static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_offline_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* If failed to find any available CPU, skip irq migration. */
+ if (l3c_pmu->on_cpu < 0)
+ return 0;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
static int __init hisi_l3c_pmu_module_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
"AP_PERF_ARM_HISI_L3_ONLINE",
- hisi_uncore_pmu_online_cpu,
- hisi_uncore_pmu_offline_cpu);
+ hisi_l3c_pmu_online_cpu,
+ hisi_l3c_pmu_offline_cpu);
if (ret) {
pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
return ret;
@@ -617,6 +965,7 @@ static void __exit hisi_l3c_pmu_module_exit(void)
}
module_exit(hisi_l3c_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
new file mode 100644
index 000000000000..4df4eebe243e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC MN uncore Hardware event counters support
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by MN PMU */
+static enum cpuhp_state hisi_mn_pmu_online;
+
+/* MN register definition */
+#define HISI_MN_DYNAMIC_CTRL_REG 0x400
+#define HISI_MN_DYNAMIC_CTRL_EN BIT(0)
+#define HISI_MN_PERF_CTRL_REG 0x408
+#define HISI_MN_PERF_CTRL_EN BIT(6)
+#define HISI_MN_INT_MASK_REG 0x800
+#define HISI_MN_INT_STATUS_REG 0x808
+#define HISI_MN_INT_CLEAR_REG 0x80C
+#define HISI_MN_EVENT_CTRL_REG 0x1C00
+#define HISI_MN_VERSION_REG 0x1C04
+#define HISI_MN_EVTYPE0_REG 0x1d00
+#define HISI_MN_EVTYPE_MASK GENMASK(7, 0)
+#define HISI_MN_CNTR0_REG 0x1e00
+#define HISI_MN_EVTYPE_REGn(evtype0, n) ((evtype0) + (n) * 4)
+#define HISI_MN_CNTR_REGn(cntr0, n) ((cntr0) + (n) * 8)
+
+#define HISI_MN_NR_COUNTERS 4
+#define HISI_MN_TIMEOUT_US 500U
+
+struct hisi_mn_pmu_regs {
+ u32 version;
+ u32 dyn_ctrl;
+ u32 perf_ctrl;
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+ u32 event_ctrl;
+ u32 event_type0;
+ u32 event_cntr0;
+};
+
+/*
+ * Each event request takes a certain amount of time to complete. If
+ * we counting the latency related event, we need to wait for the all
+ * requests complete. Otherwise, the value of counter is slightly larger.
+ */
+static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ int ret;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->dyn_ctrl);
+ val |= HISI_MN_DYNAMIC_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->dyn_ctrl);
+
+ ret = readl_poll_timeout_atomic(mn_pmu->base + reg_info->dyn_ctrl,
+ val, !(val & HISI_MN_DYNAMIC_CTRL_EN),
+ 1, HISI_MN_TIMEOUT_US);
+ if (ret)
+ dev_warn(mn_pmu->dev, "Counter flush timeout\n");
+}
+
+static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readq(mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ /*
+ * Select the appropriate event select register.
+ * There are 2 32-bit event select registers for the
+ * 8 hardware counters, each event code is 8-bit wide.
+ */
+ val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+ val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+ writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+}
+
+static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val |= HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+}
+
+static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val &= ~HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+
+ hisi_mn_pmu_counter_flush(mn_pmu);
+}
+
+static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readl(mn_pmu->base + reg_info->int_status);
+}
+
+static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writel(BIT(idx), mn_pmu->base + reg_info->int_clear);
+}
+
+static struct attribute *hisi_mn_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_mn_pmu_format_attr,
+};
+
+static struct attribute *hisi_mn_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00),
+ HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01),
+ HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02),
+ HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03),
+ HISI_PMU_EVENT_ATTR(req_retry_num, 0x04),
+ HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05),
+ HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06),
+ HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E),
+ HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_mn_pmu_events_attr,
+};
+
+static const struct attribute_group *hisi_mn_pmu_attr_groups[] = {
+ &hisi_mn_pmu_format_group,
+ &hisi_mn_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_mn_ops = {
+ .write_evtype = hisi_mn_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_mn_pmu_start_counters,
+ .stop_counters = hisi_mn_pmu_stop_counters,
+ .enable_counter = hisi_mn_pmu_enable_counter,
+ .disable_counter = hisi_mn_pmu_disable_counter,
+ .enable_counter_int = hisi_mn_pmu_enable_counter_int,
+ .disable_counter_int = hisi_mn_pmu_disable_counter_int,
+ .write_counter = hisi_mn_pmu_write_counter,
+ .read_counter = hisi_mn_pmu_read_counter,
+ .get_int_status = hisi_mn_pmu_get_int_status,
+ .clear_int_status = hisi_mn_pmu_clear_int_status,
+};
+
+static int hisi_mn_pmu_dev_init(struct platform_device *pdev,
+ struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info;
+ int ret;
+
+ hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev);
+
+ if (mn_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN scl id\n");
+
+ if (mn_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN index id\n");
+
+ mn_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mn_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base),
+ "Failed to ioremap resource\n");
+
+ ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev);
+ if (ret)
+ return ret;
+
+ mn_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!mn_pmu->dev_info)
+ return -ENODEV;
+
+ mn_pmu->pmu_events.attr_groups = mn_pmu->dev_info->attr_groups;
+ mn_pmu->counter_bits = mn_pmu->dev_info->counter_bits;
+ mn_pmu->check_event = mn_pmu->dev_info->check_event;
+ mn_pmu->num_counters = HISI_MN_NR_COUNTERS;
+ mn_pmu->ops = &hisi_uncore_mn_ops;
+ mn_pmu->dev = &pdev->dev;
+ mn_pmu->on_cpu = -1;
+
+ reg_info = mn_pmu->dev_info->private;
+ mn_pmu->identifier = readl(mn_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node);
+}
+
+static void hisi_mn_pmu_unregister(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_mn_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *mn_pmu;
+ char *name;
+ int ret;
+
+ mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL);
+ if (!mn_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, mn_pmu);
+
+ ret = hisi_mn_pmu_dev_init(pdev, mn_pmu);
+ if (ret)
+ return ret;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d",
+ mn_pmu->topo.scl_id, mn_pmu->topo.index_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n");
+
+ ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(mn_pmu, THIS_MODULE);
+
+ ret = perf_pmu_register(&mn_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n");
+
+ return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu);
+}
+
+static struct hisi_mn_pmu_regs hisi_mn_v1_pmu_regs = {
+ .version = HISI_MN_VERSION_REG,
+ .dyn_ctrl = HISI_MN_DYNAMIC_CTRL_REG,
+ .perf_ctrl = HISI_MN_PERF_CTRL_REG,
+ .int_mask = HISI_MN_INT_MASK_REG,
+ .int_clear = HISI_MN_INT_CLEAR_REG,
+ .int_status = HISI_MN_INT_STATUS_REG,
+ .event_ctrl = HISI_MN_EVENT_CTRL_REG,
+ .event_type0 = HISI_MN_EVTYPE0_REG,
+ .event_cntr0 = HISI_MN_CNTR0_REG,
+};
+
+static const struct hisi_pmu_dev_info hisi_mn_v1 = {
+ .attr_groups = hisi_mn_pmu_attr_groups,
+ .counter_bits = 48,
+ .check_event = HISI_MN_EVTYPE_MASK,
+ .private = &hisi_mn_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = {
+ { "HISI0222", (kernel_ulong_t) &hisi_mn_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match);
+
+static struct platform_driver hisi_mn_pmu_driver = {
+ .driver = {
+ .name = "hisi_mn_pmu",
+ .acpi_match_table = hisi_mn_pmu_acpi_match,
+ /*
+ * We have not worked out a safe bind/unbind process,
+ * Forcefully unbinding during sampling will lead to a
+ * kernel panic, so this is not supported yet.
+ */
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_mn_pmu_probe,
+};
+
+static int __init hisi_mn_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret);
+ return ret;
+ }
+ hisi_mn_pmu_online = ret;
+
+ ret = platform_driver_register(&hisi_mn_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+
+ return ret;
+}
+module_init(hisi_mn_pmu_module_init);
+
+static void __exit hisi_mn_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_mn_pmu_driver);
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+}
+module_exit(hisi_mn_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
new file mode 100644
index 000000000000..de3b9cc7aada
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/sysfs.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define NOC_PMU_VERSION 0x1e00
+#define NOC_PMU_GLOBAL_CTRL 0x1e04
+#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0)
+#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1)
+#define NOC_PMU_CNT_INFO 0x1e08
+#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n)
+#define NOC_PMU_EVENT_CTRL0 0x1e20
+#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0)
+/*
+ * Note channel of 0x0 will reset the counter value, so don't do it before
+ * we read out the counter.
+ */
+#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8)
+#define NOC_PMU_EVENT_CTRL_EN BIT(11)
+#define NOC_PMU_EVENT_COUNTER0 0x1e80
+
+#define NOC_PMU_NR_COUNTERS 4
+#define NOC_PMU_CH_DEFAULT 0x7
+
+#define NOC_PMU_EVENT_CTRLn(ctrl0, n) ((ctrl0) + 4 * (n))
+#define NOC_PMU_EVENT_CNTRn(cntr0, n) ((cntr0) + 8 * (n))
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3);
+
+/* Dynamic CPU hotplug state used by this PMU driver */
+static enum cpuhp_state hisi_noc_pmu_cpuhp_state;
+
+struct hisi_noc_pmu_regs {
+ u32 version;
+ u32 pmu_ctrl;
+ u32 event_ctrl0;
+ u32 event_cntr0;
+ u32 overflow_status;
+};
+
+/*
+ * Tracetag filtering is not per event and all the events should keep
+ * the consistence. Return true if the new comer doesn't match the
+ * tracetag filtering configuration of the current scheduled events.
+ */
+static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr,
+ struct perf_event *new)
+{
+ return hisi_get_tt_en(curr) == hisi_get_tt_en(new);
+}
+
+static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_TYPE;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+}
+
+static int hisi_noc_pmu_get_event_idx(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events;
+ int cur_idx;
+
+ cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters);
+ if (cur_idx != noc_pmu->num_counters &&
+ !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event))
+ return -EAGAIN;
+
+ return hisi_uncore_pmu_get_event_idx(event);
+}
+
+static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readq(noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ writeq(val, noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg |= NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ /* We don't support interrupt, so a stub here. */
+}
+
+static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+}
+
+static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readl(noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->overflow_status);
+ reg &= ~NOC_PMU_CNT_INFO_OVERFLOW(idx);
+ writel(reg, noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_enable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ struct hw_perf_event *hwc = &event->hw;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 ch = hisi_get_ch(event);
+ u32 reg;
+
+ if (!ch)
+ ch = NOC_PMU_CH_DEFAULT;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+
+ /*
+ * Since tracetag filter applies to all the counters, don't touch it
+ * if user doesn't specify it explicitly.
+ */
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static void hisi_noc_pmu_disable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 reg;
+
+ /*
+ * If we're not the last counter, don't touch the global tracetag
+ * configuration.
+ */
+ if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1)
+ return;
+
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static const struct hisi_uncore_ops hisi_uncore_noc_ops = {
+ .write_evtype = hisi_noc_pmu_write_evtype,
+ .get_event_idx = hisi_noc_pmu_get_event_idx,
+ .read_counter = hisi_noc_pmu_read_counter,
+ .write_counter = hisi_noc_pmu_write_counter,
+ .enable_counter = hisi_noc_pmu_enable_counter,
+ .disable_counter = hisi_noc_pmu_disable_counter,
+ .enable_counter_int = hisi_noc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_noc_pmu_disable_counter_int,
+ .start_counters = hisi_noc_pmu_start_counters,
+ .stop_counters = hisi_noc_pmu_stop_counters,
+ .get_int_status = hisi_noc_pmu_get_int_status,
+ .clear_int_status = hisi_noc_pmu_clear_int_status,
+ .enable_filter = hisi_noc_pmu_enable_filter,
+ .disable_filter = hisi_noc_pmu_disable_filter,
+};
+
+static struct attribute *hisi_noc_pmu_format_attrs[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"),
+ HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_noc_pmu_format_attrs,
+};
+
+static struct attribute *hisi_noc_pmu_events_attrs[] = {
+ HISI_PMU_EVENT_ATTR(cycles, 0x0e),
+ /* Flux on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a),
+ HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17),
+ /* Buffer full duration on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19),
+ HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12),
+ /* Failure packets count on/off the ring */
+ HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01),
+ HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09),
+ HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03),
+ HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b),
+ /* Flux of the ring */
+ HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05),
+ HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_noc_pmu_events_attrs,
+};
+
+static const struct attribute_group *hisi_noc_pmu_attr_groups[] = {
+ &hisi_noc_pmu_format_group,
+ &hisi_noc_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info;
+
+ hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev);
+
+ if (noc_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n");
+
+ if (noc_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n");
+
+ if (noc_pmu->topo.sub_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n");
+
+ noc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(noc_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base),
+ "fail to remap io memory\n");
+
+ noc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!noc_pmu->dev_info)
+ return -ENODEV;
+
+ noc_pmu->pmu_events.attr_groups = noc_pmu->dev_info->attr_groups;
+ noc_pmu->counter_bits = noc_pmu->dev_info->counter_bits;
+ noc_pmu->check_event = noc_pmu->dev_info->check_event;
+ noc_pmu->num_counters = NOC_PMU_NR_COUNTERS;
+ noc_pmu->ops = &hisi_uncore_noc_ops;
+ noc_pmu->dev = &pdev->dev;
+ noc_pmu->on_cpu = -1;
+
+ reg_info = noc_pmu->dev_info->private;
+ noc_pmu->identifier = readl(noc_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node);
+}
+
+static void hisi_noc_pmu_unregister_pmu(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_noc_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct hisi_pmu *noc_pmu;
+ char *name;
+ int ret;
+
+ noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL);
+ if (!noc_pmu)
+ return -ENOMEM;
+
+ /*
+ * HiSilicon Uncore PMU framework needs to get common hisi_pmu device
+ * from device's drvdata.
+ */
+ platform_set_drvdata(pdev, noc_pmu);
+
+ ret = hisi_noc_pmu_dev_init(pdev, noc_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n");
+
+ ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance,
+ &noc_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(noc_pmu, THIS_MODULE);
+
+ name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d",
+ noc_pmu->topo.scl_id, noc_pmu->topo.index_id,
+ noc_pmu->topo.sub_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = perf_pmu_register(&noc_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register PMU\n");
+
+ return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu,
+ &noc_pmu->pmu);
+}
+
+static struct hisi_noc_pmu_regs hisi_noc_v1_pmu_regs = {
+ .version = NOC_PMU_VERSION,
+ .pmu_ctrl = NOC_PMU_GLOBAL_CTRL,
+ .event_ctrl0 = NOC_PMU_EVENT_CTRL0,
+ .event_cntr0 = NOC_PMU_EVENT_COUNTER0,
+ .overflow_status = NOC_PMU_CNT_INFO,
+};
+
+static const struct hisi_pmu_dev_info hisi_noc_v1 = {
+ .attr_groups = hisi_noc_pmu_attr_groups,
+ .counter_bits = 64,
+ .check_event = NOC_PMU_EVENT_CTRL_TYPE,
+ .private = &hisi_noc_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_noc_pmu_ids[] = {
+ { "HISI04E0", (kernel_ulong_t) &hisi_noc_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids);
+
+static struct platform_driver hisi_noc_pmu_driver = {
+ .driver = {
+ .name = "hisi_noc_pmu",
+ .acpi_match_table = hisi_noc_pmu_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_noc_pmu_probe,
+};
+
+static int __init hisi_noc_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret);
+ return ret;
+ }
+ hisi_noc_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&hisi_noc_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+
+ return ret;
+}
+module_init(hisi_noc_pmu_module_init);
+
+static void __exit hisi_noc_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_noc_pmu_driver);
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+}
+module_exit(hisi_noc_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index 47d3cc9b6eec..80108c63cb60 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -22,9 +22,15 @@
#define PA_TT_CTRL 0x1c08
#define PA_TGTID_CTRL 0x1c14
#define PA_SRCID_CTRL 0x1c18
+
+/* H32 PA interrupt registers */
#define PA_INT_MASK 0x1c70
#define PA_INT_STATUS 0x1c78
#define PA_INT_CLEAR 0x1c7c
+
+#define H60PA_INT_STATUS 0x1c70
+#define H60PA_INT_MASK 0x1c74
+
#define PA_EVENT_TYPE0 0x1c80
#define PA_PMU_VERSION 0x1cf0
#define PA_EVENT_CNT0_L 0x1d00
@@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+struct hisi_pa_pmu_int_regs {
+ u32 mask_offset;
+ u32 clear_offset;
+ u32 status_offset;
+};
+
static void hisi_pa_pmu_enable_tracetag(struct perf_event *event)
{
struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
@@ -219,62 +231,63 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu,
static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
u32 val;
/* Write 0 to enable interrupt */
- val = readl(pa_pmu->base + PA_INT_MASK);
+ val = readl(pa_pmu->base + regs->mask_offset);
val &= ~(1 << hwc->idx);
- writel(val, pa_pmu->base + PA_INT_MASK);
+ writel(val, pa_pmu->base + regs->mask_offset);
}
static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
u32 val;
/* Write 1 to mask interrupt */
- val = readl(pa_pmu->base + PA_INT_MASK);
+ val = readl(pa_pmu->base + regs->mask_offset);
val |= 1 << hwc->idx;
- writel(val, pa_pmu->base + PA_INT_MASK);
+ writel(val, pa_pmu->base + regs->mask_offset);
}
static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu)
{
- return readl(pa_pmu->base + PA_INT_STATUS);
+ struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+
+ return readl(pa_pmu->base + regs->status_offset);
}
static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx)
{
- writel(1 << idx, pa_pmu->base + PA_INT_CLEAR);
-}
+ struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
-static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
- { "HISI0273", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+ writel(1 << idx, pa_pmu->base + regs->clear_offset);
+}
static int hisi_pa_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *pa_pmu)
{
+ hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev);
+
/*
* As PA PMU is in a SICL, use the SICL_ID and the index ID
* to identify the PA PMU.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &pa_pmu->sicl_id)) {
+ if (pa_pmu->topo.sicl_id < 0) {
dev_err(&pdev->dev, "Cannot read sicl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &pa_pmu->index_id)) {
+ if (pa_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id!\n");
return -EINVAL;
}
- pa_pmu->ccl_id = -1;
- pa_pmu->sccl_id = -1;
+ pa_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!pa_pmu->dev_info)
+ return -ENODEV;
pa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(pa_pmu->base)) {
@@ -314,37 +327,86 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = {
.attrs = hisi_pa_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_pa_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
+static struct attribute *hisi_pa_pmu_v3_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(tx_req, 0x0),
+ HISI_PMU_EVENT_ATTR(tx_dat, 0x1),
+ HISI_PMU_EVENT_ATTR(tx_snp, 0x2),
+ HISI_PMU_EVENT_ATTR(rx_req, 0x7),
+ HISI_PMU_EVENT_ATTR(rx_dat, 0x8),
+ HISI_PMU_EVENT_ATTR(rx_snp, 0x9),
NULL
};
-static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = {
- .attrs = hisi_pa_pmu_cpumask_attrs,
+static const struct attribute_group hisi_pa_pmu_v3_events_group = {
+ .name = "events",
+ .attrs = hisi_pa_pmu_v3_events_attr,
};
-static struct device_attribute hisi_pa_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_pa_pmu_identifier_attrs[] = {
- &hisi_pa_pmu_identifier_attr.attr,
+static struct attribute *hisi_h60pa_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rx_flit, 0x50),
+ HISI_PMU_EVENT_ATTR(tx_flit, 0x65),
NULL
};
-static const struct attribute_group hisi_pa_pmu_identifier_group = {
- .attrs = hisi_pa_pmu_identifier_attrs,
+static const struct attribute_group hisi_h60pa_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_h60pa_pmu_events_attr,
+};
+
+static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = {
+ .mask_offset = PA_INT_MASK,
+ .clear_offset = PA_INT_CLEAR,
+ .status_offset = PA_INT_STATUS,
};
static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
&hisi_pa_pmu_v2_format_group,
&hisi_pa_pmu_v2_events_group,
- &hisi_pa_pmu_cpumask_attr_group,
- &hisi_pa_pmu_identifier_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h32pa_v2 = {
+ .name = "pa",
+ .attr_groups = hisi_pa_pmu_v2_attr_groups,
+ .private = &hisi_pa_pmu_regs,
+};
+
+static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = {
+ &hisi_pa_pmu_v2_format_group,
+ &hisi_pa_pmu_v3_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
+static const struct hisi_pmu_dev_info hisi_h32pa_v3 = {
+ .name = "pa",
+ .attr_groups = hisi_pa_pmu_v3_attr_groups,
+ .private = &hisi_pa_pmu_regs,
+};
+
+static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = {
+ .mask_offset = H60PA_INT_MASK,
+ .clear_offset = H60PA_INT_STATUS, /* Clear on write */
+ .status_offset = H60PA_INT_STATUS,
+};
+
+static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = {
+ &hisi_pa_pmu_v2_format_group,
+ &hisi_h60pa_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h60pa = {
+ .name = "h60pa",
+ .attr_groups = hisi_h60pa_pmu_attr_groups,
+ .private = &hisi_h60pa_pmu_regs,
+};
+
static const struct hisi_uncore_ops hisi_uncore_pa_ops = {
.write_evtype = hisi_pa_pmu_write_evtype,
.get_event_idx = hisi_uncore_pmu_get_event_idx,
@@ -375,10 +437,10 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups;
+ pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups;
pa_pmu->num_counters = PA_NR_COUNTERS;
pa_pmu->ops = &hisi_uncore_pa_ops;
- pa_pmu->check_event = 0xB0;
+ pa_pmu->check_event = PA_EVTYPE_MASK;
pa_pmu->counter_bits = 64;
pa_pmu->dev = &pdev->dev;
pa_pmu->on_cpu = -1;
@@ -400,8 +462,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u",
- pa_pmu->sicl_id, pa_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d",
+ pa_pmu->topo.sicl_id, pa_pmu->dev_info->name,
+ pa_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -412,12 +475,12 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
return ret;
}
- hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE);
+ hisi_pmu_init(pa_pmu, THIS_MODULE);
ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
if (ret) {
dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
- &pa_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+ &pa_pmu->node);
return ret;
}
@@ -425,16 +488,23 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_pa_pmu_remove(struct platform_device *pdev)
+static void hisi_pa_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&pa_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
&pa_pmu->node);
- return 0;
}
+static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
+ { "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 },
+ { "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 },
+ { "HISI0274", (kernel_ulong_t)&hisi_h60pa },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+
static struct platform_driver hisi_pa_pmu_driver = {
.driver = {
.name = "hisi_pa_pmu",
@@ -473,6 +543,7 @@ static void __exit hisi_pa_pmu_module_exit(void)
}
module_exit(hisi_pa_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index fbc8a93d5eac..de71dcf11653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -14,30 +14,16 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/property.h>
#include <asm/cputype.h>
#include <asm/local64.h>
#include "hisi_uncore_pmu.h"
-#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
/*
- * PMU format attributes
- */
-ssize_t hisi_format_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-EXPORT_SYMBOL_GPL(hisi_format_sysfs_show);
-
-/*
* PMU event attributes
*/
ssize_t hisi_event_sysfs_show(struct device *dev,
@@ -49,7 +35,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev,
return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var);
}
-EXPORT_SYMBOL_GPL(hisi_event_sysfs_show);
+EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU");
/*
* sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
@@ -61,7 +47,52 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu);
}
-EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
+EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU");
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus);
+}
+static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL);
+
+static struct attribute *hisi_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ &dev_attr_associated_cpus.attr,
+ NULL
+};
+
+const struct attribute_group hisi_pmu_cpumask_attr_group = {
+ .attrs = hisi_pmu_cpumask_attrs,
+};
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, "HISI_PMU");
+
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
+}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU");
+
+static struct device_attribute hisi_pmu_identifier_attr =
+ __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_pmu_identifier_attrs[] = {
+ &hisi_pmu_identifier_attr.attr,
+ NULL
+};
+
+const struct attribute_group hisi_pmu_identifier_group = {
+ .attrs = hisi_pmu_identifier_attrs,
+};
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, "HISI_PMU");
static bool hisi_validate_event_group(struct perf_event *event)
{
@@ -111,24 +142,14 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
return idx;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx);
-
-ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
- struct device_attribute *attr,
- char *page)
-{
- struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
-
- return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
-}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU");
static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
{
clear_bit(idx, hisi_pmu->pmu_events.used_mask);
}
-static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
{
struct hisi_pmu *hisi_pmu = data;
struct perf_event *event;
@@ -157,6 +178,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, "HISI_PMU");
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev)
@@ -180,7 +202,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU");
int hisi_uncore_pmu_event_init(struct perf_event *event)
{
@@ -213,7 +235,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
return -EINVAL;
hisi_pmu = to_hisi_pmu(event->pmu);
- if (event->attr.config > hisi_pmu->check_event)
+ if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event)
return -EINVAL;
if (hisi_pmu->on_cpu == -1)
@@ -226,12 +248,15 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
hwc->idx = -1;
hwc->config_base = event->attr.config;
+ if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event))
+ return -EINVAL;
+
/* Enforce to use the same CPU for all events in this PMU */
event->cpu = hisi_pmu->on_cpu;
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU");
/*
* Set the counter to count the event that we're interested in,
@@ -285,7 +310,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event)
/* Write start value to the hardware event counter */
hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU");
void hisi_uncore_pmu_event_update(struct perf_event *event)
{
@@ -306,7 +331,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event)
HISI_MAX_PERIOD(hisi_pmu->counter_bits);
local64_add(delta, &event->count);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU");
void hisi_uncore_pmu_start(struct perf_event *event, int flags)
{
@@ -329,7 +354,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags)
hisi_uncore_pmu_enable_event(event);
perf_event_update_userpage(event);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU");
void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
{
@@ -346,7 +371,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
hisi_uncore_pmu_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU");
int hisi_uncore_pmu_add(struct perf_event *event, int flags)
{
@@ -369,7 +394,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags)
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU");
void hisi_uncore_pmu_del(struct perf_event *event, int flags)
{
@@ -381,14 +406,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags)
perf_event_update_userpage(event);
hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU");
void hisi_uncore_pmu_read(struct perf_event *event)
{
/* Read hardware counter and update the perf counter statistics */
hisi_uncore_pmu_event_update(event);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU");
void hisi_uncore_pmu_enable(struct pmu *pmu)
{
@@ -401,7 +426,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu)
hisi_pmu->ops->start_counters(hisi_pmu);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU");
void hisi_uncore_pmu_disable(struct pmu *pmu)
{
@@ -409,7 +434,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
hisi_pmu->ops->stop_counters(hisi_pmu);
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU");
/*
@@ -456,22 +481,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp)
*/
static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
{
+ struct hisi_pmu_topology *topo = &hisi_pmu->topo;
int sccl_id, ccl_id;
- /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */
- if (hisi_pmu->sccl_id == -1)
- return true;
-
- if (hisi_pmu->ccl_id == -1) {
+ if (topo->ccl_id == -1) {
/* If CCL_ID is -1, the PMU only shares the same SCCL */
hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
- return sccl_id == hisi_pmu->sccl_id;
+ return sccl_id == topo->sccl_id;
}
hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
- return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
+ return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id;
}
int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
@@ -479,35 +501,46 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
- if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
+ /*
+ * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu
+ * based on the locality if it hasn't been initialized yet. For PMUs
+ * do have associated CPUs, it'll be updated later.
+ */
+ if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) {
+ if (hisi_pmu->on_cpu != -1)
+ return 0;
+
+ hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev));
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq,
+ cpumask_of(hisi_pmu->on_cpu)));
return 0;
+ }
cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
- /* If another CPU is already managing this PMU, simply return. */
- if (hisi_pmu->on_cpu != -1)
+ /* If another associated CPU is already managing this PMU, simply return. */
+ if (hisi_pmu->on_cpu != -1 &&
+ cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus))
return 0;
/* Use this CPU in cpumask for event counting */
hisi_pmu->on_cpu = cpu;
/* Overflow interrupt also should use the same CPU */
- WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU");
int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
- cpumask_t pmu_online_cpus;
unsigned int target;
- if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
- return 0;
-
/* Nothing to do if this CPU doesn't own the PMU */
if (hisi_pmu->on_cpu != cpu)
return 0;
@@ -515,27 +548,65 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
/* Give up ownership of the PMU */
hisi_pmu->on_cpu = -1;
- /* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&pmu_online_cpus, cpu);
+ /*
+ * Migrate ownership of the PMU to a new CPU chosen from PMU's online
+ * associated CPUs if possible, if no associated CPU online then
+ * migrate to one online CPU.
+ */
+ target = cpumask_any_and_but(&hisi_pmu->associated_cpus,
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target >= nr_cpu_ids)
return 0;
perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
/* Use this CPU for event counting */
hisi_pmu->on_cpu = target;
- WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
return 0;
}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU");
-void hisi_pmu_init(struct pmu *pmu, const char *name,
- const struct attribute_group **attr_groups, struct module *module)
+/*
+ * Retrieve the topology information from the firmware for the hisi_pmu device.
+ * The topology ID will be -1 if we cannot initialize it, it may either due to
+ * the PMU doesn't locate on this certain topology or the firmware needs to be
+ * fixed.
+ */
+void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev)
{
- pmu->name = name;
+ struct hisi_pmu_topology *topo = &hisi_pmu->topo;
+
+ topo->sccl_id = -1;
+ topo->ccl_id = -1;
+ topo->index_id = -1;
+ topo->sub_id = -1;
+
+ if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id))
+ dev_dbg(dev, "no scl-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id))
+ dev_dbg(dev, "no ccl-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id))
+ dev_dbg(dev, "no idx-id present\n");
+
+ if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id))
+ dev_dbg(dev, "no sub-id present\n");
+}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU");
+
+void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
+{
+ struct pmu *pmu = &hisi_pmu->pmu;
+
pmu->module = module;
+ pmu->parent = hisi_pmu->dev;
pmu->task_ctx_nr = perf_invalid_context;
pmu->event_init = hisi_uncore_pmu_event_init;
pmu->pmu_enable = hisi_uncore_pmu_enable;
@@ -545,8 +616,10 @@ void hisi_pmu_init(struct pmu *pmu, const char *name,
pmu->start = hisi_uncore_pmu_start;
pmu->stop = hisi_uncore_pmu_stop;
pmu->read = hisi_uncore_pmu_read;
- pmu->attr_groups = attr_groups;
+ pmu->attr_groups = hisi_pmu->pmu_events.attr_groups;
+ pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE;
}
-EXPORT_SYMBOL_GPL(hisi_pmu_init);
+EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index b59de33cd059..3ffe6acda653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -24,7 +24,7 @@
#define pr_fmt(fmt) "hisi_pmu: " fmt
#define HISI_PMU_V2 0x30
-#define HISI_MAX_COUNTERS 0x10
+#define HISI_MAX_COUNTERS 0x18
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
#define HISI_PMU_ATTR(_name, _func, _config) \
@@ -33,7 +33,7 @@
})[0].attr.attr)
#define HISI_PMU_FORMAT_ATTR(_name, _config) \
- HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
+ HISI_PMU_ATTR(_name, device_show_string, _config)
#define HISI_PMU_EVENT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
@@ -43,9 +43,16 @@
return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \
}
+#define HISI_EVENTID_MASK GENMASK(7, 0)
+#define HISI_GET_EVENTID(ev) ((ev)->hw.config_base & HISI_EVENTID_MASK)
+
+#define HISI_PMU_EVTYPE_BITS 8
+#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS)
+
struct hisi_pmu;
struct hisi_uncore_ops {
+ int (*check_filter)(struct perf_event *event);
void (*write_evtype)(struct hisi_pmu *, int, u32);
int (*get_event_idx)(struct perf_event *);
u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
@@ -62,32 +69,70 @@ struct hisi_uncore_ops {
void (*disable_filter)(struct perf_event *event);
};
+/* Describes the HISI PMU chip features information */
+struct hisi_pmu_dev_info {
+ const char *name;
+ const struct attribute_group **attr_groups;
+ u32 counter_bits;
+ u32 check_event;
+ void *private;
+};
+
struct hisi_pmu_hwevents {
struct perf_event *hw_events[HISI_MAX_COUNTERS];
DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
const struct attribute_group **attr_groups;
};
+/**
+ * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU
+ * is located.
+ * @sccl_id: ID of the SCCL on which the PMU locate is located.
+ * @sicl_id: ID of the SICL on which the PMU locate is located.
+ * @scl_id: ID used by the core which is unaware of the SCCL/SICL.
+ * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located.
+ * @index_id: the ID of the PMU module if there're several PMUs at a
+ * particularly location in the topology.
+ * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2
+ * since each DDRC has more than one DMC
+ *
+ * The ID will be -1 if the PMU isn't located on a certain topology.
+ */
+struct hisi_pmu_topology {
+ /*
+ * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel
+ * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL
+ * distinction is not relevant, use scl_id instead.
+ */
+ union {
+ int sccl_id;
+ int sicl_id;
+ int scl_id;
+ };
+ int ccl_id;
+ int index_id;
+ int sub_id;
+};
+
/* Generic pmu struct for different pmu types */
struct hisi_pmu {
struct pmu pmu;
const struct hisi_uncore_ops *ops;
+ const struct hisi_pmu_dev_info *dev_info;
struct hisi_pmu_hwevents pmu_events;
- /* associated_cpus: All CPUs associated with the PMU */
+ struct hisi_pmu_topology topo;
+ /*
+ * CPUs associated to the PMU and are preferred to use for counting.
+ * Could be empty if PMU has no association (e.g. PMU on SICL), in
+ * which case any online CPU will be used.
+ */
cpumask_t associated_cpus;
/* CPU used for counting */
int on_cpu;
int irq;
struct device *dev;
struct hlist_node node;
- int sccl_id;
- int sicl_id;
- int ccl_id;
void __iomem *base;
- /* the ID of the PMU modules */
- u32 index_id;
- /* For DDRC PMU v2: each DDRC has more than one DMC */
- u32 sub_id;
int num_counters;
int counter_bits;
/* check event code range */
@@ -95,6 +140,10 @@ struct hisi_pmu {
u32 identifier;
};
+/* Generic implementation of cpumask/identifier group */
+extern const struct attribute_group hisi_pmu_cpumask_attr_group;
+extern const struct attribute_group hisi_pmu_identifier_group;
+
int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
void hisi_uncore_pmu_read(struct perf_event *event);
int hisi_uncore_pmu_add(struct perf_event *event, int flags);
@@ -108,8 +157,6 @@ void hisi_uncore_pmu_enable(struct pmu *pmu);
void hisi_uncore_pmu_disable(struct pmu *pmu);
ssize_t hisi_event_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
-ssize_t hisi_format_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *buf);
ssize_t hisi_cpumask_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
@@ -118,9 +165,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
struct device_attribute *attr,
char *page);
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data);
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev);
+void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev);
-void hisi_pmu_init(struct pmu *pmu, const char *name,
- const struct attribute_group **attr_groups, struct module *module);
+void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module);
#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index b9c79f17230c..cd32d606df05 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -28,6 +28,18 @@
#define SLLC_VERSION 0x1cf0
#define SLLC_EVENT_CNT0_L 0x1d00
+/* SLLC registers definition in v3 */
+#define SLLC_V3_INT_MASK 0x6834
+#define SLLC_V3_INT_STATUS 0x6838
+#define SLLC_V3_INT_CLEAR 0x683c
+#define SLLC_V3_VERSION 0x6c00
+#define SLLC_V3_PERF_CTRL 0x6d00
+#define SLLC_V3_SRCID_CTRL 0x6d04
+#define SLLC_V3_TGTID_CTRL 0x6d08
+#define SLLC_V3_EVENT_CTRL 0x6d14
+#define SLLC_V3_EVENT_TYPE0 0x6d18
+#define SLLC_V3_EVENT_CNT0_L 0x6e00
+
#define SLLC_EVTYPE_MASK 0xff
#define SLLC_PERF_CTRL_EN BIT(0)
#define SLLC_FILT_EN BIT(1)
@@ -40,7 +52,14 @@
#define SLLC_TGTID_MAX_SHIFT 12
#define SLLC_SRCID_CMD_SHIFT 1
#define SLLC_SRCID_MSK_SHIFT 12
-#define SLLC_NR_EVENTS 0x80
+
+#define SLLC_V3_TGTID_MIN_SHIFT 1
+#define SLLC_V3_TGTID_MAX_SHIFT 10
+#define SLLC_V3_SRCID_CMD_SHIFT 1
+#define SLLC_V3_SRCID_MSK_SHIFT 10
+
+#define SLLC_NR_EVENTS 0xff
+#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8)
HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11);
@@ -48,6 +67,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+struct hisi_sllc_pmu_regs {
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+ u32 perf_ctrl;
+ u32 srcid_ctrl;
+ u32 srcid_cmd_shift;
+ u32 srcid_mask_shift;
+ u32 tgtid_ctrl;
+ u32 tgtid_min_shift;
+ u32 tgtid_max_shift;
+ u32 event_ctrl;
+ u32 event_type0;
+ u32 version;
+ u32 event_cnt0;
+};
+
static bool tgtid_is_valid(u32 max, u32 min)
{
return max > 0 && max >= min;
@@ -56,96 +92,104 @@ static bool tgtid_is_valid(u32 max, u32 min)
static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 tt_en = hisi_get_tracetag_en(event);
if (tt_en) {
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_TRACETAG_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 tt_en = hisi_get_tracetag_en(event);
if (tt_en) {
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_config_tgtid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 min = hisi_get_tgtid_min(event);
u32 max = hisi_get_tgtid_max(event);
if (tgtid_is_valid(max, min)) {
- u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT);
+ u32 val = (max << regs->tgtid_max_shift) |
+ (min << regs->tgtid_min_shift);
- writel(val, sllc_pmu->base + SLLC_TGTID_CTRL);
+ writel(val, sllc_pmu->base + regs->tgtid_ctrl);
/* Enable the tgtid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_TGTID_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 min = hisi_get_tgtid_min(event);
u32 max = hisi_get_tgtid_max(event);
if (tgtid_is_valid(max, min)) {
u32 val;
- writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL);
+ writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl);
/* Disable the tgtid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_config_srcid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 cmd = hisi_get_srcid_cmd(event);
if (cmd) {
u32 val, msk;
msk = hisi_get_srcid_msk(event);
- val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT);
- writel(val, sllc_pmu->base + SLLC_SRCID_CTRL);
+ val = (cmd << regs->srcid_cmd_shift) |
+ (msk << regs->srcid_mask_shift);
+ writel(val, sllc_pmu->base + regs->srcid_ctrl);
/* Enable the srcid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_SRCID_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_clear_srcid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 cmd = hisi_get_srcid_cmd(event);
if (cmd) {
u32 val;
- writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL);
+ writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl);
/* Disable the srcid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
@@ -167,29 +211,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event)
}
}
-static u32 hisi_sllc_pmu_get_counter_offset(int idx)
-{
- return (SLLC_EVENT_CNT0_L + idx * 8);
-}
-
static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
- return readq(sllc_pmu->base +
- hisi_sllc_pmu_get_counter_offset(hwc->idx));
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx));
}
static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc, u64 val)
{
- writeq(val, sllc_pmu->base +
- hisi_sllc_pmu_get_counter_offset(hwc->idx));
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx));
}
static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
u32 type)
{
- u32 reg, reg_idx, shift, val;
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+ u32 reg, val;
/*
* Select the appropriate event select register(SLLC_EVENT_TYPE0/1).
@@ -198,114 +240,117 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
* SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
* SLLC_EVENT_TYPE1 is chosen.
*/
- reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4;
- reg_idx = idx % 4;
- shift = 8 * reg_idx;
+ reg = regs->event_type0 + (idx / 4) * 4;
/* Write event code to SLLC_EVENT_TYPEx Register */
val = readl(sllc_pmu->base + reg);
- val &= ~(SLLC_EVTYPE_MASK << shift);
- val |= (type << shift);
+ val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
writel(val, sllc_pmu->base + reg);
}
static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_PERF_CTRL_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_PERF_CTRL_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
- val |= 1 << hwc->idx;
- writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+ val = readl(sllc_pmu->base + regs->event_ctrl);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->event_ctrl);
}
static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+ val = readl(sllc_pmu->base + regs->event_ctrl);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->event_ctrl);
}
static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_INT_MASK);
- /* Write 0 to enable interrupt */
- val &= ~(1 << hwc->idx);
- writel(val, sllc_pmu->base + SLLC_INT_MASK);
+ val = readl(sllc_pmu->base + regs->int_mask);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->int_mask);
}
static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_INT_MASK);
- /* Write 1 to mask interrupt */
- val |= 1 << hwc->idx;
- writel(val, sllc_pmu->base + SLLC_INT_MASK);
+ val = readl(sllc_pmu->base + regs->int_mask);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->int_mask);
}
static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu)
{
- return readl(sllc_pmu->base + SLLC_INT_STATUS);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ return readl(sllc_pmu->base + regs->int_status);
}
static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx)
{
- writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR);
-}
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
-static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
- { "HISI0263", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+ writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear);
+}
static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs;
+
+ hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev);
+
/*
* Use the SCCL_ID and the index ID to identify the SLLC PMU,
* while SCCL_ID is from MPIDR_EL1 by CPU.
*/
- if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
- &sllc_pmu->sccl_id)) {
+ if (sllc_pmu->topo.sccl_id < 0) {
dev_err(&pdev->dev, "Cannot read sccl-id!\n");
return -EINVAL;
}
- if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
- &sllc_pmu->index_id)) {
+ if (sllc_pmu->topo.index_id < 0) {
dev_err(&pdev->dev, "Cannot read idx-id!\n");
return -EINVAL;
}
- /* SLLC PMUs only share the same SCCL */
- sllc_pmu->ccl_id = -1;
+ sllc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!sllc_pmu->dev_info)
+ return -ENODEV;
sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sllc_pmu->base)) {
@@ -313,7 +358,8 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
return PTR_ERR(sllc_pmu->base);
}
- sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION);
+ regs = sllc_pmu->dev_info->private;
+ sllc_pmu->identifier = readl(sllc_pmu->base + regs->version);
return 0;
}
@@ -347,35 +393,54 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = {
.attrs = hisi_sllc_pmu_v2_events_attr,
};
-static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
-
-static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
+static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
+ &hisi_sllc_pmu_v2_format_group,
+ &hisi_sllc_pmu_v2_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
NULL
};
-static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = {
- .attrs = hisi_sllc_pmu_cpumask_attrs,
+static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = {
+ .int_mask = SLLC_INT_MASK,
+ .int_clear = SLLC_INT_CLEAR,
+ .int_status = SLLC_INT_STATUS,
+ .perf_ctrl = SLLC_PERF_CTRL,
+ .srcid_ctrl = SLLC_SRCID_CTRL,
+ .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT,
+ .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT,
+ .tgtid_ctrl = SLLC_TGTID_CTRL,
+ .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT,
+ .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT,
+ .event_ctrl = SLLC_EVENT_CTRL,
+ .event_type0 = SLLC_EVENT_TYPE0,
+ .version = SLLC_VERSION,
+ .event_cnt0 = SLLC_EVENT_CNT0_L,
};
-static struct device_attribute hisi_sllc_pmu_identifier_attr =
- __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
-
-static struct attribute *hisi_sllc_pmu_identifier_attrs[] = {
- &hisi_sllc_pmu_identifier_attr.attr,
- NULL
+static const struct hisi_pmu_dev_info hisi_sllc_v2 = {
+ .private = &hisi_sllc_v2_pmu_regs,
};
-static const struct attribute_group hisi_sllc_pmu_identifier_group = {
- .attrs = hisi_sllc_pmu_identifier_attrs,
+static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = {
+ .int_mask = SLLC_V3_INT_MASK,
+ .int_clear = SLLC_V3_INT_CLEAR,
+ .int_status = SLLC_V3_INT_STATUS,
+ .perf_ctrl = SLLC_V3_PERF_CTRL,
+ .srcid_ctrl = SLLC_V3_SRCID_CTRL,
+ .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT,
+ .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT,
+ .tgtid_ctrl = SLLC_V3_TGTID_CTRL,
+ .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT,
+ .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT,
+ .event_ctrl = SLLC_V3_EVENT_CTRL,
+ .event_type0 = SLLC_V3_EVENT_TYPE0,
+ .version = SLLC_V3_VERSION,
+ .event_cnt0 = SLLC_V3_EVENT_CNT0_L,
};
-static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
- &hisi_sllc_pmu_v2_format_group,
- &hisi_sllc_pmu_v2_events_group,
- &hisi_sllc_pmu_cpumask_attr_group,
- &hisi_sllc_pmu_identifier_group,
- NULL
+static const struct hisi_pmu_dev_info hisi_sllc_v3 = {
+ .private = &hisi_sllc_v3_pmu_regs,
};
static const struct hisi_uncore_ops hisi_uncore_sllc_ops = {
@@ -433,8 +498,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u",
- sllc_pmu->sccl_id, sllc_pmu->index_id);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d",
+ sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id);
if (!name)
return -ENOMEM;
@@ -445,13 +510,13 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
return ret;
}
- hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE);
+ hisi_pmu_init(sllc_pmu, THIS_MODULE);
ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
if (ret) {
dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
- &sllc_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+ &sllc_pmu->node);
return ret;
}
@@ -460,16 +525,22 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int hisi_sllc_pmu_remove(struct platform_device *pdev)
+static void hisi_sllc_pmu_remove(struct platform_device *pdev)
{
struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev);
perf_pmu_unregister(&sllc_pmu->pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
&sllc_pmu->node);
- return 0;
}
+static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
+ { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 },
+ { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+
static struct platform_driver hisi_sllc_pmu_driver = {
.driver = {
.name = "hisi_sllc_pmu",
@@ -508,6 +579,7 @@ static void __exit hisi_sllc_pmu_module_exit(void)
}
module_exit(hisi_sllc_pmu_module_exit);
+MODULE_IMPORT_NS("HISI_PMU");
MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
new file mode 100644
index 000000000000..03cb9b564b99
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC UC (unified cache) uncore Hardware event counters support
+ *
+ * Copyright (C) 2023 HiSilicon Limited
+ *
+ * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by UC PMU */
+static enum cpuhp_state hisi_uc_pmu_online;
+
+/* UC register definition */
+#define HISI_UC_INT_MASK_REG 0x0800
+#define HISI_UC_INT_STS_REG 0x0808
+#define HISI_UC_INT_CLEAR_REG 0x080c
+#define HISI_UC_TRACETAG_CTRL_REG 0x1b2c
+#define HISI_UC_TRACETAG_REQ_MSK GENMASK(9, 7)
+#define HISI_UC_TRACETAG_MARK_EN BIT(0)
+#define HISI_UC_TRACETAG_REQ_EN (HISI_UC_TRACETAG_MARK_EN | BIT(2))
+#define HISI_UC_TRACETAG_SRCID_EN BIT(3)
+#define HISI_UC_SRCID_CTRL_REG 0x1b40
+#define HISI_UC_SRCID_MSK GENMASK(14, 1)
+#define HISI_UC_EVENT_CTRL_REG 0x1c00
+#define HISI_UC_EVENT_TRACETAG_EN BIT(29)
+#define HISI_UC_EVENT_URING_MSK GENMASK(28, 27)
+#define HISI_UC_EVENT_GLB_EN BIT(26)
+#define HISI_UC_VERSION_REG 0x1cf0
+#define HISI_UC_EVTYPE_REGn(n) (0x1d00 + (n) * 4)
+#define HISI_UC_EVTYPE_MASK GENMASK(7, 0)
+#define HISI_UC_CNTR_REGn(n) (0x1e00 + (n) * 8)
+
+#define HISI_UC_NR_COUNTERS 0x8
+#define HISI_UC_V2_NR_EVENTS 0xFF
+#define HISI_UC_CNTR_REG_BITS 64
+
+#define HISI_UC_RD_REQ_TRACETAG 0x4
+#define HISI_UC_URING_EVENT_MIN 0x47
+#define HISI_UC_URING_EVENT_MAX 0x59
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20);
+
+static int hisi_uc_pmu_check_filter(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+
+ if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) {
+ dev_err(uc_pmu->dev,
+ "rcid_en depends on rd_req_en being enabled!\n");
+ return -EINVAL;
+ }
+
+ if (!hisi_get_uring_channel(event))
+ return 0;
+
+ if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) ||
+ (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX))
+ dev_warn(uc_pmu->dev,
+ "Only events: [%#x ~ %#x] support channel filtering!",
+ HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX);
+
+ return 0;
+}
+
+static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 val;
+
+ if (!hisi_get_rd_req_en(event))
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ /* The request-type has been configured */
+ if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG)
+ return;
+
+ /* Set request-type for tracetag, only read request is supported! */
+ val &= ~HISI_UC_TRACETAG_REQ_MSK;
+ val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG);
+ val |= HISI_UC_TRACETAG_REQ_EN;
+ writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 val;
+
+ if (!hisi_get_rd_req_en(event))
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ /* Do nothing, the request-type tracetag has been cleaned up */
+ if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0)
+ return;
+
+ /* Clear request-type */
+ val &= ~HISI_UC_TRACETAG_REQ_MSK;
+ val &= ~HISI_UC_TRACETAG_REQ_EN;
+ writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 val;
+
+ if (!hisi_get_srcid_en(event))
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ /* Do nothing, the source id has been configured */
+ if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val))
+ return;
+
+ /* Enable source id tracetag */
+ val |= HISI_UC_TRACETAG_SRCID_EN;
+ writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+ val &= ~HISI_UC_SRCID_MSK;
+ val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event));
+ writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+
+ /* Depend on request-type tracetag enabled */
+ hisi_uc_pmu_config_req_tracetag(event);
+}
+
+static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 val;
+
+ if (!hisi_get_srcid_en(event))
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ /* Do nothing, the source id has been cleaned up */
+ if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0)
+ return;
+
+ hisi_uc_pmu_clear_req_tracetag(event);
+
+ /* Disable source id tracetag */
+ val &= ~HISI_UC_TRACETAG_SRCID_EN;
+ writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+ val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+ val &= ~HISI_UC_SRCID_MSK;
+ writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_uring_channel(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 uring_channel = hisi_get_uring_channel(event);
+ u32 val;
+
+ /* Do nothing if not being set or is set explicitly to zero (default) */
+ if (uring_channel == 0)
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+ /* Do nothing, the uring_channel has been configured */
+ if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val))
+ return;
+
+ val &= ~HISI_UC_EVENT_URING_MSK;
+ val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel);
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event)
+{
+ struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+ u32 val;
+
+ /* Do nothing if not being set or is set explicitly to zero (default) */
+ if (hisi_get_uring_channel(event) == 0)
+ return;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+ /* Do nothing, the uring_channel has been cleaned up */
+ if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0)
+ return;
+
+ val &= ~HISI_UC_EVENT_URING_MSK;
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_filter(struct perf_event *event)
+{
+ if (event->attr.config1 == 0)
+ return;
+
+ hisi_uc_pmu_config_uring_channel(event);
+ hisi_uc_pmu_config_req_tracetag(event);
+ hisi_uc_pmu_config_srcid_tracetag(event);
+}
+
+static void hisi_uc_pmu_disable_filter(struct perf_event *event)
+{
+ if (event->attr.config1 == 0)
+ return;
+
+ hisi_uc_pmu_clear_srcid_tracetag(event);
+ hisi_uc_pmu_clear_req_tracetag(event);
+ hisi_uc_pmu_clear_uring_channel(event);
+}
+
+static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type)
+{
+ u32 val;
+
+ /*
+ * Select the appropriate event select register.
+ * There are 2 32-bit event select registers for the
+ * 8 hardware counters, each event code is 8-bit wide.
+ */
+ val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+ val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+ writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+}
+
+static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ val |= HISI_UC_EVENT_GLB_EN;
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ val &= ~HISI_UC_EVENT_GLB_EN;
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Enable counter index */
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ val |= (1 << hwc->idx);
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Clear counter index */
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ val &= ~(1 << hwc->idx);
+ writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc)
+{
+ return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static bool hisi_uc_pmu_get_glb_en_state(struct hisi_pmu *uc_pmu)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+ return !!FIELD_GET(HISI_UC_EVENT_GLB_EN, val);
+}
+
+static void hisi_uc_pmu_write_counter_normal(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static void hisi_uc_pmu_write_counter_quirk_v2(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ hisi_uc_pmu_start_counters(uc_pmu);
+ hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val);
+ hisi_uc_pmu_stop_counters(uc_pmu);
+}
+
+static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ bool enable = hisi_uc_pmu_get_glb_en_state(uc_pmu);
+ bool erratum = uc_pmu->identifier == HISI_PMU_V2;
+
+ /*
+ * HiSilicon UC PMU v2 suffers the erratum 162700402 that the
+ * PMU counter cannot be set due to the lack of clock under power
+ * saving mode. This will lead to error or inaccurate counts.
+ * The clock can be enabled by the PMU global enabling control.
+ * The irq handler and pmu_start() will call the function to set
+ * period. If the function under irq context, the PMU has been
+ * enabled therefore we set counter directly. Other situations
+ * the PMU is disabled, we need to enable it to turn on the
+ * counter clock to set period, and then restore PMU enable
+ * status, the counter can hold its value without a clock.
+ */
+ if (enable || !erratum)
+ hisi_uc_pmu_write_counter_normal(uc_pmu, hwc, val);
+ else
+ hisi_uc_pmu_write_counter_quirk_v2(uc_pmu, hwc, val);
+}
+
+static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+ val &= ~(1 << hwc->idx);
+ writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+ val |= (1 << hwc->idx);
+ writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu)
+{
+ return readl(uc_pmu->base + HISI_UC_INT_STS_REG);
+}
+
+static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx)
+{
+ writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG);
+}
+
+static int hisi_uc_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *uc_pmu)
+{
+ hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev);
+
+ /*
+ * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to
+ * identify the topology information of UC PMU devices in the chip.
+ * They have some CCLs per SCCL and then 4 UC PMU per CCL.
+ */
+ if (uc_pmu->topo.sccl_id < 0) {
+ dev_err(&pdev->dev, "Can not read uc sccl-id!\n");
+ return -EINVAL;
+ }
+
+ if (uc_pmu->topo.ccl_id < 0) {
+ dev_err(&pdev->dev, "Can not read uc ccl-id!\n");
+ return -EINVAL;
+ }
+
+ if (uc_pmu->topo.sub_id < 0) {
+ dev_err(&pdev->dev, "Can not read sub-id!\n");
+ return -EINVAL;
+ }
+
+ uc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(uc_pmu->base)) {
+ dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n");
+ return PTR_ERR(uc_pmu->base);
+ }
+
+ uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG);
+
+ return 0;
+}
+
+static struct attribute *hisi_uc_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"),
+ HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"),
+ HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"),
+ HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"),
+ NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_uc_pmu_format_attr,
+};
+
+static struct attribute *hisi_uc_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(sq_time, 0x00),
+ HISI_PMU_EVENT_ATTR(pq_time, 0x01),
+ HISI_PMU_EVENT_ATTR(hbm_time, 0x02),
+ HISI_PMU_EVENT_ATTR(iq_comp_time_cring, 0x03),
+ HISI_PMU_EVENT_ATTR(iq_comp_time_uring, 0x05),
+ HISI_PMU_EVENT_ATTR(cpu_rd, 0x10),
+ HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17),
+ HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19),
+ HISI_PMU_EVENT_ATTR(cpu_mru, 0x1c),
+ HISI_PMU_EVENT_ATTR(cycles, 0x95),
+ HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3),
+ HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb),
+ HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa),
+ HISI_PMU_EVENT_ATTR(cring_txdat_cnt, 0xfb),
+ HISI_PMU_EVENT_ATTR(uring_rxdat_cnt, 0xfc),
+ HISI_PMU_EVENT_ATTR(uring_txdat_cnt, 0xfd),
+ NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_uc_pmu_events_attr,
+};
+
+static const struct attribute_group *hisi_uc_pmu_attr_groups[] = {
+ &hisi_uc_pmu_format_group,
+ &hisi_uc_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = {
+ .check_filter = hisi_uc_pmu_check_filter,
+ .write_evtype = hisi_uc_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_uc_pmu_start_counters,
+ .stop_counters = hisi_uc_pmu_stop_counters,
+ .enable_counter = hisi_uc_pmu_enable_counter,
+ .disable_counter = hisi_uc_pmu_disable_counter,
+ .enable_counter_int = hisi_uc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_uc_pmu_disable_counter_int,
+ .write_counter = hisi_uc_pmu_write_counter,
+ .read_counter = hisi_uc_pmu_read_counter,
+ .get_int_status = hisi_uc_pmu_get_int_status,
+ .clear_int_status = hisi_uc_pmu_clear_int_status,
+ .enable_filter = hisi_uc_pmu_enable_filter,
+ .disable_filter = hisi_uc_pmu_disable_filter,
+};
+
+static int hisi_uc_pmu_dev_probe(struct platform_device *pdev,
+ struct hisi_pmu *uc_pmu)
+{
+ int ret;
+
+ ret = hisi_uc_pmu_init_data(pdev, uc_pmu);
+ if (ret)
+ return ret;
+
+ ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev);
+ if (ret)
+ return ret;
+
+ uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups;
+ uc_pmu->check_event = HISI_UC_EVTYPE_MASK;
+ uc_pmu->ops = &hisi_uncore_uc_pmu_ops;
+ uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS;
+ uc_pmu->num_counters = HISI_UC_NR_COUNTERS;
+ uc_pmu->dev = &pdev->dev;
+ uc_pmu->on_cpu = -1;
+
+ return 0;
+}
+
+static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node);
+}
+
+static void hisi_uc_pmu_unregister_pmu(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_uc_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *uc_pmu;
+ char *name;
+ int ret;
+
+ uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL);
+ if (!uc_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, uc_pmu);
+
+ ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu);
+ if (ret)
+ return ret;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d",
+ uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id,
+ uc_pmu->topo.sub_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n");
+
+ ret = devm_add_action_or_reset(&pdev->dev,
+ hisi_uc_pmu_remove_cpuhp_instance,
+ &uc_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(uc_pmu, THIS_MODULE);
+
+ ret = perf_pmu_register(&uc_pmu->pmu, name, -1);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(&pdev->dev,
+ hisi_uc_pmu_unregister_pmu,
+ &uc_pmu->pmu);
+}
+
+static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = {
+ { "HISI0291", },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match);
+
+static struct platform_driver hisi_uc_pmu_driver = {
+ .driver = {
+ .name = "hisi_uc_pmu",
+ .acpi_match_table = hisi_uc_pmu_acpi_match,
+ /*
+ * We have not worked out a safe bind/unbind process,
+ * Forcefully unbinding during sampling will lead to a
+ * kernel panic, so this is not supported yet.
+ */
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_uc_pmu_probe,
+};
+
+static int __init hisi_uc_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/hisi/uc:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret);
+ return ret;
+ }
+ hisi_uc_pmu_online = ret;
+
+ ret = platform_driver_register(&hisi_uc_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_uc_pmu_online);
+
+ return ret;
+}
+module_init(hisi_uc_pmu_module_init);
+
+static void __exit hisi_uc_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_uc_pmu_driver);
+ cpuhp_remove_multi_state(hisi_uc_pmu_online);
+}
+module_exit(hisi_uc_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
index e0457d84af6b..c157f3572cae 100644
--- a/drivers/perf/hisilicon/hns3_pmu.c
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -363,16 +363,6 @@ HNS3_PMU_FILTER_ATTR(global, config1, 52, 52);
HNS3_PMU_EVT_PPS_##_name##_TIME, \
HNS3_PMU_FILTER_INTR_##_name})
-static ssize_t hns3_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
-
- return sysfs_emit(buf, "%s\n", (char *)eattr->var);
-}
-
static ssize_t hns3_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -421,7 +411,7 @@ static ssize_t hns3_pmu_filter_mode_show(struct device *dev,
})[0].attr.attr)
#define HNS3_PMU_FORMAT_ATTR(_name, _format) \
- HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format)
+ HNS3_PMU_ATTR(_name, device_show_string, _format)
#define HNS3_PMU_EVENT_ATTR(_name, _event) \
HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event)
#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \
@@ -1085,15 +1075,27 @@ static bool hns3_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
+ /*
+ * If we find a related event, then it's a valid group
+ * since we don't need to allocate a new counter for it.
+ */
if (hns3_pmu_cmp_event(event_group[num], sibling))
break;
}
+ /*
+ * Otherwise it's a new event but if there's no available counter,
+ * fail the check since we cannot schedule all the events in
+ * the group simultaneously.
+ */
+ if (num == HNS3_PMU_MAX_HW_EVENTS)
+ return false;
+
if (num == counters)
event_group[counters++] = sibling;
}
- return counters <= HNS3_PMU_MAX_HW_EVENTS;
+ return true;
}
static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
@@ -1419,6 +1421,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
hns3_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hns3_pmu_event_init,
.pmu_enable = hns3_pmu_enable,
.pmu_disable = hns3_pmu_disable,
@@ -1515,7 +1518,7 @@ static int hns3_pmu_irq_register(struct pci_dev *pdev,
return ret;
}
- ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
+ ret = devm_add_action_or_reset(&pdev->dev, hns3_pmu_free_irq, pdev);
if (ret) {
pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
return ret;
@@ -1556,8 +1559,8 @@ static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
if (ret) {
pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
- &hns3_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
}
return ret;
@@ -1568,8 +1571,8 @@ static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
perf_pmu_unregister(&hns3_pmu->pmu);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
- &hns3_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
}
static int hns3_pmu_init_dev(struct pci_dev *pdev)
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index 665b382a0ee3..72ac17efd846 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -1,37 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
-/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+/*
+ * Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
*
- * Copyright (C) 2021 Marvell.
+ * Copyright (C) 2021-2024 Marvell.
*/
#include <linux/init.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/perf_event.h>
#include <linux/hrtimer.h>
+#include <linux/acpi.h>
+#include <linux/platform_device.h>
/* Performance Counters Operating Mode Control Registers */
-#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
-#define OP_MODE_CTRL_VAL_MANNUAL 0x1
+#define CN10K_DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
+#define ODY_DDRC_PERF_CNT_OP_MODE_CTRL 0x20020
+#define OP_MODE_CTRL_VAL_MANUAL 0x1
/* Performance Counters Start Operation Control Registers */
-#define DDRC_PERF_CNT_START_OP_CTRL 0x8028
+#define CN10K_DDRC_PERF_CNT_START_OP_CTRL 0x8028
+#define ODY_DDRC_PERF_CNT_START_OP_CTRL 0x200A0
#define START_OP_CTRL_VAL_START 0x1ULL
#define START_OP_CTRL_VAL_ACTIVE 0x2
/* Performance Counters End Operation Control Registers */
-#define DDRC_PERF_CNT_END_OP_CTRL 0x8030
+#define CN10K_DDRC_PERF_CNT_END_OP_CTRL 0x8030
+#define ODY_DDRC_PERF_CNT_END_OP_CTRL 0x200E0
#define END_OP_CTRL_VAL_END 0x1ULL
/* Performance Counters End Status Registers */
-#define DDRC_PERF_CNT_END_STATUS 0x8038
+#define CN10K_DDRC_PERF_CNT_END_STATUS 0x8038
+#define ODY_DDRC_PERF_CNT_END_STATUS 0x20120
#define END_STATUS_VAL_END_TIMER_MODE_END 0x1
/* Performance Counters Configuration Registers */
-#define DDRC_PERF_CFG_BASE 0x8040
+#define CN10K_DDRC_PERF_CFG_BASE 0x8040
+#define ODY_DDRC_PERF_CFG_BASE 0x20160
/* 8 Generic event counter + 2 fixed event counters */
#define DDRC_PERF_NUM_GEN_COUNTERS 8
@@ -42,18 +48,28 @@
DDRC_PERF_NUM_FIX_COUNTERS)
/* Generic event counter registers */
-#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n))
+#define DDRC_PERF_CFG(base, n) ((base) + 8 * (n))
#define EVENT_ENABLE BIT_ULL(63)
/* Two dedicated event counters for DDR reads and writes */
#define EVENT_DDR_READS 101
#define EVENT_DDR_WRITES 100
+#define DDRC_PERF_REG(base, n) ((base) + 8 * (n))
/*
* programmable events IDs in programmable event counters.
* DO NOT change these event-id numbers, they are used to
* program event bitmap in h/w.
*/
+#define EVENT_DFI_CMD_IS_RETRY 61
+#define EVENT_RD_UC_ECC_ERROR 60
+#define EVENT_RD_CRC_ERROR 59
+#define EVENT_CAPAR_ERROR 58
+#define EVENT_WR_CRC_ERROR 57
+#define EVENT_DFI_PARITY_POISON 56
+#define EVENT_RETRY_FIFO_FULL 46
+#define EVENT_DFI_CYCLES 45
+
#define EVENT_OP_IS_ZQLATCH 55
#define EVENT_OP_IS_ZQSTART 54
#define EVENT_OP_IS_TCR_MRR 53
@@ -102,28 +118,37 @@
#define EVENT_HIF_RD_OR_WR 1
/* Event counter value registers */
-#define DDRC_PERF_CNT_VALUE_BASE 0x8080
-#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+#define CN10K_DDRC_PERF_CNT_VALUE_BASE 0x8080
+#define ODY_DDRC_PERF_CNT_VALUE_BASE 0x201C0
/* Fixed event counter enable/disable register */
-#define DDRC_PERF_CNT_FREERUN_EN 0x80C0
+#define CN10K_DDRC_PERF_CNT_FREERUN_EN 0x80C0
#define DDRC_PERF_FREERUN_WRITE_EN 0x1
#define DDRC_PERF_FREERUN_READ_EN 0x2
/* Fixed event counter control register */
-#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8
+#define CN10K_DDRC_PERF_CNT_FREERUN_CTRL 0x80C8
+#define ODY_DDRC_PERF_CNT_FREERUN_CTRL 0x20240
#define DDRC_FREERUN_WRITE_CNT_CLR 0x1
#define DDRC_FREERUN_READ_CNT_CLR 0x2
-/* Fixed event counter value register */
-#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0
-#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8
+/* Fixed event counter clear register, defined only for Odyssey */
+#define ODY_DDRC_PERF_CNT_FREERUN_CLR 0x20248
+
#define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48)
#define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0)
+/* Fixed event counter value register */
+#define CN10K_DDRC_PERF_CNT_VALUE_WR_OP 0x80D0
+#define CN10K_DDRC_PERF_CNT_VALUE_RD_OP 0x80D8
+#define ODY_DDRC_PERF_CNT_VALUE_WR_OP 0x20250
+#define ODY_DDRC_PERF_CNT_VALUE_RD_OP 0x20258
+
struct cn10k_ddr_pmu {
struct pmu pmu;
void __iomem *base;
+ const struct ddr_pmu_platform_data *p_data;
+ const struct ddr_pmu_ops *ops;
unsigned int cpu;
struct device *dev;
int active_events;
@@ -132,8 +157,36 @@ struct cn10k_ddr_pmu {
struct hlist_node node;
};
+struct ddr_pmu_ops {
+ void (*enable_read_freerun_counter)(struct cn10k_ddr_pmu *pmu,
+ bool enable);
+ void (*enable_write_freerun_counter)(struct cn10k_ddr_pmu *pmu,
+ bool enable);
+ void (*clear_read_freerun_counter)(struct cn10k_ddr_pmu *pmu);
+ void (*clear_write_freerun_counter)(struct cn10k_ddr_pmu *pmu);
+ void (*pmu_overflow_handler)(struct cn10k_ddr_pmu *pmu, int evt_idx);
+};
+
#define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu)
+struct ddr_pmu_platform_data {
+ u64 counter_overflow_val;
+ u64 counter_max_val;
+ u64 cnt_base;
+ u64 cfg_base;
+ u64 cnt_op_mode_ctrl;
+ u64 cnt_start_op_ctrl;
+ u64 cnt_end_op_ctrl;
+ u64 cnt_end_status;
+ u64 cnt_freerun_en;
+ u64 cnt_freerun_ctrl;
+ u64 cnt_freerun_clr;
+ u64 cnt_value_wr_op;
+ u64 cnt_value_rd_op;
+ bool is_cn10k;
+ bool is_ody;
+};
+
static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -209,6 +262,85 @@ static struct attribute *cn10k_ddr_perf_events_attrs[] = {
NULL
};
+static struct attribute *odyssey_ddr_perf_events_attrs[] = {
+ /* Programmable */
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_wr_data_access,
+ EVENT_DFI_WR_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_rd_data_access,
+ EVENT_DFI_RD_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+ EVENT_HPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+ EVENT_LPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+ EVENT_WR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access,
+ EVENT_OP_IS_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access,
+ EVENT_OP_IS_RD_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr,
+ EVENT_PRECHARGE_FOR_RDWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+ EVENT_PRECHARGE_FOR_OTHER),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown,
+ EVENT_OP_IS_ENTER_POWERDOWN),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cycles, EVENT_DFI_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_retry_fifo_full,
+ EVENT_RETRY_FIFO_FULL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_parity_poison,
+ EVENT_DFI_PARITY_POISON),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_crc_error, EVENT_WR_CRC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_capar_error, EVENT_CAPAR_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_crc_error, EVENT_RD_CRC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_uc_ecc_error, EVENT_RD_UC_ECC_ERROR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cmd_is_retry, EVENT_DFI_CMD_IS_RETRY),
+ /* Free run event counters */
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+ NULL
+};
+
+static struct attribute_group odyssey_ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = odyssey_ddr_perf_events_attrs,
+};
+
static struct attribute_group cn10k_ddr_perf_events_attr_group = {
.name = "events",
.attrs = cn10k_ddr_perf_events_attrs,
@@ -254,6 +386,13 @@ static const struct attribute_group *cn10k_attr_groups[] = {
NULL,
};
+static const struct attribute_group *odyssey_attr_groups[] = {
+ &odyssey_ddr_perf_events_attr_group,
+ &cn10k_ddr_perf_format_attr_group,
+ &cn10k_ddr_perf_cpumask_attr_group,
+ NULL
+};
+
/* Default poll timeout is 100 sec, which is very sufficient for
* 48 bit counter incremented max at 5.6 GT/s, which may take many
* hours to overflow.
@@ -266,9 +405,18 @@ static ktime_t cn10k_ddr_pmu_timer_period(void)
return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
}
-static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
+ struct cn10k_ddr_pmu *ddr_pmu)
{
+ int err = 0;
+
switch (eventid) {
+ case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
+ if (!ddr_pmu->p_data->is_ody) {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
*event_bitmap = (1ULL << (eventid - 1));
@@ -279,11 +427,12 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
*event_bitmap = (0xFULL << (eventid - 1));
break;
default:
- pr_err("%s Invalid eventid %d\n", __func__, eventid);
- return -EINVAL;
+ err = -EINVAL;
}
- return 0;
+ if (err)
+ pr_err("%s Invalid eventid %d\n", __func__, eventid);
+ return err;
}
static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
@@ -351,9 +500,33 @@ static int cn10k_ddr_perf_event_init(struct perf_event *event)
return 0;
}
+static void cn10k_ddr_perf_counter_start(struct cn10k_ddr_pmu *ddr_pmu,
+ int counter)
+{
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+ u64 ctrl_reg = p_data->cnt_start_op_ctrl;
+
+ writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+ DDRC_PERF_REG(ctrl_reg, counter));
+}
+
+static void cn10k_ddr_perf_counter_stop(struct cn10k_ddr_pmu *ddr_pmu,
+ int counter)
+{
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+ u64 ctrl_reg = p_data->cnt_end_op_ctrl;
+
+ writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+ DDRC_PERF_REG(ctrl_reg, counter));
+}
+
static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
int counter, bool enable)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 ctrl_reg = pmu->p_data->cnt_op_mode_ctrl;
+ const struct ddr_pmu_ops *ops = pmu->ops;
+ bool is_ody = pmu->p_data->is_ody;
u32 reg;
u64 val;
@@ -363,7 +536,7 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
}
if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
- reg = DDRC_PERF_CFG(counter);
+ reg = DDRC_PERF_CFG(p_data->cfg_base, counter);
val = readq_relaxed(pmu->base + reg);
if (enable)
@@ -372,40 +545,52 @@ static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
val &= ~EVENT_ENABLE;
writeq_relaxed(val, pmu->base + reg);
- } else {
- val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
- if (enable) {
- if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val |= DDRC_PERF_FREERUN_READ_EN;
- else
- val |= DDRC_PERF_FREERUN_WRITE_EN;
- } else {
- if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val &= ~DDRC_PERF_FREERUN_READ_EN;
- else
- val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ if (is_ody) {
+ if (enable) {
+ /*
+ * Setup the PMU counter to work in
+ * manual mode
+ */
+ reg = DDRC_PERF_REG(ctrl_reg, counter);
+ writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL,
+ pmu->base + reg);
+
+ cn10k_ddr_perf_counter_start(pmu, counter);
+ } else {
+ cn10k_ddr_perf_counter_stop(pmu, counter);
+ }
}
- writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+ } else {
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ ops->enable_read_freerun_counter(pmu, enable);
+ else
+ ops->enable_write_freerun_counter(pmu, enable);
}
}
static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
u64 val;
if (counter == DDRC_PERF_READ_COUNTER_IDX)
- return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+ return readq_relaxed(pmu->base +
+ p_data->cnt_value_rd_op);
if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
- return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+ return readq_relaxed(pmu->base +
+ p_data->cnt_value_wr_op);
- val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+ val = readq_relaxed(pmu->base +
+ DDRC_PERF_REG(p_data->cnt_base, counter));
return val;
}
static void cn10k_ddr_perf_event_update(struct perf_event *event)
{
struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
struct hw_perf_event *hwc = &event->hw;
u64 prev_count, new_count, mask;
@@ -414,7 +599,7 @@ static void cn10k_ddr_perf_event_update(struct perf_event *event)
new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
- mask = DDRC_PERF_CNT_MAX_VALUE;
+ mask = p_data->counter_max_val;
local64_add((new_count - prev_count) & mask, &event->count);
}
@@ -435,6 +620,8 @@ static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
{
struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ const struct ddr_pmu_ops *ops = pmu->ops;
struct hw_perf_event *hwc = &event->hw;
u8 config = event->attr.config;
int counter, ret;
@@ -454,8 +641,8 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
/* Generic counters, configure event id */
- reg_offset = DDRC_PERF_CFG(counter);
- ret = ddr_perf_get_event_bitmap(config, &val);
+ reg_offset = DDRC_PERF_CFG(p_data->cfg_base, counter);
+ ret = ddr_perf_get_event_bitmap(config, &val, pmu);
if (ret)
return ret;
@@ -463,11 +650,9 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
} else {
/* fixed event counter, clear counter value */
if (counter == DDRC_PERF_READ_COUNTER_IDX)
- val = DDRC_FREERUN_READ_CNT_CLR;
+ ops->clear_read_freerun_counter(pmu);
else
- val = DDRC_FREERUN_WRITE_CNT_CLR;
-
- writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+ ops->clear_write_freerun_counter(pmu);
}
hwc->state |= PERF_HES_STOPPED;
@@ -512,17 +697,19 @@ static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
{
struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
- DDRC_PERF_CNT_START_OP_CTRL);
+ p_data->cnt_start_op_ctrl);
}
static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
{
struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+ const struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
- DDRC_PERF_CNT_END_OP_CTRL);
+ p_data->cnt_end_op_ctrl);
}
static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
@@ -547,8 +734,123 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
}
}
+static void ddr_pmu_enable_read_freerun(struct cn10k_ddr_pmu *pmu, bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_en);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_READ_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_READ_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en);
+}
+
+static void ddr_pmu_enable_write_freerun(struct cn10k_ddr_pmu *pmu, bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_en);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_WRITE_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_en);
+}
+
+static void ddr_pmu_read_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_READ_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_write_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_WRITE_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx)
+{
+ cn10k_ddr_perf_event_update_all(pmu);
+ cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+ cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+}
+
+static void ddr_pmu_ody_enable_read_freerun(struct cn10k_ddr_pmu *pmu,
+ bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_READ_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_READ_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_ody_enable_write_freerun(struct cn10k_ddr_pmu *pmu,
+ bool enable)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = readq_relaxed(pmu->base + p_data->cnt_freerun_ctrl);
+ if (enable)
+ val |= DDRC_PERF_FREERUN_WRITE_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_ctrl);
+}
+
+static void ddr_pmu_ody_read_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_READ_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr);
+}
+
+static void ddr_pmu_ody_write_clear_freerun(struct cn10k_ddr_pmu *pmu)
+{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ u64 val;
+
+ val = DDRC_FREERUN_WRITE_CNT_CLR;
+ writeq_relaxed(val, pmu->base + p_data->cnt_freerun_clr);
+}
+
+static void ddr_pmu_ody_overflow_hander(struct cn10k_ddr_pmu *pmu, int evt_idx)
+{
+ /*
+ * On reaching the maximum value of the counter, the counter freezes
+ * there. The particular event is updated and the respective counter
+ * is stopped and started again so that it starts counting from zero
+ */
+ cn10k_ddr_perf_event_update(pmu->events[evt_idx]);
+ cn10k_ddr_perf_counter_stop(pmu, evt_idx);
+ cn10k_ddr_perf_counter_start(pmu, evt_idx);
+}
+
static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
{
+ const struct ddr_pmu_platform_data *p_data = pmu->p_data;
+ const struct ddr_pmu_ops *ops = pmu->ops;
struct perf_event *event;
struct hw_perf_event *hwc;
u64 prev_count, new_count;
@@ -586,11 +888,9 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
continue;
value = cn10k_ddr_perf_read_counter(pmu, i);
- if (value == DDRC_PERF_CNT_MAX_VALUE) {
+ if (value == p_data->counter_max_val) {
pr_info("Counter-(%d) reached max value\n", i);
- cn10k_ddr_perf_event_update_all(pmu);
- cn10k_ddr_perf_pmu_disable(&pmu->pmu);
- cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+ ops->pmu_overflow_handler(pmu, i);
}
}
@@ -629,11 +929,68 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
}
+static const struct ddr_pmu_ops ddr_pmu_ops = {
+ .enable_read_freerun_counter = ddr_pmu_enable_read_freerun,
+ .enable_write_freerun_counter = ddr_pmu_enable_write_freerun,
+ .clear_read_freerun_counter = ddr_pmu_read_clear_freerun,
+ .clear_write_freerun_counter = ddr_pmu_write_clear_freerun,
+ .pmu_overflow_handler = ddr_pmu_overflow_hander,
+};
+
+#if defined(CONFIG_ACPI) || defined(CONFIG_OF)
+static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
+ .counter_overflow_val = BIT_ULL(48),
+ .counter_max_val = GENMASK_ULL(48, 0),
+ .cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
+ .cfg_base = CN10K_DDRC_PERF_CFG_BASE,
+ .cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
+ .cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
+ .cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
+ .cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
+ .cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
+ .cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
+ .cnt_freerun_clr = 0,
+ .cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
+ .cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
+ .is_cn10k = TRUE,
+};
+#endif
+
+static const struct ddr_pmu_ops ddr_pmu_ody_ops = {
+ .enable_read_freerun_counter = ddr_pmu_ody_enable_read_freerun,
+ .enable_write_freerun_counter = ddr_pmu_ody_enable_write_freerun,
+ .clear_read_freerun_counter = ddr_pmu_ody_read_clear_freerun,
+ .clear_write_freerun_counter = ddr_pmu_ody_write_clear_freerun,
+ .pmu_overflow_handler = ddr_pmu_ody_overflow_hander,
+};
+
+#ifdef CONFIG_ACPI
+static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
+ .counter_overflow_val = 0,
+ .counter_max_val = GENMASK_ULL(63, 0),
+ .cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
+ .cfg_base = ODY_DDRC_PERF_CFG_BASE,
+ .cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
+ .cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
+ .cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
+ .cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
+ .cnt_freerun_en = 0,
+ .cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
+ .cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
+ .cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
+ .cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
+ .is_ody = TRUE,
+};
+#endif
+
static int cn10k_ddr_perf_probe(struct platform_device *pdev)
{
+ const struct ddr_pmu_platform_data *dev_data;
struct cn10k_ddr_pmu *ddr_pmu;
struct resource *res;
void __iomem *base;
+ bool is_cn10k;
+ bool is_ody;
char *name;
int ret;
@@ -644,30 +1001,60 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
ddr_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, ddr_pmu);
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data) {
+ dev_err(&pdev->dev, "Error: No device match data found\n");
+ return -ENODEV;
+ }
+
base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(base))
return PTR_ERR(base);
ddr_pmu->base = base;
- /* Setup the PMU counter to work in manual mode */
- writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
- DDRC_PERF_CNT_OP_MODE_CTRL);
-
- ddr_pmu->pmu = (struct pmu) {
- .module = THIS_MODULE,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- .task_ctx_nr = perf_invalid_context,
- .attr_groups = cn10k_attr_groups,
- .event_init = cn10k_ddr_perf_event_init,
- .add = cn10k_ddr_perf_event_add,
- .del = cn10k_ddr_perf_event_del,
- .start = cn10k_ddr_perf_event_start,
- .stop = cn10k_ddr_perf_event_stop,
- .read = cn10k_ddr_perf_event_update,
- .pmu_enable = cn10k_ddr_perf_pmu_enable,
- .pmu_disable = cn10k_ddr_perf_pmu_disable,
- };
+ ddr_pmu->p_data = dev_data;
+ is_cn10k = ddr_pmu->p_data->is_cn10k;
+ is_ody = ddr_pmu->p_data->is_ody;
+
+ if (is_cn10k) {
+ ddr_pmu->ops = &ddr_pmu_ops;
+ /* Setup the PMU counter to work in manual mode */
+ writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
+ ddr_pmu->p_data->cnt_op_mode_ctrl);
+
+ ddr_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = cn10k_attr_groups,
+ .event_init = cn10k_ddr_perf_event_init,
+ .add = cn10k_ddr_perf_event_add,
+ .del = cn10k_ddr_perf_event_del,
+ .start = cn10k_ddr_perf_event_start,
+ .stop = cn10k_ddr_perf_event_stop,
+ .read = cn10k_ddr_perf_event_update,
+ .pmu_enable = cn10k_ddr_perf_pmu_enable,
+ .pmu_disable = cn10k_ddr_perf_pmu_disable,
+ };
+ }
+
+ if (is_ody) {
+ ddr_pmu->ops = &ddr_pmu_ody_ops;
+
+ ddr_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = odyssey_attr_groups,
+ .event_init = cn10k_ddr_perf_event_init,
+ .add = cn10k_ddr_perf_event_add,
+ .del = cn10k_ddr_perf_event_del,
+ .start = cn10k_ddr_perf_event_start,
+ .stop = cn10k_ddr_perf_event_stop,
+ .read = cn10k_ddr_perf_event_update,
+ };
+ }
/* Choose this cpu to collect perf data */
ddr_pmu->cpu = raw_smp_processor_id();
@@ -677,8 +1064,8 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
if (!name)
return -ENOMEM;
- hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
+ hrtimer_setup(&ddr_pmu->hrtimer, cn10k_ddr_pmu_timer_handler, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
cpuhp_state_add_instance_nocalls(
CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
@@ -688,7 +1075,7 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
if (ret)
goto error;
- pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+ pr_info("DDR PMU Driver for ddrc@%llx\n", res->start);
return 0;
error:
cpuhp_state_remove_instance_nocalls(
@@ -697,7 +1084,7 @@ error:
return ret;
}
-static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+static void cn10k_ddr_perf_remove(struct platform_device *pdev)
{
struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
@@ -706,21 +1093,30 @@ static int cn10k_ddr_perf_remove(struct platform_device *pdev)
&ddr_pmu->node);
perf_pmu_unregister(&ddr_pmu->pmu);
- return 0;
}
#ifdef CONFIG_OF
static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
- { .compatible = "marvell,cn10k-ddr-pmu", },
+ { .compatible = "marvell,cn10k-ddr-pmu", .data = &cn10k_ddr_pmu_pdata },
{ },
};
MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
#endif
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = {
+ {"MRVL000A", (kernel_ulong_t)&cn10k_ddr_pmu_pdata },
+ {"MRVL000C", (kernel_ulong_t)&odyssey_ddr_pmu_pdata},
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match);
+#endif
+
static struct platform_driver cn10k_ddr_pmu_driver = {
.driver = {
.name = "cn10k-ddr-pmu",
.of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match),
+ .acpi_match_table = ACPI_PTR(cn10k_ddr_pmu_acpi_match),
.suppress_bind_attrs = true,
},
.probe = cn10k_ddr_perf_probe,
@@ -755,4 +1151,5 @@ module_init(cn10k_ddr_pmu_init);
module_exit(cn10k_ddr_pmu_exit);
MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index a1166afb3702..51ccb0befa05 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -6,13 +6,13 @@
#define pr_fmt(fmt) "tad_pmu: " fmt
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/cpuhotplug.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <linux/acpi.h>
#define TAD_PFC_OFFSET 0x800
#define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3))
@@ -37,6 +37,15 @@ struct tad_pmu {
DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
};
+enum mrvl_tad_pmu_version {
+ TAD_PMU_V1 = 1,
+ TAD_PMU_V2,
+};
+
+struct tad_pmu_data {
+ int id;
+};
+
static int tad_pmu_cpuhp_state;
static void tad_pmu_event_counter_read(struct perf_event *event)
@@ -214,6 +223,24 @@ static const struct attribute_group tad_pmu_events_attr_group = {
.attrs = tad_pmu_event_attrs,
};
+static struct attribute *ody_tad_pmu_event_attrs[] = {
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
+ TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
+ TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
+ TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
+ TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
+ TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
+ TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
+ TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
+ TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
+ NULL
+};
+
+static const struct attribute_group ody_tad_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = ody_tad_pmu_event_attrs,
+};
+
PMU_FORMAT_ATTR(event, "config:0-7");
static struct attribute *tad_pmu_format_attrs[] = {
@@ -252,15 +279,24 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
NULL
};
+static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
+ &ody_tad_pmu_events_attr_group,
+ &tad_pmu_format_attr_group,
+ &tad_pmu_cpumask_attr_group,
+ NULL
+};
+
static int tad_pmu_probe(struct platform_device *pdev)
{
- struct device_node *node = pdev->dev.of_node;
+ const struct tad_pmu_data *dev_data;
+ struct device *dev = &pdev->dev;
struct tad_region *regions;
struct tad_pmu *tad_pmu;
struct resource *res;
u32 tad_pmu_page_size;
u32 tad_page_size;
u32 tad_cnt;
+ int version;
int i, ret;
char *name;
@@ -270,27 +306,34 @@ static int tad_pmu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tad_pmu);
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data) {
+ dev_err(&pdev->dev, "Error: No device match data found\n");
+ return -ENODEV;
+ }
+ version = dev_data->id;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(&pdev->dev, "Mem resource not found\n");
return -ENODEV;
}
- ret = of_property_read_u32(node, "marvell,tad-page-size",
- &tad_page_size);
+ ret = device_property_read_u32(dev, "marvell,tad-page-size",
+ &tad_page_size);
if (ret) {
dev_err(&pdev->dev, "Can't find tad-page-size property\n");
return ret;
}
- ret = of_property_read_u32(node, "marvell,tad-pmu-page-size",
- &tad_pmu_page_size);
+ ret = device_property_read_u32(dev, "marvell,tad-pmu-page-size",
+ &tad_pmu_page_size);
if (ret) {
dev_err(&pdev->dev, "Can't find tad-pmu-page-size property\n");
return ret;
}
- ret = of_property_read_u32(node, "marvell,tad-cnt", &tad_cnt);
+ ret = device_property_read_u32(dev, "marvell,tad-cnt", &tad_cnt);
if (ret) {
dev_err(&pdev->dev, "Can't find tad-cnt property\n");
return ret;
@@ -319,7 +362,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
tad_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
- .attr_groups = tad_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE |
PERF_PMU_CAP_NO_INTERRUPT,
.task_ctx_nr = perf_invalid_context,
@@ -332,6 +374,11 @@ static int tad_pmu_probe(struct platform_device *pdev)
.read = tad_pmu_event_counter_read,
};
+ if (version == TAD_PMU_V1)
+ tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
+ else
+ tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
+
tad_pmu->cpu = raw_smp_processor_id();
/* Register pmu instance for cpu hotplug */
@@ -351,28 +398,48 @@ static int tad_pmu_probe(struct platform_device *pdev)
return ret;
}
-static int tad_pmu_remove(struct platform_device *pdev)
+static void tad_pmu_remove(struct platform_device *pdev)
{
struct tad_pmu *pmu = platform_get_drvdata(pdev);
cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
&pmu->node);
perf_pmu_unregister(&pmu->pmu);
-
- return 0;
}
+#if defined(CONFIG_OF) || defined(CONFIG_ACPI)
+static const struct tad_pmu_data tad_pmu_data = {
+ .id = TAD_PMU_V1,
+};
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct tad_pmu_data tad_pmu_v2_data = {
+ .id = TAD_PMU_V2,
+};
+#endif
+
#ifdef CONFIG_OF
static const struct of_device_id tad_pmu_of_match[] = {
- { .compatible = "marvell,cn10k-tad-pmu", },
+ { .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data },
+ {},
+};
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id tad_pmu_acpi_match[] = {
+ {"MRVL000B", (kernel_ulong_t)&tad_pmu_data},
+ {"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data},
{},
};
+MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);
#endif
static struct platform_driver tad_pmu_driver = {
.driver = {
.name = "cn10k_tad_pmu",
.of_match_table = of_match_ptr(tad_pmu_of_match),
+ .acpi_match_table = ACPI_PTR(tad_pmu_acpi_match),
.suppress_bind_attrs = true,
},
.probe = tad_pmu_probe,
diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c
new file mode 100644
index 000000000000..29fbcd1848e4
--- /dev/null
+++ b/drivers/perf/marvell_pem_pmu.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Marvell PEM(PCIe RC) Performance Monitor Driver
+ *
+ * Copyright (C) 2024 Marvell.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/*
+ * Each of these events maps to a free running 64 bit counter
+ * with no event control, but can be reset.
+ */
+enum pem_events {
+ IB_TLP_NPR,
+ IB_TLP_PR,
+ IB_TLP_CPL,
+ IB_TLP_DWORDS_NPR,
+ IB_TLP_DWORDS_PR,
+ IB_TLP_DWORDS_CPL,
+ IB_INFLIGHT,
+ IB_READS,
+ IB_REQ_NO_RO_NCB,
+ IB_REQ_NO_RO_EBUS,
+ OB_TLP_NPR,
+ OB_TLP_PR,
+ OB_TLP_CPL,
+ OB_TLP_DWORDS_NPR,
+ OB_TLP_DWORDS_PR,
+ OB_TLP_DWORDS_CPL,
+ OB_INFLIGHT,
+ OB_READS,
+ OB_MERGES_NPR,
+ OB_MERGES_PR,
+ OB_MERGES_CPL,
+ ATS_TRANS,
+ ATS_TRANS_LATENCY,
+ ATS_PRI,
+ ATS_PRI_LATENCY,
+ ATS_INV,
+ ATS_INV_LATENCY,
+ PEM_EVENTIDS_MAX
+};
+
+static u64 eventid_to_offset_table[] = {
+ [IB_TLP_NPR] = 0x0,
+ [IB_TLP_PR] = 0x8,
+ [IB_TLP_CPL] = 0x10,
+ [IB_TLP_DWORDS_NPR] = 0x100,
+ [IB_TLP_DWORDS_PR] = 0x108,
+ [IB_TLP_DWORDS_CPL] = 0x110,
+ [IB_INFLIGHT] = 0x200,
+ [IB_READS] = 0x300,
+ [IB_REQ_NO_RO_NCB] = 0x400,
+ [IB_REQ_NO_RO_EBUS] = 0x408,
+ [OB_TLP_NPR] = 0x500,
+ [OB_TLP_PR] = 0x508,
+ [OB_TLP_CPL] = 0x510,
+ [OB_TLP_DWORDS_NPR] = 0x600,
+ [OB_TLP_DWORDS_PR] = 0x608,
+ [OB_TLP_DWORDS_CPL] = 0x610,
+ [OB_INFLIGHT] = 0x700,
+ [OB_READS] = 0x800,
+ [OB_MERGES_NPR] = 0x900,
+ [OB_MERGES_PR] = 0x908,
+ [OB_MERGES_CPL] = 0x910,
+ [ATS_TRANS] = 0x2D18,
+ [ATS_TRANS_LATENCY] = 0x2D20,
+ [ATS_PRI] = 0x2D28,
+ [ATS_PRI_LATENCY] = 0x2D30,
+ [ATS_INV] = 0x2D38,
+ [ATS_INV_LATENCY] = 0x2D40,
+};
+
+struct pem_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct device *dev;
+ struct hlist_node node;
+};
+
+#define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu)
+
+static int eventid_to_offset(int eventid)
+{
+ return eventid_to_offset_table[eventid];
+}
+
+/* Events */
+static ssize_t pem_pmu_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define PEM_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *pem_perf_events_attrs[] = {
+ PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR),
+ PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR),
+ PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL),
+ PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT),
+ PEM_EVENT_ATTR(ib_reads, IB_READS),
+ PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB),
+ PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS),
+ PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR),
+ PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR),
+ PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL),
+ PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT),
+ PEM_EVENT_ATTR(ob_reads_partid, OB_READS),
+ PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR),
+ PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR),
+ PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL),
+ PEM_EVENT_ATTR(ats_trans, ATS_TRANS),
+ PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY),
+ PEM_EVENT_ATTR(ats_pri, ATS_PRI),
+ PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY),
+ PEM_EVENT_ATTR(ats_inv, ATS_INV),
+ PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY),
+ NULL
+};
+
+static struct attribute_group pem_perf_events_attr_group = {
+ .name = "events",
+ .attrs = pem_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+
+static struct attribute *pem_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_format_attr_group = {
+ .name = "format",
+ .attrs = pem_perf_format_attrs,
+};
+
+/* cpumask */
+static ssize_t pem_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pem_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute pem_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL);
+
+static struct attribute *pem_perf_cpumask_attrs[] = {
+ &pem_perf_cpumask_attr.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_cpumask_attr_group = {
+ .attrs = pem_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *pem_perf_attr_groups[] = {
+ &pem_perf_events_attr_group,
+ &pem_perf_cpumask_attr_group,
+ &pem_perf_format_attr_group,
+ NULL
+};
+
+static int pem_perf_event_init(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (event->attr.config >= PEM_EVENTIDS_MAX)
+ return -EINVAL;
+
+ if (is_sampling_event(event) ||
+ event->attach_state & PERF_ATTACH_TASK) {
+ return -EOPNOTSUPP;
+ }
+
+ if (event->cpu < 0)
+ return -EOPNOTSUPP;
+
+ /* We must NOT create groups containing mixed PMUs */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+ }
+ /*
+ * Set ownership of event to one CPU, same event can not be observed
+ * on multiple cpus at same time.
+ */
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+ return 0;
+}
+
+static u64 pem_perf_read_counter(struct pem_pmu *pmu,
+ struct perf_event *event, int eventid)
+{
+ return readq_relaxed(pmu->base + eventid_to_offset(eventid));
+}
+
+static void pem_perf_event_update(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_count, new_count;
+
+ do {
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = pem_perf_read_counter(pmu, event, hwc->idx);
+ } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+ local64_add((new_count - prev_count), &event->count);
+}
+
+static void pem_perf_event_start(struct perf_event *event, int flags)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int eventid = hwc->idx;
+
+ /*
+ * All counters are free-running and associated with
+ * a fixed event to track in Hardware
+ */
+ local64_set(&hwc->prev_count,
+ pem_perf_read_counter(pmu, event, eventid));
+
+ hwc->state = 0;
+}
+
+static int pem_perf_event_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = event->attr.config;
+ if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX))
+ return -EINVAL;
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ pem_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void pem_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_UPDATE)
+ pem_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void pem_perf_event_del(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pem_perf_event_stop(event, PERF_EF_UPDATE);
+ hwc->idx = -1;
+}
+
+static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node);
+ unsigned int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+ return 0;
+}
+
+static int pem_perf_probe(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu;
+ struct resource *res;
+ void __iomem *base;
+ char *name;
+ int ret;
+
+ pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
+ if (!pem_pmu)
+ return -ENOMEM;
+
+ pem_pmu->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pem_pmu);
+
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ pem_pmu->base = base;
+
+ pem_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = pem_perf_attr_groups,
+ .event_init = pem_perf_event_init,
+ .add = pem_perf_event_add,
+ .del = pem_perf_event_del,
+ .start = pem_perf_event_start,
+ .stop = pem_perf_event_stop,
+ .read = pem_perf_event_update,
+ };
+
+ /* Choose this cpu to collect perf data */
+ pem_pmu->cpu = raw_smp_processor_id();
+
+ name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
+ res->start);
+ if (!name)
+ return -ENOMEM;
+
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ ret = perf_pmu_register(&pem_pmu->pmu, name, -1);
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+ return ret;
+}
+
+static void pem_perf_remove(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ perf_pmu_unregister(&pem_pmu->pmu);
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id pem_pmu_acpi_match[] = {
+ {"MRVL000E", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match);
+#endif
+
+static struct platform_driver pem_pmu_driver = {
+ .driver = {
+ .name = "pem-pmu",
+ .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = pem_perf_probe,
+ .remove = pem_perf_remove,
+};
+
+static int __init pem_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
+ "perf/marvell/pem:online", NULL,
+ pem_pmu_offline_cpu);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&pem_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
+ return ret;
+}
+
+static void __exit pem_pmu_exit(void)
+{
+ platform_driver_unregister(&pem_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
+}
+
+module_init(pem_pmu_init);
+module_exit(pem_pmu_exit);
+
+MODULE_DESCRIPTION("Marvell PEM Perf driver");
+MODULE_AUTHOR("Gowthami Thiagarajan <gthiagarajan@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index aaca6db7d8f6..ea8c85729937 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
- cpumask_t cluster_online_cpus;
+ struct cluster_pmu *cluster;
unsigned int target;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
@@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
cluster->on_cpu = -1;
/* Any other CPU for this cluster which is still online */
- cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&cluster_online_cpus, cpu);
+ target = cpumask_any_and_but(&cluster->cluster_cpus,
+ cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
disable_irq(cluster->irq);
return 0;
@@ -857,7 +855,6 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
return -ENOMEM;
INIT_LIST_HEAD(&cluster->next);
- list_add(&cluster->next, &l2cache_pmu->clusters);
cluster->cluster_id = fw_cluster_id;
irq = platform_get_irq(sdev, 0);
@@ -883,6 +880,7 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
spin_lock_init(&cluster->pmu_lock);
+ list_add(&cluster->next, &l2cache_pmu->clusters);
l2cache_pmu->num_pmus++;
return 0;
@@ -904,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev)
l2cache_pmu->pmu = (struct pmu) {
/* suffix is instance id for future use with multiple sockets */
.name = "l2cache_0",
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = l2_cache_pmu_enable,
.pmu_disable = l2_cache_pmu_disable,
@@ -965,7 +964,7 @@ out_unregister:
return err;
}
-static int l2_cache_pmu_remove(struct platform_device *pdev)
+static void l2_cache_pmu_remove(struct platform_device *pdev)
{
struct l2cache_pmu *l2cache_pmu =
to_l2cache_pmu(platform_get_drvdata(pdev));
@@ -973,7 +972,6 @@ static int l2_cache_pmu_remove(struct platform_device *pdev)
perf_pmu_unregister(&l2cache_pmu->pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
&l2cache_pmu->node);
- return 0;
}
static struct platform_driver l2_cache_pmu_driver = {
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index 346311a05460..66e6cabd6fff 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -609,18 +609,9 @@ static void qcom_l3_cache__event_read(struct perf_event *event)
/* formats */
-static ssize_t l3cache_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *) eattr->var);
-}
-
#define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \
(&((struct dev_ext_attribute[]) { \
- { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
.var = (void *) _config, } \
})[0].attr.attr)
@@ -742,12 +733,13 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
- acpi_dev_parent(acpi_dev)->pnp.unique_id,
- acpi_dev->pnp.unique_id);
+ acpi_device_uid(acpi_dev_parent(acpi_dev)),
+ acpi_device_uid(acpi_dev));
if (!l3pmu || !name)
return -ENOMEM;
l3pmu->pmu = (struct pmu) {
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = qcom_l3_cache__pmu_enable,
@@ -763,8 +755,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
- memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
+ l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc);
if (IS_ERR(l3pmu->regs))
return PTR_ERR(l3pmu->regs);
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index ebca5eab9c9b..7644147d50b4 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -14,9 +14,80 @@
#include <linux/perf/riscv_pmu.h>
#include <linux/printk.h>
#include <linux/smp.h>
+#include <linux/sched_clock.h>
#include <asm/sbi.h>
+static bool riscv_perf_user_access(struct perf_event *event)
+{
+ return ((event->attr.type == PERF_TYPE_HARDWARE) ||
+ (event->attr.type == PERF_TYPE_HW_CACHE) ||
+ (event->attr.type == PERF_TYPE_RAW)) &&
+ !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) &&
+ (event->hw.idx != -1);
+}
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc = riscv_perf_user_access(event);
+
+ /*
+ * The counters are 64-bit but the priv spec doesn't mandate all the
+ * bits to be implemented: that's why, counter width can vary based on
+ * the cpu vendor.
+ */
+ if (userpg->cap_user_rdpmc)
+ userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
+
static unsigned long csr_read_num(int csr_num)
{
#define switchcase_csr_read(__csr_num, __val) {\
@@ -77,19 +148,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- if (!rvpmu->ctr_get_width)
- /**
- * If the pmu driver doesn't support counter width, set it to default
- * maximum allowed by the specification.
- */
- cwidth = 63;
- else {
- if (hwc->idx == -1)
- /* Handle init case where idx is not initialized yet */
- cwidth = rvpmu->ctr_get_width(0);
- else
- cwidth = rvpmu->ctr_get_width(hwc->idx);
- }
+ if (hwc->idx == -1)
+ /* Handle init case where idx is not initialized yet */
+ cwidth = rvpmu->ctr_get_width(0);
+ else
+ cwidth = rvpmu->ctr_get_width(hwc->idx);
return GENMASK_ULL(cwidth, 0);
}
@@ -102,7 +165,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
- if (!rvpmu->ctr_read)
+ if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
@@ -126,8 +189,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-
if (!(hwc->state & PERF_HES_STOPPED)) {
if (rvpmu->ctr_stop) {
rvpmu->ctr_stop(event, 0);
@@ -171,6 +232,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
return overflow;
}
@@ -181,9 +244,6 @@ void riscv_pmu_start(struct perf_event *event, int flags)
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
u64 init_val;
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
@@ -249,6 +309,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;
+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
@@ -267,6 +331,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
hwc->idx = -1;
hwc->event_base = mapped_event;
+ if (rvpmu->event_init)
+ rvpmu->event_init(event);
+
if (!is_sampling_event(event)) {
/*
* For non-sampling runs, limit the sample_period to half
@@ -283,6 +350,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
return 0;
}
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ return 0;
+
+ if (rvpmu->csr_index)
+ return rvpmu->csr_index(event) + 1;
+
+ return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_mapped) {
+ rvpmu->event_mapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_unmapped) {
+ rvpmu->event_unmapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
@@ -304,9 +404,13 @@ struct riscv_pmu *riscv_pmu_alloc(void)
cpuc->n_events = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
+ cpuc->snapshot_addr = NULL;
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
+ .event_mapped = riscv_pmu_event_mapped,
+ .event_unmapped = riscv_pmu_event_unmapped,
+ .event_idx = riscv_pmu_event_idx,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index ca9e20bfc7ac..93c8e0fdb589 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -13,7 +13,7 @@
#include <linux/platform_device.h>
#define RISCV_PMU_LEGACY_CYCLE 0
-#define RISCV_PMU_LEGACY_INSTRET 1
+#define RISCV_PMU_LEGACY_INSTRET 2
static bool pmu_init_done;
@@ -22,13 +22,13 @@ static int pmu_legacy_ctr_get_idx(struct perf_event *event)
struct perf_event_attr *attr = &event->attr;
if (event->attr.type != PERF_TYPE_HARDWARE)
- return -EOPNOTSUPP;
+ return -ENOENT;
if (attr->config == PERF_COUNT_HW_CPU_CYCLES)
return RISCV_PMU_LEGACY_CYCLE;
else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS)
return RISCV_PMU_LEGACY_INSTRET;
else
- return -EOPNOTSUPP;
+ return -ENOENT;
}
/* For legacy config & counter index are same */
@@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
return pmu_legacy_ctr_get_idx(event);
}
+/* cycle & instret are always 64 bit, one bit less according to SBI spec */
+static int pmu_legacy_ctr_get_width(int idx)
+{
+ return 63;
+}
+
static u64 pmu_legacy_read_ctr(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -71,6 +77,29 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
local64_set(&hwc->prev_count, initial_val);
}
+static uint8_t pmu_legacy_csr_index(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
/*
* This is just a simple implementation to allow legacy implementations
* compatible with new RISC-V PMU driver framework.
@@ -88,9 +117,14 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
pmu->ctr_stop = NULL;
pmu->event_map = pmu_legacy_event_map;
pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
- pmu->ctr_get_width = NULL;
+ pmu->ctr_get_width = pmu_legacy_ctr_get_width;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
+ pmu->event_mapped = pmu_legacy_event_mapped;
+ pmu->event_unmapped = pmu_legacy_event_unmapped;
+ pmu->csr_index = pmu_legacy_csr_index;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
@@ -102,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev)
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu_legacy_init(pmu);
return 0;
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index f6507efe2a58..7dd282da67ce 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -19,13 +19,54 @@
#include <linux/of.h>
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
+#include <linux/soc/andes/irq.h>
+#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
-#include <asm/hwcap.h>
-
-PMU_FORMAT_ATTR(event, "config:0-47");
-PMU_FORMAT_ATTR(firmware, "config:63");
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+
+#define ALT_SBI_PMU_OVERFLOW(__ovl) \
+asm volatile(ALTERNATIVE_2( \
+ "csrr %0, " __stringify(CSR_SCOUNTOVF), \
+ "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
+ THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
+ CONFIG_ERRATA_THEAD_PMU, \
+ "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \
+ ANDES_VENDOR_ID, \
+ RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \
+ CONFIG_ANDES_CUSTOM_PMU) \
+ : "=r" (__ovl) : \
+ : "memory")
+
+#define ALT_SBI_PMU_OVF_CLEAR_PENDING(__irq_mask) \
+asm volatile(ALTERNATIVE( \
+ "csrc " __stringify(CSR_IP) ", %0\n\t", \
+ "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \
+ ANDES_VENDOR_ID, \
+ RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \
+ CONFIG_ANDES_CUSTOM_PMU) \
+ : : "r"(__irq_mask) \
+ : "memory")
+
+#define SYSCTL_NO_USER_ACCESS 0
+#define SYSCTL_USER_ACCESS 1
+#define SYSCTL_LEGACY 2
+
+#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
+#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
+#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
+
+PMU_FORMAT_ATTR(event, "config:0-55");
+PMU_FORMAT_ATTR(firmware, "config:62-63");
+
+static bool sbi_v2_available;
+static bool sbi_v3_available;
+static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
+#define sbi_pmu_snapshot_available() \
+ static_branch_unlikely(&sbi_pmu_snapshot_available)
static struct attribute *riscv_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -43,15 +84,23 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
NULL,
};
+/* Allow user mode access by default */
+static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
+
/*
- * RISC-V doesn't have hetergenous harts yet. This need to be part of
+ * RISC-V doesn't have heterogeneous harts yet. This need to be part of
* per_cpu in case of harts with different pmu counters
*/
static union sbi_pmu_ctr_info *pmu_ctr_list;
static bool riscv_pmu_use_irq;
static unsigned int riscv_pmu_irq_num;
+static unsigned int riscv_pmu_irq_mask;
static unsigned int riscv_pmu_irq;
+/* Cache the available counters in a bitmask */
+static unsigned long cmask;
+
+static int pmu_event_find_cache(u64 config);
struct sbi_pmu_event_data {
union {
union {
@@ -72,7 +121,7 @@ struct sbi_pmu_event_data {
};
};
-static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@@ -106,7 +155,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
-static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -251,6 +300,103 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
+static int pmu_sbi_check_event_info(void)
+{
+ int num_events = ARRAY_SIZE(pmu_hw_event_map) + PERF_COUNT_HW_CACHE_MAX *
+ PERF_COUNT_HW_CACHE_OP_MAX * PERF_COUNT_HW_CACHE_RESULT_MAX;
+ struct riscv_pmu_event_info *event_info_shmem;
+ phys_addr_t base_addr;
+ int i, j, k, result = 0, count = 0;
+ struct sbiret ret;
+
+ event_info_shmem = kcalloc(num_events, sizeof(*event_info_shmem), GFP_KERNEL);
+ if (!event_info_shmem)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ event_info_shmem[count++].event_idx = pmu_hw_event_map[i].event_idx;
+
+ for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) {
+ for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) {
+ for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ event_info_shmem[count++].event_idx =
+ pmu_cache_event_map[i][j][k].event_idx;
+ }
+ }
+
+ base_addr = __pa(event_info_shmem);
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, lower_32_bits(base_addr),
+ upper_32_bits(base_addr), count, 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, base_addr, 0,
+ count, 0, 0, 0);
+ if (ret.error) {
+ result = -EOPNOTSUPP;
+ goto free_mem;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) {
+ if (!(event_info_shmem[i].output & RISCV_PMU_EVENT_INFO_OUTPUT_MASK))
+ pmu_hw_event_map[i].event_idx = -ENOENT;
+ }
+
+ count = ARRAY_SIZE(pmu_hw_event_map);
+
+ for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) {
+ for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) {
+ for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) {
+ if (!(event_info_shmem[count].output &
+ RISCV_PMU_EVENT_INFO_OUTPUT_MASK))
+ pmu_cache_event_map[i][j][k].event_idx = -ENOENT;
+ count++;
+ }
+ }
+ }
+
+free_mem:
+ kfree(event_info_shmem);
+
+ return result;
+}
+
+static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ 0, cmask, 0, edata->event_idx, 0, 0);
+ if (!ret.error) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ } else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
+ /* This event cannot be monitored by any counter */
+ edata->event_idx = -ENOENT;
+ }
+}
+
+static void pmu_sbi_check_std_events(struct work_struct *work)
+{
+ int ret;
+
+ if (sbi_v3_available) {
+ ret = pmu_sbi_check_event_info();
+ if (ret)
+ pr_err("pmu_sbi_check_event_info failed with error %d\n", ret);
+ return;
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ pmu_sbi_check_event(&pmu_hw_event_map[i]);
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
+ for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
+ for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
+}
+
+static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
+
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@@ -264,7 +410,129 @@ static bool pmu_sbi_ctr_is_fw(int cidx)
if (!info)
return false;
- return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
+ return info->type == SBI_PMU_CTR_TYPE_FW;
+}
+
+int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig)
+{
+ int ret = -ENOENT;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ if (config >= PERF_COUNT_HW_MAX)
+ return -EINVAL;
+ ret = pmu_hw_event_map[config].event_idx;
+ break;
+ case PERF_TYPE_HW_CACHE:
+ ret = pmu_event_find_cache(config);
+ break;
+ case PERF_TYPE_RAW:
+ /*
+ * As per SBI v0.3 specification,
+ * -- the upper 16 bits must be unused for a hardware raw event.
+ * As per SBI v2.0 specification,
+ * -- the upper 8 bits must be unused for a hardware raw event.
+ * Bits 63:62 are used to distinguish between raw events
+ * 00 - Hardware raw event
+ * 10 - SBI firmware events
+ * 11 - Risc-V platform specific firmware event
+ */
+ switch (config >> 62) {
+ case 0:
+ if (sbi_v3_available) {
+ /* Return error any bits [56-63] is set as it is not allowed by the spec */
+ if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) {
+ if (econfig)
+ *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK;
+ ret = RISCV_PMU_RAW_EVENT_V2_IDX;
+ }
+ /* Return error any bits [48-63] is set as it is not allowed by the spec */
+ } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) {
+ if (econfig)
+ *econfig = config & RISCV_PMU_RAW_EVENT_MASK;
+ ret = RISCV_PMU_RAW_EVENT_IDX;
+ }
+ break;
+ case 2:
+ ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16);
+ break;
+ case 3:
+ /*
+ * For Risc-V platform specific firmware events
+ * Event code - 0xFFFF
+ * Event data - raw event encoding
+ */
+ ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT;
+ if (econfig)
+ *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(riscv_pmu_get_event_info);
+
+/*
+ * Returns the counter width of a programmable counter and number of hardware
+ * counters. As we don't support heterogeneous CPUs yet, it is okay to just
+ * return the counter width of the first programmable counter.
+ */
+int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
+{
+ int i;
+ union sbi_pmu_ctr_info *info;
+ u32 hpm_width = 0, hpm_count = 0;
+
+ if (!cmask)
+ return -EINVAL;
+
+ for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) {
+ info = &pmu_ctr_list[i];
+ if (!info)
+ continue;
+ if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET)
+ hpm_width = info->width;
+ if (info->type == SBI_PMU_CTR_TYPE_HW)
+ hpm_count++;
+ }
+
+ *hw_ctr_width = hpm_width;
+ *num_hw_ctr = hpm_count;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
+
+static uint8_t pmu_sbi_csr_index(struct perf_event *event)
+{
+ return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
+}
+
+static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
+{
+ unsigned long cflags = 0;
+ bool guest_events = false;
+
+ if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS)
+ guest_events = true;
+ if (event->attr.exclude_kernel)
+ cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH;
+ if (event->attr.exclude_user)
+ cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH;
+ if (guest_events && event->attr.exclude_hv)
+ cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
+ if (event->attr.exclude_host)
+ cflags |= SBI_PMU_CFG_FLAG_SET_UINH | SBI_PMU_CFG_FLAG_SET_SINH;
+ if (event->attr.exclude_guest)
+ cflags |= SBI_PMU_CFG_FLAG_SET_VSINH | SBI_PMU_CFG_FLAG_SET_VUINH;
+
+ return cflags;
}
static int pmu_sbi_ctr_get_idx(struct perf_event *event)
@@ -274,22 +542,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct sbiret ret;
int idx;
- uint64_t cbase = 0;
+ uint64_t cbase = 0, cmask = rvpmu->cmask;
unsigned long cflags = 0;
- if (event->attr.exclude_kernel)
- cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
- if (event->attr.exclude_user)
- cflags |= SBI_PMU_CFG_FLAG_SET_UINH;
+ cflags = pmu_sbi_get_filter_flags(event);
+
+ /*
+ * In legacy mode, we have to force the fixed counters for those events
+ * but not in the user access mode as we want to use the other counters
+ * that support sampling/filtering.
+ */
+ if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) {
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1;
+ } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = BIT(CSR_INSTRET - CSR_CYCLE);
+ }
+ }
/* retrieve the available counter index */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config,
+ cmask, cflags, hwc->event_base, hwc->config,
hwc->config >> 32);
#else
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0);
+ cmask, cflags, hwc->event_base, hwc->config, 0);
#endif
if (ret.error) {
pr_debug("Not able to find a counter for event %lx config %llx\n",
@@ -363,44 +643,97 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
{
u32 type = event->attr.type;
u64 config = event->attr.config;
- int bSoftware;
- u64 raw_config_val;
- int ret;
- switch (type) {
- case PERF_TYPE_HARDWARE:
- if (config >= PERF_COUNT_HW_MAX)
- return -EINVAL;
- ret = pmu_hw_event_map[event->attr.config].event_idx;
- break;
- case PERF_TYPE_HW_CACHE:
- ret = pmu_event_find_cache(config);
- break;
- case PERF_TYPE_RAW:
- /*
- * As per SBI specification, the upper 16 bits must be unused for
- * a raw event. Use the MSB (63b) to distinguish between hardware
- * raw event and firmware events.
- */
- bSoftware = config >> 63;
- raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
- if (bSoftware) {
- if (raw_config_val < SBI_PMU_FW_MAX)
- ret = (raw_config_val & 0xFFFF) |
- (SBI_PMU_EVENT_TYPE_FW << 16);
- else
- return -EINVAL;
- } else {
- ret = RISCV_PMU_RAW_EVENT_IDX;
- *econfig = raw_config_val;
+ /*
+ * Ensure we are finished checking standard hardware events for
+ * validity before allowing userspace to configure any events.
+ */
+ flush_work(&check_std_events_work);
+
+ return riscv_pmu_get_event_info(type, config, econfig);
+}
+
+static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ if (!cpu_hw_evt->snapshot_addr)
+ continue;
+
+ free_page((unsigned long)cpu_hw_evt->snapshot_addr);
+ cpu_hw_evt->snapshot_addr = NULL;
+ cpu_hw_evt->snapshot_addr_phys = 0;
+ }
+}
+
+static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu)
+{
+ int cpu;
+ struct page *snapshot_page;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!snapshot_page) {
+ pmu_sbi_snapshot_free(pmu);
+ return -ENOMEM;
}
- break;
- default:
- ret = -EINVAL;
- break;
+ cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page);
+ cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page);
}
- return ret;
+ return 0;
+}
+
+static int pmu_sbi_snapshot_disable(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0, 0, 0, 0);
+ if (ret.error) {
+ pr_warn("failed to disable snapshot shared memory\n");
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
+{
+ struct cpu_hw_events *cpu_hw_evt;
+ struct sbiret ret = {0};
+
+ cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+ if (!cpu_hw_evt->snapshot_addr_phys)
+ return -EINVAL;
+
+ if (cpu_hw_evt->snapshot_set_done)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys,
+ (u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0);
+
+ /* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */
+ if (ret.error) {
+ if (ret.error != SBI_ERR_NOT_SUPPORTED)
+ pr_warn("pmu snapshot setup failed with error %ld\n", ret.error);
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+ cpu_hw_evt->snapshot_set_done = true;
+
+ return 0;
}
static u64 pmu_sbi_ctr_read(struct perf_event *event)
@@ -408,30 +741,68 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
struct sbiret ret;
- union sbi_pmu_ctr_info info;
u64 val = 0;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
+
+ /* Read the value from the shared memory directly only if counter is stopped */
+ if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
+ val = sdata->ctr_values[idx];
+ return val;
+ }
if (pmu_sbi_is_fw_event(event)) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
hwc->idx, 0, 0, 0, 0, 0);
- if (!ret.error)
- val = ret.value;
+ if (ret.error)
+ return 0;
+
+ val = ret.value;
+ if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ hwc->idx, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ val |= ((u64)ret.value << 32);
+ else
+ WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n",
+ ret.error);
+ }
} else {
- info = pmu_ctr_list[idx];
val = riscv_pmu_ctr_read_csr(info.csr);
if (IS_ENABLED(CONFIG_32BIT))
- val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
+ val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32;
}
return val;
}
+static void pmu_sbi_set_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ if (event->hw.idx != -1)
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event)));
+}
+
+static void pmu_sbi_reset_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ if (event->hw.idx != -1)
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event)));
+}
+
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+ /* There is no benefit setting SNAPSHOT FLAG for a single counter */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
1, flag, ival, ival >> 32, 0);
@@ -442,18 +813,46 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
pr_err("Starting counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_set_scounteren((void *)event);
}
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_reset_scounteren((void *)event);
+
+ if (sbi_pmu_snapshot_available())
+ flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
- if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
- flag != SBI_PMU_STOP_FLAG_RESET)
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /*
+ * The counter snapshot is based on the index base specified by hwc->idx.
+ * The actual counter value is updated in shared memory at index 0 when counter
+ * mask is 0x01. To ensure accurate counter values, it's necessary to transfer
+ * the counter value to shared memory. However, if hwc->idx is zero, the counter
+ * value is already correctly updated in shared memory, requiring no further
+ * adjustment.
+ */
+ if (hwc->idx > 0) {
+ sdata->ctr_values[hwc->idx] = sdata->ctr_values[0];
+ sdata->ctr_values[0] = 0;
+ }
+ } else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
+ flag != SBI_PMU_STOP_FLAG_RESET) {
pr_err("Stopping counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+ }
}
static int pmu_sbi_find_num_ctrs(void)
@@ -505,16 +904,45 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, pmu->cmask, 0, 0, 0, 0);
+ 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ unsigned long flag = 0;
+ int i, idx;
+ struct sbiret ret;
+ u64 temp_ctr_overflow_mask = 0;
+
+ if (sbi_pmu_snapshot_available())
+ flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+
+ /* Reset the shadow copy to avoid save/restore any value from previous overflow */
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* No need to check the error here as we can't do anything about the error */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /* Save the counter values to avoid clobbering */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
+ sdata->ctr_values[idx];
+ /* Save the overflow mask to avoid clobbering */
+ temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);
+ }
+ }
- /* No need to check the error here as we can't do anything about the error */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
- cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+ /* Restore the counter values to the shared memory for used hw counters */
+ if (sbi_pmu_snapshot_available()) {
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
+ sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
+ if (temp_ctr_overflow_mask)
+ sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
+ }
}
/*
@@ -523,11 +951,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
* while the overflowed counters need to be started with updated initialization
* value.
*/
-static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
- unsigned long ctr_ovf_mask)
+static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
{
- int idx = 0;
- struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ int idx = 0, i;
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
unsigned long ctr_start_mask = 0;
@@ -535,11 +962,14 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
struct hw_perf_event *hwc;
u64 init_val = 0;
- ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
-
- /* Start all the counters that did not overflow in a single shot */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
- 0, 0, 0, 0);
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
+ /* Start all the counters that did not overflow in a single shot */
+ if (ctr_start_mask) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG,
+ ctr_start_mask, 0, 0, 0, 0);
+ }
+ }
/* Reinitialize and start all the counter that overflowed */
while (ctr_ovf_mask) {
@@ -562,6 +992,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
}
}
+static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
+{
+ int i, idx = 0;
+ struct perf_event *event;
+ unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
+ u64 max_period, init_val = 0;
+ struct hw_perf_event *hwc;
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+ if (ctr_ovf_mask & BIT(idx)) {
+ event = cpu_hw_evt->events[idx];
+ hwc = &event->hw;
+ max_period = riscv_pmu_ctr_get_width_mask(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
+ }
+ /*
+ * We do not need to update the non-overflow counters the previous
+ * value should have been there already.
+ */
+ }
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* Restore the counter values to relative indices for used hw counters */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ sdata->ctr_values[idx] =
+ cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
+ /* Start all the counters in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ }
+}
+
+static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+ u64 ctr_ovf_mask)
+{
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+ if (sbi_pmu_snapshot_available())
+ pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask);
+ else
+ pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
+}
+
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
{
struct perf_sample_data data;
@@ -571,19 +1047,25 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
int lidx, hidx, fidx;
struct riscv_pmu *pmu;
struct perf_event *event;
- unsigned long overflow;
- unsigned long overflowed_ctrs = 0;
+ u64 overflow;
+ u64 overflowed_ctrs = 0;
struct cpu_hw_events *cpu_hw_evt = dev;
u64 start_clock = sched_clock();
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if (WARN_ON_ONCE(!cpu_hw_evt))
return IRQ_NONE;
/* Firmware counter don't support overflow yet */
fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+ if (fidx == RISCV_MAX_COUNTERS) {
+ csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+ return IRQ_NONE;
+ }
+
event = cpu_hw_evt->events[fidx];
if (!event) {
- csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
return IRQ_NONE;
}
@@ -591,13 +1073,16 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
pmu_sbi_stop_hw_ctrs(pmu);
/* Overflow status register should only be read after counter are stopped */
- ALT_SBI_PMU_OVERFLOW(overflow);
+ if (sbi_pmu_snapshot_available())
+ overflow = sdata->ctr_overflow_mask;
+ else
+ ALT_SBI_PMU_OVERFLOW(overflow);
/*
* Overflow interrupt pending bit should only be cleared after stopping
* all the counters to avoid any race condition.
*/
- csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
/* No overflow bit is set */
if (!overflow)
@@ -617,19 +1102,27 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
continue;
- /* compute hardware counter index */
- hidx = info->csr - CSR_CYCLE;
- /* check if the corresponding bit is set in sscountovf */
- if (!(overflow & (1 << hidx)))
+ if (sbi_pmu_snapshot_available())
+ /* SBI implementation already updated the logical indicies */
+ hidx = lidx;
+ else
+ /* compute hardware counter index */
+ hidx = info->csr - CSR_CYCLE;
+
+ /* check if the corresponding bit is set in scountovf or overflow mask in shmem */
+ if (!(overflow & BIT(hidx)))
continue;
/*
* Keep a track of overflowed counters so that they can be started
* with updated initial value.
*/
- overflowed_ctrs |= 1 << lidx;
+ overflowed_ctrs |= BIT(lidx);
hw_evt = &event->hw;
+ /* Update the event states here so that we know the state while reading */
+ hw_evt->state |= PERF_HES_STOPPED;
riscv_pmu_event_update(event);
+ hw_evt->state |= PERF_HES_UPTODATE;
perf_sample_data_init(&data, 0, hw_evt->last_period);
if (riscv_pmu_event_set_period(event)) {
/*
@@ -642,6 +1135,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
perf_event_overflow(event, &data, regs);
}
+ /* Reset the state as we are going to start the counter after the loop */
+ hw_evt->state = 0;
}
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
@@ -656,21 +1151,26 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/*
- * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
- * as is necessary to maintain uABI compatibility.
+ * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
+ * legacy option but that will be removed in the future.
*/
- csr_write(CSR_SCOUNTEREN, 0x7);
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
/* Stop all the counters so that they can be enabled from perf */
pmu_sbi_stop_all(pmu);
if (riscv_pmu_use_irq) {
cpu_hw_evt->irq = riscv_pmu_irq;
- csr_clear(CSR_IP, BIT(riscv_pmu_irq_num));
- csr_set(CSR_IE, BIT(riscv_pmu_irq_num));
+ ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_setup(pmu, cpu);
+
return 0;
}
@@ -678,12 +1178,14 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
{
if (riscv_pmu_use_irq) {
disable_percpu_irq(riscv_pmu_irq);
- csr_clear(CSR_IE, BIT(riscv_pmu_irq_num));
}
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_disable();
+
return 0;
}
@@ -691,7 +1193,6 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
{
int ret;
struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
- struct device_node *cpu, *child;
struct irq_domain *domain = NULL;
if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
@@ -703,25 +1204,20 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
riscv_cached_mimpid(0) == 0) {
riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
riscv_pmu_use_irq = true;
+ } else if (riscv_has_vendor_extension_unlikely(ANDES_VENDOR_ID,
+ RISCV_ISA_VENDOR_EXT_XANDESPMU) &&
+ IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) {
+ riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI;
+ riscv_pmu_use_irq = true;
}
+ riscv_pmu_irq_mask = BIT(riscv_pmu_irq_num % BITS_PER_LONG);
+
if (!riscv_pmu_use_irq)
return -EOPNOTSUPP;
- for_each_of_cpu_node(cpu) {
- child = of_get_compatible_child(cpu, "riscv,cpu-intc");
- if (!child) {
- pr_err("Failed to find INTC node\n");
- of_node_put(cpu);
- return -ENODEV;
- }
- domain = irq_find_host(child);
- of_node_put(child);
- if (domain) {
- of_node_put(cpu);
- break;
- }
- }
+ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+ DOMAIN_BUS_ANY);
if (!domain) {
pr_err("Failed to find INTC IRQ root domain\n");
return -ENODEV;
@@ -771,14 +1267,8 @@ static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
case CPU_PM_ENTER_FAILED:
/*
* Restore and enable the counter.
- *
- * Requires RCU read locking to be functional,
- * wrap the call within RCU_NONIDLE to make the
- * RCU subsystem aware this cpu is not idle from
- * an RCU perspective for the riscv_pmu_start() call
- * duration.
*/
- RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD));
+ riscv_pmu_start(event, PERF_EF_RELOAD);
break;
default:
break;
@@ -805,14 +1295,133 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
static void riscv_pmu_destroy(struct riscv_pmu *pmu)
{
+ if (sbi_v2_available) {
+ if (sbi_pmu_snapshot_available()) {
+ pmu_sbi_snapshot_disable();
+ pmu_sbi_snapshot_free(pmu);
+ }
+ }
riscv_pm_pmu_unregister(pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
+static void pmu_sbi_event_init(struct perf_event *event)
+{
+ /*
+ * The permissions are set at event_init so that we do not depend
+ * on the sysctl value that can change.
+ */
+ if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
+ else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
+ else
+ event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
+}
+
+static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * The user mmapped the event to directly access it: this is where
+ * we determine based on sysctl_perf_user_access if we grant userspace
+ * the direct access to this event. That means that within the same
+ * task, some events may be directly accessible and some other may not,
+ * if the user changes the value of sysctl_perf_user_accesss in the
+ * meantime.
+ */
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+
+ /*
+ * We must enable userspace access *before* advertising in the user page
+ * that it is possible to do so to avoid any race.
+ * And we must notify all cpus here because threads that currently run
+ * on other cpus will try to directly access the counter too without
+ * calling pmu_sbi_ctr_start.
+ */
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_set_scounteren, (void *)event, 1);
+}
+
+static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * Here we can directly remove user access since the user does not have
+ * access to the user page anymore so we avoid the racy window where the
+ * user could have read cap_user_rdpmc to true right before we disable
+ * it.
+ */
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_reset_scounteren, (void *)event, 1);
+}
+
+static void riscv_pmu_update_counter_access(void *info)
+{
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
+}
+
+static int riscv_pmu_proc_user_access_handler(const struct ctl_table *table,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int prev = sysctl_perf_user_access;
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ /*
+ * Test against the previous value since we clear SCOUNTEREN when
+ * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
+ * not do that if that was already the case.
+ */
+ if (ret || !write || prev == sysctl_perf_user_access)
+ return ret;
+
+ on_each_cpu(riscv_pmu_update_counter_access, NULL, 1);
+
+ return 0;
+}
+
+static const struct ctl_table sbi_pmu_sysctl_table[] = {
+ {
+ .procname = "perf_user_access",
+ .data = &sysctl_perf_user_access,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = riscv_pmu_proc_user_access_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+};
+
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
- unsigned long cmask = 0;
int ret = -ENODEV;
int num_counters;
@@ -827,6 +1436,12 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
goto out_free;
}
+ /* It is possible to get from SBI more than max number of counters */
+ if (num_counters > RISCV_MAX_COUNTERS) {
+ num_counters = RISCV_MAX_COUNTERS;
+ pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters);
+ }
+
/* cache all the information about counters now */
if (pmu_sbi_get_ctrinfo(num_counters, &cmask))
goto out_free;
@@ -839,6 +1454,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
}
pmu->pmu.attr_groups = riscv_pmu_attr_groups;
+ pmu->pmu.parent = &pdev->dev;
pmu->cmask = cmask;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -847,10 +1463,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->ctr_get_width = pmu_sbi_ctr_get_width;
pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
pmu->ctr_read = pmu_sbi_ctr_read;
-
- ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
- if (ret)
- return ret;
+ pmu->event_init = pmu_sbi_event_init;
+ pmu->event_mapped = pmu_sbi_event_mapped;
+ pmu->event_unmapped = pmu_sbi_event_unmapped;
+ pmu->csr_index = pmu_sbi_csr_index;
ret = riscv_pm_pmu_register(pmu);
if (ret)
@@ -860,6 +1476,42 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* SBI PMU Snapsphot is only available in SBI v2.0 */
+ if (sbi_v2_available) {
+ int cpu;
+
+ ret = pmu_sbi_snapshot_alloc(pmu);
+ if (ret)
+ goto out_unregister;
+
+ cpu = get_cpu();
+ ret = pmu_sbi_snapshot_setup(pmu, cpu);
+ put_cpu();
+
+ if (ret) {
+ /* Snapshot is an optional feature. Continue if not available */
+ pmu_sbi_snapshot_free(pmu);
+ } else {
+ pr_info("SBI PMU snapshot detected\n");
+ /*
+ * We enable it once here for the boot cpu. If snapshot shmem setup
+ * fails during cpu hotplug process, it will fail to start the cpu
+ * as we can not handle hetergenous PMUs with different snapshot
+ * capability.
+ */
+ static_branch_enable(&sbi_pmu_snapshot_available);
+ }
+ }
+
+ register_sysctl("kernel", sbi_pmu_sysctl_table);
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+ if (ret)
+ goto out_unregister;
+
+ /* Asynchronously check which standard events are available */
+ schedule_work(&check_std_events_work);
+
return 0;
out_unregister:
@@ -873,7 +1525,7 @@ out_free:
static struct platform_driver pmu_sbi_driver = {
.probe = pmu_sbi_device_probe,
.driver = {
- .name = RISCV_PMU_PDEV_NAME,
+ .name = RISCV_PMU_SBI_PDEV_NAME,
},
};
@@ -883,10 +1535,16 @@ static int __init pmu_sbi_devinit(void)
struct platform_device *pdev;
if (sbi_spec_version < sbi_mk_version(0, 3) ||
- sbi_probe_extension(SBI_EXT_PMU) <= 0) {
+ !sbi_probe_extension(SBI_EXT_PMU)) {
return 0;
}
+ if (sbi_spec_version >= sbi_mk_version(2, 0))
+ sbi_v2_available = true;
+
+ if (sbi_spec_version >= sbi_mk_version(3, 0))
+ sbi_v3_available = true;
+
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
"perf/riscv/pmu:starting",
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
@@ -900,7 +1558,7 @@ static int __init pmu_sbi_devinit(void)
if (ret)
return ret;
- pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
+ pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
platform_driver_unregister(&pmu_sbi_driver);
return PTR_ERR(pdev);
diff --git a/drivers/perf/starfive_starlink_pmu.c b/drivers/perf/starfive_starlink_pmu.c
new file mode 100644
index 000000000000..5e5a672b4229
--- /dev/null
+++ b/drivers/perf/starfive_starlink_pmu.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * StarFive's StarLink PMU driver
+ *
+ * Copyright (C) 2023 StarFive Technology Co., Ltd.
+ *
+ * Author: Ji Sheng Teoh <jisheng.teoh@starfivetech.com>
+ *
+ */
+
+#define STARLINK_PMU_PDEV_NAME "starfive_starlink_pmu"
+#define pr_fmt(fmt) STARLINK_PMU_PDEV_NAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+
+#define STARLINK_PMU_MAX_COUNTERS 64
+#define STARLINK_PMU_NUM_COUNTERS 16
+#define STARLINK_PMU_IDX_CYCLE_COUNTER 63
+
+#define STARLINK_PMU_EVENT_SELECT 0x060
+#define STARLINK_PMU_EVENT_COUNTER 0x160
+#define STARLINK_PMU_COUNTER_MASK GENMASK_ULL(63, 0)
+#define STARLINK_PMU_CYCLE_COUNTER 0x058
+
+#define STARLINK_PMU_CONTROL 0x040
+#define STARLINK_PMU_GLOBAL_ENABLE BIT_ULL(0)
+
+#define STARLINK_PMU_INTERRUPT_ENABLE 0x050
+#define STARLINK_PMU_COUNTER_OVERFLOW_STATUS 0x048
+#define STARLINK_PMU_CYCLE_OVERFLOW_MASK BIT_ULL(63)
+
+#define STARLINK_CYCLES 0x058
+#define CACHE_READ_REQUEST 0x04000701
+#define CACHE_WRITE_REQUEST 0x03000001
+#define CACHE_RELEASE_REQUEST 0x0003e001
+#define CACHE_READ_HIT 0x00901202
+#define CACHE_READ_MISS 0x04008002
+#define CACHE_WRITE_HIT 0x006c0002
+#define CACHE_WRITE_MISS 0x03000002
+#define CACHE_WRITEBACK 0x00000403
+
+#define to_starlink_pmu(p) (container_of(p, struct starlink_pmu, pmu))
+
+#define STARLINK_FORMAT_ATTR(_name, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { .attr = __ATTR(_name, 0444, starlink_pmu_sysfs_format_show, NULL), \
+ .var = (void *)_config, } \
+ })[0].attr.attr)
+
+#define STARLINK_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, starlink_pmu_sysfs_event_show, _id)
+
+static int starlink_pmu_cpuhp_state;
+
+struct starlink_hw_events {
+ struct perf_event *events[STARLINK_PMU_MAX_COUNTERS];
+ DECLARE_BITMAP(used_mask, STARLINK_PMU_MAX_COUNTERS);
+};
+
+struct starlink_pmu {
+ struct pmu pmu;
+ struct starlink_hw_events __percpu *hw_events;
+ struct hlist_node node;
+ struct notifier_block starlink_pmu_pm_nb;
+ void __iomem *pmu_base;
+ cpumask_t cpumask;
+ int irq;
+};
+
+static ssize_t
+starlink_pmu_sysfs_format_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr);
+
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static struct attribute *starlink_pmu_format_attrs[] = {
+ STARLINK_FORMAT_ATTR(event, "config:0-31"),
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = starlink_pmu_format_attrs,
+};
+
+static ssize_t
+starlink_pmu_sysfs_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct perf_pmu_events_attr *eattr = container_of(attr,
+ struct perf_pmu_events_attr, attr);
+
+ return sysfs_emit(buf, "event=0x%02llx\n", eattr->id);
+}
+
+static struct attribute *starlink_pmu_event_attrs[] = {
+ STARLINK_EVENT_ATTR(cycles, STARLINK_CYCLES),
+ STARLINK_EVENT_ATTR(read_request, CACHE_READ_REQUEST),
+ STARLINK_EVENT_ATTR(write_request, CACHE_WRITE_REQUEST),
+ STARLINK_EVENT_ATTR(release_request, CACHE_RELEASE_REQUEST),
+ STARLINK_EVENT_ATTR(read_hit, CACHE_READ_HIT),
+ STARLINK_EVENT_ATTR(read_miss, CACHE_READ_MISS),
+ STARLINK_EVENT_ATTR(write_hit, CACHE_WRITE_HIT),
+ STARLINK_EVENT_ATTR(write_miss, CACHE_WRITE_MISS),
+ STARLINK_EVENT_ATTR(writeback, CACHE_WRITEBACK),
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = starlink_pmu_event_attrs,
+};
+
+static ssize_t
+cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &starlink_pmu->cpumask);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *starlink_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group starlink_pmu_cpumask_attr_group = {
+ .attrs = starlink_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *starlink_pmu_attr_groups[] = {
+ &starlink_pmu_format_attr_group,
+ &starlink_pmu_events_attr_group,
+ &starlink_pmu_cpumask_attr_group,
+ NULL
+};
+
+static void starlink_pmu_set_event_period(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+
+ /*
+ * Program counter to half of it's max count to handle
+ * cases of extreme interrupt latency.
+ */
+ u64 val = STARLINK_PMU_COUNTER_MASK >> 1;
+
+ local64_set(&hwc->prev_count, val);
+ if (hwc->config == STARLINK_CYCLES)
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CYCLE_COUNTER);
+ else
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_EVENT_COUNTER +
+ idx * sizeof(u64));
+}
+
+static void starlink_pmu_counter_start(struct perf_event *event,
+ struct starlink_pmu *starlink_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+ u64 val;
+
+ /*
+ * Enable counter overflow interrupt[63:0],
+ * which is mapped as follow:
+ *
+ * event counter 0 - Bit [0]
+ * event counter 1 - Bit [1]
+ * ...
+ * cycle counter - Bit [63]
+ */
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+
+ if (hwc->config == STARLINK_CYCLES) {
+ /*
+ * Cycle count has its dedicated register, and it starts
+ * counting as soon as STARLINK_PMU_GLOBAL_ENABLE is set.
+ */
+ val |= STARLINK_PMU_CYCLE_OVERFLOW_MASK;
+ } else {
+ writeq(event->hw.config, starlink_pmu->pmu_base +
+ STARLINK_PMU_EVENT_SELECT + idx * sizeof(u64));
+
+ val |= BIT_ULL(idx);
+ }
+
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+
+ writeq(STARLINK_PMU_GLOBAL_ENABLE, starlink_pmu->pmu_base +
+ STARLINK_PMU_CONTROL);
+}
+
+static void starlink_pmu_counter_stop(struct perf_event *event,
+ struct starlink_pmu *starlink_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = event->hw.idx;
+ u64 val;
+
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_CONTROL);
+ val &= ~STARLINK_PMU_GLOBAL_ENABLE;
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_CONTROL);
+
+ val = readq(starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+ if (hwc->config == STARLINK_CYCLES)
+ val &= ~STARLINK_PMU_CYCLE_OVERFLOW_MASK;
+ else
+ val &= ~BIT_ULL(idx);
+
+ writeq(val, starlink_pmu->pmu_base + STARLINK_PMU_INTERRUPT_ENABLE);
+}
+
+static void starlink_pmu_update(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 prev_raw_count, new_raw_count;
+ u64 oldval;
+ u64 delta;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (hwc->config == STARLINK_CYCLES)
+ new_raw_count = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_CYCLE_COUNTER);
+ else
+ new_raw_count = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_EVENT_COUNTER +
+ idx * sizeof(u64));
+ oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count);
+ } while (oldval != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & STARLINK_PMU_COUNTER_MASK;
+ local64_add(delta, &event->count);
+}
+
+static void starlink_pmu_start(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ starlink_pmu_set_event_period(event);
+ starlink_pmu_counter_start(event, starlink_pmu);
+
+ perf_event_update_userpage(event);
+}
+
+static void starlink_pmu_stop(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ starlink_pmu_counter_stop(event, starlink_pmu);
+ starlink_pmu_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int starlink_pmu_add(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long *used_mask = hw_events->used_mask;
+ u32 n_events = STARLINK_PMU_NUM_COUNTERS;
+ int idx;
+
+ /*
+ * Cycle counter has dedicated register to hold counter value.
+ * Event other than cycle count has to be enabled through
+ * event select register, and assigned with independent counter
+ * as they appear.
+ */
+
+ if (hwc->config == STARLINK_CYCLES) {
+ idx = STARLINK_PMU_IDX_CYCLE_COUNTER;
+ } else {
+ idx = find_first_zero_bit(used_mask, n_events);
+ /* All counter are in use */
+ if (idx < 0)
+ return idx;
+
+ set_bit(idx, used_mask);
+ }
+
+ hwc->idx = idx;
+ hw_events->events[idx] = event;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ starlink_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void starlink_pmu_del(struct perf_event *event, int flags)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ starlink_pmu_stop(event, PERF_EF_UPDATE);
+ hw_events->events[hwc->idx] = NULL;
+ clear_bit(hwc->idx, hw_events->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static bool starlink_pmu_validate_event_group(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counter = 1;
+
+ /*
+ * Ensure hardware events in the group are on the same PMU,
+ * software events are acceptable.
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return false;
+
+ for_each_sibling_event(sibling, leader) {
+ if (sibling->pmu != event->pmu && !is_software_event(sibling))
+ return false;
+
+ counter++;
+ }
+
+ return counter <= STARLINK_PMU_NUM_COUNTERS;
+}
+
+static int starlink_pmu_event_init(struct perf_event *event)
+{
+ struct starlink_pmu *starlink_pmu = to_starlink_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * Sampling is not supported, as counters are shared
+ * by all CPU.
+ */
+ if (hwc->sample_period)
+ return -EOPNOTSUPP;
+
+ /*
+ * Per-task and attach to a task are not supported,
+ * as uncore events are not specific to any CPU.
+ */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (!starlink_pmu_validate_event_group(event))
+ return -EINVAL;
+
+ hwc->idx = -1;
+ hwc->config = event->attr.config;
+ event->cpu = cpumask_first(&starlink_pmu->cpumask);
+
+ return 0;
+}
+
+static irqreturn_t starlink_pmu_handle_irq(int irq_num, void *data)
+{
+ struct starlink_pmu *starlink_pmu = data;
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ bool handled = false;
+ int idx;
+ u64 overflow_status;
+
+ for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) {
+ struct perf_event *event = hw_events->events[idx];
+
+ if (!event)
+ continue;
+
+ overflow_status = readq(starlink_pmu->pmu_base +
+ STARLINK_PMU_COUNTER_OVERFLOW_STATUS);
+ if (!(overflow_status & BIT_ULL(idx)))
+ continue;
+
+ writeq(BIT_ULL(idx), starlink_pmu->pmu_base +
+ STARLINK_PMU_COUNTER_OVERFLOW_STATUS);
+
+ starlink_pmu_update(event);
+ starlink_pmu_set_event_period(event);
+ handled = true;
+ }
+ return IRQ_RETVAL(handled);
+}
+
+static int starlink_setup_irqs(struct starlink_pmu *starlink_pmu,
+ struct platform_device *pdev)
+{
+ int ret, irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -EINVAL;
+
+ ret = devm_request_irq(&pdev->dev, irq, starlink_pmu_handle_irq,
+ 0, STARLINK_PMU_PDEV_NAME, starlink_pmu);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to request IRQ\n");
+
+ starlink_pmu->irq = irq;
+
+ return 0;
+}
+
+static int starlink_pmu_pm_notify(struct notifier_block *b,
+ unsigned long cmd, void *v)
+{
+ struct starlink_pmu *starlink_pmu = container_of(b, struct starlink_pmu,
+ starlink_pmu_pm_nb);
+ struct starlink_hw_events *hw_events =
+ this_cpu_ptr(starlink_pmu->hw_events);
+ int enabled = bitmap_weight(hw_events->used_mask,
+ STARLINK_PMU_MAX_COUNTERS);
+ struct perf_event *event;
+ int idx;
+
+ if (!enabled)
+ return NOTIFY_OK;
+
+ for (idx = 0; idx < STARLINK_PMU_MAX_COUNTERS; idx++) {
+ event = hw_events->events[idx];
+ if (!event)
+ continue;
+
+ switch (cmd) {
+ case CPU_PM_ENTER:
+ /* Stop and update the counter */
+ starlink_pmu_stop(event, PERF_EF_UPDATE);
+ break;
+ case CPU_PM_EXIT:
+ case CPU_PM_ENTER_FAILED:
+ /* Restore and enable the counter */
+ starlink_pmu_start(event, PERF_EF_RELOAD);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return NOTIFY_OK;
+}
+
+static int starlink_pmu_pm_register(struct starlink_pmu *starlink_pmu)
+{
+ if (!IS_ENABLED(CONFIG_CPU_PM))
+ return 0;
+
+ starlink_pmu->starlink_pmu_pm_nb.notifier_call = starlink_pmu_pm_notify;
+ return cpu_pm_register_notifier(&starlink_pmu->starlink_pmu_pm_nb);
+}
+
+static void starlink_pmu_pm_unregister(struct starlink_pmu *starlink_pmu)
+{
+ if (!IS_ENABLED(CONFIG_CPU_PM))
+ return;
+
+ cpu_pm_unregister_notifier(&starlink_pmu->starlink_pmu_pm_nb);
+}
+
+static void starlink_pmu_destroy(struct starlink_pmu *starlink_pmu)
+{
+ starlink_pmu_pm_unregister(starlink_pmu);
+ cpuhp_state_remove_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+}
+
+static int starlink_pmu_probe(struct platform_device *pdev)
+{
+ struct starlink_pmu *starlink_pmu;
+ struct starlink_hw_events *hw_events;
+ struct resource *res;
+ int cpuid, i, ret;
+
+ starlink_pmu = devm_kzalloc(&pdev->dev, sizeof(*starlink_pmu), GFP_KERNEL);
+ if (!starlink_pmu)
+ return -ENOMEM;
+
+ starlink_pmu->pmu_base =
+ devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(starlink_pmu->pmu_base))
+ return PTR_ERR(starlink_pmu->pmu_base);
+
+ starlink_pmu->hw_events = alloc_percpu_gfp(struct starlink_hw_events,
+ GFP_KERNEL);
+ if (!starlink_pmu->hw_events) {
+ dev_err(&pdev->dev, "Failed to allocate per-cpu PMU data\n");
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpuid) {
+ hw_events = per_cpu_ptr(starlink_pmu->hw_events, cpuid);
+ for (i = 0; i < STARLINK_PMU_MAX_COUNTERS; i++)
+ hw_events->events[i] = NULL;
+ }
+
+ ret = starlink_setup_irqs(starlink_pmu, pdev);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register hotplug\n");
+ return ret;
+ }
+
+ ret = starlink_pmu_pm_register(starlink_pmu);
+ if (ret) {
+ cpuhp_state_remove_instance(starlink_pmu_cpuhp_state,
+ &starlink_pmu->node);
+ return ret;
+ }
+
+ starlink_pmu->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = starlink_pmu_event_init,
+ .add = starlink_pmu_add,
+ .del = starlink_pmu_del,
+ .start = starlink_pmu_start,
+ .stop = starlink_pmu_stop,
+ .read = starlink_pmu_update,
+ .attr_groups = starlink_pmu_attr_groups,
+ };
+
+ ret = perf_pmu_register(&starlink_pmu->pmu, STARLINK_PMU_PDEV_NAME, -1);
+ if (ret)
+ starlink_pmu_destroy(starlink_pmu);
+
+ return ret;
+}
+
+static const struct of_device_id starlink_pmu_of_match[] = {
+ { .compatible = "starfive,jh8100-starlink-pmu" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, starlink_pmu_of_match);
+
+static struct platform_driver starlink_pmu_driver = {
+ .driver = {
+ .name = STARLINK_PMU_PDEV_NAME,
+ .of_match_table = starlink_pmu_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = starlink_pmu_probe,
+};
+
+static int
+starlink_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct starlink_pmu *starlink_pmu = hlist_entry_safe(node,
+ struct starlink_pmu,
+ node);
+
+ if (cpumask_empty(&starlink_pmu->cpumask))
+ cpumask_set_cpu(cpu, &starlink_pmu->cpumask);
+
+ WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(cpu)));
+
+ return 0;
+}
+
+static int
+starlink_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct starlink_pmu *starlink_pmu = hlist_entry_safe(node,
+ struct starlink_pmu,
+ node);
+ unsigned int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &starlink_pmu->cpumask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&starlink_pmu->pmu, cpu, target);
+
+ cpumask_set_cpu(target, &starlink_pmu->cpumask);
+ WARN_ON(irq_set_affinity(starlink_pmu->irq, cpumask_of(target)));
+
+ return 0;
+}
+
+static int __init starlink_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "soc/starfive/starlink_pmu:online",
+ starlink_pmu_online_cpu,
+ starlink_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ starlink_pmu_cpuhp_state = ret;
+
+ return platform_driver_register(&starlink_pmu_driver);
+}
+
+device_initcall(starlink_pmu_init);
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 1edb9c03704f..6ed4707bd6bb 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event)
static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
{
- int i = 0;
- struct acpi_tx2_pmu_device {
- __u8 id[ACPI_ID_LEN];
- enum tx2_uncore_type type;
- } devices[] = {
+ struct acpi_device_id devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
{"CAV901E", PMU_TYPE_CCPI2},
- {"", PMU_TYPE_INVALID}
+ {}
};
+ const struct acpi_device_id *id;
- while (devices[i].type != PMU_TYPE_INVALID) {
- if (!strcmp(acpi_device_hid(adev), devices[i].id))
- break;
- i++;
- }
+ id = acpi_match_acpi_device(devices, adev);
+ if (!id)
+ return PMU_TYPE_INVALID;
- return devices[i].type;
+ return (enum tx2_uncore_type)id->driver_data;
}
static bool tx2_uncore_validate_event(struct pmu *pmu,
@@ -729,6 +724,7 @@ static int tx2_uncore_pmu_register(
/* Perf event registration */
tx2_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = tx2_pmu->dev,
.attr_groups = tx2_pmu->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = tx2_uncore_event_init,
@@ -756,9 +752,8 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu)
tx2_pmu->cpu = cpu;
if (tx2_pmu->hrtimer_callback) {
- hrtimer_init(&tx2_pmu->hrtimer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback;
+ hrtimer_setup(&tx2_pmu->hrtimer, tx2_pmu->hrtimer_callback, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ret = tx2_uncore_pmu_register(tx2_pmu);
@@ -932,9 +927,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
struct hlist_node *hpnode)
{
- int new_cpu;
struct tx2_uncore_pmu *tx2_pmu;
- struct cpumask cpu_online_mask_temp;
+ unsigned int new_cpu;
tx2_pmu = hlist_entry_safe(hpnode,
struct tx2_uncore_pmu, hpnode);
@@ -945,11 +939,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (tx2_pmu->hrtimer_callback)
hrtimer_cancel(&tx2_pmu->hrtimer);
- cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
- cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
- new_cpu = cpumask_any_and(
- cpumask_of_node(tx2_pmu->node),
- &cpu_online_mask_temp);
+ new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node),
+ cpu_online_mask, cpu);
tx2_pmu->cpu = new_cpu;
if (new_cpu >= nr_cpu_ids)
@@ -993,7 +984,7 @@ static int tx2_uncore_probe(struct platform_device *pdev)
return 0;
}
-static int tx2_uncore_remove(struct platform_device *pdev)
+static void tx2_uncore_remove(struct platform_device *pdev)
{
struct tx2_uncore_pmu *tx2_pmu, *temp;
struct device *dev = &pdev->dev;
@@ -1009,7 +1000,6 @@ static int tx2_uncore_remove(struct platform_device *pdev)
}
}
}
- return 0;
}
static struct platform_driver tx2_uncore_driver = {
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 0c32dffc7ede..33b5497bdc06 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -16,11 +16,9 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@@ -164,18 +162,9 @@ enum xgene_pmu_dev_type {
/*
* sysfs format attributes
*/
-static ssize_t xgene_pmu_format_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dev_ext_attribute *eattr;
-
- eattr = container_of(attr, struct dev_ext_attribute, attr);
- return sysfs_emit(buf, "%s\n", (char *) eattr->var);
-}
-
#define XGENE_PMU_FORMAT_ATTR(_name, _config) \
(&((struct dev_ext_attribute[]) { \
- { .attr = __ATTR(_name, S_IRUGO, xgene_pmu_format_show, NULL), \
+ { .attr = __ATTR(_name, S_IRUGO, device_show_string, NULL), \
.var = (void *) _config, } \
})[0].attr.attr)
@@ -1104,6 +1093,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
/* Perf driver registration */
pmu_dev->pmu = (struct pmu) {
+ .parent = pmu_dev->parent->dev,
.attr_groups = pmu_dev->attr_groups,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = xgene_perf_pmu_enable,
@@ -1731,6 +1721,12 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
.id = PCP_PMU_V2,
};
+#ifdef CONFIG_ACPI
+static const struct xgene_pmu_data xgene_pmu_v3_data = {
+ .id = PCP_PMU_V3,
+};
+#endif
+
static const struct xgene_pmu_ops xgene_pmu_ops = {
.mask_int = xgene_pmu_mask_int,
.unmask_int = xgene_pmu_unmask_int,
@@ -1773,9 +1769,9 @@ static const struct of_device_id xgene_pmu_of_match[] = {
MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
- {"APMC0D5B", PCP_PMU_V1},
- {"APMC0D5C", PCP_PMU_V2},
- {"APMC0D83", PCP_PMU_V3},
+ {"APMC0D5B", (kernel_ulong_t)&xgene_pmu_data},
+ {"APMC0D5C", (kernel_ulong_t)&xgene_pmu_v2_data},
+ {"APMC0D83", (kernel_ulong_t)&xgene_pmu_v3_data},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1831,9 +1827,7 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
static int xgene_pmu_probe(struct platform_device *pdev)
{
const struct xgene_pmu_data *dev_data;
- const struct of_device_id *of_id;
struct xgene_pmu *xgene_pmu;
- struct resource *res;
int irq, rc;
int version;
@@ -1851,24 +1845,10 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, xgene_pmu);
- version = -EINVAL;
- of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
- if (of_id) {
- dev_data = (const struct xgene_pmu_data *) of_id->data;
- version = dev_data->id;
- }
-
-#ifdef CONFIG_ACPI
- if (ACPI_COMPANION(&pdev->dev)) {
- const struct acpi_device_id *acpi_id;
-
- acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
- if (acpi_id)
- version = (int) acpi_id->driver_data;
- }
-#endif
- if (version < 0)
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data)
return -ENODEV;
+ version = dev_data->id;
if (version == PCP_PMU_V3)
xgene_pmu->ops = &xgene_pmu_v3_ops;
@@ -1883,8 +1863,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->version = version;
dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
+ xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(xgene_pmu->pcppmu_csr)) {
dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
return PTR_ERR(xgene_pmu->pcppmu_csr);
@@ -1950,7 +1929,7 @@ xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
}
}
-static int xgene_pmu_remove(struct platform_device *pdev)
+static void xgene_pmu_remove(struct platform_device *pdev)
{
struct xgene_pmu *xgene_pmu = dev_get_drvdata(&pdev->dev);
@@ -1960,8 +1939,6 @@ static int xgene_pmu_remove(struct platform_device *pdev)
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
&xgene_pmu->node);
-
- return 0;
}
static struct platform_driver xgene_pmu_driver = {