summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/perf/dwc_pcie_pmu.rst4
-rw-r--r--Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst110
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst49
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--Documentation/arch/arm64/booting.rst11
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml1
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm64/include/asm/el2_setup.h28
-rw-r--r--arch/arm64/include/asm/sysreg.h9
-rw-r--r--arch/arm64/tools/sysreg13
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.c3
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm-ccn.c2
-rw-r--r--drivers/perf/arm-cmn.c9
-rw-r--r--drivers/perf/arm_pmuv3.c29
-rw-r--r--drivers/perf/arm_spe_pmu.c114
-rw-r--r--drivers/perf/dwc_pcie_pmu.c161
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c6
-rw-r--r--drivers/perf/fujitsu_uncore_pmu.c613
-rw-r--r--drivers/perf/hisilicon/Makefile3
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c528
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_mn_pmu.c411
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_noc_pmu.c443
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c5
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h6
26 files changed, 2399 insertions, 174 deletions
diff --git a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
index cb376f335f40..167f9281fbf5 100644
--- a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
+++ b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
@@ -16,8 +16,8 @@ provides the following two features:
- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
time spent in each low-power LTSSM state) and
-- one 32-bit counter for Event Counting (error and non-error events for
- a specified lane)
+- one 32-bit counter per event for Event Counting (error and non-error
+ events for a specified lane)
Note: There is no interrupt for counter overflow.
diff --git a/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
new file mode 100644
index 000000000000..46595b788d3a
--- /dev/null
+++ b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================================================
+Fujitsu Uncore Performance Monitoring Unit (PMU)
+================================================
+
+This driver supports the Uncore MAC PMUs and the Uncore PCI PMUs found
+in Fujitsu chips.
+Each MAC PMU on these chips is exposed as a uncore perf PMU with device name
+mac_iod<iod>_mac<mac>_ch<ch>.
+And each PCI PMU on these chips is exposed as a uncore perf PMU with device name
+pci_iod<iod>_pci<pci>.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/bus/event_sources/devices/mac_iod<iod>_mac<mac>_ch<ch>/
+and /sys/bus/event_sources/devices/pci_iod<iod>_pci<pci>/.
+This driver exports:
+- formats, used by perf user space and other tools to configure events
+- events, used by perf user space and other tools to create events
+ symbolically, e.g.:
+ perf stat -a -e mac_iod0_mac0_ch0/event=0x21/ ls
+ perf stat -a -e pci_iod0_pci0/event=0x24/ ls
+- cpumask, used by perf user space and other tools to know on which CPUs
+ to open the events
+
+This driver supports the following events for MAC:
+- cycles
+ This event counts MAC cycles at MAC frequency.
+- read-count
+ This event counts the number of read requests to MAC.
+- read-count-request
+ This event counts the number of read requests including retry to MAC.
+- read-count-return
+ This event counts the number of responses to read requests to MAC.
+- read-count-request-pftgt
+ This event counts the number of read requests including retry with PFTGT
+ flag.
+- read-count-request-normal
+ This event counts the number of read requests including retry without PFTGT
+ flag.
+- read-count-return-pftgt-hit
+ This event counts the number of responses to read requests which hit the
+ PFTGT buffer.
+- read-count-return-pftgt-miss
+ This event counts the number of responses to read requests which miss the
+ PFTGT buffer.
+- read-wait
+ This event counts outstanding read requests issued by DDR memory controller
+ per cycle.
+- write-count
+ This event counts the number of write requests to MAC (including zero write,
+ full write, partial write, write cancel).
+- write-count-write
+ This event counts the number of full write requests to MAC (not including
+ zero write).
+- write-count-pwrite
+ This event counts the number of partial write requests to MAC.
+- memory-read-count
+ This event counts the number of read requests from MAC to memory.
+- memory-write-count
+ This event counts the number of full write requests from MAC to memory.
+- memory-pwrite-count
+ This event counts the number of partial write requests from MAC to memory.
+- ea-mac
+ This event counts energy consumption of MAC.
+- ea-memory
+ This event counts energy consumption of memory.
+- ea-memory-mac-write
+ This event counts the number of write requests from MAC to memory.
+- ea-ha
+ This event counts energy consumption of HA.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e mac_iod0_mac0_ch0/ea-mac/ ls
+
+And, this driver supports the following events for PCI:
+- pci-port0-cycles
+ This event counts PCI cycles at PCI frequency in port0.
+- pci-port0-read-count
+ This event counts read transactions for data transfer in port0.
+- pci-port0-read-count-bus
+ This event counts read transactions for bus usage in port0.
+- pci-port0-write-count
+ This event counts write transactions for data transfer in port0.
+- pci-port0-write-count-bus
+ This event counts write transactions for bus usage in port0.
+- pci-port1-cycles
+ This event counts PCI cycles at PCI frequency in port1.
+- pci-port1-read-count
+ This event counts read transactions for data transfer in port1.
+- pci-port1-read-count-bus
+ This event counts read transactions for bus usage in port1.
+- pci-port1-write-count
+ This event counts write transactions for data transfer in port1.
+- pci-port1-write-count-bus
+ This event counts write transactions for bus usage in port1.
+- ea-pci
+ This event counts energy consumption of PCI.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e pci_iod0_pci0/ea-pci/ ls
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index 48992a0b8e94..c4c2cbbf88cb 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -18,9 +18,10 @@ HiSilicon SoC uncore PMU driver
Each device PMU has separate registers for event counting, control and
interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
-be described in the sysfs, see:
+be described in the sysfs, see::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>
-/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
@@ -112,6 +113,50 @@ uring channel. It is 2 bits. Some important codes are as follows:
- 2'b00: default value, count the events which sent to the both uring and
uring_ext channel;
+6. ch: NoC PMU supports filtering the event counts of certain transaction
+channel with this option. The current supported channels are as follows:
+
+- 3'b010: Request channel
+- 3'b100: Snoop channel
+- 3'b110: Response channel
+- 3'b111: Data channel
+
+7. tt_en: NoC PMU supports counting only transactions that have tracetag set
+if this option is set. See the 2nd list for more information about tracetag.
+
+For HiSilicon uncore PMU v3 whose identifier is 0x40, some uncore PMUs are
+further divided into parts for finer granularity of tracing, each part has its
+own dedicated PMU, and all such PMUs together cover the monitoring job of events
+on particular uncore device. Such PMUs are described in sysfs with name format
+slightly changed::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}_{Z}/ddrc{Y}_{Z}/noc{Y}_{Z}>
+
+Z is the sub-id, indicating different PMUs for part of hardware device.
+
+Usage of most PMUs with different sub-ids are identical. Specially, L3C PMU
+provides ``ext`` option to allow exploration of even finer granual statistics
+of L3C PMU. L3C PMU driver uses that as hint of termination when delivering
+perf command to hardware:
+
+- ext=0: Default, could be used with event names.
+- ext=1 and ext=2: Must be used with event codes, event names are not supported.
+
+An example of perf command could be::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/rd_spipe/ sleep 5
+
+or::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/event=0x1,ext=1/ sleep 5
+
+As above, ``hisi_sccl0_l3c1_0`` locates PMU of Super CPU CLuster 0, L3 cache 1
+pipe0.
+
+First command locates the first part of L3C since ``ext=0`` is implied by
+default. Second command issues the counting on another part of L3C with the
+event ``0x1``.
+
Users could configure IDs to count data come from specific CCL/ICL, by setting
srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 072b510385c4..47d9a3df6329 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -29,3 +29,4 @@ Performance monitor support
cxl
ampere_cspmu
mrvl-pem-pmu
+ fujitsu_uncore_pmu
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index 2f666a7c303c..e4f953839f71 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -466,6 +466,17 @@ Before jumping into the kernel, the following conditions must be met:
- HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
- HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+ For CPUs with SPE data source filtering (FEAT_SPE_FDS):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPMS3 (bit 42) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+
For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
- If the kernel is entered at EL1 and EL2 is present:
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
index 8597ea625edb..d2e578d6b83b 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -33,6 +33,7 @@ properties:
- items:
- enum:
- fsl,imx91-ddr-pmu
+ - fsl,imx94-ddr-pmu
- fsl,imx95-ddr-pmu
- const: fsl,imx93-ddr-pmu
diff --git a/MAINTAINERS b/MAINTAINERS
index fe168477caa4..c482d641fbe1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9744,11 +9744,14 @@ F: drivers/video/fbdev/imxfb.c
FREESCALE IMX DDR PMU DRIVER
M: Frank Li <Frank.li@nxp.com>
+M: Xu Yang <xu.yang_2@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: Documentation/admin-guide/perf/imx-ddr.rst
F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
F: drivers/perf/fsl_imx8_ddr_perf.c
+F: drivers/perf/fsl_imx9_ddr_perf.c
+F: tools/perf/pmu-events/arch/arm64/freescale/
FREESCALE IMX I2C DRIVER
M: Oleksij Rempel <o.rempel@pengutronix.de>
@@ -11059,7 +11062,6 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt
F: drivers/net/ethernet/hisilicon/
HISILICON PMU DRIVER
-M: Yicong Yang <yangyicong@hisilicon.com>
M: Jonathan Cameron <jonathan.cameron@huawei.com>
S: Supported
W: http://www.hisilicon.com
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 46033027510c..b37da3ee8529 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -91,6 +91,14 @@
msr cntvoff_el2, xzr // Clear virtual offset
.endm
+/* Branch to skip_label if SPE version is less than given version */
+.macro __spe_vers_imp skip_label, version, tmp
+ mrs \tmp, id_aa64dfr0_el1
+ ubfx \tmp, \tmp, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
+ cmp \tmp, \version
+ b.lt \skip_label
+.endm
+
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
@@ -103,8 +111,7 @@
csel x2, xzr, x0, eq // all PMU counters from EL1
/* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
- cbz x0, .Lskip_spe_\@ // Skip if SPE not present
+ __spe_vers_imp .Lskip_spe_\@, ID_AA64DFR0_EL1_PMSVer_IMP, x0 // Skip if SPE not present
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT)
@@ -263,10 +270,8 @@
mov x0, xzr
mov x2, xzr
- mrs x1, id_aa64dfr0_el1
- ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
- cmp x1, #3
- b.lt .Lskip_spe_fgt_\@
+ /* If SPEv1p2 is implemented, */
+ __spe_vers_imp .Lskip_spe_fgt_\@, #ID_AA64DFR0_EL1_PMSVer_V1P2, x1
/* Disable PMSNEVFR_EL1 read and write traps */
orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK
orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK
@@ -387,6 +392,17 @@
orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0
orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1
.Lskip_pmuv3p9_\@:
+ /* If SPE is implemented, */
+ __spe_vers_imp .Lskip_spefds_\@, ID_AA64DFR0_EL1_PMSVer_IMP, x1
+ /* we can read PMSIDR and */
+ mrs_s x1, SYS_PMSIDR_EL1
+ and x1, x1, #PMSIDR_EL1_FDS
+ /* if FEAT_SPE_FDS is implemented, */
+ cbz x1, .Lskip_spefds_\@
+ /* disable traps of PMSDSFR to EL2. */
+ orr x0, x0, #HDFGRTR2_EL2_nPMSDSFR_EL1
+
+.Lskip_spefds_\@:
msr_s SYS_HDFGRTR2_EL2, x0
msr_s SYS_HDFGWTR2_EL2, x0
msr_s SYS_HFGRTR2_EL2, xzr
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d5b5f2ae1afa..20cbd9860c8f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -344,15 +344,6 @@
#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
-/*** Statistical Profiling Extension ***/
-#define PMSEVFR_EL1_RES0_IMP \
- (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
- BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
-#define PMSEVFR_EL1_RES0_V1P1 \
- (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
-#define PMSEVFR_EL1_RES0_V1P2 \
- (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6))
-
/* Buffer error reporting */
#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT
#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 696ab1f32a67..b743fc8ffe5d 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2994,11 +2994,20 @@ Field 0 RND
EndSysreg
Sysreg PMSFCR_EL1 3 0 9 9 4
-Res0 63:19
+Res0 63:53
+Field 52 SIMDm
+Field 51 FPm
+Field 50 STm
+Field 49 LDm
+Field 48 Bm
+Res0 47:21
+Field 20 SIMD
+Field 19 FP
Field 18 ST
Field 17 LD
Field 16 B
-Res0 15:4
+Res0 15:5
+Field 4 FDS
Field 3 FnE
Field 2 FL
Field 1 FT
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 8267dd1a2130..8f426f94e32a 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -23,7 +23,8 @@
#include "coresight-self-hosted-trace.h"
#include "coresight-trbe.h"
-#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+#define PERF_IDX2OFF(idx, buf) \
+ ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
/*
* A padding packet that will help the user space tools
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index a9188dec36fe..638321fc9800 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -178,6 +178,15 @@ config FSL_IMX9_DDR_PMU
can give information about memory throughput and other related
events.
+config FUJITSU_UNCORE_PMU
+ tristate "Fujitsu Uncore PMU"
+ depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+ help
+ Provides support for the Uncore performance monitor unit (PMU)
+ in Fujitsu processors.
+ Adds the Uncore PMU into the perf events subsystem for
+ monitoring Uncore events.
+
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 192fc8b16204..ea52711a87e3 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
+obj-$(CONFIG_FUJITSU_UNCORE_PMU) += fujitsu_uncore_pmu.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 1a0d0e1a2263..8af3563fdf60 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -565,7 +565,7 @@ module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint,
static ktime_t arm_ccn_pmu_timer_period(void)
{
- return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000);
+ return us_to_ktime((u64)arm_ccn_pmu_poll_period_us);
}
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 11fb2234b10f..23245352a3fc 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -65,7 +65,7 @@
/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
/* ...except when they don't :( */
-#define CMN_S3_DTM_OFFSET 0xa000
+#define CMN_S3_R1_DTM_OFFSET 0xa000
#define CMN_S3_PMU_OFFSET 0xd900
/* For most nodes, this is all there is */
@@ -233,6 +233,9 @@ enum cmn_revision {
REV_CMN700_R1P0,
REV_CMN700_R2P0,
REV_CMN700_R3P0,
+ REV_CMNS3_R0P0 = 0,
+ REV_CMNS3_R0P1,
+ REV_CMNS3_R1P0,
REV_CI700_R0P0 = 0,
REV_CI700_R1P0,
REV_CI700_R2P0,
@@ -425,8 +428,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
{
if (cmn->part == PART_CMN_S3) {
- if (dn->type == CMN_TYPE_XP)
- return CMN_S3_DTM_OFFSET;
+ if (cmn->rev >= REV_CMNS3_R1P0 && dn->type == CMN_TYPE_XP)
+ return CMN_S3_R1_DTM_OFFSET;
return CMN_S3_PMU_OFFSET;
}
return CMN_PMU_OFFSET;
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index f6d7bab5d555..69c5cc8f5606 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -978,6 +978,32 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
return -EAGAIN;
}
+static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+ if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES)
+ return false;
+
+ /*
+ * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0
+ * since it lacks threshold support.
+ */
+ if (armv8pmu_event_get_threshold(&event->attr))
+ return false;
+
+ /*
+ * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP.
+ * So don't use it for branch events.
+ */
+ if (has_branch_stack(event))
+ return false;
+
+ return true;
+}
+
static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct perf_event *event)
{
@@ -986,8 +1012,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
- if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
- !armv8pmu_event_get_threshold(&event->attr) && !has_branch_stack(event)) {
+ if (armv8pmu_can_use_pmccntr(cpuc, event)) {
if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask))
return ARMV8_PMU_CYCLE_IDX;
else if (armv8pmu_event_is_64bit(event) &&
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 369e77ad5f13..fa50645fedda 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -86,9 +86,11 @@ struct arm_spe_pmu {
#define SPE_PMU_FEAT_ERND (1UL << 5)
#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6)
#define SPE_PMU_FEAT_DISCARD (1UL << 7)
+#define SPE_PMU_FEAT_EFT (1UL << 8)
#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
u64 features;
+ u64 pmsevfr_res0;
u16 max_record_sz;
u16 align;
struct perf_output_handle __percpu *handle;
@@ -97,7 +99,8 @@ struct arm_spe_pmu {
#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
/* Convert a free-running index from perf into an SPE buffer offset */
-#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+#define PERF_IDX2OFF(idx, buf) \
+ ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
/* Keep track of our dynamic hotplug state */
static enum cpuhp_state arm_spe_pmu_online;
@@ -115,6 +118,7 @@ enum arm_spe_pmu_capabilities {
SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_MIN_IVAL,
+ SPE_PMU_CAP_EVENT_FILTER,
};
static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
@@ -122,7 +126,7 @@ static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
[SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND,
};
-static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+static u64 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
{
if (cap < SPE_PMU_CAP_FEAT_MAX)
return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
@@ -132,6 +136,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
return spe_pmu->counter_sz;
case SPE_PMU_CAP_MIN_IVAL:
return spe_pmu->min_period;
+ case SPE_PMU_CAP_EVENT_FILTER:
+ return ~spe_pmu->pmsevfr_res0;
default:
WARN(1, "unknown cap %d\n", cap);
}
@@ -148,7 +154,19 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
container_of(attr, struct dev_ext_attribute, attr);
int cap = (long)ea->var;
- return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+ return sysfs_emit(buf, "%llu\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+static ssize_t arm_spe_pmu_cap_show_hex(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return sysfs_emit(buf, "0x%llx\n", arm_spe_pmu_cap_get(spe_pmu, cap));
}
#define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \
@@ -158,12 +176,15 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \
SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+#define SPE_CAP_EXT_ATTR_ENTRY_HEX(_name, _var) \
+ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show_hex, _var)
static struct attribute *arm_spe_pmu_cap_attr[] = {
SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+ SPE_CAP_EXT_ATTR_ENTRY_HEX(event_filter, SPE_PMU_CAP_EVENT_FILTER),
NULL,
};
@@ -197,6 +218,27 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */
#define ATTR_CFG_FLD_discard_LO 35
#define ATTR_CFG_FLD_discard_HI 35
+#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */
+#define ATTR_CFG_FLD_branch_filter_mask_LO 36
+#define ATTR_CFG_FLD_branch_filter_mask_HI 36
+#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */
+#define ATTR_CFG_FLD_load_filter_mask_LO 37
+#define ATTR_CFG_FLD_load_filter_mask_HI 37
+#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */
+#define ATTR_CFG_FLD_store_filter_mask_LO 38
+#define ATTR_CFG_FLD_store_filter_mask_HI 38
+#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */
+#define ATTR_CFG_FLD_simd_filter_LO 39
+#define ATTR_CFG_FLD_simd_filter_HI 39
+#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */
+#define ATTR_CFG_FLD_simd_filter_mask_LO 40
+#define ATTR_CFG_FLD_simd_filter_mask_HI 40
+#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */
+#define ATTR_CFG_FLD_float_filter_LO 41
+#define ATTR_CFG_FLD_float_filter_HI 41
+#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */
+#define ATTR_CFG_FLD_float_filter_mask_LO 42
+#define ATTR_CFG_FLD_float_filter_mask_HI 42
#define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
#define ATTR_CFG_FLD_event_filter_LO 0
@@ -215,8 +257,15 @@ GEN_PMU_FORMAT_ATTR(pa_enable);
GEN_PMU_FORMAT_ATTR(pct_enable);
GEN_PMU_FORMAT_ATTR(jitter);
GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(branch_filter_mask);
GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(load_filter_mask);
GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(store_filter_mask);
+GEN_PMU_FORMAT_ATTR(simd_filter);
+GEN_PMU_FORMAT_ATTR(simd_filter_mask);
+GEN_PMU_FORMAT_ATTR(float_filter);
+GEN_PMU_FORMAT_ATTR(float_filter_mask);
GEN_PMU_FORMAT_ATTR(event_filter);
GEN_PMU_FORMAT_ATTR(inv_event_filter);
GEN_PMU_FORMAT_ATTR(min_latency);
@@ -228,8 +277,15 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_pct_enable.attr,
&format_attr_jitter.attr,
&format_attr_branch_filter.attr,
+ &format_attr_branch_filter_mask.attr,
&format_attr_load_filter.attr,
+ &format_attr_load_filter_mask.attr,
&format_attr_store_filter.attr,
+ &format_attr_store_filter_mask.attr,
+ &format_attr_simd_filter.attr,
+ &format_attr_simd_filter_mask.attr,
+ &format_attr_float_filter.attr,
+ &format_attr_float_filter_mask.attr,
&format_attr_event_filter.attr,
&format_attr_inv_event_filter.attr,
&format_attr_min_latency.attr,
@@ -250,6 +306,16 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
return 0;
+ if ((attr == &format_attr_branch_filter_mask.attr ||
+ attr == &format_attr_load_filter_mask.attr ||
+ attr == &format_attr_store_filter_mask.attr ||
+ attr == &format_attr_simd_filter.attr ||
+ attr == &format_attr_simd_filter_mask.attr ||
+ attr == &format_attr_float_filter.attr ||
+ attr == &format_attr_float_filter_mask.attr) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return 0;
+
return attr->mode;
}
@@ -345,8 +411,15 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
u64 reg = 0;
reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask));
reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask));
reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask));
if (reg)
reg |= PMSFCR_EL1_FT;
@@ -697,20 +770,6 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
return IRQ_HANDLED;
}
-static u64 arm_spe_pmsevfr_res0(u16 pmsver)
-{
- switch (pmsver) {
- case ID_AA64DFR0_EL1_PMSVer_IMP:
- return PMSEVFR_EL1_RES0_IMP;
- case ID_AA64DFR0_EL1_PMSVer_V1P1:
- return PMSEVFR_EL1_RES0_V1P1;
- case ID_AA64DFR0_EL1_PMSVer_V1P2:
- /* Return the highest version we support in default */
- default:
- return PMSEVFR_EL1_RES0_V1P2;
- }
-}
-
/* Perf callbacks */
static int arm_spe_pmu_event_init(struct perf_event *event)
{
@@ -726,10 +785,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;
- if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+ if (arm_spe_event_to_pmsevfr(event) & spe_pmu->pmsevfr_res0)
return -EOPNOTSUPP;
- if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+ if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0)
return -EOPNOTSUPP;
if (attr->exclude_idle)
@@ -762,6 +821,16 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
return -EOPNOTSUPP;
+ if ((FIELD_GET(PMSFCR_EL1_LDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_STm, reg) ||
+ FIELD_GET(PMSFCR_EL1_Bm, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMD, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_FP, reg) ||
+ FIELD_GET(PMSFCR_EL1_FPm, reg)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return -EOPNOTSUPP;
+
if (ATTR_CFG_GET_FLD(&event->attr, discard) &&
!(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
return -EOPNOTSUPP;
@@ -1053,6 +1122,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2)
spe_pmu->features |= SPE_PMU_FEAT_DISCARD;
+ if (FIELD_GET(PMSIDR_EL1_EFT, reg))
+ spe_pmu->features |= SPE_PMU_FEAT_EFT;
+
/* This field has a spaced out encoding, so just use a look-up */
fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
switch (fld) {
@@ -1107,6 +1179,10 @@ static void __arm_spe_pmu_dev_probe(void *info)
spe_pmu->counter_sz = 16;
}
+ /* Write all 1s and then read back. Unsupported filter bits are RAZ/WI. */
+ write_sysreg_s(U64_MAX, SYS_PMSEVFR_EL1);
+ spe_pmu->pmsevfr_res0 = ~read_sysreg_s(SYS_PMSEVFR_EL1);
+
dev_info(dev,
"probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus),
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index 146ff57813fb..22f73ac894e9 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -39,6 +39,10 @@
#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
#define DWC_PCIE_EVENT_PER_CLEAR 0x1
+/* Event Selection Field has two subfields */
+#define DWC_PCIE_CNT_EVENT_SEL_GROUP GENMASK(11, 8)
+#define DWC_PCIE_CNT_EVENT_SEL_EVID GENMASK(7, 0)
+
#define DWC_PCIE_EVENT_CNT_DATA 0xC
#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
@@ -73,6 +77,10 @@ enum dwc_pcie_event_type {
DWC_PCIE_EVENT_TYPE_MAX,
};
+#define DWC_PCIE_LANE_GROUP_6 6
+#define DWC_PCIE_LANE_GROUP_7 7
+#define DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP 256
+
#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
@@ -82,8 +90,11 @@ struct dwc_pcie_pmu {
u16 ras_des_offset;
u32 nr_lanes;
+ /* Groups #6 and #7 */
+ DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ struct perf_event *time_based_event;
+
struct hlist_node cpuhp_node;
- struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
int on_cpu;
};
@@ -246,19 +257,26 @@ static const struct attribute_group *dwc_pcie_attr_groups[] = {
};
static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ struct perf_event *event,
bool enable)
{
struct pci_dev *pdev = pcie_pmu->pdev;
u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u32 ctrl;
+
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
if (enable)
- pci_clear_and_set_config_dword(pdev,
- ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
- DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
else
- pci_clear_and_set_config_dword(pdev,
- ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
- DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
}
static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
@@ -276,11 +294,22 @@ static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
{
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
u16 ras_des_offset = pcie_pmu->ras_des_offset;
- u32 val;
+ u32 val, ctrl;
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
+ ctrl |= FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+
return val;
}
@@ -329,26 +358,77 @@ static void dwc_pcie_pmu_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
- u64 delta, prev, now = 0;
+ u64 delta, prev, now;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ now = dwc_pcie_pmu_read_lane_event_counter(event) &
+ DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+ local64_add(now, &event->count);
+ return;
+ }
do {
prev = local64_read(&hwc->prev_count);
-
- if (type == DWC_PCIE_LANE_EVENT)
- now = dwc_pcie_pmu_read_lane_event_counter(event);
- else if (type == DWC_PCIE_TIME_BASE_EVENT)
- now = dwc_pcie_pmu_read_time_based_counter(event);
+ now = dwc_pcie_pmu_read_time_based_counter(event);
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
- /* 32-bit counter for Lane Event Counting */
- if (type == DWC_PCIE_LANE_EVENT)
- delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD;
-
local64_add(delta, &event->count);
}
+static int dwc_pcie_pmu_validate_add_lane_event(struct perf_event *event,
+ unsigned long val_lane_events[])
+{
+ int event_id, event_nr, group;
+
+ event_id = DWC_PCIE_EVENT_ID(event);
+ event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id);
+
+ if (group != DWC_PCIE_LANE_GROUP_6 && group != DWC_PCIE_LANE_GROUP_7)
+ return -EINVAL;
+
+ group -= DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ val_lane_events))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ DECLARE_BITMAP(val_lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ bool time_event = false;
+ int type;
+
+ type = DWC_PCIE_EVENT_TYPE(leader);
+ if (type == DWC_PCIE_TIME_BASE_EVENT)
+ time_event = true;
+ else
+ if (dwc_pcie_pmu_validate_add_lane_event(leader, val_lane_events))
+ return -ENOSPC;
+
+ for_each_sibling_event(sibling, leader) {
+ type = DWC_PCIE_EVENT_TYPE(sibling);
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (time_event)
+ return -ENOSPC;
+
+ time_event = true;
+ continue;
+ }
+
+ if (dwc_pcie_pmu_validate_add_lane_event(sibling, val_lane_events))
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int dwc_pcie_pmu_event_init(struct perf_event *event)
{
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
@@ -367,10 +447,6 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
- if (event->group_leader != event &&
- !is_software_event(event->group_leader))
- return -EINVAL;
-
for_each_sibling_event(sibling, event->group_leader) {
if (sibling->pmu != event->pmu && !is_software_event(sibling))
return -EINVAL;
@@ -385,6 +461,9 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
+ if (dwc_pcie_pmu_validate_group(event))
+ return -ENOSPC;
+
event->cpu = pcie_pmu->on_cpu;
return 0;
@@ -400,7 +479,7 @@ static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
local64_set(&hwc->prev_count, 0);
if (type == DWC_PCIE_LANE_EVENT)
- dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true);
else if (type == DWC_PCIE_TIME_BASE_EVENT)
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
}
@@ -414,12 +493,13 @@ static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
if (event->hw.state & PERF_HES_STOPPED)
return;
+ dwc_pcie_pmu_event_update(event);
+
if (type == DWC_PCIE_LANE_EVENT)
- dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false);
else if (type == DWC_PCIE_TIME_BASE_EVENT)
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
- dwc_pcie_pmu_event_update(event);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
@@ -434,14 +514,17 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
u16 ras_des_offset = pcie_pmu->ras_des_offset;
u32 ctrl;
- /* one counter for each type and it is in use */
- if (pcie_pmu->event[type])
- return -ENOSPC;
-
- pcie_pmu->event[type] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (type == DWC_PCIE_LANE_EVENT) {
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events))
+ return -ENOSPC;
+
/* EVENT_COUNTER_DATA_REG needs clear manually */
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
@@ -450,6 +533,11 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
ctrl);
} else if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (pcie_pmu->time_based_event)
+ return -ENOSPC;
+
+ pcie_pmu->time_based_event = event;
+
/*
* TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
* use it with any manually controlled duration. And it is
@@ -478,7 +566,18 @@ static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
perf_event_update_userpage(event);
- pcie_pmu->event[type] = NULL;
+
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ pcie_pmu->time_based_event = NULL;
+ } else {
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ clear_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events);
+ }
}
static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 267754fdf581..7050b48c0467 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -104,6 +104,11 @@ static const struct imx_ddr_devtype_data imx93_devtype_data = {
.filter_ver = DDR_PERF_AXI_FILTER_V1
};
+static const struct imx_ddr_devtype_data imx94_devtype_data = {
+ .identifier = "imx94",
+ .filter_ver = DDR_PERF_AXI_FILTER_V2
+};
+
static const struct imx_ddr_devtype_data imx95_devtype_data = {
.identifier = "imx95",
.filter_ver = DDR_PERF_AXI_FILTER_V2
@@ -122,6 +127,7 @@ static inline bool axi_filter_v2(struct ddr_pmu *pmu)
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data },
{ .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data },
+ { .compatible = "fsl,imx94-ddr-pmu", .data = &imx94_devtype_data },
{ .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data },
{ /* sentinel */ }
};
diff --git a/drivers/perf/fujitsu_uncore_pmu.c b/drivers/perf/fujitsu_uncore_pmu.c
new file mode 100644
index 000000000000..c3c6f56474ad
--- /dev/null
+++ b/drivers/perf/fujitsu_uncore_pmu.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Uncore PMUs in Fujitsu chips.
+ *
+ * See Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst for more details.
+ *
+ * Copyright (c) 2025 Fujitsu. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/* Number of counters on each PMU */
+#define MAC_NUM_COUNTERS 8
+#define PCI_NUM_COUNTERS 8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define UNCORE_EVTYPE_MASK 0xFF
+
+/* Perfmon registers */
+#define PM_EVCNTR(__cntr) (0x000 + (__cntr) * 8)
+#define PM_CNTCTL(__cntr) (0x100 + (__cntr) * 8)
+#define PM_CNTCTL_RESET 0
+#define PM_EVTYPE(__cntr) (0x200 + (__cntr) * 8)
+#define PM_EVTYPE_EVSEL(__val) FIELD_GET(UNCORE_EVTYPE_MASK, __val)
+#define PM_CR 0x400
+#define PM_CR_RESET BIT(1)
+#define PM_CR_ENABLE BIT(0)
+#define PM_CNTENSET 0x410
+#define PM_CNTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR 0x418
+#define PM_CNTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR_RESET 0xFF
+#define PM_INTENSET 0x420
+#define PM_INTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR 0x428
+#define PM_INTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR_RESET 0xFF
+#define PM_OVSR 0x440
+#define PM_OVSR_OVSRCLR_RESET 0xFF
+
+enum fujitsu_uncore_pmu {
+ FUJITSU_UNCORE_PMU_MAC = 1,
+ FUJITSU_UNCORE_PMU_PCI = 2,
+};
+
+struct uncore_pmu {
+ int num_counters;
+ struct pmu pmu;
+ struct hlist_node node;
+ void __iomem *regs;
+ struct perf_event **events;
+ unsigned long *used_mask;
+ int cpu;
+ int irq;
+ struct device *dev;
+};
+
+#define to_uncore_pmu(p) (container_of(p, struct uncore_pmu, pmu))
+
+static int uncore_pmu_cpuhp_state;
+
+static void fujitsu_uncore_counter_start(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Initialize the hardware counter and reset prev_count*/
+ local64_set(&event->hw.prev_count, 0);
+ writeq_relaxed(0, uncorepmu->regs + PM_EVCNTR(idx));
+
+ /* Set the event type */
+ writeq_relaxed(PM_EVTYPE_EVSEL(event->attr.config), uncorepmu->regs + PM_EVTYPE(idx));
+
+ /* Enable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENSET_IDX(idx), uncorepmu->regs + PM_INTENSET);
+
+ /* Finally, enable the counter */
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(idx));
+ writeq_relaxed(PM_CNTENSET_IDX(idx), uncorepmu->regs + PM_CNTENSET);
+}
+
+static void fujitsu_uncore_counter_stop(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Disable the counter */
+ writeq_relaxed(PM_CNTENCLR_IDX(idx), uncorepmu->regs + PM_CNTENCLR);
+
+ /* Disable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENCLR_IDX(idx), uncorepmu->regs + PM_INTENCLR);
+}
+
+static void fujitsu_uncore_counter_update(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+ u64 prev, new;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ new = readq_relaxed(uncorepmu->regs + PM_EVCNTR(idx));
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ local64_add(new - prev, &event->count);
+}
+
+static inline void fujitsu_uncore_init(struct uncore_pmu *uncorepmu)
+{
+ int i;
+
+ writeq_relaxed(PM_CR_RESET, uncorepmu->regs + PM_CR);
+
+ writeq_relaxed(PM_CNTENCLR_RESET, uncorepmu->regs + PM_CNTENCLR);
+ writeq_relaxed(PM_INTENCLR_RESET, uncorepmu->regs + PM_INTENCLR);
+ writeq_relaxed(PM_OVSR_OVSRCLR_RESET, uncorepmu->regs + PM_OVSR);
+
+ for (i = 0; i < uncorepmu->num_counters; ++i) {
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(i));
+ writeq_relaxed(PM_EVTYPE_EVSEL(0), uncorepmu->regs + PM_EVTYPE(i));
+ }
+ writeq_relaxed(PM_CR_ENABLE, uncorepmu->regs + PM_CR);
+}
+
+static irqreturn_t fujitsu_uncore_handle_irq(int irq_num, void *data)
+{
+ struct uncore_pmu *uncorepmu = data;
+ /* Read the overflow status register */
+ long status = readq_relaxed(uncorepmu->regs + PM_OVSR);
+ int idx;
+
+ if (status == 0)
+ return IRQ_NONE;
+
+ /* Clear the bits we read on the overflow status register */
+ writeq_relaxed(status, uncorepmu->regs + PM_OVSR);
+
+ for_each_set_bit(idx, &status, uncorepmu->num_counters) {
+ struct perf_event *event;
+
+ event = uncorepmu->events[idx];
+ if (!event)
+ continue;
+
+ fujitsu_uncore_counter_update(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void fujitsu_uncore_pmu_enable(struct pmu *pmu)
+{
+ writeq_relaxed(PM_CR_ENABLE, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static void fujitsu_uncore_pmu_disable(struct pmu *pmu)
+{
+ writeq_relaxed(0, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static bool fujitsu_uncore_validate_event_group(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counters = 1;
+
+ if (leader == event)
+ return true;
+
+ if (leader->pmu == event->pmu)
+ counters++;
+
+ for_each_sibling_event(sibling, leader) {
+ if (sibling->pmu == event->pmu)
+ counters++;
+ }
+
+ /*
+ * If the group requires more counters than the HW has, it
+ * cannot ever be scheduled.
+ */
+ return counters <= uncorepmu->num_counters;
+}
+
+static int fujitsu_uncore_event_init(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Is the event for this PMU? */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * Sampling not supported since these events are not
+ * core-attributable.
+ */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /*
+ * Task mode not available, we run the counters as socket counters,
+ * not attributable to any CPU and therefore cannot attribute per-task.
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* Validate the group */
+ if (!fujitsu_uncore_validate_event_group(event))
+ return -EINVAL;
+
+ hwc->idx = -1;
+
+ event->cpu = uncorepmu->cpu;
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->state = 0;
+ fujitsu_uncore_counter_start(event);
+}
+
+static void fujitsu_uncore_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ fujitsu_uncore_counter_stop(event);
+ if (flags & PERF_EF_UPDATE)
+ fujitsu_uncore_counter_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int fujitsu_uncore_event_add(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ /* Try to allocate a counter. */
+ idx = bitmap_find_free_region(uncorepmu->used_mask, uncorepmu->num_counters, 0);
+ if (idx < 0)
+ /* The counters are all in use. */
+ return -EAGAIN;
+
+ hwc->idx = idx;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ uncorepmu->events[idx] = event;
+
+ if (flags & PERF_EF_START)
+ fujitsu_uncore_event_start(event, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_del(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Stop and clean up */
+ fujitsu_uncore_event_stop(event, flags | PERF_EF_UPDATE);
+ uncorepmu->events[hwc->idx] = NULL;
+ bitmap_release_region(uncorepmu->used_mask, hwc->idx, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+}
+
+static void fujitsu_uncore_event_read(struct perf_event *event)
+{
+ fujitsu_uncore_counter_update(event);
+}
+
+#define UNCORE_PMU_FORMAT_ATTR(_name, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
+ .var = (void *)_config, } \
+ })[0].attr.attr)
+
+static struct attribute *fujitsu_uncore_pmu_formats[] = {
+ UNCORE_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_format_group = {
+ .name = "format",
+ .attrs = fujitsu_uncore_pmu_formats,
+};
+
+static ssize_t fujitsu_uncore_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define MAC_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_mac_pmu_events[] = {
+ MAC_EVENT_ATTR(cycles, 0x00),
+ MAC_EVENT_ATTR(read-count, 0x10),
+ MAC_EVENT_ATTR(read-count-request, 0x11),
+ MAC_EVENT_ATTR(read-count-return, 0x12),
+ MAC_EVENT_ATTR(read-count-request-pftgt, 0x13),
+ MAC_EVENT_ATTR(read-count-request-normal, 0x14),
+ MAC_EVENT_ATTR(read-count-return-pftgt-hit, 0x15),
+ MAC_EVENT_ATTR(read-count-return-pftgt-miss, 0x16),
+ MAC_EVENT_ATTR(read-wait, 0x17),
+ MAC_EVENT_ATTR(write-count, 0x20),
+ MAC_EVENT_ATTR(write-count-write, 0x21),
+ MAC_EVENT_ATTR(write-count-pwrite, 0x22),
+ MAC_EVENT_ATTR(memory-read-count, 0x40),
+ MAC_EVENT_ATTR(memory-write-count, 0x50),
+ MAC_EVENT_ATTR(memory-pwrite-count, 0x60),
+ MAC_EVENT_ATTR(ea-mac, 0x80),
+ MAC_EVENT_ATTR(ea-memory, 0x90),
+ MAC_EVENT_ATTR(ea-memory-mac-write, 0x92),
+ MAC_EVENT_ATTR(ea-ha, 0xa0),
+ NULL
+};
+
+#define PCI_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_pci_pmu_events[] = {
+ PCI_EVENT_ATTR(pci-port0-cycles, 0x00),
+ PCI_EVENT_ATTR(pci-port0-read-count, 0x10),
+ PCI_EVENT_ATTR(pci-port0-read-count-bus, 0x14),
+ PCI_EVENT_ATTR(pci-port0-write-count, 0x20),
+ PCI_EVENT_ATTR(pci-port0-write-count-bus, 0x24),
+ PCI_EVENT_ATTR(pci-port1-cycles, 0x40),
+ PCI_EVENT_ATTR(pci-port1-read-count, 0x50),
+ PCI_EVENT_ATTR(pci-port1-read-count-bus, 0x54),
+ PCI_EVENT_ATTR(pci-port1-write-count, 0x60),
+ PCI_EVENT_ATTR(pci-port1-write-count-bus, 0x64),
+ PCI_EVENT_ATTR(ea-pci, 0x80),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_mac_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_mac_pmu_events,
+};
+
+static const struct attribute_group fujitsu_uncore_pci_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_pci_pmu_events,
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(uncorepmu->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *fujitsu_uncore_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_cpumask_attr_group = {
+ .attrs = fujitsu_uncore_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *fujitsu_uncore_mac_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_mac_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static const struct attribute_group *fujitsu_uncore_pci_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_pci_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static void fujitsu_uncore_pmu_migrate(struct uncore_pmu *uncorepmu, unsigned int cpu)
+{
+ perf_pmu_migrate_context(&uncorepmu->pmu, uncorepmu->cpu, cpu);
+ irq_set_affinity(uncorepmu->irq, cpumask_of(cpu));
+ uncorepmu->cpu = cpu;
+}
+
+static int fujitsu_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ node = dev_to_node(uncorepmu->dev);
+ if (cpu_to_node(uncorepmu->cpu) != node && cpu_to_node(cpu) == node)
+ fujitsu_uncore_pmu_migrate(uncorepmu, cpu);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ unsigned int target;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ if (cpu != uncorepmu->cpu)
+ return 0;
+
+ node = dev_to_node(uncorepmu->dev);
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ fujitsu_uncore_pmu_migrate(uncorepmu, target);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long device_type = (unsigned long)device_get_match_data(dev);
+ const struct attribute_group **attr_groups;
+ struct uncore_pmu *uncorepmu;
+ struct resource *memrc;
+ size_t alloc_size;
+ char *name;
+ int ret;
+ int irq;
+ u64 uid;
+
+ ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid);
+ if (ret)
+ return dev_err_probe(dev, ret, "unable to read ACPI uid\n");
+
+ uncorepmu = devm_kzalloc(dev, sizeof(*uncorepmu), GFP_KERNEL);
+ if (!uncorepmu)
+ return -ENOMEM;
+ uncorepmu->dev = dev;
+ uncorepmu->cpu = cpumask_local_spread(0, dev_to_node(dev));
+ platform_set_drvdata(pdev, uncorepmu);
+
+ switch (device_type) {
+ case FUJITSU_UNCORE_PMU_MAC:
+ uncorepmu->num_counters = MAC_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_mac_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "mac_iod%llu_mac%llu_ch%llu",
+ (uid >> 8) & 0xF, (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ case FUJITSU_UNCORE_PMU_PCI:
+ uncorepmu->num_counters = PCI_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_pci_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "pci_iod%llu_pci%llu",
+ (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ default:
+ return dev_err_probe(dev, -EINVAL, "illegal device type: %lu\n", device_type);
+ }
+ if (!name)
+ return -ENOMEM;
+
+ uncorepmu->pmu = (struct pmu) {
+ .parent = dev,
+ .task_ctx_nr = perf_invalid_context,
+
+ .attr_groups = attr_groups,
+
+ .pmu_enable = fujitsu_uncore_pmu_enable,
+ .pmu_disable = fujitsu_uncore_pmu_disable,
+ .event_init = fujitsu_uncore_event_init,
+ .add = fujitsu_uncore_event_add,
+ .del = fujitsu_uncore_event_del,
+ .start = fujitsu_uncore_event_start,
+ .stop = fujitsu_uncore_event_stop,
+ .read = fujitsu_uncore_event_read,
+
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ };
+
+ alloc_size = sizeof(uncorepmu->events[0]) * uncorepmu->num_counters;
+ uncorepmu->events = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->events)
+ return -ENOMEM;
+
+ alloc_size = sizeof(uncorepmu->used_mask[0]) * BITS_TO_LONGS(uncorepmu->num_counters);
+ uncorepmu->used_mask = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->used_mask)
+ return -ENOMEM;
+
+ uncorepmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc);
+ if (IS_ERR(uncorepmu->regs))
+ return PTR_ERR(uncorepmu->regs);
+
+ fujitsu_uncore_init(uncorepmu);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, fujitsu_uncore_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ name, uncorepmu);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request IRQ:%d\n", irq);
+
+ ret = irq_set_affinity(irq, cpumask_of(uncorepmu->cpu));
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to set irq affinity:%d\n", irq);
+
+ uncorepmu->irq = irq;
+
+ /* Add this instance to the list used by the offline callback */
+ ret = cpuhp_state_add_instance(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Error registering hotplug");
+
+ ret = perf_pmu_register(&uncorepmu->pmu, name, -1);
+ if (ret < 0) {
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ return dev_err_probe(dev, ret, "Failed to register %s PMU\n", name);
+ }
+
+ dev_dbg(dev, "Registered %s, type: %d\n", name, uncorepmu->pmu.type);
+
+ return 0;
+}
+
+static void fujitsu_uncore_pmu_remove(struct platform_device *pdev)
+{
+ struct uncore_pmu *uncorepmu = platform_get_drvdata(pdev);
+
+ writeq_relaxed(0, uncorepmu->regs + PM_CR);
+
+ perf_pmu_unregister(&uncorepmu->pmu);
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+}
+
+static const struct acpi_device_id fujitsu_uncore_pmu_acpi_match[] = {
+ { "FUJI200C", FUJITSU_UNCORE_PMU_MAC },
+ { "FUJI200D", FUJITSU_UNCORE_PMU_PCI },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, fujitsu_uncore_pmu_acpi_match);
+
+static struct platform_driver fujitsu_uncore_pmu_driver = {
+ .driver = {
+ .name = "fujitsu-uncore-pmu",
+ .acpi_match_table = fujitsu_uncore_pmu_acpi_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = fujitsu_uncore_pmu_probe,
+ .remove = fujitsu_uncore_pmu_remove,
+};
+
+static int __init fujitsu_uncore_pmu_init(void)
+{
+ int ret;
+
+ /* Install a hook to update the reader CPU in case it goes offline */
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/fujitsu/uncore:online",
+ fujitsu_uncore_pmu_online_cpu,
+ fujitsu_uncore_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ uncore_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&fujitsu_uncore_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+
+ return ret;
+}
+
+static void __exit fujitsu_uncore_pmu_exit(void)
+{
+ platform_driver_unregister(&fujitsu_uncore_pmu_driver);
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+}
+
+module_init(fujitsu_uncore_pmu_init);
+module_exit(fujitsu_uncore_pmu_exit);
+
+MODULE_AUTHOR("Koichi Okuno <fj2767dz@fujitsu.com>");
+MODULE_DESCRIPTION("Fujitsu Uncore PMU driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 48dcc8381ea7..186be3d02238 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
- hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o
+ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \
+ hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o
obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 412fc3a97963..bbd81a43047d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -39,6 +39,7 @@
/* L3C has 8-counters */
#define L3C_NR_COUNTERS 0x8
+#define L3C_MAX_EXT 2
#define L3C_PERF_CTRL_EN 0x10000
#define L3C_TRACETAG_EN BIT(31)
@@ -55,59 +56,152 @@
#define L3C_V1_NR_EVENTS 0x59
#define L3C_V2_NR_EVENTS 0xFF
-HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0);
-static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+struct hisi_l3c_pmu {
+ struct hisi_pmu l3c_pmu;
+
+ /* MMIO and IRQ resources for extension events */
+ void __iomem *ext_base[L3C_MAX_EXT];
+ int ext_irq[L3C_MAX_EXT];
+ int ext_num;
+};
+
+#define to_hisi_l3c_pmu(_l3c_pmu) \
+ container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu)
+
+/*
+ * The hardware counter idx used in counter enable/disable,
+ * interrupt enable/disable and status check, etc.
+ */
+#define L3C_HW_IDX(_cntr_idx) ((_cntr_idx) % L3C_NR_COUNTERS)
+
+/* Range of ext counters in used mask. */
+#define L3C_CNTR_EXT_L(_ext) (((_ext) + 1) * L3C_NR_COUNTERS)
+#define L3C_CNTR_EXT_H(_ext) (((_ext) + 2) * L3C_NR_COUNTERS)
+
+struct hisi_l3c_pmu_ext {
+ bool support_ext;
+};
+
+static bool support_ext(struct hisi_l3c_pmu *pmu)
+{
+ struct hisi_l3c_pmu_ext *l3c_pmu_ext = pmu->l3c_pmu.dev_info->private;
+
+ return l3c_pmu_ext->support_ext;
+}
+
+static int hisi_l3c_pmu_get_event_idx(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ int ext = hisi_get_ext(event);
+ int idx;
+
+ /*
+ * For an L3C PMU that supports extension events, we can monitor
+ * maximum 2 * num_counters to 3 * num_counters events, depending on
+ * the number of ext regions supported by hardware. Thus use bit
+ * [0, num_counters - 1] for normal events and bit
+ * [ext * num_counters, (ext + 1) * num_counters - 1] for extension
+ * events. The idx allocation will keep unchanged for normal events and
+ * we can also use the idx to distinguish whether it's an extension
+ * event or not.
+ *
+ * Since normal events and extension events locates on the different
+ * address space, save the base address to the event->hw.event_base.
+ */
+ if (ext && !support_ext(hisi_l3c_pmu))
+ return -EOPNOTSUPP;
+
+ if (ext)
+ event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base[ext - 1];
+ else
+ event->hw.event_base = (unsigned long)l3c_pmu->base;
+
+ ext -= 1;
+ idx = find_next_zero_bit(used_mask, L3C_CNTR_EXT_H(ext), L3C_CNTR_EXT_L(ext));
+
+ if (idx >= L3C_CNTR_EXT_H(ext))
+ return -EAGAIN;
+
+ set_bit(idx, used_mask);
+
+ return idx;
+}
+
+static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg)
+{
+ return readl((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val)
+{
+ writel(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg)
+{
+ return readq((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val)
+{
+ writeq(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Set request-type for tracetag */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
val |= L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Enable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Clear request-type */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
val &= ~L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Disable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
u32 reg, reg_idx, shift, val;
- int idx = hwc->idx;
+ int idx = L3C_HW_IDX(hwc->idx);
/*
* Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
@@ -120,15 +214,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
reg_idx = idx % 4;
shift = 8 * reg_idx;
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_DATSRC_MASK << shift);
val |= ds_cfg << shift;
- writel(val, l3c_pmu->base + reg);
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_config_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -138,15 +232,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val |= L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -156,57 +250,63 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val &= ~L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Config and enable core information */
- writel(core, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Enable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Clear core information */
- writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Disable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
+static bool hisi_l3c_pmu_have_filter(struct perf_event *event)
+{
+ return hisi_get_tt_req(event) || hisi_get_tt_core(event) ||
+ hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event);
+}
+
static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_config_req_tracetag(event);
hisi_l3c_pmu_config_core_tracetag(event);
hisi_l3c_pmu_config_ds(event);
@@ -215,38 +315,53 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_clear_ds(event);
hisi_l3c_pmu_clear_core_tracetag(event);
hisi_l3c_pmu_clear_req_tracetag(event);
}
}
+static int hisi_l3c_pmu_check_filter(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ext = hisi_get_ext(event);
+
+ if (ext < 0 || ext > hisi_l3c_pmu->ext_num)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* Select the counter register offset using the counter index
*/
static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
{
- return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+ return L3C_CNTR0_LOWER + L3C_HW_IDX(cntr_idx) * 8;
}
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc)
{
- return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc, u64 val)
{
- writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val);
}
static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
u32 type)
{
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
u32 reg, reg_idx, shift, val;
+ idx = L3C_HW_IDX(idx);
+
/*
* Select the appropriate event select register(L3C_EVENT_TYPE0/1).
* There are 2 event select registers for the 8 hardware counters.
@@ -259,36 +374,72 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
shift = 8 * reg_idx;
/* Write event code to L3C_EVENT_TYPEx Register */
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_EVTYPE_NONE << shift);
- val |= (type << shift);
- writel(val, l3c_pmu->base + reg);
+ val |= type << shift;
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Set perf_enable bit in L3C_PERF_CTRL register to start counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, enable it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val |= L3C_PERF_CTRL_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do enable it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, stop it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val &= ~(L3C_PERF_CTRL_EN);
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do stop it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
@@ -297,9 +448,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Enable counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
@@ -308,9 +459,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Clear counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -318,10 +469,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 0 to enable interrupt */
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -329,28 +480,37 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 1 to mask interrupt */
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu)
{
- return readl(l3c_pmu->base + L3C_INT_STATUS);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ u32 ext_int, status, status_ext = 0;
+ int i;
+
+ status = readl(l3c_pmu->base + L3C_INT_STATUS);
+
+ if (!support_ext(hisi_l3c_pmu))
+ return status;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ ext_int = readl(hisi_l3c_pmu->ext_base[i] + L3C_INT_STATUS);
+ status_ext |= ext_int << (L3C_NR_COUNTERS * i);
+ }
+
+ return status | (status_ext << L3C_NR_COUNTERS);
}
static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
{
- writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR);
-}
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
-static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
- { "HISI0213", },
- { "HISI0214", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx));
+}
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
@@ -371,6 +531,10 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
+ l3c_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!l3c_pmu->dev_info)
+ return -ENODEV;
+
l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(l3c_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
@@ -382,6 +546,50 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return 0;
}
+static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev)
+{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, irq, ext_num, i;
+ char *irqname;
+
+ /* HiSilicon L3C PMU supporting ext should have more than 1 irq resources. */
+ ext_num = platform_irq_count(pdev);
+ if (ext_num < L3C_MAX_EXT)
+ return -ENODEV;
+
+ /*
+ * The number of ext supported equals the number of irq - 1, since one
+ * of the irqs belongs to the normal part of PMU.
+ */
+ hisi_l3c_pmu->ext_num = ext_num - 1;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ hisi_l3c_pmu->ext_base[i] = devm_platform_ioremap_resource(pdev, i + 1);
+ if (IS_ERR(hisi_l3c_pmu->ext_base[i]))
+ return PTR_ERR(hisi_l3c_pmu->ext_base[i]);
+
+ irq = platform_get_irq(pdev, i + 1);
+ if (irq < 0)
+ return irq;
+
+ irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext%d",
+ dev_name(&pdev->dev), i + 1);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ irqname, l3c_pmu);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Fail to request EXT IRQ: %d.\n", irq);
+
+ hisi_l3c_pmu->ext_irq[i] = irq;
+ }
+
+ return 0;
+}
+
static struct attribute *hisi_l3c_pmu_v1_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
NULL,
@@ -394,7 +602,7 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
- HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
@@ -406,6 +614,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
.attrs = hisi_l3c_pmu_v2_format_attr,
};
+static struct attribute *hisi_l3c_pmu_v3_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ext, "config:16-17"),
+ HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v3_format_group = {
+ .name = "format",
+ .attrs = hisi_l3c_pmu_v3_format_attr,
+};
+
static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
@@ -441,6 +662,26 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
.attrs = hisi_l3c_pmu_v2_events_attr,
};
+static struct attribute *hisi_l3c_pmu_v3_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rd_spipe, 0x18),
+ HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x19),
+ HISI_PMU_EVENT_ATTR(wr_spipe, 0x1a),
+ HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b),
+ HISI_PMU_EVENT_ATTR(io_rd_spipe, 0x1c),
+ HISI_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d),
+ HISI_PMU_EVENT_ATTR(io_wr_spipe, 0x1e),
+ HISI_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f),
+ HISI_PMU_EVENT_ATTR(cycles, 0x7f),
+ HISI_PMU_EVENT_ATTR(l3c_ref, 0xbc),
+ HISI_PMU_EVENT_ATTR(l3c2ring, 0xbd),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v3_events_group = {
+ .name = "events",
+ .attrs = hisi_l3c_pmu_v3_events_attr,
+};
+
static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
&hisi_l3c_pmu_v1_format_group,
&hisi_l3c_pmu_v1_events_group,
@@ -457,9 +698,46 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
NULL
};
+static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = {
+ &hisi_l3c_pmu_v3_format_group,
+ &hisi_l3c_pmu_v3_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_support_ext = {
+ .support_ext = true,
+};
+
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_not_support_ext = {
+ .support_ext = false,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = {
+ .attr_groups = hisi_l3c_pmu_v1_attr_groups,
+ .counter_bits = 48,
+ .check_event = L3C_V1_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = {
+ .attr_groups = hisi_l3c_pmu_v2_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = {
+ .attr_groups = hisi_l3c_pmu_v3_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_support_ext,
+};
+
static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.write_evtype = hisi_l3c_pmu_write_evtype,
- .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .get_event_idx = hisi_l3c_pmu_get_event_idx,
.start_counters = hisi_l3c_pmu_start_counters,
.stop_counters = hisi_l3c_pmu_stop_counters,
.enable_counter = hisi_l3c_pmu_enable_counter,
@@ -472,11 +750,14 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.clear_int_status = hisi_l3c_pmu_clear_int_status,
.enable_filter = hisi_l3c_pmu_enable_filter,
.disable_filter = hisi_l3c_pmu_disable_filter,
+ .check_filter = hisi_l3c_pmu_check_filter,
};
static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ struct hisi_l3c_pmu_ext *l3c_pmu_dev_ext;
int ret;
ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
@@ -487,42 +768,55 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- if (l3c_pmu->identifier >= HISI_PMU_V2) {
- l3c_pmu->counter_bits = 64;
- l3c_pmu->check_event = L3C_V2_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
- } else {
- l3c_pmu->counter_bits = 48;
- l3c_pmu->check_event = L3C_V1_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
- }
-
+ l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups;
+ l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits;
+ l3c_pmu->check_event = l3c_pmu->dev_info->check_event;
l3c_pmu->num_counters = L3C_NR_COUNTERS;
l3c_pmu->ops = &hisi_uncore_l3c_ops;
l3c_pmu->dev = &pdev->dev;
l3c_pmu->on_cpu = -1;
+ l3c_pmu_dev_ext = l3c_pmu->dev_info->private;
+ if (l3c_pmu_dev_ext->support_ext) {
+ ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev);
+ if (ret)
+ return ret;
+ /*
+ * The extension events have their own counters with the
+ * same number of the normal events counters. So we can
+ * have at maximum num_counters * ext events monitored.
+ */
+ l3c_pmu->num_counters += hisi_l3c_pmu->ext_num * L3C_NR_COUNTERS;
+ }
+
return 0;
}
static int hisi_l3c_pmu_probe(struct platform_device *pdev)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu;
struct hisi_pmu *l3c_pmu;
char *name;
int ret;
- l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
- if (!l3c_pmu)
+ hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL);
+ if (!hisi_l3c_pmu)
return -ENOMEM;
+ l3c_pmu = &hisi_l3c_pmu->l3c_pmu;
platform_set_drvdata(pdev, l3c_pmu);
ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
- l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
+ if (l3c_pmu->topo.sub_id >= 0)
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id,
+ l3c_pmu->topo.sub_id);
+ else
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
if (!name)
return -ENOMEM;
@@ -554,6 +848,14 @@ static void hisi_l3c_pmu_remove(struct platform_device *pdev)
&l3c_pmu->node);
}
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+ { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 },
+ { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 },
+ { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
static struct platform_driver hisi_l3c_pmu_driver = {
.driver = {
.name = "hisi_l3c_pmu",
@@ -564,14 +866,60 @@ static struct platform_driver hisi_l3c_pmu_driver = {
.remove = hisi_l3c_pmu_remove,
};
+static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_online_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
+static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_offline_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* If failed to find any available CPU, skip irq migration. */
+ if (l3c_pmu->on_cpu < 0)
+ return 0;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
static int __init hisi_l3c_pmu_module_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
"AP_PERF_ARM_HISI_L3_ONLINE",
- hisi_uncore_pmu_online_cpu,
- hisi_uncore_pmu_offline_cpu);
+ hisi_l3c_pmu_online_cpu,
+ hisi_l3c_pmu_offline_cpu);
if (ret) {
pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
return ret;
diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
new file mode 100644
index 000000000000..4df4eebe243e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC MN uncore Hardware event counters support
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by MN PMU */
+static enum cpuhp_state hisi_mn_pmu_online;
+
+/* MN register definition */
+#define HISI_MN_DYNAMIC_CTRL_REG 0x400
+#define HISI_MN_DYNAMIC_CTRL_EN BIT(0)
+#define HISI_MN_PERF_CTRL_REG 0x408
+#define HISI_MN_PERF_CTRL_EN BIT(6)
+#define HISI_MN_INT_MASK_REG 0x800
+#define HISI_MN_INT_STATUS_REG 0x808
+#define HISI_MN_INT_CLEAR_REG 0x80C
+#define HISI_MN_EVENT_CTRL_REG 0x1C00
+#define HISI_MN_VERSION_REG 0x1C04
+#define HISI_MN_EVTYPE0_REG 0x1d00
+#define HISI_MN_EVTYPE_MASK GENMASK(7, 0)
+#define HISI_MN_CNTR0_REG 0x1e00
+#define HISI_MN_EVTYPE_REGn(evtype0, n) ((evtype0) + (n) * 4)
+#define HISI_MN_CNTR_REGn(cntr0, n) ((cntr0) + (n) * 8)
+
+#define HISI_MN_NR_COUNTERS 4
+#define HISI_MN_TIMEOUT_US 500U
+
+struct hisi_mn_pmu_regs {
+ u32 version;
+ u32 dyn_ctrl;
+ u32 perf_ctrl;
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+ u32 event_ctrl;
+ u32 event_type0;
+ u32 event_cntr0;
+};
+
+/*
+ * Each event request takes a certain amount of time to complete. If
+ * we counting the latency related event, we need to wait for the all
+ * requests complete. Otherwise, the value of counter is slightly larger.
+ */
+static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ int ret;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->dyn_ctrl);
+ val |= HISI_MN_DYNAMIC_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->dyn_ctrl);
+
+ ret = readl_poll_timeout_atomic(mn_pmu->base + reg_info->dyn_ctrl,
+ val, !(val & HISI_MN_DYNAMIC_CTRL_EN),
+ 1, HISI_MN_TIMEOUT_US);
+ if (ret)
+ dev_warn(mn_pmu->dev, "Counter flush timeout\n");
+}
+
+static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readq(mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ /*
+ * Select the appropriate event select register.
+ * There are 2 32-bit event select registers for the
+ * 8 hardware counters, each event code is 8-bit wide.
+ */
+ val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+ val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+ writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+}
+
+static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val |= HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+}
+
+static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val &= ~HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+
+ hisi_mn_pmu_counter_flush(mn_pmu);
+}
+
+static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readl(mn_pmu->base + reg_info->int_status);
+}
+
+static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writel(BIT(idx), mn_pmu->base + reg_info->int_clear);
+}
+
+static struct attribute *hisi_mn_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_mn_pmu_format_attr,
+};
+
+static struct attribute *hisi_mn_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00),
+ HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01),
+ HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02),
+ HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03),
+ HISI_PMU_EVENT_ATTR(req_retry_num, 0x04),
+ HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05),
+ HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06),
+ HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E),
+ HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_mn_pmu_events_attr,
+};
+
+static const struct attribute_group *hisi_mn_pmu_attr_groups[] = {
+ &hisi_mn_pmu_format_group,
+ &hisi_mn_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_mn_ops = {
+ .write_evtype = hisi_mn_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_mn_pmu_start_counters,
+ .stop_counters = hisi_mn_pmu_stop_counters,
+ .enable_counter = hisi_mn_pmu_enable_counter,
+ .disable_counter = hisi_mn_pmu_disable_counter,
+ .enable_counter_int = hisi_mn_pmu_enable_counter_int,
+ .disable_counter_int = hisi_mn_pmu_disable_counter_int,
+ .write_counter = hisi_mn_pmu_write_counter,
+ .read_counter = hisi_mn_pmu_read_counter,
+ .get_int_status = hisi_mn_pmu_get_int_status,
+ .clear_int_status = hisi_mn_pmu_clear_int_status,
+};
+
+static int hisi_mn_pmu_dev_init(struct platform_device *pdev,
+ struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info;
+ int ret;
+
+ hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev);
+
+ if (mn_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN scl id\n");
+
+ if (mn_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN index id\n");
+
+ mn_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mn_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base),
+ "Failed to ioremap resource\n");
+
+ ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev);
+ if (ret)
+ return ret;
+
+ mn_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!mn_pmu->dev_info)
+ return -ENODEV;
+
+ mn_pmu->pmu_events.attr_groups = mn_pmu->dev_info->attr_groups;
+ mn_pmu->counter_bits = mn_pmu->dev_info->counter_bits;
+ mn_pmu->check_event = mn_pmu->dev_info->check_event;
+ mn_pmu->num_counters = HISI_MN_NR_COUNTERS;
+ mn_pmu->ops = &hisi_uncore_mn_ops;
+ mn_pmu->dev = &pdev->dev;
+ mn_pmu->on_cpu = -1;
+
+ reg_info = mn_pmu->dev_info->private;
+ mn_pmu->identifier = readl(mn_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node);
+}
+
+static void hisi_mn_pmu_unregister(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_mn_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *mn_pmu;
+ char *name;
+ int ret;
+
+ mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL);
+ if (!mn_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, mn_pmu);
+
+ ret = hisi_mn_pmu_dev_init(pdev, mn_pmu);
+ if (ret)
+ return ret;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d",
+ mn_pmu->topo.scl_id, mn_pmu->topo.index_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n");
+
+ ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(mn_pmu, THIS_MODULE);
+
+ ret = perf_pmu_register(&mn_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n");
+
+ return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu);
+}
+
+static struct hisi_mn_pmu_regs hisi_mn_v1_pmu_regs = {
+ .version = HISI_MN_VERSION_REG,
+ .dyn_ctrl = HISI_MN_DYNAMIC_CTRL_REG,
+ .perf_ctrl = HISI_MN_PERF_CTRL_REG,
+ .int_mask = HISI_MN_INT_MASK_REG,
+ .int_clear = HISI_MN_INT_CLEAR_REG,
+ .int_status = HISI_MN_INT_STATUS_REG,
+ .event_ctrl = HISI_MN_EVENT_CTRL_REG,
+ .event_type0 = HISI_MN_EVTYPE0_REG,
+ .event_cntr0 = HISI_MN_CNTR0_REG,
+};
+
+static const struct hisi_pmu_dev_info hisi_mn_v1 = {
+ .attr_groups = hisi_mn_pmu_attr_groups,
+ .counter_bits = 48,
+ .check_event = HISI_MN_EVTYPE_MASK,
+ .private = &hisi_mn_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = {
+ { "HISI0222", (kernel_ulong_t) &hisi_mn_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match);
+
+static struct platform_driver hisi_mn_pmu_driver = {
+ .driver = {
+ .name = "hisi_mn_pmu",
+ .acpi_match_table = hisi_mn_pmu_acpi_match,
+ /*
+ * We have not worked out a safe bind/unbind process,
+ * Forcefully unbinding during sampling will lead to a
+ * kernel panic, so this is not supported yet.
+ */
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_mn_pmu_probe,
+};
+
+static int __init hisi_mn_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret);
+ return ret;
+ }
+ hisi_mn_pmu_online = ret;
+
+ ret = platform_driver_register(&hisi_mn_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+
+ return ret;
+}
+module_init(hisi_mn_pmu_module_init);
+
+static void __exit hisi_mn_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_mn_pmu_driver);
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+}
+module_exit(hisi_mn_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
new file mode 100644
index 000000000000..de3b9cc7aada
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/sysfs.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define NOC_PMU_VERSION 0x1e00
+#define NOC_PMU_GLOBAL_CTRL 0x1e04
+#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0)
+#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1)
+#define NOC_PMU_CNT_INFO 0x1e08
+#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n)
+#define NOC_PMU_EVENT_CTRL0 0x1e20
+#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0)
+/*
+ * Note channel of 0x0 will reset the counter value, so don't do it before
+ * we read out the counter.
+ */
+#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8)
+#define NOC_PMU_EVENT_CTRL_EN BIT(11)
+#define NOC_PMU_EVENT_COUNTER0 0x1e80
+
+#define NOC_PMU_NR_COUNTERS 4
+#define NOC_PMU_CH_DEFAULT 0x7
+
+#define NOC_PMU_EVENT_CTRLn(ctrl0, n) ((ctrl0) + 4 * (n))
+#define NOC_PMU_EVENT_CNTRn(cntr0, n) ((cntr0) + 8 * (n))
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3);
+
+/* Dynamic CPU hotplug state used by this PMU driver */
+static enum cpuhp_state hisi_noc_pmu_cpuhp_state;
+
+struct hisi_noc_pmu_regs {
+ u32 version;
+ u32 pmu_ctrl;
+ u32 event_ctrl0;
+ u32 event_cntr0;
+ u32 overflow_status;
+};
+
+/*
+ * Tracetag filtering is not per event and all the events should keep
+ * the consistence. Return true if the new comer doesn't match the
+ * tracetag filtering configuration of the current scheduled events.
+ */
+static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr,
+ struct perf_event *new)
+{
+ return hisi_get_tt_en(curr) == hisi_get_tt_en(new);
+}
+
+static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_TYPE;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+}
+
+static int hisi_noc_pmu_get_event_idx(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events;
+ int cur_idx;
+
+ cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters);
+ if (cur_idx != noc_pmu->num_counters &&
+ !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event))
+ return -EAGAIN;
+
+ return hisi_uncore_pmu_get_event_idx(event);
+}
+
+static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readq(noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ writeq(val, noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg |= NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ /* We don't support interrupt, so a stub here. */
+}
+
+static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+}
+
+static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readl(noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->overflow_status);
+ reg &= ~NOC_PMU_CNT_INFO_OVERFLOW(idx);
+ writel(reg, noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_enable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ struct hw_perf_event *hwc = &event->hw;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 ch = hisi_get_ch(event);
+ u32 reg;
+
+ if (!ch)
+ ch = NOC_PMU_CH_DEFAULT;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+
+ /*
+ * Since tracetag filter applies to all the counters, don't touch it
+ * if user doesn't specify it explicitly.
+ */
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static void hisi_noc_pmu_disable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 reg;
+
+ /*
+ * If we're not the last counter, don't touch the global tracetag
+ * configuration.
+ */
+ if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1)
+ return;
+
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static const struct hisi_uncore_ops hisi_uncore_noc_ops = {
+ .write_evtype = hisi_noc_pmu_write_evtype,
+ .get_event_idx = hisi_noc_pmu_get_event_idx,
+ .read_counter = hisi_noc_pmu_read_counter,
+ .write_counter = hisi_noc_pmu_write_counter,
+ .enable_counter = hisi_noc_pmu_enable_counter,
+ .disable_counter = hisi_noc_pmu_disable_counter,
+ .enable_counter_int = hisi_noc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_noc_pmu_disable_counter_int,
+ .start_counters = hisi_noc_pmu_start_counters,
+ .stop_counters = hisi_noc_pmu_stop_counters,
+ .get_int_status = hisi_noc_pmu_get_int_status,
+ .clear_int_status = hisi_noc_pmu_clear_int_status,
+ .enable_filter = hisi_noc_pmu_enable_filter,
+ .disable_filter = hisi_noc_pmu_disable_filter,
+};
+
+static struct attribute *hisi_noc_pmu_format_attrs[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"),
+ HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_noc_pmu_format_attrs,
+};
+
+static struct attribute *hisi_noc_pmu_events_attrs[] = {
+ HISI_PMU_EVENT_ATTR(cycles, 0x0e),
+ /* Flux on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a),
+ HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17),
+ /* Buffer full duration on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19),
+ HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12),
+ /* Failure packets count on/off the ring */
+ HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01),
+ HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09),
+ HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03),
+ HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b),
+ /* Flux of the ring */
+ HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05),
+ HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_noc_pmu_events_attrs,
+};
+
+static const struct attribute_group *hisi_noc_pmu_attr_groups[] = {
+ &hisi_noc_pmu_format_group,
+ &hisi_noc_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info;
+
+ hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev);
+
+ if (noc_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n");
+
+ if (noc_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n");
+
+ if (noc_pmu->topo.sub_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n");
+
+ noc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(noc_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base),
+ "fail to remap io memory\n");
+
+ noc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!noc_pmu->dev_info)
+ return -ENODEV;
+
+ noc_pmu->pmu_events.attr_groups = noc_pmu->dev_info->attr_groups;
+ noc_pmu->counter_bits = noc_pmu->dev_info->counter_bits;
+ noc_pmu->check_event = noc_pmu->dev_info->check_event;
+ noc_pmu->num_counters = NOC_PMU_NR_COUNTERS;
+ noc_pmu->ops = &hisi_uncore_noc_ops;
+ noc_pmu->dev = &pdev->dev;
+ noc_pmu->on_cpu = -1;
+
+ reg_info = noc_pmu->dev_info->private;
+ noc_pmu->identifier = readl(noc_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node);
+}
+
+static void hisi_noc_pmu_unregister_pmu(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_noc_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct hisi_pmu *noc_pmu;
+ char *name;
+ int ret;
+
+ noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL);
+ if (!noc_pmu)
+ return -ENOMEM;
+
+ /*
+ * HiSilicon Uncore PMU framework needs to get common hisi_pmu device
+ * from device's drvdata.
+ */
+ platform_set_drvdata(pdev, noc_pmu);
+
+ ret = hisi_noc_pmu_dev_init(pdev, noc_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n");
+
+ ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance,
+ &noc_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(noc_pmu, THIS_MODULE);
+
+ name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d",
+ noc_pmu->topo.scl_id, noc_pmu->topo.index_id,
+ noc_pmu->topo.sub_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = perf_pmu_register(&noc_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register PMU\n");
+
+ return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu,
+ &noc_pmu->pmu);
+}
+
+static struct hisi_noc_pmu_regs hisi_noc_v1_pmu_regs = {
+ .version = NOC_PMU_VERSION,
+ .pmu_ctrl = NOC_PMU_GLOBAL_CTRL,
+ .event_ctrl0 = NOC_PMU_EVENT_CTRL0,
+ .event_cntr0 = NOC_PMU_EVENT_COUNTER0,
+ .overflow_status = NOC_PMU_CNT_INFO,
+};
+
+static const struct hisi_pmu_dev_info hisi_noc_v1 = {
+ .attr_groups = hisi_noc_pmu_attr_groups,
+ .counter_bits = 64,
+ .check_event = NOC_PMU_EVENT_CTRL_TYPE,
+ .private = &hisi_noc_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_noc_pmu_ids[] = {
+ { "HISI04E0", (kernel_ulong_t) &hisi_noc_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids);
+
+static struct platform_driver hisi_noc_pmu_driver = {
+ .driver = {
+ .name = "hisi_noc_pmu",
+ .acpi_match_table = hisi_noc_pmu_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_noc_pmu_probe,
+};
+
+static int __init hisi_noc_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret);
+ return ret;
+ }
+ hisi_noc_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&hisi_noc_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+
+ return ret;
+}
+module_init(hisi_noc_pmu_module_init);
+
+static void __exit hisi_noc_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_noc_pmu_driver);
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+}
+module_exit(hisi_noc_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index a449651f79c9..de71dcf11653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -149,7 +149,7 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
clear_bit(idx, hisi_pmu->pmu_events.used_mask);
}
-static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
{
struct hisi_pmu *hisi_pmu = data;
struct perf_event *event;
@@ -178,6 +178,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, "HISI_PMU");
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev)
@@ -234,7 +235,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
return -EINVAL;
hisi_pmu = to_hisi_pmu(event->pmu);
- if (event->attr.config > hisi_pmu->check_event)
+ if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event)
return -EINVAL;
if (hisi_pmu->on_cpu == -1)
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 777675838b80..3ffe6acda653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -24,7 +24,7 @@
#define pr_fmt(fmt) "hisi_pmu: " fmt
#define HISI_PMU_V2 0x30
-#define HISI_MAX_COUNTERS 0x10
+#define HISI_MAX_COUNTERS 0x18
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
#define HISI_PMU_ATTR(_name, _func, _config) \
@@ -43,7 +43,8 @@
return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \
}
-#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+#define HISI_EVENTID_MASK GENMASK(7, 0)
+#define HISI_GET_EVENTID(ev) ((ev)->hw.config_base & HISI_EVENTID_MASK)
#define HISI_PMU_EVTYPE_BITS 8
#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS)
@@ -164,6 +165,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
struct device_attribute *attr,
char *page);
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data);
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev);
void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev);