summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/perf/imx-ddr.rst15
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst20
-rw-r--r--Documentation/devicetree/bindings/perf/arm-ccn.txt1
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt1
-rw-r--r--arch/arm64/kernel/perf_event.c191
-rw-r--r--drivers/perf/arm-cci.c4
-rw-r--r--drivers/perf/arm-ccn.c4
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c5
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c124
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c5
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c26
-rw-r--r--drivers/perf/thunderx2_pmu.c267
-rw-r--r--drivers/perf/xgene_pmu.c14
15 files changed, 458 insertions, 227 deletions
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 517a205abad6..90056e4e8859 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -17,7 +17,8 @@ The "format" directory describes format of the config (event ID) and config1
(AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/
devices/imx8_ddr0/format/. The "events" directory describes the events types
hardware supported that can be used with perf tool, see /sys/bus/event_source/
-devices/imx8_ddr0/events/.
+devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
+in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
e.g.::
perf stat -a -e imx8_ddr0/cycles/ cmd
perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd
@@ -25,9 +26,12 @@ devices/imx8_ddr0/events/.
AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
to count reading or writing matches filter setting. Filter setting is various
from different DRAM controller implementations, which is distinguished by quirks
-in the driver.
+in the driver. You also can dump info from userspace, filter in "caps" directory
+indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates
+whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and
+value 1 for supported.
-* With DDR_CAP_AXI_ID_FILTER quirk.
+* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0).
Filter is defined with two configuration parts:
--AXI_ID defines AxID matching value.
--AXI_MASKING defines which bits of AxID are meaningful for the matching.
@@ -50,3 +54,8 @@ in the driver.
axi_id to monitor a specific id, rather than having to specify axi_mask.
e.g.::
perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
+
+* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1).
+ This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
+ counting the number of bytes (as opposed to the number of bursts) from DDR
+ read and write transactions concurrently with another set of data counters.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index 08e33675853a..01f158238ae1 100644
--- a/Documentation/admin-guide/perf/thunderx2-pmu.rst
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -3,24 +3,26 @@ Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
=============================================================
The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
-PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
+PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and
+Cavium Coherent Processor Interconnect (CCPI2).
The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
Events are counted for the default channel (i.e. channel 0) and prorated
to the total number of channels/tiles.
-The DMC and L3C support up to 4 counters. Counters are independently
-programmable and can be started and stopped individually. Each counter
-can be set to a different event. Counters are 32-bit and do not support
-an overflow interrupt; they are read every 2 seconds.
+The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8
+counters. Counters are independently programmable to different events and
+can be started and stopped individually. None of the counters support an
+overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds.
+The CCPI2 counters are 64-bit and assumed not to overflow in normal operation.
PMU UNCORE (perf) driver:
The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
-L3C devices. Each PMU can be used to count up to 4 events
-simultaneously. The PMUs provide a description of their available events
-and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
+L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
+(CCPI2) events simultaneously. The PMUs provide a description of their
+available events and configuration options under sysfs, see
+/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
diff --git a/Documentation/devicetree/bindings/perf/arm-ccn.txt b/Documentation/devicetree/bindings/perf/arm-ccn.txt
index 43b5a71a5a9d..1c53b5aa3317 100644
--- a/Documentation/devicetree/bindings/perf/arm-ccn.txt
+++ b/Documentation/devicetree/bindings/perf/arm-ccn.txt
@@ -6,6 +6,7 @@ Required properties:
"arm,ccn-502"
"arm,ccn-504"
"arm,ccn-508"
+ "arm,ccn-512"
- reg: (standard registers property) physical address and size
(16MB) of the configuration registers block
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
index d77e3f26f9e6..7822a806ea0a 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -5,6 +5,7 @@ Required properties:
- compatible: should be one of:
"fsl,imx8-ddr-pmu"
"fsl,imx8m-ddr-pmu"
+ "fsl,imx8mp-ddr-pmu"
- reg: physical address and size
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a0b4f1bca491..e40b65645c86 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev,
return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
}
-#define ARMV8_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
- config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config) \
+ (&((struct perf_pmu_events_attr) { \
+ .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
+ .id = config, \
+ }).attr.attr)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- &armv8_event_attr_sw_incr.attr.attr,
- &armv8_event_attr_l1i_cache_refill.attr.attr,
- &armv8_event_attr_l1i_tlb_refill.attr.attr,
- &armv8_event_attr_l1d_cache_refill.attr.attr,
- &armv8_event_attr_l1d_cache.attr.attr,
- &armv8_event_attr_l1d_tlb_refill.attr.attr,
- &armv8_event_attr_ld_retired.attr.attr,
- &armv8_event_attr_st_retired.attr.attr,
- &armv8_event_attr_inst_retired.attr.attr,
- &armv8_event_attr_exc_taken.attr.attr,
- &armv8_event_attr_exc_return.attr.attr,
- &armv8_event_attr_cid_write_retired.attr.attr,
- &armv8_event_attr_pc_write_retired.attr.attr,
- &armv8_event_attr_br_immed_retired.attr.attr,
- &armv8_event_attr_br_return_retired.attr.attr,
- &armv8_event_attr_unaligned_ldst_retired.attr.attr,
- &armv8_event_attr_br_mis_pred.attr.attr,
- &armv8_event_attr_cpu_cycles.attr.attr,
- &armv8_event_attr_br_pred.attr.attr,
- &armv8_event_attr_mem_access.attr.attr,
- &armv8_event_attr_l1i_cache.attr.attr,
- &armv8_event_attr_l1d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_cache.attr.attr,
- &armv8_event_attr_l2d_cache_refill.attr.attr,
- &armv8_event_attr_l2d_cache_wb.attr.attr,
- &armv8_event_attr_bus_access.attr.attr,
- &armv8_event_attr_memory_error.attr.attr,
- &armv8_event_attr_inst_spec.attr.attr,
- &armv8_event_attr_ttbr_write_retired.attr.attr,
- &armv8_event_attr_bus_cycles.attr.attr,
- &armv8_event_attr_l1d_cache_allocate.attr.attr,
- &armv8_event_attr_l2d_cache_allocate.attr.attr,
- &armv8_event_attr_br_retired.attr.attr,
- &armv8_event_attr_br_mis_pred_retired.attr.attr,
- &armv8_event_attr_stall_frontend.attr.attr,
- &armv8_event_attr_stall_backend.attr.attr,
- &armv8_event_attr_l1d_tlb.attr.attr,
- &armv8_event_attr_l1i_tlb.attr.attr,
- &armv8_event_attr_l2i_cache.attr.attr,
- &armv8_event_attr_l2i_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache_allocate.attr.attr,
- &armv8_event_attr_l3d_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache.attr.attr,
- &armv8_event_attr_l3d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_tlb_refill.attr.attr,
- &armv8_event_attr_l2i_tlb_refill.attr.attr,
- &armv8_event_attr_l2d_tlb.attr.attr,
- &armv8_event_attr_l2i_tlb.attr.attr,
- &armv8_event_attr_remote_access.attr.attr,
- &armv8_event_attr_ll_cache.attr.attr,
- &armv8_event_attr_ll_cache_miss.attr.attr,
- &armv8_event_attr_dtlb_walk.attr.attr,
- &armv8_event_attr_itlb_walk.attr.attr,
- &armv8_event_attr_ll_cache_rd.attr.attr,
- &armv8_event_attr_ll_cache_miss_rd.attr.attr,
- &armv8_event_attr_remote_access_rd.attr.attr,
- &armv8_event_attr_sample_pop.attr.attr,
- &armv8_event_attr_sample_feed.attr.attr,
- &armv8_event_attr_sample_filtrate.attr.attr,
- &armv8_event_attr_sample_collision.attr.attr,
+ ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+ ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+ ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+ ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+ ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+ ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+ ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+ ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+ ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+ ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+ ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+ ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+ ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+ ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+ ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+ ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+ ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+ ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+ ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+ /* Don't expose the chain event in /sys, since it's useless in isolation */
+ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+ ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+ ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+ ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+ ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+ ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+ ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+ ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+ ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+ ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+ ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+ ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+ ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+ ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+ ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+ ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+ ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+ ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
NULL,
};
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 8f8606b9bc9e..1b8e337a29ca 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1642,7 +1642,6 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev)
static int cci_pmu_probe(struct platform_device *pdev)
{
- struct resource *res;
struct cci_pmu *cci_pmu;
int i, ret, irq;
@@ -1650,8 +1649,7 @@ static int cci_pmu_probe(struct platform_device *pdev)
if (IS_ERR(cci_pmu))
return PTR_ERR(cci_pmu);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ cci_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(cci_pmu->base))
return -ENOMEM;
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 6fc0273b6129..fea354d6fb29 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1477,8 +1477,7 @@ static int arm_ccn_probe(struct platform_device *pdev)
ccn->dev = &pdev->dev;
platform_set_drvdata(pdev, ccn);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ccn->base = devm_ioremap_resource(ccn->dev, res);
+ ccn->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ccn->base))
return PTR_ERR(ccn->base);
@@ -1537,6 +1536,7 @@ static int arm_ccn_remove(struct platform_device *pdev)
static const struct of_device_id arm_ccn_match[] = {
{ .compatible = "arm,ccn-502", },
{ .compatible = "arm,ccn-504", },
+ { .compatible = "arm,ccn-512", },
{},
};
MODULE_DEVICE_TABLE(of, arm_ccn_match);
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index abcf54f7d19c..773128f411f1 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -727,7 +727,7 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
static int smmu_pmu_probe(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu;
- struct resource *res_0, *res_1;
+ struct resource *res_0;
u32 cfgr, reg_size;
u64 ceid_64[2];
int irq, err;
@@ -764,8 +764,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
/* Determine if page 1 is present */
if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
- res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1);
+ smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(smmu_pmu->reloc_base))
return PTR_ERR(smmu_pmu->reloc_base);
} else {
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index ce7345745b42..55083c67b2bb 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -45,7 +45,8 @@
static DEFINE_IDA(ddr_ida);
/* DDR Perf hardware feature */
-#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
+#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
+#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
@@ -57,9 +58,14 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
.quirks = DDR_CAP_AXI_ID_FILTER,
};
+static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+};
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+ { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -78,6 +84,61 @@ struct ddr_pmu {
int id;
};
+enum ddr_perf_filter_capabilities {
+ PERF_CAP_AXI_ID_FILTER = 0,
+ PERF_CAP_AXI_ID_FILTER_ENHANCED,
+ PERF_CAP_AXI_ID_FEAT_MAX,
+};
+
+static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
+{
+ u32 quirks = pmu->devtype_data->quirks;
+
+ switch (cap) {
+ case PERF_CAP_AXI_ID_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_FILTER);
+ case PERF_CAP_AXI_ID_FILTER_ENHANCED:
+ quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ default:
+ WARN(1, "unknown filter cap %d\n", cap);
+ }
+
+ return 0;
+}
+
+static ssize_t ddr_perf_filter_cap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ ddr_perf_filter_cap_get(pmu, cap));
+}
+
+#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \
+ (&((struct dev_ext_attribute) { \
+ __ATTR(_name, 0444, _func, NULL), (void *)_var \
+ }).attr.attr)
+
+#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \
+ PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var)
+
+static struct attribute *ddr_perf_filter_cap_attr[] = {
+ PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
+ PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+ NULL,
+};
+
+static struct attribute_group ddr_perf_filter_cap_attr_group = {
+ .name = "caps",
+ .attrs = ddr_perf_filter_cap_attr,
+};
+
static ssize_t ddr_perf_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -175,9 +236,40 @@ static const struct attribute_group *attr_groups[] = {
&ddr_perf_events_attr_group,
&ddr_perf_format_attr_group,
&ddr_perf_cpumask_attr_group,
+ &ddr_perf_filter_cap_attr_group,
NULL,
};
+static bool ddr_perf_is_filtered(struct perf_event *event)
+{
+ return event->attr.config == 0x41 || event->attr.config == 0x42;
+}
+
+static u32 ddr_perf_filter_val(struct perf_event *event)
+{
+ return event->attr.config1;
+}
+
+static bool ddr_perf_filters_compatible(struct perf_event *a,
+ struct perf_event *b)
+{
+ if (!ddr_perf_is_filtered(a))
+ return true;
+ if (!ddr_perf_is_filtered(b))
+ return true;
+ return ddr_perf_filter_val(a) == ddr_perf_filter_val(b);
+}
+
+static bool ddr_perf_is_enhanced_filtered(struct perf_event *event)
+{
+ unsigned int filt;
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+ filt = pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ return (filt == DDR_CAP_AXI_ID_FILTER_ENHANCED) &&
+ ddr_perf_is_filtered(event);
+}
+
static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
{
int i;
@@ -209,27 +301,17 @@ static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
{
- return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
-}
-
-static bool ddr_perf_is_filtered(struct perf_event *event)
-{
- return event->attr.config == 0x41 || event->attr.config == 0x42;
-}
+ struct perf_event *event = pmu->events[counter];
+ void __iomem *base = pmu->base;
-static u32 ddr_perf_filter_val(struct perf_event *event)
-{
- return event->attr.config1;
-}
-
-static bool ddr_perf_filters_compatible(struct perf_event *a,
- struct perf_event *b)
-{
- if (!ddr_perf_is_filtered(a))
- return true;
- if (!ddr_perf_is_filtered(b))
- return true;
- return ddr_perf_filter_val(a) == ddr_perf_filter_val(b);
+ /*
+ * return bytes instead of bursts from ddr transaction for
+ * axid-read and axid-write event if PMU core supports enhanced
+ * filter.
+ */
+ base += ddr_perf_is_enhanced_filtered(event) ? COUNTER_DPCR1 :
+ COUNTER_READ;
+ return readl_relaxed(base + counter * 4);
}
static int ddr_perf_event_init(struct perf_event *event)
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index e42d4464c2cf..453f1c6a16ca 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -243,8 +243,6 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *ddrc_pmu)
{
- struct resource *res;
-
/*
* Use the SCCL_ID and DDRC channel ID to identify the
* DDRC PMU, while SCCL_ID is in MPIDR[aff2].
@@ -263,8 +261,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
/* DDRC PMUs only share the same SCCL */
ddrc_pmu->ccl_id = -1;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ddrc_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
return PTR_ERR(ddrc_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index f28063873e11..6a1dd72d8abb 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -234,7 +234,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *hha_pmu)
{
unsigned long long id;
- struct resource *res;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
@@ -256,8 +255,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
/* HHA PMUs only share the same SCCL */
hha_pmu->ccl_id = -1;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- hha_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ hha_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hha_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n");
return PTR_ERR(hha_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 078b8dc57250..1151e99b241c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -233,7 +233,6 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
unsigned long long id;
- struct resource *res;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
@@ -259,8 +258,7 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(l3c_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
return PTR_ERR(l3c_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 79f76f8dda8e..96183e31b96a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <asm/cputype.h>
#include <asm/local64.h>
#include "hisi_uncore_pmu.h"
@@ -338,8 +339,10 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
/*
* Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
- * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2]
- * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID
+ * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu
+ * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID
+ * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID
+ * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID
* is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1].
*/
static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
@@ -347,12 +350,19 @@ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
u64 mpidr = read_cpuid_mpidr();
if (mpidr & MPIDR_MT_BITMASK) {
- int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
-
- if (sccl_id)
- *sccl_id = aff2 >> 3;
- if (ccl_id)
- *ccl_id = aff2 & 0x7;
+ if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) {
+ int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+
+ if (sccl_id)
+ *sccl_id = aff2 >> 3;
+ if (ccl_id)
+ *ccl_id = aff2 & 0x7;
+ } else {
+ if (sccl_id)
+ *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3);
+ if (ccl_id)
+ *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+ }
} else {
if (sccl_id)
*sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 43d76c85da56..51b31d6ff2c4 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -16,23 +16,36 @@
* they need to be sampled before overflow(i.e, at every 2 seconds).
*/
-#define TX2_PMU_MAX_COUNTERS 4
+#define TX2_PMU_DMC_L3C_MAX_COUNTERS 4
+#define TX2_PMU_CCPI2_MAX_COUNTERS 8
+#define TX2_PMU_MAX_COUNTERS TX2_PMU_CCPI2_MAX_COUNTERS
+
+
#define TX2_PMU_DMC_CHANNELS 8
#define TX2_PMU_L3_TILES 16
#define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC)
-#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f)
-#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3)
+#define GET_EVENTID(ev, mask) ((ev->hw.config) & mask)
+#define GET_COUNTERID(ev, mask) ((ev->hw.idx) & mask)
/* 1 byte per counter(4 counters).
* Event id is encoded in bits [5:1] of a byte,
*/
#define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1))
+/* bits[3:0] to select counters, are indexed from 8 to 15. */
+#define CCPI2_COUNTER_OFFSET 8
+
#define L3C_COUNTER_CTL 0xA8
#define L3C_COUNTER_DATA 0xAC
#define DMC_COUNTER_CTL 0x234
#define DMC_COUNTER_DATA 0x240
+#define CCPI2_PERF_CTL 0x108
+#define CCPI2_COUNTER_CTL 0x10C
+#define CCPI2_COUNTER_SEL 0x12c
+#define CCPI2_COUNTER_DATA_L 0x130
+#define CCPI2_COUNTER_DATA_H 0x134
+
/* L3C event IDs */
#define L3_EVENT_READ_REQ 0xD
#define L3_EVENT_WRITEBACK_REQ 0xE
@@ -51,15 +64,28 @@
#define DMC_EVENT_READ_TXNS 0xF
#define DMC_EVENT_MAX 0x10
+#define CCPI2_EVENT_REQ_PKT_SENT 0x3D
+#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65
+#define CCPI2_EVENT_DATA_PKT_SENT 0x105
+#define CCPI2_EVENT_GIC_PKT_SENT 0x12D
+#define CCPI2_EVENT_MAX 0x200
+
+#define CCPI2_PERF_CTL_ENABLE BIT(0)
+#define CCPI2_PERF_CTL_START BIT(1)
+#define CCPI2_PERF_CTL_RESET BIT(4)
+#define CCPI2_EVENT_LEVEL_RISING_EDGE BIT(10)
+#define CCPI2_EVENT_TYPE_EDGE_SENSITIVE BIT(11)
+
enum tx2_uncore_type {
PMU_TYPE_L3C,
PMU_TYPE_DMC,
+ PMU_TYPE_CCPI2,
PMU_TYPE_INVALID,
};
/*
- * pmu on each socket has 2 uncore devices(dmc and l3c),
- * each device has 4 counters.
+ * Each socket has 3 uncore devices associated with a PMU. The DMC and
+ * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters.
*/
struct tx2_uncore_pmu {
struct hlist_node hpnode;
@@ -69,8 +95,10 @@ struct tx2_uncore_pmu {
int node;
int cpu;
u32 max_counters;
+ u32 counters_mask;
u32 prorate_factor;
u32 max_events;
+ u32 events_mask;
u64 hrtimer_interval;
void __iomem *base;
DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS);
@@ -79,6 +107,7 @@ struct tx2_uncore_pmu {
struct hrtimer hrtimer;
const struct attribute_group **attr_groups;
enum tx2_uncore_type type;
+ enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb);
void (*init_cntr_base)(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu);
void (*stop_event)(struct perf_event *event);
@@ -92,7 +121,21 @@ static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu)
return container_of(pmu, struct tx2_uncore_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-4");
+#define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t \
+__tx2_pmu_##_var##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+ \
+static struct device_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL)
+
+TX2_PMU_FORMAT_ATTR(event, event, "config:0-4");
+TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9");
static struct attribute *l3c_pmu_format_attrs[] = {
&format_attr_event.attr,
@@ -104,6 +147,11 @@ static struct attribute *dmc_pmu_format_attrs[] = {
NULL,
};
+static struct attribute *ccpi2_pmu_format_attrs[] = {
+ &format_attr_event_ccpi2.attr,
+ NULL,
+};
+
static const struct attribute_group l3c_pmu_format_attr_group = {
.name = "format",
.attrs = l3c_pmu_format_attrs,
@@ -114,6 +162,11 @@ static const struct attribute_group dmc_pmu_format_attr_group = {
.attrs = dmc_pmu_format_attrs,
};
+static const struct attribute_group ccpi2_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = ccpi2_pmu_format_attrs,
+};
+
/*
* sysfs event attributes
*/
@@ -164,6 +217,19 @@ static struct attribute *dmc_pmu_events_attrs[] = {
NULL,
};
+TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT);
+TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT);
+TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT);
+TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT);
+
+static struct attribute *ccpi2_pmu_events_attrs[] = {
+ &tx2_pmu_event_attr_req_pktsent.attr.attr,
+ &tx2_pmu_event_attr_snoop_pktsent.attr.attr,
+ &tx2_pmu_event_attr_data_pktsent.attr.attr,
+ &tx2_pmu_event_attr_gic_pktsent.attr.attr,
+ NULL,
+};
+
static const struct attribute_group l3c_pmu_events_attr_group = {
.name = "events",
.attrs = l3c_pmu_events_attrs,
@@ -174,6 +240,11 @@ static const struct attribute_group dmc_pmu_events_attr_group = {
.attrs = dmc_pmu_events_attrs,
};
+static const struct attribute_group ccpi2_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = ccpi2_pmu_events_attrs,
+};
+
/*
* sysfs cpumask attributes
*/
@@ -213,6 +284,13 @@ static const struct attribute_group *dmc_pmu_attr_groups[] = {
NULL
};
+static const struct attribute_group *ccpi2_pmu_attr_groups[] = {
+ &ccpi2_pmu_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &ccpi2_pmu_events_attr_group,
+ NULL
+};
+
static inline u32 reg_readl(unsigned long addr)
{
return readl((void __iomem *)addr);
@@ -245,33 +323,58 @@ static void init_cntr_base_l3c(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu)
{
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
/* counter ctrl/data reg offset at 8 */
hwc->config_base = (unsigned long)tx2_pmu->base
- + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event));
+ + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask));
hwc->event_base = (unsigned long)tx2_pmu->base
- + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event));
+ + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask));
}
static void init_cntr_base_dmc(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu)
{
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
hwc->config_base = (unsigned long)tx2_pmu->base
+ DMC_COUNTER_CTL;
/* counter data reg offset at 0xc */
hwc->event_base = (unsigned long)tx2_pmu->base
- + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event));
+ + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask));
+}
+
+static void init_cntr_base_ccpi2(struct perf_event *event,
+ struct tx2_uncore_pmu *tx2_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ cmask = tx2_pmu->counters_mask;
+
+ hwc->config_base = (unsigned long)tx2_pmu->base
+ + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask));
+ hwc->event_base = (unsigned long)tx2_pmu->base;
}
static void uncore_start_event_l3c(struct perf_event *event, int flags)
{
- u32 val;
+ u32 val, emask;
struct hw_perf_event *hwc = &event->hw;
+ struct tx2_uncore_pmu *tx2_pmu;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ emask = tx2_pmu->events_mask;
/* event id encoded in bits [07:03] */
- val = GET_EVENTID(event) << 3;
+ val = GET_EVENTID(event, emask) << 3;
reg_writel(val, hwc->config_base);
local64_set(&hwc->prev_count, 0);
reg_writel(0, hwc->event_base);
@@ -284,10 +387,17 @@ static inline void uncore_stop_event_l3c(struct perf_event *event)
static void uncore_start_event_dmc(struct perf_event *event, int flags)
{
- u32 val;
+ u32 val, cmask, emask;
struct hw_perf_event *hwc = &event->hw;
- int idx = GET_COUNTERID(event);
- int event_id = GET_EVENTID(event);
+ struct tx2_uncore_pmu *tx2_pmu;
+ int idx, event_id;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
+ emask = tx2_pmu->events_mask;
+
+ idx = GET_COUNTERID(event, cmask);
+ event_id = GET_EVENTID(event, emask);
/* enable and start counters.
* 8 bits for each counter, bits[05:01] of a counter to set event type.
@@ -302,9 +412,14 @@ static void uncore_start_event_dmc(struct perf_event *event, int flags)
static void uncore_stop_event_dmc(struct perf_event *event)
{
- u32 val;
+ u32 val, cmask;
struct hw_perf_event *hwc = &event->hw;
- int idx = GET_COUNTERID(event);
+ struct tx2_uncore_pmu *tx2_pmu;
+ int idx;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
+ idx = GET_COUNTERID(event, cmask);
/* clear event type(bits[05:01]) to stop counter */
val = reg_readl(hwc->config_base);
@@ -312,27 +427,72 @@ static void uncore_stop_event_dmc(struct perf_event *event)
reg_writel(val, hwc->config_base);
}
+static void uncore_start_event_ccpi2(struct perf_event *event, int flags)
+{
+ u32 emask;
+ struct hw_perf_event *hwc = &event->hw;
+ struct tx2_uncore_pmu *tx2_pmu;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ emask = tx2_pmu->events_mask;
+
+ /* Bit [09:00] to set event id.
+ * Bits [10], set level to rising edge.
+ * Bits [11], set type to edge sensitive.
+ */
+ reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE |
+ CCPI2_EVENT_LEVEL_RISING_EDGE |
+ GET_EVENTID(event, emask)), hwc->config_base);
+
+ /* reset[4], enable[0] and start[1] counters */
+ reg_writel(CCPI2_PERF_CTL_RESET |
+ CCPI2_PERF_CTL_START |
+ CCPI2_PERF_CTL_ENABLE,
+ hwc->event_base + CCPI2_PERF_CTL);
+ local64_set(&event->hw.prev_count, 0ULL);
+}
+
+static void uncore_stop_event_ccpi2(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* disable and stop counter */
+ reg_writel(0, hwc->event_base + CCPI2_PERF_CTL);
+}
+
static void tx2_uncore_event_update(struct perf_event *event)
{
- s64 prev, delta, new = 0;
+ u64 prev, delta, new = 0;
struct hw_perf_event *hwc = &event->hw;
struct tx2_uncore_pmu *tx2_pmu;
enum tx2_uncore_type type;
u32 prorate_factor;
+ u32 cmask, emask;
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
type = tx2_pmu->type;
+ cmask = tx2_pmu->counters_mask;
+ emask = tx2_pmu->events_mask;
prorate_factor = tx2_pmu->prorate_factor;
-
- new = reg_readl(hwc->event_base);
- prev = local64_xchg(&hwc->prev_count, new);
-
- /* handles rollover of 32 bit counter */
- delta = (u32)(((1UL << 32) - prev) + new);
+ if (type == PMU_TYPE_CCPI2) {
+ reg_writel(CCPI2_COUNTER_OFFSET +
+ GET_COUNTERID(event, cmask),
+ hwc->event_base + CCPI2_COUNTER_SEL);
+ new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H);
+ new = (new << 32) +
+ reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L);
+ prev = local64_xchg(&hwc->prev_count, new);
+ delta = new - prev;
+ } else {
+ new = reg_readl(hwc->event_base);
+ prev = local64_xchg(&hwc->prev_count, new);
+ /* handles rollover of 32 bit counter */
+ delta = (u32)(((1UL << 32) - prev) + new);
+ }
/* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */
if (type == PMU_TYPE_DMC &&
- GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS)
+ GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS)
delta = delta/4;
/* L3C and DMC has 16 and 8 interleave channels respectively.
@@ -351,6 +511,7 @@ static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
} devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
+ {"CAV901E", PMU_TYPE_CCPI2},
{"", PMU_TYPE_INVALID}
};
@@ -380,7 +541,8 @@ static bool tx2_uncore_validate_event(struct pmu *pmu,
* Make sure the group of events can be scheduled at once
* on the PMU.
*/
-static bool tx2_uncore_validate_event_group(struct perf_event *event)
+static bool tx2_uncore_validate_event_group(struct perf_event *event,
+ int max_counters)
{
struct perf_event *sibling, *leader = event->group_leader;
int counters = 0;
@@ -403,7 +565,7 @@ static bool tx2_uncore_validate_event_group(struct perf_event *event)
* If the group requires more counters than the HW has,
* it cannot ever be scheduled.
*/
- return counters <= TX2_PMU_MAX_COUNTERS;
+ return counters <= max_counters;
}
@@ -439,7 +601,7 @@ static int tx2_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config;
/* Validate the group */
- if (!tx2_uncore_validate_event_group(event))
+ if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters))
return -EINVAL;
return 0;
@@ -456,6 +618,10 @@ static void tx2_uncore_event_start(struct perf_event *event, int flags)
tx2_pmu->start_event(event, flags);
perf_event_update_userpage(event);
+ /* No hrtimer needed for CCPI2, 64-bit counters */
+ if (!tx2_pmu->hrtimer_callback)
+ return;
+
/* Start timer for first event */
if (bitmap_weight(tx2_pmu->active_counters,
tx2_pmu->max_counters) == 1) {
@@ -510,15 +676,23 @@ static void tx2_uncore_event_del(struct perf_event *event, int flags)
{
struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+ cmask = tx2_pmu->counters_mask;
tx2_uncore_event_stop(event, PERF_EF_UPDATE);
/* clear the assigned counter */
- free_counter(tx2_pmu, GET_COUNTERID(event));
+ free_counter(tx2_pmu, GET_COUNTERID(event, cmask));
perf_event_update_userpage(event);
tx2_pmu->events[hwc->idx] = NULL;
hwc->idx = -1;
+
+ if (!tx2_pmu->hrtimer_callback)
+ return;
+
+ if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters))
+ hrtimer_cancel(&tx2_pmu->hrtimer);
}
static void tx2_uncore_event_read(struct perf_event *event)
@@ -580,8 +754,12 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu)
cpu_online_mask);
tx2_pmu->cpu = cpu;
- hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- tx2_pmu->hrtimer.function = tx2_hrtimer_callback;
+
+ if (tx2_pmu->hrtimer_callback) {
+ hrtimer_init(&tx2_pmu->hrtimer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback;
+ }
ret = tx2_uncore_pmu_register(tx2_pmu);
if (ret) {
@@ -653,10 +831,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
switch (tx2_pmu->type) {
case PMU_TYPE_L3C:
- tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
+ tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x3;
tx2_pmu->prorate_factor = TX2_PMU_L3_TILES;
tx2_pmu->max_events = L3_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1f;
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+ tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
tx2_pmu->attr_groups = l3c_pmu_attr_groups;
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
"uncore_l3c_%d", tx2_pmu->node);
@@ -665,10 +846,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
tx2_pmu->stop_event = uncore_stop_event_l3c;
break;
case PMU_TYPE_DMC:
- tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
+ tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x3;
tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS;
tx2_pmu->max_events = DMC_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1f;
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+ tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
tx2_pmu->attr_groups = dmc_pmu_attr_groups;
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
"uncore_dmc_%d", tx2_pmu->node);
@@ -676,6 +860,21 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
tx2_pmu->start_event = uncore_start_event_dmc;
tx2_pmu->stop_event = uncore_stop_event_dmc;
break;
+ case PMU_TYPE_CCPI2:
+ /* CCPI2 has 8 counters */
+ tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x7;
+ tx2_pmu->prorate_factor = 1;
+ tx2_pmu->max_events = CCPI2_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1ff;
+ tx2_pmu->attr_groups = ccpi2_pmu_attr_groups;
+ tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
+ "uncore_ccpi2_%d", tx2_pmu->node);
+ tx2_pmu->init_cntr_base = init_cntr_base_ccpi2;
+ tx2_pmu->start_event = uncore_start_event_ccpi2;
+ tx2_pmu->stop_event = uncore_stop_event_ccpi2;
+ tx2_pmu->hrtimer_callback = NULL;
+ break;
case PMU_TYPE_INVALID:
devm_kfree(dev, tx2_pmu);
return NULL;
@@ -744,7 +943,9 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (cpu != tx2_pmu->cpu)
return 0;
- hrtimer_cancel(&tx2_pmu->hrtimer);
+ if (tx2_pmu->hrtimer_callback)
+ hrtimer_cancel(&tx2_pmu->hrtimer);
+
cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
new_cpu = cpumask_any_and(
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 7e328d6385c3..46ee6807d533 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1282,25 +1282,21 @@ static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
- struct resource *res;
unsigned int reg;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- csw_csr = devm_ioremap_resource(&pdev->dev, res);
+ csw_csr = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(csw_csr)) {
dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
return PTR_ERR(csw_csr);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
- mcba_csr = devm_ioremap_resource(&pdev->dev, res);
+ mcba_csr = devm_platform_ioremap_resource(pdev, 2);
if (IS_ERR(mcba_csr)) {
dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
return PTR_ERR(mcba_csr);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
- mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
+ mcbb_csr = devm_platform_ioremap_resource(pdev, 3);
if (IS_ERR(mcbb_csr)) {
dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
return PTR_ERR(mcbb_csr);
@@ -1332,13 +1328,11 @@ static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr;
- struct resource *res;
unsigned int reg;
u32 mcb0routing;
u32 mcb1routing;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- csw_csr = devm_ioremap_resource(&pdev->dev, res);
+ csw_csr = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(csw_csr)) {
dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
return PTR_ERR(csw_csr);