From 59809fe88224db24432ad50e62fd8d5f0df738a1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Jun 2019 16:06:08 -0300 Subject: docs: perf: move to the admin-guide The perf infrastructure is used for userspace to track issues. At least a good part of what's described here is related to it. So, add it to the admin-guide. Signed-off-by: Mauro Carvalho Chehab --- Documentation/admin-guide/index.rst | 1 + Documentation/admin-guide/perf/arm-ccn.rst | 61 ++++++++++++++++++++++++ Documentation/admin-guide/perf/arm_dsu_pmu.rst | 29 +++++++++++ Documentation/admin-guide/perf/hisi-pmu.rst | 60 +++++++++++++++++++++++ Documentation/admin-guide/perf/index.rst | 14 ++++++ Documentation/admin-guide/perf/qcom_l2_pmu.rst | 39 +++++++++++++++ Documentation/admin-guide/perf/qcom_l3_pmu.rst | 26 ++++++++++ Documentation/admin-guide/perf/thunderx2-pmu.rst | 42 ++++++++++++++++ Documentation/admin-guide/perf/xgene-pmu.rst | 49 +++++++++++++++++++ Documentation/perf/arm-ccn.rst | 61 ------------------------ Documentation/perf/arm_dsu_pmu.rst | 29 ----------- Documentation/perf/hisi-pmu.rst | 60 ----------------------- Documentation/perf/index.rst | 16 ------- Documentation/perf/qcom_l2_pmu.rst | 39 --------------- Documentation/perf/qcom_l3_pmu.rst | 26 ---------- Documentation/perf/thunderx2-pmu.rst | 42 ---------------- Documentation/perf/xgene-pmu.rst | 49 ------------------- MAINTAINERS | 4 +- drivers/perf/qcom_l3_pmu.c | 2 +- 19 files changed, 324 insertions(+), 325 deletions(-) create mode 100644 Documentation/admin-guide/perf/arm-ccn.rst create mode 100644 Documentation/admin-guide/perf/arm_dsu_pmu.rst create mode 100644 Documentation/admin-guide/perf/hisi-pmu.rst create mode 100644 Documentation/admin-guide/perf/index.rst create mode 100644 Documentation/admin-guide/perf/qcom_l2_pmu.rst create mode 100644 Documentation/admin-guide/perf/qcom_l3_pmu.rst create mode 100644 Documentation/admin-guide/perf/thunderx2-pmu.rst create mode 100644 Documentation/admin-guide/perf/xgene-pmu.rst delete mode 100644 Documentation/perf/arm-ccn.rst delete mode 100644 Documentation/perf/arm_dsu_pmu.rst delete mode 100644 Documentation/perf/hisi-pmu.rst delete mode 100644 Documentation/perf/index.rst delete mode 100644 Documentation/perf/qcom_l2_pmu.rst delete mode 100644 Documentation/perf/qcom_l3_pmu.rst delete mode 100644 Documentation/perf/thunderx2-pmu.rst delete mode 100644 Documentation/perf/xgene-pmu.rst diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 8853c95ef0d4..f40c4b5a181b 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -38,6 +38,7 @@ problems and bugs in particular. ramoops dynamic-debug-howto init + perf/index This is the beginning of a section with information of interest to application developers. Documents covering various aspects of the kernel diff --git a/Documentation/admin-guide/perf/arm-ccn.rst b/Documentation/admin-guide/perf/arm-ccn.rst new file mode 100644 index 000000000000..832b0c64023a --- /dev/null +++ b/Documentation/admin-guide/perf/arm-ccn.rst @@ -0,0 +1,61 @@ +========================== +ARM Cache Coherent Network +========================== + +CCN-504 is a ring-bus interconnect consisting of 11 crosspoints +(XPs), with each crosspoint supporting up to two device ports, +so nodes (devices) 0 and 1 are connected to crosspoint 0, +nodes 2 and 3 to crosspoint 1 etc. + +PMU (perf) driver +----------------- + +The CCN driver registers a perf PMU driver, which provides +description of available events and configuration options +in sysfs, see /sys/bus/event_source/devices/ccn*. + +The "format" directory describes format of the config, config1 +and config2 fields of the perf_event_attr structure. The "events" +directory provides configuration templates for all documented +events, that can be used with perf tool. For example "xp_valid_flit" +is an equivalent of "type=0x8,event=0x4". Other parameters must be +explicitly specified. + +For events originating from device, "node" defines its index. + +Crosspoint PMU events require "xp" (index), "bus" (bus number) +and "vc" (virtual channel ID). + +Crosspoint watchpoint-based events (special "event" value 0xfe) +require "xp" and "vc" as as above plus "port" (device port index), +"dir" (transmit/receive direction), comparator values ("cmp_l" +and "cmp_h") and "mask", being index of the comparator mask. + +Masks are defined separately from the event description +(due to limited number of the config values) in the "cmp_mask" +directory, with first 8 configurable by user and additional +4 hardcoded for the most frequent use cases. + +Cycle counter is described by a "type" value 0xff and does +not require any other settings. + +The driver also provides a "cpumask" sysfs attribute, which contains +a single CPU ID, of the processor which will be used to handle all +the CCN PMU events. It is recommended that the user space tools +request the events on this processor (if not, the perf_event->cpu value +will be overwritten anyway). In case of this processor being offlined, +the events are migrated to another one and the attribute is updated. + +Example of perf tool use:: + + / # perf list | grep ccn + ccn/cycles/ [Kernel PMU event] + <...> + ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/ [Kernel PMU event] + <...> + + / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \ + sleep 1 + +The driver does not support sampling, therefore "perf record" will +not work. Per-task (without "-a") perf sessions are not supported. diff --git a/Documentation/admin-guide/perf/arm_dsu_pmu.rst b/Documentation/admin-guide/perf/arm_dsu_pmu.rst new file mode 100644 index 000000000000..7fd34db75d13 --- /dev/null +++ b/Documentation/admin-guide/perf/arm_dsu_pmu.rst @@ -0,0 +1,29 @@ +================================== +ARM DynamIQ Shared Unit (DSU) PMU +================================== + +ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system, +control logic and external interfaces to form a multicore cluster. The PMU +allows counting the various events related to the L3 cache, Snoop Control Unit +etc, using 32bit independent counters. It also provides a 64bit cycle counter. + +The PMU can only be accessed via CPU system registers and are common to the +cores connected to the same DSU. Like most of the other uncore PMUs, DSU +PMU doesn't support process specific events and cannot be used in sampling mode. + +The DSU provides a bitmap for a subset of implemented events via hardware +registers. There is no way for the driver to determine if the other events +are available or not. Hence the driver exposes only those events advertised +by the DSU, in "events" directory under:: + + /sys/bus/event_sources/devices/arm_dsu_/ + +The user should refer to the TRM of the product to figure out the supported events +and use the raw event code for the unlisted events. + +The driver also exposes the CPUs connected to the DSU instance in "associated_cpus". + + +e.g usage:: + + perf stat -a -e arm_dsu_0/cycles/ diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst new file mode 100644 index 000000000000..404a5c3d9d00 --- /dev/null +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -0,0 +1,60 @@ +====================================================== +HiSilicon SoC uncore Performance Monitoring Unit (PMU) +====================================================== + +The HiSilicon SoC chip includes various independent system device PMUs +such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are +independent and have hardware logic to gather statistics and performance +information. + +The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster +(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is +called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has +two HHAs (0 - 1) and four DDRCs (0 - 3), respectively. + +HiSilicon SoC uncore PMU driver +------------------------------- + +Each device PMU has separate registers for event counting, control and +interrupt, and the PMU driver shall register perf PMU drivers like L3C, +HHA and DDRC etc. The available events and configuration options shall +be described in the sysfs, see: + +/sys/devices/hisi_sccl{X}_/, or +/sys/bus/event_source/devices/hisi_sccl{X}_. +The "perf list" command shall list the available events from sysfs. + +Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU +name will appear in event listing as hisi_sccl_module. +where "sccl-id" is the identifier of the SCCL and "index-id" is the index of +module. + +e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in +SCCL ID #3. + +e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in +SCCL ID #1. + +The driver also provides a "cpumask" sysfs attribute, which shows the CPU core +ID used to count the uncore PMU event. + +Example usage of perf:: + + $# perf list + hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event] + ------------------------------------------ + hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event] + ------------------------------------------ + hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event] + ------------------------------------------ + hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event] + ------------------------------------------ + + $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5 + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5 + +The current driver does not support sampling. So "perf record" is unsupported. +Also attach to a task is unsupported as the events are all uncore. + +Note: Please contact the maintainer for a complete list of events supported for +the PMU devices in the SoC and its information if needed. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst new file mode 100644 index 000000000000..9d445451ea18 --- /dev/null +++ b/Documentation/admin-guide/perf/index.rst @@ -0,0 +1,14 @@ +=========================== +Performance monitor support +=========================== + +.. toctree:: + :maxdepth: 1 + + hisi-pmu + qcom_l2_pmu + qcom_l3_pmu + arm-ccn + xgene-pmu + arm_dsu_pmu + thunderx2-pmu diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst new file mode 100644 index 000000000000..c130178a4a55 --- /dev/null +++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst @@ -0,0 +1,39 @@ +===================================================================== +Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU) +===================================================================== + +This driver supports the L2 cache clusters found in Qualcomm Technologies +Centriq SoCs. There are multiple physical L2 cache clusters, each with their +own PMU. Each cluster has one or more CPUs associated with it. + +There is one logical L2 PMU exposed, which aggregates the results from +the physical PMUs. + +The driver provides a description of its available events and configuration +options in sysfs, see /sys/devices/l2cache_0. + +The "format" directory describes the format of the events. + +Events can be envisioned as a 2-dimensional array. Each column represents +a group of events. There are 8 groups. Only one entry from each +group can be in use at a time. If multiple events from the same group +are specified, the conflicting events cannot be counted at the same time. + +Events are specified as 0xCCG, where CC is 2 hex digits specifying +the code (array row) and G specifies the group (column) 0-7. + +In addition there is a cycle counter event specified by the value 0xFE +which is outside the above scheme. + +The driver provides a "cpumask" sysfs attribute which contains a mask +consisting of one CPU per cluster which will be used to handle all the PMU +events on that cluster. + +Examples for use with perf:: + + perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1 + + perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1 + +The driver does not support sampling, therefore "perf record" will +not work. Per-task perf sessions are not supported. diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst new file mode 100644 index 000000000000..a3d014a46bfd --- /dev/null +++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst @@ -0,0 +1,26 @@ +=========================================================================== +Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU) +=========================================================================== + +This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies +Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared +by all cores within a socket. Each slice is exposed as a separate uncore perf +PMU with device name l3cache__. User space is responsible +for aggregating across slices. + +The driver provides a description of its available events and configuration +options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs +the driver also exposes a "cpumask" sysfs attribute which contains a mask +consisting of one CPU per socket which will be used to handle all the PMU +events on that socket. + +The hardware implements 32bit event counters and has a flat 8bit event space +exposed via the "event" format attribute. In addition to the 32bit physical +counters the driver supports virtual 64bit hardware counters by using hardware +counter chaining. This feature is exposed via the "lc" (long counter) format +flag. E.g.:: + + perf stat -e l3cache_0_0/read-miss,lc/ + +Given that these are uncore PMUs the driver does not support sampling, therefore +"perf record" will not work. Per-task perf sessions are not supported. diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst new file mode 100644 index 000000000000..08e33675853a --- /dev/null +++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst @@ -0,0 +1,42 @@ +============================================================= +Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE) +============================================================= + +The ThunderX2 SoC PMU consists of independent, system-wide, per-socket +PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC). + +The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles. +Events are counted for the default channel (i.e. channel 0) and prorated +to the total number of channels/tiles. + +The DMC and L3C support up to 4 counters. Counters are independently +programmable and can be started and stopped individually. Each counter +can be set to a different event. Counters are 32-bit and do not support +an overflow interrupt; they are read every 2 seconds. + +PMU UNCORE (perf) driver: + +The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and +L3C devices. Each PMU can be used to count up to 4 events +simultaneously. The PMUs provide a description of their available events +and configuration options under sysfs, see +/sys/devices/uncore_; S is the socket id. + +The driver does not support sampling, therefore "perf record" will not +work. Per-task perf sessions are also not supported. + +Examples:: + + # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1 + + # perf stat -a -e \ + uncore_dmc_0/cnt_cycles/,\ + uncore_dmc_0/data_transfers/,\ + uncore_dmc_0/read_txns/,\ + uncore_dmc_0/write_txns/ sleep 1 + + # perf stat -a -e \ + uncore_l3c_0/read_request/,\ + uncore_l3c_0/read_hit/,\ + uncore_l3c_0/inv_request/,\ + uncore_l3c_0/inv_hit/ sleep 1 diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst new file mode 100644 index 000000000000..644f8ed89152 --- /dev/null +++ b/Documentation/admin-guide/perf/xgene-pmu.rst @@ -0,0 +1,49 @@ +================================================ +APM X-Gene SoC Performance Monitoring Unit (PMU) +================================================ + +X-Gene SoC PMU consists of various independent system device PMUs such as +L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory +controller(s). These PMU devices are loosely architected to follow the +same model as the PMU for ARM cores. The PMUs share the same top level +interrupt and status CSR region. + +PMU (perf) driver +----------------- + +The xgene-pmu driver registers several perf PMU drivers. Each of the perf +driver provides description of its available events and configuration options +in sysfs, see /sys/devices//. + +The "format" directory describes format of the config (event ID), +config1 (agent ID) fields of the perf_event_attr structure. The "events" +directory provides configuration templates for all supported event types that +can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an +equivalent of "l3c0/config=0x0b/". + +Most of the SoC PMU has a specific list of agent ID used for monitoring +performance of a specific datapath. For example, agents of a L3 cache can be +a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of +masking the agents from which the request come from. If the bit with +the bit number corresponding to the agent is set, the event is counted only if +it is caused by a request from that agent. Each agent ID bit is inversely mapped +to a corresponding bit in "config1" field. By default, the event will be +counted for all agent requests (config1 = 0x0). For all the supported agents of +each PMU, please refer to APM X-Gene User Manual. + +Each perf driver also provides a "cpumask" sysfs attribute, which contains a +single CPU ID of the processor which will be used to handle all the PMU events. + +Example for perf tool use:: + + / # perf list | grep -e l3c -e iob -e mcb -e mc + l3c0/ackq-full/ [Kernel PMU event] + <...> + mcb1/mcb-csw-stall/ [Kernel PMU event] + + / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1 + + / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1 + +The driver does not support sampling, therefore "perf record" will +not work. Per-task (without "-a") perf sessions are not supported. diff --git a/Documentation/perf/arm-ccn.rst b/Documentation/perf/arm-ccn.rst deleted file mode 100644 index 832b0c64023a..000000000000 --- a/Documentation/perf/arm-ccn.rst +++ /dev/null @@ -1,61 +0,0 @@ -========================== -ARM Cache Coherent Network -========================== - -CCN-504 is a ring-bus interconnect consisting of 11 crosspoints -(XPs), with each crosspoint supporting up to two device ports, -so nodes (devices) 0 and 1 are connected to crosspoint 0, -nodes 2 and 3 to crosspoint 1 etc. - -PMU (perf) driver ------------------ - -The CCN driver registers a perf PMU driver, which provides -description of available events and configuration options -in sysfs, see /sys/bus/event_source/devices/ccn*. - -The "format" directory describes format of the config, config1 -and config2 fields of the perf_event_attr structure. The "events" -directory provides configuration templates for all documented -events, that can be used with perf tool. For example "xp_valid_flit" -is an equivalent of "type=0x8,event=0x4". Other parameters must be -explicitly specified. - -For events originating from device, "node" defines its index. - -Crosspoint PMU events require "xp" (index), "bus" (bus number) -and "vc" (virtual channel ID). - -Crosspoint watchpoint-based events (special "event" value 0xfe) -require "xp" and "vc" as as above plus "port" (device port index), -"dir" (transmit/receive direction), comparator values ("cmp_l" -and "cmp_h") and "mask", being index of the comparator mask. - -Masks are defined separately from the event description -(due to limited number of the config values) in the "cmp_mask" -directory, with first 8 configurable by user and additional -4 hardcoded for the most frequent use cases. - -Cycle counter is described by a "type" value 0xff and does -not require any other settings. - -The driver also provides a "cpumask" sysfs attribute, which contains -a single CPU ID, of the processor which will be used to handle all -the CCN PMU events. It is recommended that the user space tools -request the events on this processor (if not, the perf_event->cpu value -will be overwritten anyway). In case of this processor being offlined, -the events are migrated to another one and the attribute is updated. - -Example of perf tool use:: - - / # perf list | grep ccn - ccn/cycles/ [Kernel PMU event] - <...> - ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/ [Kernel PMU event] - <...> - - / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \ - sleep 1 - -The driver does not support sampling, therefore "perf record" will -not work. Per-task (without "-a") perf sessions are not supported. diff --git a/Documentation/perf/arm_dsu_pmu.rst b/Documentation/perf/arm_dsu_pmu.rst deleted file mode 100644 index 7fd34db75d13..000000000000 --- a/Documentation/perf/arm_dsu_pmu.rst +++ /dev/null @@ -1,29 +0,0 @@ -================================== -ARM DynamIQ Shared Unit (DSU) PMU -================================== - -ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system, -control logic and external interfaces to form a multicore cluster. The PMU -allows counting the various events related to the L3 cache, Snoop Control Unit -etc, using 32bit independent counters. It also provides a 64bit cycle counter. - -The PMU can only be accessed via CPU system registers and are common to the -cores connected to the same DSU. Like most of the other uncore PMUs, DSU -PMU doesn't support process specific events and cannot be used in sampling mode. - -The DSU provides a bitmap for a subset of implemented events via hardware -registers. There is no way for the driver to determine if the other events -are available or not. Hence the driver exposes only those events advertised -by the DSU, in "events" directory under:: - - /sys/bus/event_sources/devices/arm_dsu_/ - -The user should refer to the TRM of the product to figure out the supported events -and use the raw event code for the unlisted events. - -The driver also exposes the CPUs connected to the DSU instance in "associated_cpus". - - -e.g usage:: - - perf stat -a -e arm_dsu_0/cycles/ diff --git a/Documentation/perf/hisi-pmu.rst b/Documentation/perf/hisi-pmu.rst deleted file mode 100644 index 404a5c3d9d00..000000000000 --- a/Documentation/perf/hisi-pmu.rst +++ /dev/null @@ -1,60 +0,0 @@ -====================================================== -HiSilicon SoC uncore Performance Monitoring Unit (PMU) -====================================================== - -The HiSilicon SoC chip includes various independent system device PMUs -such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are -independent and have hardware logic to gather statistics and performance -information. - -The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster -(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is -called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has -two HHAs (0 - 1) and four DDRCs (0 - 3), respectively. - -HiSilicon SoC uncore PMU driver -------------------------------- - -Each device PMU has separate registers for event counting, control and -interrupt, and the PMU driver shall register perf PMU drivers like L3C, -HHA and DDRC etc. The available events and configuration options shall -be described in the sysfs, see: - -/sys/devices/hisi_sccl{X}_/, or -/sys/bus/event_source/devices/hisi_sccl{X}_. -The "perf list" command shall list the available events from sysfs. - -Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU -name will appear in event listing as hisi_sccl_module. -where "sccl-id" is the identifier of the SCCL and "index-id" is the index of -module. - -e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in -SCCL ID #3. - -e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in -SCCL ID #1. - -The driver also provides a "cpumask" sysfs attribute, which shows the CPU core -ID used to count the uncore PMU event. - -Example usage of perf:: - - $# perf list - hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event] - ------------------------------------------ - hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event] - ------------------------------------------ - hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event] - ------------------------------------------ - hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event] - ------------------------------------------ - - $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5 - $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5 - -The current driver does not support sampling. So "perf record" is unsupported. -Also attach to a task is unsupported as the events are all uncore. - -Note: Please contact the maintainer for a complete list of events supported for -the PMU devices in the SoC and its information if needed. diff --git a/Documentation/perf/index.rst b/Documentation/perf/index.rst deleted file mode 100644 index 4bf848e27f26..000000000000 --- a/Documentation/perf/index.rst +++ /dev/null @@ -1,16 +0,0 @@ -:orphan: - -=========================== -Performance monitor support -=========================== - -.. toctree:: - :maxdepth: 1 - - hisi-pmu - qcom_l2_pmu - qcom_l3_pmu - arm-ccn - xgene-pmu - arm_dsu_pmu - thunderx2-pmu diff --git a/Documentation/perf/qcom_l2_pmu.rst b/Documentation/perf/qcom_l2_pmu.rst deleted file mode 100644 index c130178a4a55..000000000000 --- a/Documentation/perf/qcom_l2_pmu.rst +++ /dev/null @@ -1,39 +0,0 @@ -===================================================================== -Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU) -===================================================================== - -This driver supports the L2 cache clusters found in Qualcomm Technologies -Centriq SoCs. There are multiple physical L2 cache clusters, each with their -own PMU. Each cluster has one or more CPUs associated with it. - -There is one logical L2 PMU exposed, which aggregates the results from -the physical PMUs. - -The driver provides a description of its available events and configuration -options in sysfs, see /sys/devices/l2cache_0. - -The "format" directory describes the format of the events. - -Events can be envisioned as a 2-dimensional array. Each column represents -a group of events. There are 8 groups. Only one entry from each -group can be in use at a time. If multiple events from the same group -are specified, the conflicting events cannot be counted at the same time. - -Events are specified as 0xCCG, where CC is 2 hex digits specifying -the code (array row) and G specifies the group (column) 0-7. - -In addition there is a cycle counter event specified by the value 0xFE -which is outside the above scheme. - -The driver provides a "cpumask" sysfs attribute which contains a mask -consisting of one CPU per cluster which will be used to handle all the PMU -events on that cluster. - -Examples for use with perf:: - - perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1 - - perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1 - -The driver does not support sampling, therefore "perf record" will -not work. Per-task perf sessions are not supported. diff --git a/Documentation/perf/qcom_l3_pmu.rst b/Documentation/perf/qcom_l3_pmu.rst deleted file mode 100644 index a3d014a46bfd..000000000000 --- a/Documentation/perf/qcom_l3_pmu.rst +++ /dev/null @@ -1,26 +0,0 @@ -=========================================================================== -Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU) -=========================================================================== - -This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies -Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared -by all cores within a socket. Each slice is exposed as a separate uncore perf -PMU with device name l3cache__. User space is responsible -for aggregating across slices. - -The driver provides a description of its available events and configuration -options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs -the driver also exposes a "cpumask" sysfs attribute which contains a mask -consisting of one CPU per socket which will be used to handle all the PMU -events on that socket. - -The hardware implements 32bit event counters and has a flat 8bit event space -exposed via the "event" format attribute. In addition to the 32bit physical -counters the driver supports virtual 64bit hardware counters by using hardware -counter chaining. This feature is exposed via the "lc" (long counter) format -flag. E.g.:: - - perf stat -e l3cache_0_0/read-miss,lc/ - -Given that these are uncore PMUs the driver does not support sampling, therefore -"perf record" will not work. Per-task perf sessions are not supported. diff --git a/Documentation/perf/thunderx2-pmu.rst b/Documentation/perf/thunderx2-pmu.rst deleted file mode 100644 index 08e33675853a..000000000000 --- a/Documentation/perf/thunderx2-pmu.rst +++ /dev/null @@ -1,42 +0,0 @@ -============================================================= -Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE) -============================================================= - -The ThunderX2 SoC PMU consists of independent, system-wide, per-socket -PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC). - -The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles. -Events are counted for the default channel (i.e. channel 0) and prorated -to the total number of channels/tiles. - -The DMC and L3C support up to 4 counters. Counters are independently -programmable and can be started and stopped individually. Each counter -can be set to a different event. Counters are 32-bit and do not support -an overflow interrupt; they are read every 2 seconds. - -PMU UNCORE (perf) driver: - -The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and -L3C devices. Each PMU can be used to count up to 4 events -simultaneously. The PMUs provide a description of their available events -and configuration options under sysfs, see -/sys/devices/uncore_; S is the socket id. - -The driver does not support sampling, therefore "perf record" will not -work. Per-task perf sessions are also not supported. - -Examples:: - - # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1 - - # perf stat -a -e \ - uncore_dmc_0/cnt_cycles/,\ - uncore_dmc_0/data_transfers/,\ - uncore_dmc_0/read_txns/,\ - uncore_dmc_0/write_txns/ sleep 1 - - # perf stat -a -e \ - uncore_l3c_0/read_request/,\ - uncore_l3c_0/read_hit/,\ - uncore_l3c_0/inv_request/,\ - uncore_l3c_0/inv_hit/ sleep 1 diff --git a/Documentation/perf/xgene-pmu.rst b/Documentation/perf/xgene-pmu.rst deleted file mode 100644 index 644f8ed89152..000000000000 --- a/Documentation/perf/xgene-pmu.rst +++ /dev/null @@ -1,49 +0,0 @@ -================================================ -APM X-Gene SoC Performance Monitoring Unit (PMU) -================================================ - -X-Gene SoC PMU consists of various independent system device PMUs such as -L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory -controller(s). These PMU devices are loosely architected to follow the -same model as the PMU for ARM cores. The PMUs share the same top level -interrupt and status CSR region. - -PMU (perf) driver ------------------ - -The xgene-pmu driver registers several perf PMU drivers. Each of the perf -driver provides description of its available events and configuration options -in sysfs, see /sys/devices//. - -The "format" directory describes format of the config (event ID), -config1 (agent ID) fields of the perf_event_attr structure. The "events" -directory provides configuration templates for all supported event types that -can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an -equivalent of "l3c0/config=0x0b/". - -Most of the SoC PMU has a specific list of agent ID used for monitoring -performance of a specific datapath. For example, agents of a L3 cache can be -a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of -masking the agents from which the request come from. If the bit with -the bit number corresponding to the agent is set, the event is counted only if -it is caused by a request from that agent. Each agent ID bit is inversely mapped -to a corresponding bit in "config1" field. By default, the event will be -counted for all agent requests (config1 = 0x0). For all the supported agents of -each PMU, please refer to APM X-Gene User Manual. - -Each perf driver also provides a "cpumask" sysfs attribute, which contains a -single CPU ID of the processor which will be used to handle all the PMU events. - -Example for perf tool use:: - - / # perf list | grep -e l3c -e iob -e mcb -e mc - l3c0/ackq-full/ [Kernel PMU event] - <...> - mcb1/mcb-csw-stall/ [Kernel PMU event] - - / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1 - - / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1 - -The driver does not support sampling, therefore "perf record" will -not work. Per-task (without "-a") perf sessions are not supported. diff --git a/MAINTAINERS b/MAINTAINERS index db96cd4a229b..b8ce346d5254 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1155,7 +1155,7 @@ APPLIED MICRO (APM) X-GENE SOC PMU M: Khuong Dinh S: Supported F: drivers/perf/xgene_pmu.c -F: Documentation/perf/xgene-pmu.rst +F: Documentation/admin-guide/perf/xgene-pmu.rst F: Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt APTINA CAMERA SENSOR PLL @@ -7262,7 +7262,7 @@ M: Shaokun Zhang W: http://www.hisilicon.com S: Supported F: drivers/perf/hisilicon -F: Documentation/perf/hisi-pmu.rst +F: Documentation/admin-guide/perf/hisi-pmu.rst HISILICON ROCE DRIVER M: Lijun Ou diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 90f88ce5192b..656e830798d9 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -8,7 +8,7 @@ * the slices. User space needs to aggregate to individual counts to provide * a global picture. * - * See Documentation/perf/qcom_l3_pmu.rst for more details. + * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details. * * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. */ -- cgit