summaryrefslogtreecommitdiff
path: root/tools/perf/tests
diff options
context:
space:
mode:
authorYicong Yang <yangyicong@hisilicon.com>2024-02-08 10:40:26 +0800
committerNamhyung Kim <namhyung@kernel.org>2024-02-09 14:59:53 -0800
commitcbc917a1b03bce85f385c1e640c9dcb02ffb9ab0 (patch)
tree85bf911a310bfd4c9e3804d1959afc8441ac3a8f /tools/perf/tests
parent9a440bb2e2e9a4af3a7857af42a825f61b27a18c (diff)
perf stat: Support per-cluster aggregation
Some platforms have 'cluster' topology and CPUs in the cluster will share resources like L3 Cache Tag (for HiSilicon Kunpeng SoC) or L2 cache (for Intel Jacobsville). Currently parsing and building cluster topology have been supported since [1]. perf stat has already supported aggregation for other topologies like die or socket, etc. It'll be useful to aggregate per-cluster to find problems like L3T bandwidth contention. This patch add support for "--per-cluster" option for per-cluster aggregation. Also update the docs and related test. The output will be like: [root@localhost tmp]# perf stat -a -e LLC-load --per-cluster -- sleep 5 Performance counter stats for 'system wide': S56-D0-CLS158 4 1,321,521,570 LLC-load S56-D0-CLS594 4 794,211,453 LLC-load S56-D0-CLS1030 4 41,623 LLC-load S56-D0-CLS1466 4 41,646 LLC-load S56-D0-CLS1902 4 16,863 LLC-load S56-D0-CLS2338 4 15,721 LLC-load S56-D0-CLS2774 4 22,671 LLC-load [...] On a legacy system without cluster or cluster support, the output will be look like: [root@localhost perf]# perf stat -a -e cycles --per-cluster -- sleep 1 Performance counter stats for 'system wide': S56-D0-CLS0 64 18,011,485 cycles S7182-D0-CLS0 64 16,548,835 cycles Note that this patch doesn't mix the cluster information in the outputs of --per-core to avoid breaking any tools/scripts using it. Note that perf recently supports "--per-cache" aggregation, but it's not the same with the cluster although cluster CPUs may share some cache resources. For example on my machine all clusters within a die share the same L3 cache: $ cat /sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list 0-31 $ cat /sys/devices/system/cpu/cpu0/topology/cluster_cpus_list 0-3 [1] commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die") Tested-by: Jie Zhan <zhanjie9@hisilicon.com> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Cc: james.clark@arm.com Cc: 21cnbao@gmail.com Cc: prime.zeng@hisilicon.com Cc: Jonathan.Cameron@huawei.com Cc: fanghao11@huawei.com Cc: linuxarm@huawei.com Cc: tim.c.chen@intel.com Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240208024026.2691-1-yangyicong@huawei.com
Diffstat (limited to 'tools/perf/tests')
-rw-r--r--tools/perf/tests/shell/lib/perf_json_output_lint.py4
-rw-r--r--tools/perf/tests/shell/lib/stat_output.sh12
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh13
-rwxr-xr-xtools/perf/tests/shell/stat+std_output.sh2
5 files changed, 32 insertions, 1 deletions
diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
index ea55d5ea1ced..abc1fd737782 100644
--- a/tools/perf/tests/shell/lib/perf_json_output_lint.py
+++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
@@ -15,6 +15,7 @@ ap.add_argument('--event', action='store_true')
ap.add_argument('--per-core', action='store_true')
ap.add_argument('--per-thread', action='store_true')
ap.add_argument('--per-cache', action='store_true')
+ap.add_argument('--per-cluster', action='store_true')
ap.add_argument('--per-die', action='store_true')
ap.add_argument('--per-node', action='store_true')
ap.add_argument('--per-socket', action='store_true')
@@ -49,6 +50,7 @@ def check_json_output(expected_items):
'cgroup': lambda x: True,
'cpu': lambda x: isint(x),
'cache': lambda x: True,
+ 'cluster': lambda x: True,
'die': lambda x: True,
'event': lambda x: True,
'event-runtime': lambda x: isfloat(x),
@@ -88,7 +90,7 @@ try:
expected_items = 7
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 8
- elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cache:
+ elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cluster or args.per_cache:
expected_items = 9
else:
# If no option is specified, don't check the number of items.
diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh
index 3cc158a64326..c81d6a9f7983 100644
--- a/tools/perf/tests/shell/lib/stat_output.sh
+++ b/tools/perf/tests/shell/lib/stat_output.sh
@@ -97,6 +97,18 @@ check_per_cache_instance()
echo "[Success]"
}
+check_per_cluster()
+{
+ echo -n "Checking $1 output: per cluster "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-cluster -a $2 true
+ echo "[Success]"
+}
+
check_per_die()
{
echo -n "Checking $1 output: per die "
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index f1818fa6d9ce..fc2d8cc6e5e0 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -42,6 +42,7 @@ function commachecker()
;; "--per-socket") exp=8
;; "--per-node") exp=8
;; "--per-die") exp=8
+ ;; "--per-cluster") exp=8
;; "--per-cache") exp=8
esac
@@ -79,6 +80,7 @@ then
check_system_wide_no_aggr "CSV" "$perf_cmd"
check_per_core "CSV" "$perf_cmd"
check_per_cache_instance "CSV" "$perf_cmd"
+ check_per_cluster "CSV" "$perf_cmd"
check_per_die "CSV" "$perf_cmd"
check_per_socket "CSV" "$perf_cmd"
else
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 3bc900533a5d..2b9c6212dffc 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -122,6 +122,18 @@ check_per_cache_instance()
echo "[Success]"
}
+check_per_cluster()
+{
+ echo -n "Checking json output: per cluster "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoia and not root"
+ return
+ fi
+ perf stat -j --per-cluster -a true 2>&1 | $PYTHON $pythonchecker --per-cluster
+ echo "[Success]"
+}
+
check_per_die()
{
echo -n "Checking json output: per die "
@@ -200,6 +212,7 @@ then
check_system_wide_no_aggr
check_per_core
check_per_cache_instance
+ check_per_cluster
check_per_die
check_per_socket
else
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index f46a0c9908c0..cbf2894b2c84 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -40,6 +40,7 @@ function commachecker()
;; "--per-node") prefix=3
;; "--per-die") prefix=3
;; "--per-cache") prefix=3
+ ;; "--per-cluster") prefix=3
esac
while read line
@@ -99,6 +100,7 @@ then
check_system_wide_no_aggr "STD" "$perf_cmd"
check_per_core "STD" "$perf_cmd"
check_per_cache_instance "STD" "$perf_cmd"
+ check_per_cluster "STD" "$perf_cmd"
check_per_die "STD" "$perf_cmd"
check_per_socket "STD" "$perf_cmd"
else