summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-amd-ibs.txt59
-rw-r--r--tools/perf/Documentation/perf-mem.txt50
-rw-r--r--tools/perf/bench/futex-hash.c2
-rw-r--r--tools/perf/bench/futex-lock-pi.c1
-rw-r--r--tools/perf/bench/futex-requeue.c1
-rw-r--r--tools/perf/bench/futex-wake-parallel.c1
-rw-r--r--tools/perf/bench/futex-wake.c1
-rw-r--r--tools/perf/bench/futex.c26
-rw-r--r--tools/perf/bench/futex.h1
-rwxr-xr-xtools/perf/check-headers.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+event_uniquifying.sh12
-rw-r--r--tools/perf/tests/tests-scripts.c1
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h1
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h5
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h8
-rw-r--r--tools/perf/util/include/linux/linkage.h4
-rw-r--r--tools/perf/util/print-events.c1
18 files changed, 133 insertions, 45 deletions
diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt
index 55f80beae037..548549935760 100644
--- a/tools/perf/Documentation/perf-amd-ibs.txt
+++ b/tools/perf/Documentation/perf-amd-ibs.txt
@@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool.
# perf mem report
A normal perf mem report output will provide detailed memory access profile.
-However, it can also be aggregated based on output fields. For example:
-
- # perf mem report -F mem,sample,snoop
- Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876
- Memory access Samples Snoop
- N/A 1903343 N/A
- L1 hit 1056754 N/A
- L2 hit 75231 N/A
- L3 hit 9496 HitM
- L3 hit 2270 N/A
- RAM hit 8710 N/A
- Remote node, same socket RAM hit 3241 N/A
- Remote core, same node Any cache hit 1572 HitM
- Remote core, same node Any cache hit 514 N/A
- Remote node, same socket Any cache hit 1216 HitM
- Remote node, same socket Any cache hit 350 N/A
- Uncached hit 18 N/A
+New output fields will show related access info together. For example:
+
+ # perf mem report -F overhead,cache,snoop,comm
+ ...
+ # Samples: 92K of event 'ibs_op//'
+ # Total weight : 531104
+ #
+ # ---------- Cache ----------- --- Snoop ----
+ # Overhead L1 L2 L1-buf Other HitM Other Command
+ # ........ ............................ .............. ..........
+ #
+ 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1
+ 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make
+ 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc
+ 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as
+ 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh
+ 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld
+ 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config
+ 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git
+ 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm
+ ...
+
+Also, it can be aggregated based on various memory access info using the
+sort keys. For example:
+
+ # perf mem report -s mem,snoop
+ ...
+ # Samples: 92K of event 'ibs_op//'
+ # Total weight : 531104
+ # Sort order : mem,snoop
+ #
+ # Overhead Samples Memory access Snoop
+ # ........ ............ ....................................... ............
+ #
+ 47.99% 1509 L2 hit N/A
+ 25.08% 338 core, same node Any cache hit HitM
+ 10.24% 54374 N/A N/A
+ 6.77% 35938 L1 hit N/A
+ 6.39% 101 core, same node Any cache hit N/A
+ 3.50% 69 RAM hit N/A
+ 0.03% 158 LFB/MAB hit N/A
+ 0.00% 2 Uncached hit N/A
Please refer to their man page for more detail.
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 965e73d37772..4d164836d094 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -119,6 +119,22 @@ REPORT OPTIONS
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat.
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Please see linkperf:perf-report[1] for details.
+
+ In addition to the default fields, 'perf mem report' will provide the
+ following fields to break down sample periods.
+
+ - op: operation in the sample instruction (load, store, prefetch, ...)
+ - cache: location in CPU cache (L1, L2, ...) where the sample hit
+ - mem: location in memory or other places the sample hit
+ - dtlb: location in Data TLB (L1, L2) where the sample hit
+ - snoop: snoop result for the sampled data access
+
+ Please take a look at the OUTPUT FIELD SELECTION section for caveats.
+
-T::
--type-profile::
Show data-type profile result instead of code symbols. This requires
@@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20:
90% [k] memcpy
10% [.] strcmp
+OUTPUT FIELD SELECTION
+----------------------
+"perf mem report" adds a number of new output fields specific to data source
+information in the sample. Some of them have the same name with the existing
+sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll
+behave differently when it's used by -F/--fields or -s/--sort.
+
+Using those two as output fields will aggregate samples altogether and show
+breakdown.
+
+ $ perf mem report -F mem,snoop
+ ...
+ # ------ Memory ------- --- Snoop ----
+ # RAM Uncach Other HitM Other
+ # ..................... ..............
+ #
+ 3.5% 0.0% 96.5% 25.1% 74.9%
+
+But using the same name for sort keys will aggregate samples for each type
+separately.
+
+ $ perf mem report -s mem,snoop
+ # Overhead Samples Memory access Snoop
+ # ........ ............ ....................................... ............
+ #
+ 47.99% 1509 L2 hit N/A
+ 25.08% 338 core, same node Any cache hit HitM
+ 10.24% 54374 N/A N/A
+ 6.77% 35938 L1 hit N/A
+ 6.39% 101 core, same node Any cache hit N/A
+ 3.50% 69 RAM hit N/A
+ 0.03% 158 LFB/MAB hit N/A
+ 0.00% 2 Uncached hit N/A
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1]
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fdf133c9520f..7e29f04da744 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -18,7 +18,6 @@
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
-#include <linux/prctl.h>
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>
@@ -57,7 +56,6 @@ static struct bench_futex_parameters params = {
static const struct option options[] = {
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
- OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 5144a158512c..40640b674427 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -47,7 +47,6 @@ static struct bench_futex_parameters params = {
static const struct option options[] = {
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
- OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_BOOLEAN( 'M', "multi", &params.multi, "Use multiple futexes"),
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a2f91ee1950b..0748b0fd689e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -52,7 +52,6 @@ static struct bench_futex_parameters params = {
static const struct option options[] = {
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
- OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index ee66482c29fd..6aede7c46b33 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -63,7 +63,6 @@ static struct bench_futex_parameters params = {
static const struct option options[] = {
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
- OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 8d6107f7cd94..a31fc1563862 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -52,7 +52,6 @@ static struct bench_futex_parameters params = {
static const struct option options[] = {
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
- OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &params.nwakes, "Specify amount of threads to wake at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
index 26382e4d8d4c..1481196a22f0 100644
--- a/tools/perf/bench/futex.c
+++ b/tools/perf/bench/futex.c
@@ -2,21 +2,24 @@
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
-#include <linux/prctl.h>
#include <sys/prctl.h>
#include "futex.h"
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+#endif // PR_FUTEX_HASH
+
void futex_set_nbuckets_param(struct bench_futex_parameters *params)
{
- unsigned long flags;
int ret;
if (params->nbuckets < 0)
return;
- flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0;
- ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags);
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0);
if (ret) {
printf("Requesting %d hash buckets failed: %d/%m\n",
params->nbuckets, ret);
@@ -40,18 +43,11 @@ void futex_print_nbuckets(struct bench_futex_parameters *params)
printf("Requested: %d in usage: %d\n", params->nbuckets, ret);
err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
}
- if (params->nbuckets == 0) {
+ if (params->nbuckets == 0)
ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
- } else {
- ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
- if (ret < 0) {
- printf("Can't check if the hash is immutable: %m\n");
- err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
- }
- ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s",
- params->nbuckets,
- ret == 1 ? "(immutable)" : "");
- }
+ else
+ ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets",
+ params->nbuckets);
} else {
if (ret <= 0) {
ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 9c9a73f9d865..dd295d27044a 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -26,7 +26,6 @@ struct bench_futex_parameters {
unsigned int nwakes;
unsigned int nrequeue;
int nbuckets;
- bool buckets_immutable;
};
/**
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index e9fab20e9330..8085e4d1d8af 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -186,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
-check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
+check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh
index 5ec35c52b7d9..bf54bd6c3e2e 100755
--- a/tools/perf/tests/shell/stat+event_uniquifying.sh
+++ b/tools/perf/tests/shell/stat+event_uniquifying.sh
@@ -9,7 +9,8 @@ perf_tool=perf
err=0
test_event_uniquifying() {
- # We use `clockticks` to verify the uniquify behavior.
+ # We use `clockticks` in `uncore_imc` to verify the uniquify behavior.
+ pmu="uncore_imc"
event="clockticks"
# If the `-A` option is added, the event should be uniquified.
@@ -43,11 +44,18 @@ test_event_uniquifying() {
echo "stat event uniquifying test"
uniquified_event_array=()
+ # Skip if the machine does not have `uncore_imc` device.
+ if ! ${perf_tool} list pmu | grep -q ${pmu}; then
+ echo "Target does not support PMU ${pmu} [Skipped]"
+ err=2
+ return
+ fi
+
# Check how many uniquified events.
while IFS= read -r line; do
uniquified_event=$(echo "$line" | awk '{print $1}')
uniquified_event_array+=("${uniquified_event}")
- done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]")
+ done < <(${perf_tool} list -v ${event} | grep ${pmu})
perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true"
$perf_command
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
index 1d5759d08141..3a2a8438f9af 100644
--- a/tools/perf/tests/tests-scripts.c
+++ b/tools/perf/tests/tests-scripts.c
@@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd,
continue; /* Skip scripts that have a separate driver. */
fd = openat(dir_fd, ent->d_name, O_PATH);
append_scripts_in_dir(fd, result, result_sz);
+ close(fd);
}
for (i = 0; i < n_dirs; i++) /* Clean up */
zfree(&entlist[i]);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index c3322eb3d686..3b262487ec06 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
}
-static inline size_t msg_data_left(struct msghdr *msg)
+static inline size_t msg_data_left(const struct msghdr *msg)
{
return iov_iter_count(&msg->msg_iter);
}
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index e762e1af650c..0098b0ce8ccb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 15c18ef4eb11..3b93fb906e3c 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -364,4 +364,9 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index f78ee3670dd5..1686861aae20 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
- /* 0xb8 */
- __u64 __spare3[9]; /* Spare space for future expansion */
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 178b00205fe6..89979ca23c3f 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -132,4 +132,8 @@
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#ifndef SYM_PIC_ALIAS
+#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index a786cbfb0ff5..83aaf7cda635 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
+ evsel__close(evsel);
evsel__delete(evsel);
}