summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung@kernel.org>2025-01-07 14:43:50 -0800
committerArnaldo Carvalho de Melo <acme@redhat.com>2025-01-08 17:20:42 -0300
commit86a12b92a97326e17a621a8f759089a3beab8f3c (patch)
treeb861c581710dbad4fe744fcac54b9ea3ae6ca0ec
parent05efa0ab0115d32882fc516aa5c99199f35412f1 (diff)
perf ftrace: Display latency statistics at the end
Sometimes users also want to see average latency as well as histogram. Display latency statistics like avg, max, min at the end. $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ... # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 1 | ##### | 16 - 32 ms | 7 | ######################################## | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # statistics (in usec) total time: 171832 avg time: 21479 max time: 30906 min time: 15869 count: 8 Committer testing: root@number:~# perf ftrace latency -nab --bucket-range 100 --max-latency 512 -T switch_mm_irqs_off sleep 1 # DURATION | COUNT | GRAPH | 0 - 100 ns | 314 | ## | 100 - 200 ns | 1843 | ############# | 200 - 300 ns | 1390 | ########## | 300 - 400 ns | 844 | ###### | 400 - 500 ns | 480 | ### | 500 - 512 ns | 315 | ## | 512 - ... ns | 16 | | # statistics (in nsec) total time: 2448936 avg time: 387 max time: 3285 min time: 82 count: 6328 root@number:~# Reviewed-by: James Clark <james.clark@linaro.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20250107224352.1128669-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-ftrace.c14
-rw-r--r--tools/perf/util/bpf_ftrace.c12
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c19
-rw-r--r--tools/perf/util/ftrace.h6
4 files changed, 47 insertions, 4 deletions
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 834074162a63..2a786047ba32 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -43,6 +43,8 @@
static volatile sig_atomic_t workload_exec_errno;
static volatile sig_atomic_t done;
+static struct stats latency_stats; /* for tracepoints */
+
static void sig_handler(int sig __maybe_unused)
{
done = true;
@@ -798,8 +800,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
if (i >= NUM_BUCKET || num >= max_latency - min_latency)
i = NUM_BUCKET - 1;
+ num += min_latency;
do_inc:
buckets[i]++;
+ update_stats(&latency_stats, num);
next:
/* empty the line buffer for the next output */
@@ -894,6 +898,12 @@ print_bucket_info:
printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
bar_len, bar, bar_total - bar_len, "");
+ printf("\n# statistics (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
+ printf(" total time: %20.0f\n", latency_stats.mean * latency_stats.n);
+ printf(" avg time: %20.0f\n", latency_stats.mean);
+ printf(" max time: %20"PRIu64"\n", latency_stats.max);
+ printf(" min time: %20"PRIu64"\n", latency_stats.min);
+ printf(" count: %20.0f\n", latency_stats.n);
}
static int prepare_func_latency(struct perf_ftrace *ftrace)
@@ -932,6 +942,8 @@ static int prepare_func_latency(struct perf_ftrace *ftrace)
if (fd < 0)
pr_err("failed to open trace_pipe\n");
+ init_stats(&latency_stats);
+
put_tracing_file(trace_file);
return fd;
}
@@ -961,7 +973,7 @@ static int stop_func_latency(struct perf_ftrace *ftrace)
static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
{
if (ftrace->target.use_bpf)
- return perf_ftrace__latency_read_bpf(ftrace, buckets);
+ return perf_ftrace__latency_read_bpf(ftrace, buckets, &latency_stats);
return 0;
}
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index bc484e65fb8f..25fc280e414a 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -11,6 +11,7 @@
#include "util/debug.h"
#include "util/evlist.h"
#include "util/bpf_counter.h"
+#include "util/stat.h"
#include "util/bpf_skel/func_latency.skel.h"
@@ -86,6 +87,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->min = INT64_MAX;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
@@ -122,7 +125,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
}
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
- int buckets[])
+ int buckets[], struct stats *stats)
{
int i, fd, err;
u32 idx;
@@ -146,6 +149,13 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
buckets[idx] += hist[i];
}
+ if (skel->bss->count) {
+ stats->mean = skel->bss->total / skel->bss->count;
+ stats->n = skel->bss->count;
+ stats->max = skel->bss->max;
+ stats->min = skel->bss->min;
+ }
+
free(hist);
return 0;
}
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index 4df54e1b1411..fb144811b34f 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -38,6 +38,12 @@ struct {
int enabled = 0;
+// stats
+__s64 total;
+__s64 count;
+__s64 max;
+__s64 min;
+
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
@@ -122,6 +128,8 @@ int BPF_PROG(func_end)
delta >= max_latency - min_latency)
key = NUM_BUCKET - 1;
}
+
+ delta += min_latency;
goto do_lookup;
}
// calculate index using delta
@@ -136,6 +144,17 @@ do_lookup:
return 0;
*hist += 1;
+
+ if (bucket_range == 0)
+ delta /= cmp_base;
+
+ __sync_fetch_and_add(&total, delta);
+ __sync_fetch_and_add(&count, 1);
+
+ if (delta > max)
+ max = delta;
+ if (delta < min)
+ min = delta;
}
return 0;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index f218703063f7..5dee2caba0fe 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -7,6 +7,7 @@
struct evlist;
struct hashamp;
+struct stats;
struct perf_ftrace {
struct evlist *evlist;
@@ -46,7 +47,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
- int buckets[]);
+ int buckets[], struct stats *stats);
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
#else /* !HAVE_BPF_SKEL */
@@ -71,7 +72,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
static inline int
perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
- int buckets[] __maybe_unused)
+ int buckets[] __maybe_unused,
+ struct stats *stats __maybe_unused)
{
return -1;
}