summaryrefslogtreecommitdiff
path: root/tools/bpf
diff options
context:
space:
mode:
authorYonghong Song <yhs@fb.com>2020-11-18 23:30:39 -0800
committerDaniel Borkmann <daniel@iogearbox.net>2020-11-20 15:50:38 +0100
commit450d060e8f752a6ce052a2bffd3f01633472e330 (patch)
treeb23f99d32b48d538b2d835bf7185b21067a34ec1 /tools/bpf
parent4e99d115d865d45e17e83478d757b58d8fa66d3c (diff)
bpftool: Add {i,d}tlb_misses support for bpftool profile
Commit 47c09d6a9f67("bpftool: Introduce "prog profile" command") introduced "bpftool prog profile" command which can be used to profile bpf program with metrics like # of instructions, This patch added support for itlb_misses and dtlb_misses. During an internal bpf program performance evaluation, I found these two metrics are also very useful. The following is an example output: $ bpftool prog profile id 324 duration 3 cycles itlb_misses 1885029 run_cnt 5134686073 cycles 306893 itlb_misses $ bpftool prog profile id 324 duration 3 cycles dtlb_misses 1827382 run_cnt 4943593648 cycles 5975636 dtlb_misses $ bpftool prog profile id 324 duration 3 cycles llc_misses 1836527 run_cnt 5019612972 cycles 4161041 llc_misses From the above, we can see quite some dtlb misses, 3 dtlb misses perf prog run. This might be something worth further investigation. Signed-off-by: Yonghong Song <yhs@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Song Liu <songliubraving@fb.com> Link: https://lore.kernel.org/bpf/20201119073039.4060095-1-yhs@fb.com
Diffstat (limited to 'tools/bpf')
-rw-r--r--tools/bpf/bpftool/prog.c30
1 files changed, 29 insertions, 1 deletions
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index acdb2c245f0a..1fe3ba255bad 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1717,6 +1717,34 @@ struct profile_metric {
.ratio_desc = "LLC misses per million insns",
.ratio_mul = 1e6,
},
+ {
+ .name = "itlb_misses",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_ITLB |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+ .exclude_user = 1
+ },
+ .ratio_metric = 2,
+ .ratio_desc = "itlb misses per million insns",
+ .ratio_mul = 1e6,
+ },
+ {
+ .name = "dtlb_misses",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_DTLB |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+ .exclude_user = 1
+ },
+ .ratio_metric = 2,
+ .ratio_desc = "dtlb misses per million insns",
+ .ratio_mul = 1e6,
+ },
};
static __u64 profile_total_count;
@@ -2109,7 +2137,7 @@ static int do_help(int argc, char **argv)
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
- " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
+ " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2]);