From 71062e282d6a662b75df9aff65702455563ff7c9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 7 Nov 2025 09:07:11 -0800 Subject: perf tool: Add the perf_tool argument to all callbacks Getting context for what a tool is doing, such as the perf_inject instance, using container_of the tool is a common pattern in the code. This isn't possible event_op2, event_op3 and event_op4 callbacks as the tool isn't passed. Add the argument and then fix function signatures to match. As tools maybe reading a tool from somewhere else, change that code to use the passed in tool. Signed-off-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8124fcb51da9..d813adbf9889 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2729,7 +2729,8 @@ static int process_switch_event(const struct perf_tool *tool, sample->tid); } -static int process_auxtrace_error(struct perf_session *session, +static int process_auxtrace_error(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event) { if (scripting_ops && scripting_ops->process_auxtrace_error) { @@ -2737,7 +2738,7 @@ static int process_auxtrace_error(struct perf_session *session, return 0; } - return perf_event__process_auxtrace_error(session, event); + return perf_event__process_auxtrace_error(tool, session, event); } static int @@ -2785,7 +2786,8 @@ process_bpf_events(const struct perf_tool *tool __maybe_unused, } static int -process_bpf_metadata_event(struct perf_session *session __maybe_unused, +process_bpf_metadata_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event) { perf_event__fprintf(event, NULL, stdout); @@ -3544,7 +3546,8 @@ static void script__setup_sample_type(struct perf_script *script) } } -static int process_stat_round_event(struct perf_session *session, +static int process_stat_round_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct perf_record_stat_round *round = &event->stat_round; @@ -3559,7 +3562,8 @@ static int process_stat_round_event(struct perf_session *session, return 0; } -static int process_stat_config_event(struct perf_session *session __maybe_unused, +static int process_stat_config_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event) { perf_event__read_stat_config(&stat_config, &event->stat_config); @@ -3593,10 +3597,10 @@ static int set_maps(struct perf_script *script) } static -int process_thread_map_event(struct perf_session *session, +int process_thread_map_event(const struct perf_tool *tool, + struct perf_session *session __maybe_unused, union perf_event *event) { - const struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); if (dump_trace) @@ -3615,10 +3619,10 @@ int process_thread_map_event(struct perf_session *session, } static -int process_cpu_map_event(struct perf_session *session, +int process_cpu_map_event(const struct perf_tool *tool, + struct perf_session *session __maybe_unused, union perf_event *event) { - const struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); if (dump_trace) @@ -3636,7 +3640,8 @@ int process_cpu_map_event(struct perf_session *session, return set_maps(script); } -static int process_feature_event(struct perf_session *session, +static int process_feature_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { if (event->feat.feat_id < HEADER_LAST_FEATURE) @@ -3645,13 +3650,13 @@ static int process_feature_event(struct perf_session *session, } #ifdef HAVE_AUXTRACE_SUPPORT -static int perf_script__process_auxtrace_info(struct perf_session *session, +static int perf_script__process_auxtrace_info(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event) { - int ret = perf_event__process_auxtrace_info(session, event); + int ret = perf_event__process_auxtrace_info(tool, session, event); if (ret == 0) { - const struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); ret = perf_script__setup_per_event_dump(script); -- cgit From 3622990efaab066897a2c570b6e90f4b9f30b200 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Nov 2025 13:21:54 -0800 Subject: perf script: Change metric format to use json metrics The metric format option isn't properly supported. This change improves that by making the sample events update the counts of an evsel, where the shadow metric code expects to read the values. To support printing metrics, metrics need to be found. This is done on the first attempt to print a metric. Every metric is parsed and then the evsels in the metric's evlist compared to those in perf script using the perf_event_attr type and config. If the metric matches then it is added for printing. As an event in the perf script's evlist may have >1 metric id, or different leader for aggregation, the first metric matched will be displayed in those cases. An example use is: ``` $ perf record -a -e '{instructions,cpu-cycles}:S' -a -- sleep 1 $ perf script -F period,metric ... 867817 metric: 0.30 insn per cycle 125394 metric: 0.04 insn per cycle 313516 metric: 0.11 insn per cycle metric: 1.00 insn per cycle ``` Signed-off-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 252 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 230 insertions(+), 22 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d813adbf9889..8bab5b264f61 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -33,6 +33,7 @@ #include "util/path.h" #include "util/event.h" #include "util/mem-info.h" +#include "util/metricgroup.h" #include "ui/ui.h" #include "print_binary.h" #include "print_insn.h" @@ -341,9 +342,6 @@ struct evsel_script { char *filename; FILE *fp; u64 samples; - /* For metric output */ - u64 val; - int gnum; }; static inline struct evsel_script *evsel_script(struct evsel *evsel) @@ -2132,13 +2130,161 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused, fputs("\tmetric: ", mctx->fp); } -static void perf_sample__fprint_metric(struct perf_script *script, - struct thread *thread, +struct script_find_metrics_args { + struct evlist *evlist; + bool system_wide; +}; + +static struct evsel *map_metric_evsel_to_script_evsel(struct evlist *script_evlist, + struct evsel *metric_evsel) +{ + struct evsel *script_evsel; + + evlist__for_each_entry(script_evlist, script_evsel) { + /* Skip if perf_event_attr differ. */ + if (metric_evsel->core.attr.type != script_evsel->core.attr.type) + continue; + if (metric_evsel->core.attr.config != script_evsel->core.attr.config) + continue; + /* Skip if the script event has a metric_id that doesn't match. */ + if (script_evsel->metric_id && + strcmp(evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel))) { + pr_debug("Skipping matching evsel due to differing metric ids '%s' vs '%s'\n", + evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel)); + continue; + } + return script_evsel; + } + return NULL; +} + +static int script_find_metrics(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *data) +{ + struct script_find_metrics_args *args = data; + struct evlist *script_evlist = args->evlist; + struct evlist *metric_evlist = evlist__new(); + struct evsel *metric_evsel; + int ret = metricgroup__parse_groups(metric_evlist, + /*pmu=*/"all", + pm->metric_name, + /*metric_no_group=*/false, + /*metric_no_merge=*/false, + /*metric_no_threshold=*/true, + /*user_requested_cpu_list=*/NULL, + args->system_wide, + /*hardware_aware_grouping=*/false); + + if (ret) { + /* Metric parsing failed but continue the search. */ + goto out; + } + + /* + * Check the script_evlist has an entry for each metric_evlist entry. If + * the script evsel was already set up avoid changing data that may + * break it. + */ + evlist__for_each_entry(metric_evlist, metric_evsel) { + struct evsel *script_evsel = + map_metric_evsel_to_script_evsel(script_evlist, metric_evsel); + struct evsel *new_metric_leader; + + if (!script_evsel) { + pr_debug("Skipping metric '%s' as evsel '%s' / '%s' is missing\n", + pm->metric_name, evsel__name(metric_evsel), + evsel__metric_id(metric_evsel)); + goto out; + } + + if (script_evsel->metric_leader == NULL) + continue; + + if (metric_evsel->metric_leader == metric_evsel) { + new_metric_leader = script_evsel; + } else { + new_metric_leader = + map_metric_evsel_to_script_evsel(script_evlist, + metric_evsel->metric_leader); + } + /* Mismatching evsel leaders. */ + if (script_evsel->metric_leader != new_metric_leader) { + pr_debug("Skipping metric '%s' due to mismatching evsel metric leaders '%s' vs '%s'\n", + pm->metric_name, evsel__metric_id(metric_evsel), + evsel__metric_id(script_evsel)); + goto out; + } + } + /* + * Metric events match those in the script evlist, copy metric evsel + * data into the script evlist. + */ + evlist__for_each_entry(metric_evlist, metric_evsel) { + struct evsel *script_evsel = + map_metric_evsel_to_script_evsel(script_evlist, metric_evsel); + struct metric_event *metric_me = metricgroup__lookup(&metric_evlist->metric_events, + metric_evsel, + /*create=*/false); + + if (script_evsel->metric_id == NULL) { + script_evsel->metric_id = metric_evsel->metric_id; + metric_evsel->metric_id = NULL; + } + + if (script_evsel->metric_leader == NULL) { + if (metric_evsel->metric_leader == metric_evsel) { + script_evsel->metric_leader = script_evsel; + } else { + script_evsel->metric_leader = + map_metric_evsel_to_script_evsel(script_evlist, + metric_evsel->metric_leader); + } + } + + if (metric_me) { + struct metric_expr *expr; + struct metric_event *script_me = + metricgroup__lookup(&script_evlist->metric_events, + script_evsel, + /*create=*/true); + + if (!script_me) { + /* + * As the metric_expr is created, the only + * failure is a lack of memory. + */ + goto out; + } + list_splice_init(&metric_me->head, &script_me->head); + list_for_each_entry(expr, &script_me->head, nd) { + for (int i = 0; expr->metric_events[i]; i++) { + expr->metric_events[i] = + map_metric_evsel_to_script_evsel(script_evlist, + expr->metric_events[i]); + } + } + } + } + pr_debug("Found metric '%s' whose evsels match those of in the perf data\n", + pm->metric_name); + evlist__delete(metric_evlist); +out: + return 0; +} + +static struct aggr_cpu_id script_aggr_cpu_id_get(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__global(cpu, /*data=*/NULL); +} + +static void perf_sample__fprint_metric(struct thread *thread, struct evsel *evsel, struct perf_sample *sample, FILE *fp) { - struct evsel *leader = evsel__leader(evsel); + static bool init_metrics; struct perf_stat_output_ctx ctx = { .print_metric = script_print_metric, .new_line = script_new_line, @@ -2150,23 +2296,85 @@ static void perf_sample__fprint_metric(struct perf_script *script, }, .force_header = false, }; - struct evsel *ev2; - u64 val; + struct perf_counts_values *count, *old_count; + int cpu_map_idx, thread_map_idx, aggr_idx; + struct evsel *pos; + + if (!init_metrics) { + /* One time initialization of stat_config and metric data. */ + struct script_find_metrics_args args = { + .evlist = evsel->evlist, + .system_wide = perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1, + + }; + if (!stat_config.output) + stat_config.output = stdout; + + if (!stat_config.aggr_map) { + /* TODO: currently only global aggregation is supported. */ + assert(stat_config.aggr_mode == AGGR_GLOBAL); + stat_config.aggr_get_id = script_aggr_cpu_id_get; + stat_config.aggr_map = + cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus, + aggr_cpu_id__global, /*data=*/NULL, + /*needs_sort=*/false); + } + + metricgroup__for_each_metric(pmu_metrics_table__find(), script_find_metrics, &args); + init_metrics = true; + } + + if (!evsel->stats) { + if (evlist__alloc_stats(&stat_config, evsel->evlist, /*alloc_raw=*/true) < 0) + return; + } + if (!evsel->stats->aggr) { + if (evlist__alloc_aggr_stats(evsel->evlist, stat_config.aggr_map->nr) < 0) + return; + } - if (!evsel->stats) - evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false); - if (evsel_script(leader)->gnum++ == 0) - perf_stat__reset_shadow_stats(); - val = sample->period * evsel->scale; - evsel_script(evsel)->val = val; - if (evsel_script(leader)->gnum == leader->core.nr_members) { - for_each_group_member (ev2, leader) { - perf_stat__print_shadow_stats(&stat_config, ev2, - evsel_script(ev2)->val, - sample->cpu, - &ctx); + /* Update the evsel's count using the sample's data. */ + cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){sample->cpu}); + if (cpu_map_idx < 0) { + /* Missing CPU, check for any CPU. */ + if (perf_cpu_map__cpu(evsel->core.cpus, /*idx=*/0).cpu == -1 || + sample->cpu == (u32)-1) { + /* Place the counts in the which ever CPU is first in the map. */ + cpu_map_idx = 0; + } else { + pr_info("Missing CPU map entry for CPU %d\n", sample->cpu); + return; + } + } + thread_map_idx = perf_thread_map__idx(evsel->core.threads, sample->tid); + if (thread_map_idx < 0) { + /* Missing thread, check for any thread. */ + if (perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1 || + sample->tid == (u32)-1) { + /* Place the counts in the which ever thread is first in the map. */ + thread_map_idx = 0; + } else { + pr_info("Missing thread map entry for thread %d\n", sample->tid); + return; + } + } + count = perf_counts(evsel->counts, cpu_map_idx, thread_map_idx); + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread_map_idx); + count->val = old_count->val + sample->period; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + + /* Update the aggregated stats. */ + perf_stat_process_counter(&stat_config, evsel); + + /* Display all metrics. */ + evlist__for_each_entry(evsel->evlist, pos) { + cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) { + perf_stat__print_shadow_stats(&stat_config, pos, + count->val, + aggr_idx, + &ctx); } - evsel_script(leader)->gnum = 0; } } @@ -2348,7 +2556,7 @@ static void process_event(struct perf_script *script, } if (PRINT_FIELD(METRIC)) - perf_sample__fprint_metric(script, thread, evsel, sample, fp); + perf_sample__fprint_metric(thread, evsel, sample, fp); if (verbose > 0) fflush(fp); -- cgit From b71f46a6a708617502faa1be915feb7dd6ecf603 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Nov 2025 13:21:55 -0800 Subject: perf stat: Remove hard coded shadow metrics Now that the metrics are encoded in common json the hard coded printing means the metrics are shown twice. Remove the hard coded version. This means that when specifying events, and those events correspond to a hard coded metric, the metric will no longer be displayed. The metric will be displayed if the metric is requested. Due to the adhoc printing in the previous approach it was often found frustrating, the new approach avoids this. The default perf stat output on an alderlake now looks like: ``` $ perf stat -a -- sleep 1 Performance counter stats for 'system wide': 19,697 context-switches # nan cs/sec cs_per_second TopdownL1 (cpu_core) # 10.7 % tma_bad_speculation # 24.9 % tma_frontend_bound TopdownL1 (cpu_core) # 34.3 % tma_backend_bound # 30.1 % tma_retiring 6,593 page-faults # nan faults/sec page_faults_per_second 729,065,658 cpu_atom/cpu-cycles/ # nan GHz cycles_frequency (49.79%) 1,605,131,101 cpu_core/cpu-cycles/ # nan GHz cycles_frequency # 19.7 % tma_bad_speculation # 14.2 % tma_retiring (50.14%) # 37.3 % tma_frontend_bound (50.31%) 87,302,268 cpu_atom/branches/ # nan M/sec branch_frequency (60.27%) 512,046,956 cpu_core/branches/ # nan M/sec branch_frequency 1,111 cpu-migrations # nan migrations/sec migrations_per_second # 28.8 % tma_backend_bound (60.26%) 0.00 msec cpu-clock # 0.0 CPUs CPUs_utilized 392,509,323 cpu_atom/instructions/ # 0.6 instructions insn_per_cycle (60.19%) 2,990,369,310 cpu_core/instructions/ # 1.9 instructions insn_per_cycle 3,493,478 cpu_atom/branch-misses/ # 5.9 % branch_miss_rate (49.69%) 7,297,531 cpu_core/branch-misses/ # 1.4 % branch_miss_rate 1.006621701 seconds time elapsed ``` Signed-off-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8bab5b264f61..cf0040bbaba9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2371,7 +2371,6 @@ static void perf_sample__fprint_metric(struct thread *thread, evlist__for_each_entry(evsel->evlist, pos) { cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) { perf_stat__print_shadow_stats(&stat_config, pos, - count->val, aggr_idx, &ctx); } -- cgit From 754187ad73b73bcb44f106a8e5fc88789beff1bd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 9 Nov 2025 17:31:51 -0800 Subject: perf build: Remove NO_AUXTRACE build option The NO_AUXTRACE build option was used when the __get_cpuid feature test failed or if it was provided on the command line. The option no longer avoids a dependency on a library and so having the option is just adding complexity to the code base. Remove the option CONFIG_AUXTRACE from Build files and HAVE_AUXTRACE_SUPPORT by assuming it is always defined. Signed-off-by: Ian Rogers Reviewed-by: James Clark Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index cf0040bbaba9..3ac6b80c460e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2000,7 +2000,6 @@ static int perf_sample__fprintf_synth_iflag_chg(struct perf_sample *sample, FILE return len + perf_sample__fprintf_pt_spacing(len, fp); } -#ifdef HAVE_AUXTRACE_SUPPORT static int perf_sample__fprintf_synth_vpadtl(struct perf_sample *data, FILE *fp) { struct powerpc_vpadtl_entry *dtl = (struct powerpc_vpadtl_entry *)data->raw_data; @@ -2019,13 +2018,6 @@ static int perf_sample__fprintf_synth_vpadtl(struct perf_sample *data, FILE *fp) return len; } -#else -static int perf_sample__fprintf_synth_vpadtl(struct perf_sample *data __maybe_unused, - FILE *fp __maybe_unused) -{ - return 0; -} -#endif static int perf_sample__fprintf_synth(struct perf_sample *sample, struct evsel *evsel, FILE *fp) @@ -3856,7 +3848,6 @@ static int process_feature_event(const struct perf_tool *tool __maybe_unused, return 0; } -#ifdef HAVE_AUXTRACE_SUPPORT static int perf_script__process_auxtrace_info(const struct perf_tool *tool, struct perf_session *session, union perf_event *event) @@ -3871,9 +3862,6 @@ static int perf_script__process_auxtrace_info(const struct perf_tool *tool, return ret; } -#else -#define perf_script__process_auxtrace_info 0 -#endif static int parse_insn_trace(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) -- cgit From 86ce2a29dd9a3564fd7be584c8bb7ced28f7ca02 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 14 Nov 2025 14:06:18 +0000 Subject: perf script: Fix build by removing unused evsel_script() The evsel_script() function is unused since the linked commit. Fix the build by removing it. Fixes the following compilation error: static inline struct evsel_script *evsel_script(struct evsel *evsel) ^ builtin-script.c:347:36: error: unused function 'evsel_script' [-Werror,-Wunused-function] Fixes: 3622990efaab ("perf script: Change metric format to use json metrics") Signed-off-by: James Clark Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3ac6b80c460e..011962e1ee0f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -344,11 +344,6 @@ struct evsel_script { u64 samples; }; -static inline struct evsel_script *evsel_script(struct evsel *evsel) -{ - return (struct evsel_script *)evsel->priv; -} - static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); -- cgit From 25a9dd56cf84300f0e08609aeef1418bcf0f097d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Nov 2025 15:48:02 -0800 Subject: perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Handle the deferred callchains in the script output. $ perf script ... pwd 2312 121.163435: 249113 cpu/cycles/P: ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms]) ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms]) ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms]) ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms]) ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms]) ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms]) ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms]) b00000006 (cookie) ([unknown]) pwd 2312 121.163447: DEFERRED CALLCHAIN [cookie: b00000006] 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 011962e1ee0f..85b42205a71b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2706,6 +2706,94 @@ out_put: return ret; } +static int process_deferred_sample_event(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct perf_script *scr = container_of(tool, struct perf_script, tool); + struct perf_event_attr *attr = &evsel->core.attr; + struct evsel_script *es = evsel->priv; + unsigned int type = output_type(attr->type); + struct addr_location al; + FILE *fp = es->fp; + int ret = 0; + + if (output[type].fields == 0) + return 0; + + /* Set thread to NULL to indicate addr_al and al are not initialized */ + addr_location__init(&al); + + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { + goto out_put; + } + + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; + } + last_timestamp = sample->time; + goto out_put; + } + + if (filter_cpu(sample)) + goto out_put; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + ret = -1; + goto out_put; + } + + if (al.filtered) + goto out_put; + + if (!show_event(sample, evsel, al.thread, &al, NULL)) + goto out_put; + + if (evswitch__discard(&scr->evswitch, evsel)) + goto out_put; + + perf_sample__fprintf_start(scr, sample, al.thread, evsel, + PERF_RECORD_CALLCHAIN_DEFERRED, fp); + fprintf(fp, "DEFERRED CALLCHAIN [cookie: %llx]", + (unsigned long long)event->callchain_deferred.cookie); + + if (PRINT_FIELD(IP)) { + struct callchain_cursor *cursor = NULL; + + if (symbol_conf.use_callchain && sample->callchain) { + cursor = get_tls_callchain_cursor(); + if (thread__resolve_callchain(al.thread, cursor, evsel, + sample, NULL, NULL, + scripting_max_stack)) { + pr_info("cannot resolve deferred callchains\n"); + cursor = NULL; + } + } + + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts, + cursor, symbol_conf.bt_stop_list, fp); + } + + fprintf(fp, "\n"); + + if (verbose > 0) + fflush(fp); + +out_put: + addr_location__exit(&al); + return ret; +} + // Used when scr->per_event_dump is not set static struct evsel_script es_stdout; @@ -4303,6 +4391,7 @@ script_found: perf_tool__init(&script.tool, !unsorted_dump); script.tool.sample = process_sample_event; + script.tool.callchain_deferred = process_deferred_sample_event; script.tool.mmap = perf_event__process_mmap; script.tool.mmap2 = perf_event__process_mmap2; script.tool.comm = perf_event__process_comm; -- cgit From 9b4525fd089decf3557eff4a3ef348cdc2b68353 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Nov 2025 15:48:03 -0800 Subject: perf tools: Merge deferred user callchains Save samples with deferred callchains in a separate list and deliver them after merging the user callchains. If users don't want to merge they can set tool->merge_deferred_callchains to false to prevent the behavior. With previous result, now perf script will show the merged callchains. $ perf script ... pwd 2312 121.163435: 249113 cpu/cycles/P: ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms]) ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms]) ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms]) ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms]) ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms]) ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms]) ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms]) 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) ... The old output can be get using --no-merge-callchain option. Also perf report can get the user callchain entry at the end. $ perf report --no-children --stdio -q -S __build_id_parse.isra.0 # symbol: __build_id_parse.isra.0 8.40% pwd [kernel.kallsyms] | ---__build_id_parse.isra.0 perf_event_mmap mprotect_fixup do_mprotect_pkey __x64_sys_mprotect do_syscall_64 entry_SYSCALL_64_after_hwframe mprotect _dl_sysdep_start _dl_start_user Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 85b42205a71b..62e43d3c5ad7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4009,6 +4009,7 @@ int cmd_script(int argc, const char **argv) bool header_only = false; bool script_started = false; bool unsorted_dump = false; + bool merge_deferred_callchains = true; char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; @@ -4162,6 +4163,8 @@ int cmd_script(int argc, const char **argv) "Guest code can be found in hypervisor process"), OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, + "Enable merge deferred user callchains"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -4418,6 +4421,7 @@ script_found: script.tool.throttle = process_throttle_event; script.tool.unthrottle = process_throttle_event; script.tool.ordering_requires_timestamps = true; + script.tool.merge_deferred_callchains = merge_deferred_callchains; session = perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); -- cgit