diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
commit | 1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch) | |
tree | a391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/util/stat-shadow.c | |
parent | 63bd30f249dcf0a7ce16967935cecee8feec24bb (diff) | |
parent | 0f66dfe7b91d2743cc71dfff37af503215b204ef (diff) |
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim:
"perf stat:
- Support new 'cluster' aggregation mode for shared resources
depending on the hardware configuration:
$ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1
Performance counter stats for 'system wide':
S0-D0-CLS0 2 85,051,822 cycles
S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle
S0-D0-CLS2 2 93,365,918 cycles
S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle
S0-D0-CLS4 2 104,157,523 cycles
S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle
S0-D0-CLS6 2 65,891,079 cycles
S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle
1.002407989 seconds time elapsed
- Various fixes and cleanups for event metrics including NaN handling
perf script:
- Use libcapstone if available to disassemble the instructions. This
enables 'perf script -F disasm' and 'perf script --insn-trace=disasm'
(for Intel-PT):
$ perf script -F event,ip,disasm
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa9839d25 movq %rax, %r14
cycles:P: ffffffffa9cdcaf0 endbr64
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax
cycles:P: ffffffffa988d428 wrmsr
- Expose sample ID / stream ID to python scripts
perf test:
- Add more perf test cases from Redhat internal test suites. This
time it adds the base infra and a few perf probe tests. More to
come. :)
- Add 'perf test -p' for parallel execution and fix some issues found
by the parallel test
- Support symbol test to print symbols in given (active) module:
$ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko
--- start ---
Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko
Overlapping symbols:
7a990-7a9a0 l __pfx_ext4_exit_fs
7a990-7a9a0 g __pfx_cleanup_module
Overlapping symbols:
7a9a0-7aa1c l ext4_exit_fs
7a9a0-7aa1c g cleanup_module
...
JSON metric updates:
- A new round of Intel metric updates
- Support Power11 PVR (compatible to Power10)
- Fix cache latency events on Zen 4 to set SliceId properly
Internal:
- Fix reference counting for 'map' data structure, tireless work from
Ian!
- More memory optimization for struct thread and annotate histogram.
Now, 'perf report' (TUI) and 'perf annotate' should be much
lighter-weight in terms of memory footprint
- Support cross-arch perf register access. Clean up the build
configuration so that it can detect arch-register support at
runtime. This can allow to parse register data in sample which was
recorded in a different arch
Others:
- Sync task state in 'perf sched' to kernel using trace event fields.
The task states have been changed so tools cannot assume a fixed
encoding
- Clean up 'perf mem' to generalize the arch-specific events
- Add support for local and global variables to data type profiling.
This would increase the success rate of type resolution with DWARF
- Add short option -H for --hierarchy in 'perf report' and 'perf top'"
* tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits)
perf annotate: Add comments in the data structures
perf annotate: Remove sym_hist.addr[] array
perf annotate: Calculate instruction overhead using hashmap
perf annotate: Add a hashmap for symbol histogram
perf threads: Reduce table size from 256 to 8
perf threads: Switch from rbtree to hashmap
perf threads: Move threads to its own files
perf machine: Move machine's threads into its own abstraction
perf machine: Move fprintf to for_each loop and a callback
perf trace: Ignore thread hashing in summary
perf report: Sort child tasks by tid
perf vendor events amd: Fix Zen 4 cache latency events
perf version: Display availability of OpenCSD support
perf vendor events intel: Add umasks/occ_sel to PCU events.
perf map: Fix map reference count issues
libperf evlist: Avoid out-of-bounds access
perf lock contention: Account contending locks too
perf metrics: Fix segv for metrics with no events
perf metrics: Fix metric matching
perf pmu: Fix a potential memory leak in perf_pmu__lookup()
...
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
-rw-r--r-- | tools/perf/util/stat-shadow.c | 72 |
1 files changed, 44 insertions, 28 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index e31426167852..3466aa952442 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -355,11 +355,13 @@ static void print_nsecs(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } -static int prepare_metric(struct evsel **metric_events, - struct metric_ref *metric_refs, +static int prepare_metric(const struct metric_expr *mexp, + const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) { + struct evsel * const *metric_events = mexp->metric_events; + struct metric_ref *metric_refs = mexp->metric_refs; int i; for (i = 0; metric_events[i]; i++) { @@ -398,12 +400,33 @@ static int prepare_metric(struct evsel **metric_events, source_count = 1; } else { struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; + struct perf_stat_aggr *aggr; + /* + * If there are multiple uncore PMUs and we're not + * reading the leader's stats, determine the stats for + * the appropriate uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; + break; + } + } + aggr = &ps->aggr[aggr_idx]; if (!aggr) break; - if (!metric_events[i]->supported) { + if (!metric_events[i]->supported) { /* * Not supported events will have a count of 0, * which can be confusing in a @@ -414,13 +437,9 @@ static int prepare_metric(struct evsel **metric_events, val = NAN; source_count = 0; } else { - /* - * If an event was scaled during stat gathering, - * reverse the scale before computing the - * metric. - */ - val = aggr->counts.val * (1.0 / metric_events[i]->scale); - source_count = evsel__source_count(metric_events[i]); + val = aggr->counts.val; + if (!source_count) + source_count = evsel__source_count(metric_events[i]); } } n = strdup(evsel__metric_id(metric_events[i])); @@ -441,18 +460,18 @@ static int prepare_metric(struct evsel **metric_events, } static void generic_metric(struct perf_stat_config *config, - const char *metric_expr, - const char *metric_threshold, - struct evsel **metric_events, - struct metric_ref *metric_refs, - char *name, - const char *metric_name, - const char *metric_unit, - int runtime, + struct metric_expr *mexp, + struct evsel *evsel, int aggr_idx, struct perf_stat_output_ctx *out) { print_metric_t print_metric = out->print_metric; + const char *metric_name = mexp->metric_name; + const char *metric_expr = mexp->metric_expr; + const char *metric_threshold = mexp->metric_threshold; + const char *metric_unit = mexp->metric_unit; + struct evsel * const *metric_events = mexp->metric_events; + int runtime = mexp->runtime; struct expr_parse_ctx *pctx; double ratio, scale, threshold; int i; @@ -467,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx); + i = prepare_metric(mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -502,18 +521,18 @@ static void generic_metric(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.2f", metric_name ? metric_name : - out->force_header ? name : "", + out->force_header ? evsel->name : "", ratio); } } else { print_metric(config, ctxp, color, /*unit=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } } else { print_metric(config, ctxp, color, /*unit=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } expr__ctx_free(pctx); @@ -528,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0) + if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -630,10 +649,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if ((*num)++ > 0) out->new_line(config, ctxp); - generic_metric(config, mexp->metric_expr, mexp->metric_threshold, - mexp->metric_events, mexp->metric_refs, evsel->name, - mexp->metric_name, mexp->metric_unit, mexp->runtime, - aggr_idx, out); + generic_metric(config, mexp, evsel, aggr_idx, out); } return NULL; |