summaryrefslogtreecommitdiff
path: root/tools/perf/util/stat-shadow.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/util/stat-shadow.c
parent63bd30f249dcf0a7ce16967935cecee8feec24bb (diff)
parent0f66dfe7b91d2743cc71dfff37af503215b204ef (diff)
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
-rw-r--r--tools/perf/util/stat-shadow.c72
1 files changed, 44 insertions, 28 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e31426167852..3466aa952442 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -355,11 +355,13 @@ static void print_nsecs(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
}
-static int prepare_metric(struct evsel **metric_events,
- struct metric_ref *metric_refs,
+static int prepare_metric(const struct metric_expr *mexp,
+ const struct evsel *evsel,
struct expr_parse_ctx *pctx,
int aggr_idx)
{
+ struct evsel * const *metric_events = mexp->metric_events;
+ struct metric_ref *metric_refs = mexp->metric_refs;
int i;
for (i = 0; metric_events[i]; i++) {
@@ -398,12 +400,33 @@ static int prepare_metric(struct evsel **metric_events,
source_count = 1;
} else {
struct perf_stat_evsel *ps = metric_events[i]->stats;
- struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+ struct perf_stat_aggr *aggr;
+ /*
+ * If there are multiple uncore PMUs and we're not
+ * reading the leader's stats, determine the stats for
+ * the appropriate uncore PMU.
+ */
+ if (evsel && evsel->metric_leader &&
+ evsel->pmu != evsel->metric_leader->pmu &&
+ mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != mexp->metric_events[i])
+ continue;
+ ps = pos->stats;
+ source_count = 1;
+ break;
+ }
+ }
+ aggr = &ps->aggr[aggr_idx];
if (!aggr)
break;
- if (!metric_events[i]->supported) {
+ if (!metric_events[i]->supported) {
/*
* Not supported events will have a count of 0,
* which can be confusing in a
@@ -414,13 +437,9 @@ static int prepare_metric(struct evsel **metric_events,
val = NAN;
source_count = 0;
} else {
- /*
- * If an event was scaled during stat gathering,
- * reverse the scale before computing the
- * metric.
- */
- val = aggr->counts.val * (1.0 / metric_events[i]->scale);
- source_count = evsel__source_count(metric_events[i]);
+ val = aggr->counts.val;
+ if (!source_count)
+ source_count = evsel__source_count(metric_events[i]);
}
}
n = strdup(evsel__metric_id(metric_events[i]));
@@ -441,18 +460,18 @@ static int prepare_metric(struct evsel **metric_events,
}
static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- const char *metric_threshold,
- struct evsel **metric_events,
- struct metric_ref *metric_refs,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- int runtime,
+ struct metric_expr *mexp,
+ struct evsel *evsel,
int aggr_idx,
struct perf_stat_output_ctx *out)
{
print_metric_t print_metric = out->print_metric;
+ const char *metric_name = mexp->metric_name;
+ const char *metric_expr = mexp->metric_expr;
+ const char *metric_threshold = mexp->metric_threshold;
+ const char *metric_unit = mexp->metric_unit;
+ struct evsel * const *metric_events = mexp->metric_events;
+ int runtime = mexp->runtime;
struct expr_parse_ctx *pctx;
double ratio, scale, threshold;
int i;
@@ -467,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+ i = prepare_metric(mexp, evsel, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -502,18 +521,18 @@ static void generic_metric(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.2f",
metric_name ?
metric_name :
- out->force_header ? name : "",
+ out->force_header ? evsel->name : "",
ratio);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (metric_name ?: evsel->name) : "", 0);
}
} else {
print_metric(config, ctxp, color, /*unit=*/NULL,
out->force_header ?
- (metric_name ? metric_name : name) : "", 0);
+ (metric_name ?: evsel->name) : "", 0);
}
expr__ctx_free(pctx);
@@ -528,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+ if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -630,10 +649,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
if ((*num)++ > 0)
out->new_line(config, ctxp);
- generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
- mexp->metric_events, mexp->metric_refs, evsel->name,
- mexp->metric_name, mexp->metric_unit, mexp->runtime,
- aggr_idx, out);
+ generic_metric(config, mexp, evsel, aggr_idx, out);
}
return NULL;