diff options
author | Ian Rogers <irogers@google.com> | 2024-02-20 23:07:53 -0800 |
---|---|---|
committer | Namhyung Kim <namhyung@kernel.org> | 2024-02-22 08:57:09 -0800 |
commit | a59fb796a36bb6c2b7e6e256a9e5f9ba18109937 (patch) | |
tree | c8cd6369418cfa273ca4d5f993e54d2348bb0bdd /tools/perf/util/stat-shadow.c | |
parent | eee41e6b287e2adfefbe3b6fc80c66097c076f89 (diff) |
perf metrics: Compute unmerged uncore metrics individually
When merging counts from multiple uncore PMUs the metric is only
computed for the metric leader. When merging/aggregation is disabled,
prior to this patch just the leader's metric would be computed. Fix
this by computing the metric for each PMU.
On a SkylakeX:
Before:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,440,851 ns duration_time
1.003440851 seconds time elapsed
```
After:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1
Performance counter stats for 'system wide':
CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total
CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total
CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total
CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total
CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total
CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total
CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total
CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total
CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0 1,003,353,416 ns duration_time
```
Signed-off-by: Ian Rogers <irogers@google.com> |
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Kaige Ye <ye@kaige.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: John Garry <john.g.garry@oracle.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240221070754.4163916-2-irogers@google.com
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
-rw-r--r-- | tools/perf/util/stat-shadow.c | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 10b452792037..3466aa952442 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config, } static int prepare_metric(const struct metric_expr *mexp, + const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) { @@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp, source_count = 1; } else { struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; + struct perf_stat_aggr *aggr; + /* + * If there are multiple uncore PMUs and we're not + * reading the leader's stats, determine the stats for + * the appropriate uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; + break; + } + } + aggr = &ps->aggr[aggr_idx]; if (!aggr) break; @@ -416,7 +438,8 @@ static int prepare_metric(const struct metric_expr *mexp, source_count = 0; } else { val = aggr->counts.val; - source_count = evsel__source_count(metric_events[i]); + if (!source_count) + source_count = evsel__source_count(metric_events[i]); } } n = strdup(evsel__metric_id(metric_events[i])); @@ -463,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(mexp, pctx, aggr_idx); + i = prepare_metric(mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -524,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp, pctx, aggr_idx) < 0) + if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) |