diff options
Diffstat (limited to 'tools')
68 files changed, 2974 insertions, 1722 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ba5d942e4c6a..4407b106d977 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -250,11 +250,20 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ +# Generate date from either KBUILD_BUILD_TIMESTAMP or git log of +# the doc input file +PERF_DATE = $(strip \ + $(if $(KBUILD_BUILD_TIMESTAMP), \ + $(shell date -u -d '$(KBUILD_BUILD_TIMESTAMP)' +%Y-%m-%d), \ + $(shell git log -1 --pretty="format:%cd" \ + --date=short --no-show-signature $<))) + ifdef USE_ASCIIDOCTOR $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b manpage -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ + -adocdate=$(PERF_DATE) -o $@+ $< && \ mv $@+ $@ endif @@ -266,9 +275,7 @@ $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ - -aperf_date=$(shell git log -1 --pretty="format:%cd" \ - --date=short --no-show-signature $<) \ - -o $@+ $< && \ + -aperf_date=$(PERF_DATE) -o $@+ $< && \ mv $@+ $@ XSLT = docbook.xsl diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 777a0d8ba7d1..ff9a52e44688 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -130,7 +130,7 @@ OPTIONS -F:: --fields:: Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat, diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index b951374bc49d..4908d54dd33b 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -43,6 +43,20 @@ const char *const powerpc_triplets[] = { NULL }; +const char *const riscv32_triplets[] = { + "riscv32-unknown-linux-gnu-", + "riscv32-linux-android-", + "riscv32-linux-gnu-", + NULL +}; + +const char *const riscv64_triplets[] = { + "riscv64-unknown-linux-gnu-", + "riscv64-linux-android-", + "riscv64-linux-gnu-", + NULL +}; + const char *const s390_triplets[] = { "s390-ibm-linux-", "s390x-linux-gnu-", @@ -164,6 +178,10 @@ static int perf_env__lookup_binutils_path(struct perf_env *env, path_list = arm64_triplets; else if (!strcmp(arch, "powerpc")) path_list = powerpc_triplets; + else if (!strcmp(arch, "riscv32")) + path_list = riscv32_triplets; + else if (!strcmp(arch, "riscv64")) + path_list = riscv64_triplets; else if (!strcmp(arch, "sh")) path_list = sh_triplets; else if (!strcmp(arch, "s390")) diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 305872692bfd..5c7bec25fee4 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -35,12 +35,14 @@ static struct ins x86__instructions[] = { { .name = "cs", .ops = &mov_ops, }, { .name = "dec", .ops = &dec_ops, }, { .name = "decl", .ops = &dec_ops, }, + { .name = "decq", .ops = &dec_ops, }, { .name = "divsd", .ops = &mov_ops, }, { .name = "divss", .ops = &mov_ops, }, { .name = "gs", .ops = &mov_ops, }, { .name = "imul", .ops = &mov_ops, }, { .name = "inc", .ops = &dec_ops, }, { .name = "incl", .ops = &dec_ops, }, + { .name = "incq", .ops = &dec_ops, }, { .name = "ja", .ops = &jump_ops, }, { .name = "jae", .ops = &jump_ops, }, { .name = "jb", .ops = &jump_ops, }, @@ -123,6 +125,8 @@ static struct ins x86__instructions[] = { { .name = "test", .ops = &mov_ops, }, { .name = "testb", .ops = &mov_ops, }, { .name = "testl", .ops = &mov_ops, }, + { .name = "testq", .ops = &mov_ops, }, + { .name = "tzcnt", .ops = &mov_ops, }, { .name = "ucomisd", .ops = &mov_ops, }, { .name = "ucomiss", .ops = &mov_ops, }, { .name = "vaddsd", .ops = &mov_ops, }, diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 902e9ea9b99e..33d39c1d3e64 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -11,6 +11,7 @@ int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest); int test__bp_modify(struct test_suite *test, int subtest); int test__x86_sample_parsing(struct test_suite *test, int subtest); +int test__hybrid(struct test_suite *test, int subtest); extern struct test_suite *arch_tests[]; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 6f4e8636c3bf..08cc8b9c931e 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += sample-parsing.o +perf-y += hybrid.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index aae6ea0fe52b..147ad0638bbb 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -22,6 +22,15 @@ struct test_suite suite__intel_pt = { DEFINE_SUITE("x86 bp modify", bp_modify); #endif DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing); +static struct test_case hybrid_tests[] = { + TEST_CASE_REASON("x86 hybrid event parsing", hybrid, "not hybrid"), + { .name = NULL, } +}; + +struct test_suite suite__hybrid = { + .desc = "x86 hybrid", + .test_cases = hybrid_tests, +}; struct test_suite *arch_tests[] = { #ifdef HAVE_DWARF_UNWIND_SUPPORT @@ -35,5 +44,6 @@ struct test_suite *arch_tests[] = { &suite__bp_modify, #endif &suite__x86_sample_parsing, + &suite__hybrid, NULL, }; diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c new file mode 100644 index 000000000000..941a9edfed4e --- /dev/null +++ b/tools/perf/arch/x86/tests/hybrid.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arch-tests.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "pmu-hybrid.h" +#include "tests/tests.h" + +static bool test_config(const struct evsel *evsel, __u64 expected_config) +{ + return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == expected_config; +} + +static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config) +{ + return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config; +} + +static bool test_hybrid_type(const struct evsel *evsel, __u64 expected_config) +{ + return (evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT) == expected_config; +} + +static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + return TEST_OK; +} + +static int test__hybrid_hw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + return TEST_OK; +} + +static int test__hybrid_sw_hw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + return TEST_OK; +} + +static int test__hybrid_hw_sw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + return TEST_OK; +} + +static int test__hybrid_group_modifier1(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); + return TEST_OK; +} + +static int test__hybrid_raw1(struct evlist *evlist) +{ + struct perf_evsel *evsel; + + perf_evlist__for_each_evsel(&evlist->core, evsel) { + struct perf_pmu *pmu = perf_pmu__find_by_type(evsel->attr.type); + + TEST_ASSERT_VAL("missing pmu", pmu); + TEST_ASSERT_VAL("unexpected pmu", !strncmp(pmu->name, "cpu_", 4)); + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a)); + } + return TEST_OK; +} + +static int test__hybrid_raw2(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a)); + return TEST_OK; +} + +static int test__hybrid_cache_event(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); + return TEST_OK; +} + +static int test__checkevent_pmu(struct evlist *evlist) +{ + + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 10 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1); + TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2); + TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3); + /* + * The period value gets configured within evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); + + return TEST_OK; +} + +struct evlist_test { + const char *name; + bool (*valid)(void); + int (*check)(struct evlist *evlist); +}; + +static const struct evlist_test test__hybrid_events[] = { + { + .name = "cpu_core/cpu-cycles/", + .check = test__hybrid_hw_event_with_pmu, + /* 0 */ + }, + { + .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", + .check = test__hybrid_hw_group_event, + /* 1 */ + }, + { + .name = "{cpu-clock,cpu_core/cpu-cycles/}", + .check = test__hybrid_sw_hw_group_event, + /* 2 */ + }, + { + .name = "{cpu_core/cpu-cycles/,cpu-clock}", + .check = test__hybrid_hw_sw_group_event, + /* 3 */ + }, + { + .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", + .check = test__hybrid_group_modifier1, + /* 4 */ + }, + { + .name = "r1a", + .check = test__hybrid_raw1, + /* 5 */ + }, + { + .name = "cpu_core/r1a/", + .check = test__hybrid_raw2, + /* 6 */ + }, + { + .name = "cpu_core/config=10,config1,config2=3,period=1000/u", + .check = test__checkevent_pmu, + /* 7 */ + }, + { + .name = "cpu_core/LLC-loads/", + .check = test__hybrid_cache_event, + /* 8 */ + }, +}; + +static int test_event(const struct evlist_test *e) +{ + struct parse_events_error err; + struct evlist *evlist; + int ret; + + if (e->valid && !e->valid()) { + pr_debug("... SKIP\n"); + return TEST_OK; + } + + evlist = evlist__new(); + if (evlist == NULL) { + pr_err("Failed allocation"); + return TEST_FAIL; + } + parse_events_error__init(&err); + ret = parse_events(evlist, e->name, &err); + if (ret) { + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + e->name, ret, err.str); + parse_events_error__print(&err, e->name); + ret = TEST_FAIL; + if (strstr(err.str, "can't access trace events")) + ret = TEST_SKIP; + } else { + ret = e->check(evlist); + } + parse_events_error__exit(&err); + evlist__delete(evlist); + + return ret; +} + +static int combine_test_results(int existing, int latest) +{ + if (existing == TEST_FAIL) + return TEST_FAIL; + if (existing == TEST_SKIP) + return latest == TEST_OK ? TEST_SKIP : latest; + return latest; +} + +static int test_events(const struct evlist_test *events, int cnt) +{ + int ret = TEST_OK; + + for (int i = 0; i < cnt; i++) { + const struct evlist_test *e = &events[i]; + int test_ret; + + pr_debug("running test %d '%s'\n", i, e->name); + test_ret = test_event(e); + if (test_ret != TEST_OK) { + pr_debug("Event test failure: test %d '%s'", i, e->name); + ret = combine_test_results(ret, test_ret); + } + } + + return ret; +} + +int test__hybrid(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + if (!perf_pmu__has_hybrid()) + return TEST_SKIP; + + return test_events(test__hybrid_events, ARRAY_SIZE(test__hybrid_events)); +} diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index d4193479a364..1b6065841fb0 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -6,6 +6,7 @@ #include "util/event.h" #include "util/pmu-hybrid.h" #include "topdown.h" +#include "evsel.h" static int ___evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, @@ -67,8 +68,7 @@ int arch_evlist__add_default_attrs(struct evlist *evlist, int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { - if (topdown_sys_has_perf_metrics() && - (!lhs->pmu_name || !strncmp(lhs->pmu_name, "cpu", 3))) { + if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) { /* Ensure the topdown slots comes first. */ if (strcasestr(lhs->name, "slots")) return -1; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 63cdf6ea6f6d..425a7e2fd6fb 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -342,7 +342,7 @@ static void hists__find_annotations(struct hists *hists, notes = symbol__annotation(he->ms.sym); if (notes->src == NULL) { find_next: - if (key == K_LEFT) + if (key == K_LEFT || key == '<') nd = rb_prev(nd); else nd = rb_next(nd); @@ -378,9 +378,11 @@ find_next: return; /* fall through */ case K_RIGHT: + case '>': next = rb_next(nd); break; case K_LEFT: + case '<': next = rb_prev(nd); break; default: diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 810e3376c7d6..ad2a9ae041f6 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -650,6 +650,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace) break; if (fwrite(buf, n, 1, stdout) != 1) break; + /* flush output since stdout is in full buffering mode due to pager */ + fflush(stdout); } } diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 1f5dbd5f0ba4..c6bd0aa4a56e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -11,8 +11,8 @@ #include "builtin.h" #include "util/print-events.h" +#include "util/pmus.h" #include "util/pmu.h" -#include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" #include "util/string2.h" @@ -429,7 +429,7 @@ int cmd_list(int argc, const char **argv) .print_event = default_print_event, .print_metric = default_print_metric, }; - const char *hybrid_name = NULL; + const char *cputype = NULL; const char *unit_name = NULL; bool json = false; struct option list_options[] = { @@ -443,8 +443,8 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, "Print deprecated events."), - OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", - "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), + OPT_STRING(0, "cputype", &cputype, "cpu type", + "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."), OPT_STRING(0, "unit", &unit_name, "PMU name", "Limit PMU or metric printing to the specified PMU."), OPT_INCR(0, "debug", &verbose, @@ -484,10 +484,15 @@ int cmd_list(int argc, const char **argv) assert(default_ps.visited_metrics); if (unit_name) default_ps.pmu_glob = strdup(unit_name); - else if (hybrid_name) { - default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); - if (!default_ps.pmu_glob) - pr_warning("WARNING: hybrid cputype is not supported!\n"); + else if (cputype) { + const struct perf_pmu *pmu = perf_pmus__pmu_for_pmu_filter(cputype); + + if (!pmu) { + pr_err("ERROR: cputype is not supported!\n"); + ret = -1; + goto out; + } + default_ps.pmu_glob = pmu->name; } } print_cb.print_start(ps); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index efa03e4ac2c9..ec0f2d5f189f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3335,6 +3335,14 @@ const char record_callchain_help[] = CALLCHAIN_RECORD_HELP static bool dry_run; +static struct parse_events_option_args parse_events_option_args = { + .evlistp = &record.evlist, +}; + +static struct parse_events_option_args switch_output_parse_events_option_args = { + .evlistp = &record.sb_evlist, +}; + /* * XXX Will stay a global variable till we fix builtin-script.c to stop messing * with it and switch to use the library functions in perf_evlist that came @@ -3343,7 +3351,7 @@ static bool dry_run; * using pipes, etc. */ static struct option __record_options[] = { - OPT_CALLBACK('e', "event", &record.evlist, "event", + OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events", parse_events_option), OPT_CALLBACK(0, "filter", &record.evlist, "filter", @@ -3496,7 +3504,8 @@ static struct option __record_options[] = { &record.switch_output.set, "signal or size[BKMG] or time[smhd]", "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), - OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", + OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, + &record.switch_output_event_set, "switch output event", "switch output event selector. use 'perf list' to list available events", parse_events_option_new_evlist), OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c57be48d65bb..029d5a597233 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -133,6 +133,7 @@ enum perf_output_field { PERF_OUTPUT_VCPU = 1ULL << 38, PERF_OUTPUT_CGROUP = 1ULL << 39, PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, + PERF_OUTPUT_DSOFF = 1ULL << 41, }; struct perf_script { @@ -174,6 +175,7 @@ struct output_option { {.str = "ip", .field = PERF_OUTPUT_IP}, {.str = "sym", .field = PERF_OUTPUT_SYM}, {.str = "dso", .field = PERF_OUTPUT_DSO}, + {.str = "dsoff", .field = PERF_OUTPUT_DSOFF}, {.str = "addr", .field = PERF_OUTPUT_ADDR}, {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, @@ -574,6 +576,9 @@ static void set_print_ip_opts(struct perf_event_attr *attr) if (PRINT_FIELD(DSO)) output[type].print_ip_opts |= EVSEL__PRINT_DSO; + if (PRINT_FIELD(DSOFF)) + output[type].print_ip_opts |= EVSEL__PRINT_DSOFF; + if (PRINT_FIELD(SYMOFFSET)) output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET; @@ -627,6 +632,10 @@ static int perf_session__check_output_opt(struct perf_session *session) if (evsel == NULL) continue; + /* 'dsoff' implys 'dso' field */ + if (output[j].fields & PERF_OUTPUT_DSOFF) + output[j].fields |= PERF_OUTPUT_DSO; + set_print_ip_opts(&evsel->core.attr); tod |= output[j].fields & PERF_OUTPUT_TOD; } @@ -929,18 +938,12 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, } printed += fprintf(fp, " 0x%"PRIx64, from); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alf.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp); printed += fprintf(fp, "/0x%"PRIx64, to); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alt.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp); printed += print_bstack_flags(fp, entries + i); } @@ -972,18 +975,12 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, thread__find_symbol_fb(thread, sample->cpumode, to, &alt); printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alf.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp); printed += fprintf(fp, "%c", '/'); printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alt.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp); printed += print_bstack_flags(fp, entries + i); } @@ -1019,17 +1016,11 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, to = map__dso_map_ip(alt.map, to); printed += fprintf(fp, " 0x%"PRIx64, from); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alf.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp); printed += fprintf(fp, "/0x%"PRIx64, to); - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, "("); - printed += map__fprintf_dsoname(alt.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp); printed += print_bstack_flags(fp, entries + i); } @@ -1393,11 +1384,8 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample, printed += symbol__fprintf_symname(al.sym, fp); } - if (PRINT_FIELD(DSO)) { - printed += fprintf(fp, " ("); - printed += map__fprintf_dsoname(al.map, fp); - printed += fprintf(fp, ")"); - } + if (PRINT_FIELD(DSO)) + printed += map__fprintf_dsoname_dsoff(al.map, PRINT_FIELD(DSOFF), al.addr, fp); out: return printed; } @@ -3883,7 +3871,7 @@ int cmd_script(int argc, const char **argv) "comma separated output fields prepend with 'type:'. " "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " - "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," + "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff" "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth," diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b9ad32f21e57..bc45cee3f77c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -44,6 +44,7 @@ #include "util/cgroup.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" +#include "util/pmus.h" #include "util/pmu.h" #include "util/event.h" #include "util/evlist.h" @@ -69,7 +70,6 @@ #include "util/pfm.h" #include "util/bpf_counter.h" #include "util/iostat.h" -#include "util/pmu-hybrid.h" #include "util/util.h" #include "asm/bug.h" @@ -101,6 +101,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv); static struct evlist *evsel_list; +static struct parse_events_option_args parse_events_option_args = { + .evlistp = &evsel_list, +}; + static bool all_counters_use_bpf = true; static struct target target = { @@ -1085,10 +1089,11 @@ static int parse_stat_cgroups(const struct option *opt, return parse_cgroups(opt, str, unset); } -static int parse_hybrid_type(const struct option *opt, +static int parse_cputype(const struct option *opt, const char *str, int unset __maybe_unused) { + const struct perf_pmu *pmu; struct evlist *evlist = *(struct evlist **)opt->value; if (!list_empty(&evlist->core.entries)) { @@ -1096,11 +1101,12 @@ static int parse_hybrid_type(const struct option *opt, return -1; } - evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str); - if (!evlist->hybrid_pmu_name) { + pmu = perf_pmus__pmu_for_pmu_filter(str); + if (!pmu) { fprintf(stderr, "--cputype %s is not supported!\n", str); return -1; } + parse_events_option_args.pmu_filter = pmu->name; return 0; } @@ -1108,7 +1114,7 @@ static int parse_hybrid_type(const struct option *opt, static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), - OPT_CALLBACK('e', "event", &evsel_list, "event", + OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events", parse_events_option), OPT_CALLBACK(0, "filter", &evsel_list, "filter", @@ -1226,7 +1232,7 @@ static struct option stat_options[] = { OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", "Only enable events on applying cpu with this type " "for hybrid platform (e.g. core or atom)", - parse_hybrid_type), + parse_cputype), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -1777,6 +1783,7 @@ static int add_default_attributes(void) }; struct perf_event_attr default_null_attrs[] = {}; + const char *pmu = parse_events_option_args.pmu_filter ?: "all"; /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) @@ -1788,11 +1795,11 @@ static int add_default_attributes(void) * will use this approach. To determine transaction support * on an architecture test for such a metric name. */ - if (!metricgroup__has_metric("transaction")) { + if (!metricgroup__has_metric(pmu, "transaction")) { pr_err("Missing transaction metrics"); return -1; } - return metricgroup__parse_groups(evsel_list, "transaction", + return metricgroup__parse_groups(evsel_list, pmu, "transaction", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1817,7 +1824,7 @@ static int add_default_attributes(void) smi_reset = true; } - if (!metricgroup__has_metric("smi")) { + if (!metricgroup__has_metric(pmu, "smi")) { pr_err("Missing smi metrics"); return -1; } @@ -1825,7 +1832,7 @@ static int add_default_attributes(void) if (!force_metric_only) stat_config.metric_only = true; - return metricgroup__parse_groups(evsel_list, "smi", + return metricgroup__parse_groups(evsel_list, pmu, "smi", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1858,7 +1865,8 @@ static int add_default_attributes(void) "Please print the result regularly, e.g. -I1000\n"); } str[8] = stat_config.topdown_level + '0'; - if (metricgroup__parse_groups(evsel_list, str, + if (metricgroup__parse_groups(evsel_list, + pmu, str, /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/true, @@ -1892,19 +1900,14 @@ static int add_default_attributes(void) * Add TopdownL1 metrics if they exist. To minimize * multiplexing, don't request threshold computation. */ - /* - * TODO: TopdownL1 is disabled on hybrid CPUs to avoid a crashes - * caused by exposing latent bugs. This is fixed properly in: - * https://lore.kernel.org/lkml/bff481ba-e60a-763f-0aa0-3ee53302c480@linux.intel.com/ - */ - if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid()) { + if (metricgroup__has_metric(pmu, "TopdownL1")) { struct evlist *metric_evlist = evlist__new(); struct evsel *metric_evsel; if (!metric_evlist) return -1; - if (metricgroup__parse_groups(metric_evlist, "TopdownL1", + if (metricgroup__parse_groups(metric_evlist, pmu, "TopdownL1", /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/true, @@ -2428,7 +2431,9 @@ int cmd_stat(int argc, const char **argv) * knowing the target is system-wide. */ if (metrics) { - metricgroup__parse_groups(evsel_list, metrics, + const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + + metricgroup__parse_groups(evsel_list, pmu, metrics, stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index eb5740154bc0..48ee49e95c5e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1440,12 +1440,15 @@ int cmd_top(int argc, const char **argv) .max_stack = sysctl__max_stack(), .nr_threads_synthesize = UINT_MAX, }; + struct parse_events_option_args parse_events_option_args = { + .evlistp = &top.evlist, + }; bool branch_call_mode = false; struct record_opts *opts = &top.record_opts; struct target *target = &opts->target; const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL; const struct option options[] = { - OPT_CALLBACK('e', "event", &top.evlist, "event", + OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events", parse_events_option), OPT_U64('c', "count", &opts->user_interval, "event period to sample"), diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8ee3a45c3c54..b49d3abb1203 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4591,8 +4591,11 @@ do_concat: err = 0; if (lists[0]) { + struct parse_events_option_args parse_events_option_args = { + .evlistp = &trace->evlist, + }; struct option o = { - .value = &trace->evlist, + .value = &parse_events_option_args, }; err = parse_events_option(&o, lists[0], 0); } diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json new file mode 100644 index 000000000000..c751d57f2e19 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json @@ -0,0 +1,17 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json new file mode 100644 index 000000000000..8623be121818 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json @@ -0,0 +1,32 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "BUS_ACCESS_SHARED" + }, + { + "ArchStdEvent": "BUS_ACCESS_NOT_SHARED" + }, + { + "ArchStdEvent": "BUS_ACCESS_NORMAL" + }, + { + "ArchStdEvent": "BUS_ACCESS_PERIPH" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json new file mode 100644 index 000000000000..fc0633054211 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json @@ -0,0 +1,104 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json new file mode 100644 index 000000000000..95c30243f2b2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json @@ -0,0 +1,698 @@ +[ + { + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", + "EventCode": "0x10A", + "EventName": "L2_PREFETCH_REFILL", + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" + }, + { + "PublicDescription": "Level 2 prefetch requests, late", + "EventCode": "0x10B", + "EventName": "L2_PREFETCH_UPGRADE", + "BriefDescription": "Level 2 prefetch requests, late" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", + "EventCode": "0x110", + "EventName": "BPU_HIT_BTB", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", + "EventCode": "0x111", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", + "EventCode": "0x112", + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", + "EventCode": "0x113", + "EventName": "BPU_HIT_RSB", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" + }, + { + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", + "EventCode": "0x114", + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted", + "EventCode": "0x115", + "EventName": "BPU_BRANCH_NO_HIT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", + "EventCode": "0x116", + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", + "EventCode": "0x117", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", + "EventCode": "0x118", + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", + "EventCode": "0x119", + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", + "EventCode": "0x11a", + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11b", + "EventName": "BPU_NO_PREDICTION_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11c", + "EventName": "BPU_BTB_UPDATE", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", + "EventCode": "0x11d", + "EventName": "BPU_RSB_FULL_STALL", + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" + }, + { + "PublicDescription": "Macro-ops speculatively decoded", + "EventCode": "0x11f", + "EventName": "ICF_INST_SPEC_DECODE", + "BriefDescription": "Macro-ops speculatively decoded" + }, + { + "PublicDescription": "Flushes", + "EventCode": "0x120", + "EventName": "GPC_FLUSH", + "BriefDescription": "Flushes" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "BPU_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + }, + { + "PublicDescription": "ETM extout bit 0", + "EventCode": "0x141", + "EventName": "MSC_ETM_EXTOUT0", + "BriefDescription": "ETM extout bit 0" + }, + { + "PublicDescription": "ETM extout bit 1", + "EventCode": "0x142", + "EventName": "MSC_ETM_EXTOUT1", + "BriefDescription": "ETM extout bit 1" + }, + { + "PublicDescription": "ETM extout bit 2", + "EventCode": "0x143", + "EventName": "MSC_ETM_EXTOUT2", + "BriefDescription": "ETM extout bit 2" + }, + { + "PublicDescription": "ETM extout bit 3", + "EventCode": "0x144", + "EventName": "MSC_ETM_EXTOUT3", + "BriefDescription": "ETM extout bit 3" + }, + { + "PublicDescription": "Bus request sn", + "EventCode": "0x156", + "EventName": "L2C_SNOOP", + "BriefDescription": "Bus request sn" + }, + { + "PublicDescription": "L2 TXDAT LCRD blocked", + "EventCode": "0x169", + "EventName": "L2C_DAT_CRD_STALL", + "BriefDescription": "L2 TXDAT LCRD blocked" + }, + { + "PublicDescription": "L2 TXRSP LCRD blocked", + "EventCode": "0x16a", + "EventName": "L2C_RSP_CRD_STALL", + "BriefDescription": "L2 TXRSP LCRD blocked" + }, + { + "PublicDescription": "L2 TXREQ LCRD blocked", + "EventCode": "0x16b", + "EventName": "L2C_REQ_CRD_STALL", + "BriefDescription": "L2 TXREQ LCRD blocked" + }, + { + "PublicDescription": "Early mispredict", + "EventCode": "0xD100", + "EventName": "ICF_EARLY_MIS_PRED", + "BriefDescription": "Early mispredict" + }, + { + "PublicDescription": "FEQ full cycles", + "EventCode": "0xD101", + "EventName": "ICF_FEQ_FULL", + "BriefDescription": "FEQ full cycles" + }, + { + "PublicDescription": "Instruction FIFO Full", + "EventCode": "0xD102", + "EventName": "ICF_INST_FIFO_FULL", + "BriefDescription": "Instruction FIFO Full" + }, + { + "PublicDescription": "L1I TLB miss", + "EventCode": "0xD103", + "EventName": "L1I_TLB_MISS", + "BriefDescription": "L1I TLB miss" + }, + { + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", + "EventCode": "0xD104", + "EventName": "ICF_STALL", + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" + }, + { + "PublicDescription": "PC FIFO Full", + "EventCode": "0xD105", + "EventName": "ICF_PC_FIFO_FULL", + "BriefDescription": "PC FIFO Full" + }, + { + "PublicDescription": "Stall due to BOB ID", + "EventCode": "0xD200", + "EventName": "IDR_STALL_BOB_ID", + "BriefDescription": "Stall due to BOB ID" + }, + { + "PublicDescription": "Dispatch stall due to LOB entries", + "EventCode": "0xD201", + "EventName": "IDR_STALL_LOB_ID", + "BriefDescription": "Dispatch stall due to LOB entries" + }, + { + "PublicDescription": "Dispatch stall due to SOB entries", + "EventCode": "0xD202", + "EventName": "IDR_STALL_SOB_ID", + "BriefDescription": "Dispatch stall due to SOB entries" + }, + { + "PublicDescription": "Dispatch stall due to IXU scheduler entries", + "EventCode": "0xD203", + "EventName": "IDR_STALL_IXU_SCHED", + "BriefDescription": "Dispatch stall due to IXU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to FSU scheduler entries", + "EventCode": "0xD204", + "EventName": "IDR_STALL_FSU_SCHED", + "BriefDescription": "Dispatch stall due to FSU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to ROB entries", + "EventCode": "0xD205", + "EventName": "IDR_STALL_ROB_ID", + "BriefDescription": "Dispatch stall due to ROB entries" + }, + { + "PublicDescription": "Dispatch stall due to flush (6 cycles)", + "EventCode": "0xD206", + "EventName": "IDR_STALL_FLUSH", + "BriefDescription": "Dispatch stall due to flush (6 cycles)" + }, + { + "PublicDescription": "Dispatch stall due to WFI", + "EventCode": "0xD207", + "EventName": "IDR_STALL_WFI", + "BriefDescription": "Dispatch stall due to WFI" + }, + { + "PublicDescription": "Number of SWOB drains triggered by timeout", + "EventCode": "0xD208", + "EventName": "IDR_STALL_SWOB_TIMEOUT", + "BriefDescription": "Number of SWOB drains triggered by timeout" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", + "EventCode": "0xD209", + "EventName": "IDR_STALL_SWOB_RAW", + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", + "EventCode": "0xD20A", + "EventName": "IDR_STALL_SWOB_FULL", + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", + "EventCode": "0xD20B", + "EventName": "STALL_FRONTEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction TLB miss", + "EventCode": "0xD20C", + "EventName": "STALL_FRONTEND_TLB", + "BriefDescription": "Dispatch stall due to L1 instruction TLB miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data cache miss", + "EventCode": "0xD20D", + "EventName": "STALL_BACKEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 data cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data TLB miss", + "EventCode": "0xD20E", + "EventName": "STALL_BACKEND_TLB", + "BriefDescription": "Dispatch stall due to L1 data TLB miss" + }, + { + "PublicDescription": "Dispatch stall due to lack of any core resource", + "EventCode": "0xD20F", + "EventName": "STALL_BACKEND_RESOURCE", + "BriefDescription": "Dispatch stall due to lack of any core resource" + }, + { + "PublicDescription": "Instructions issued by the scheduler", + "EventCode": "0xD300", + "EventName": "IXU_NUM_UOPS_ISSUED", + "BriefDescription": "Instructions issued by the scheduler" + }, + { + "PublicDescription": "Any uop issued was canceled for any reason", + "EventCode": "0xD301", + "EventName": "IXU_ISSUE_CANCEL", + "BriefDescription": "Any uop issued was canceled for any reason" + }, + { + "PublicDescription": "A load wakeup to the scheduler has been cancelled", + "EventCode": "0xD302", + "EventName": "IXU_LOAD_CANCEL", + "BriefDescription": "A load wakeup to the scheduler has been cancelled" + }, + { + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", + "EventCode": "0xD303", + "EventName": "IXU_SLOW_CANCEL", + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA", + "EventCode": "0xD304", + "EventName": "IXU_IXA_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", + "EventCode": "0xD305", + "EventName": "IXU_IXA_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", + "EventCode": "0xD306", + "EventName": "IXU_IXA_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB", + "EventCode": "0xD307", + "EventName": "IXU_IXB_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", + "EventCode": "0xD308", + "EventName": "IXU_IXB_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", + "EventCode": "0xD309", + "EventName": "IXU_IXB_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC", + "EventCode": "0xD30A", + "EventName": "IXU_IXC_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", + "EventCode": "0xD30B", + "EventName": "IXU_IXC_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", + "EventCode": "0xD30C", + "EventName": "IXU_IXC_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD", + "EventCode": "0xD30D", + "EventName": "IXU_IXD_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", + "EventCode": "0xD30E", + "EventName": "IXU_IXD_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", + "EventCode": "0xD30F", + "EventName": "IXU_IXD_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" + }, + { + "PublicDescription": "Uops issued by the FSU scheduler", + "EventCode": "0xD400", + "EventName": "FSU_ISSUED", + "BriefDescription": "Uops issued by the FSU scheduler" + }, + { + "PublicDescription": "Uops issued by the scheduler on pipe X", + "EventCode": "0xD401", + "EventName": "FSU_FSX_ISSUED", + "BriefDescription": "Uops issued by the scheduler on pipe X" + }, + { + "PublicDescription": "Uops issued by the scheduler on pipe Y", + "EventCode": "0xD402", + "EventName": "FSU_FSY_ISSUED", + "BriefDescription": "Uops issued by the scheduler on pipe Y" + }, + { + "PublicDescription": "Uops issued by the scheduler on pipe Z", + "EventCode": "0xD403", + "EventName": "FSU_FSZ_ISSUED", + "BriefDescription": "Uops issued by the scheduler on pipe Z" + }, + { + "PublicDescription": "Uops canceled (load cancels)", + "EventCode": "0xD404", + "EventName": "FSU_CANCEL", + "BriefDescription": "Uops canceled (load cancels)" + }, + { + "PublicDescription": "Count scheduler stalls due to divide/sqrt", + "EventCode": "0xD405", + "EventName": "FSU_DIV_SQRT_STALL", + "BriefDescription": "Count scheduler stalls due to divide/sqrt" + }, + { + "PublicDescription": "Number of SWOB drains", + "EventCode": "0xD500", + "EventName": "GPC_SWOB_DRAIN", + "BriefDescription": "Number of SWOB drains" + }, + { + "PublicDescription": "GPC detected a Breakpoint instruction match", + "EventCode": "0xD501", + "EventName": "BREAKPOINT_MATCH", + "BriefDescription": "GPC detected a Breakpoint instruction match" + }, + { + "PublicDescription": "L1D TLB miss", + "EventCode": "0xD600", + "EventName": "L1D_TLB_MISS", + "BriefDescription": "L1D TLB miss" + }, + { + "PublicDescription": "OFB full cycles", + "EventCode": "0xD601", + "EventName": "OFB_FULL", + "BriefDescription": "OFB full cycles" + }, + { + "PublicDescription": "Load satisified from store forwarded data", + "EventCode": "0xD605", + "EventName": "LD_FROM_ST_FWD", + "BriefDescription": "Load satisified from store forwarded data" + }, + { + "PublicDescription": "L1 prefetcher, load prefetch requests generated", + "EventCode": "0xD606", + "EventName": "L1_PFETCH_LD_GEN", + "BriefDescription": "L1 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "L1 prefetcher, load prefetch fills into the L1 cache", + "EventCode": "0xD607", + "EventName": "L1_PFETCH_LD_FILL", + "BriefDescription": "L1 prefetcher, load prefetch fills into the L1 cache" + }, + { + "PublicDescription": "L1 prefetcher, load prefetch to L2 generated", + "EventCode": "0xD608", + "EventName": "L1_PFETCH_L2_REQ", + "BriefDescription": "L1 prefetcher, load prefetch to L2 generated" + }, + { + "PublicDescription": "L1 prefetcher, distance was reset", + "EventCode": "0xD609", + "EventName": "L1_PFETCH_DIST_RST", + "BriefDescription": "L1 prefetcher, distance was reset" + }, + { + "PublicDescription": "L1 prefetcher, distance was increased", + "EventCode": "0xD60A", + "EventName": "L1_PFETCH_DIST_INC", + "BriefDescription": "L1 prefetcher, distance was increased" + }, + { + "PublicDescription": "L1 prefetcher, table entry is trained", + "EventCode": "0xD60B", + "EventName": "L1_PFETCH_ENTRY_TRAINED", + "BriefDescription": "L1 prefetcher, table entry is trained" + }, + { + "PublicDescription": "Store retirement pipe stall", + "EventCode": "0xD60C", + "EventName": "LSU_ST_RETIRE_STALL", + "BriefDescription": "Store retirement pipe stall" + }, + { + "PublicDescription": "LSU detected a Watchpoint data match", + "EventCode": "0xD60D", + "EventName": "WATCHPOINT_MATCH", + "BriefDescription": "LSU detected a Watchpoint data match" + }, + { + "PublicDescription": "L2 pipeline replay", + "EventCode": "0xD700", + "EventName": "L2C_PIPE_REPLAY", + "BriefDescription": "L2 pipeline replay" + }, + { + "PublicDescription": "L2 refill from I-side miss", + "EventCode": "0xD701", + "EventName": "L2C_INST_REFILL", + "BriefDescription": "L2 refill from I-side miss" + }, + { + "PublicDescription": "L2 refill from D-side miss", + "EventCode": "0xD702", + "EventName": "L2C_DATA_REFILL", + "BriefDescription": "L2 refill from D-side miss" + }, + { + "PublicDescription": "L2 prefetcher, load prefetch requests generated", + "EventCode": "0xD703", + "EventName": "L2_PREFETCH_REQ", + "BriefDescription": "L2 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "L2D OTB allocate", + "EventCode": "0xD800", + "EventName": "MMU_D_OTB_ALLOC", + "BriefDescription": "L2D OTB allocate" + }, + { + "PublicDescription": "DTLB Translation cache hit on S1L2 walk cache entry", + "EventCode": "0xD801", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "DTLB Translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "DTLB Translation cache hit on S1L1 walk cache entry", + "EventCode": "0xD802", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "DTLB Translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "DTLB Translation cache hit on S1L0 walk cache entry", + "EventCode": "0xD803", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "DTLB Translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "DTLB Translation cache hit on S2L2 walk cache entry", + "EventCode": "0xD804", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "DTLB Translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescription": "DTLB Translation cache hit on S2L1 walk cache entry", + "EventCode": "0xD805", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "DTLB Translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescription": "DTLB Translation cache hit on S2L0 walk cache entry", + "EventCode": "0xD806", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "DTLB Translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescription": "D-side S1 Page walk cache lookup", + "EventCode": "0xD807", + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "D-side S1 Page walk cache lookup" + }, + { + "PublicDescription": "D-side S1 Page walk cache refill", + "EventCode": "0xD808", + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", + "BriefDescription": "D-side S1 Page walk cache refill" + }, + { + "PublicDescription": "D-side S2 Page walk cache lookup", + "EventCode": "0xD809", + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "D-side S2 Page walk cache lookup" + }, + { + "PublicDescription": "D-side S2 Page walk cache refill", + "EventCode": "0xD80A", + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", + "BriefDescription": "D-side S2 Page walk cache refill" + }, + { + "PublicDescription": "D-side Stage1 tablewalk fault", + "EventCode": "0xD80B", + "EventName": "MMU_D_S1_WALK_FAULT", + "BriefDescription": "D-side Stage1 tablewalk fault" + }, + { + "PublicDescription": "D-side Stage2 tablewalk fault", + "EventCode": "0xD80C", + "EventName": "MMU_D_S2_WALK_FAULT", + "BriefDescription": "D-side Stage2 tablewalk fault" + }, + { + "PublicDescription": "D-side Tablewalk steps or descriptor fetches", + "EventCode": "0xD80D", + "EventName": "MMU_D_WALK_STEPS", + "BriefDescription": "D-side Tablewalk steps or descriptor fetches" + }, + { + "PublicDescription": "L2I OTB allocate", + "EventCode": "0xD900", + "EventName": "MMU_I_OTB_ALLOC", + "BriefDescription": "L2I OTB allocate" + }, + { + "PublicDescription": "ITLB Translation cache hit on S1L2 walk cache entry", + "EventCode": "0xD901", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "ITLB Translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "ITLB Translation cache hit on S1L1 walk cache entry", + "EventCode": "0xD902", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "ITLB Translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "ITLB Translation cache hit on S1L0 walk cache entry", + "EventCode": "0xD903", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "ITLB Translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "ITLB Translation cache hit on S2L2 walk cache entry", + "EventCode": "0xD904", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "ITLB Translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescription": "ITLB Translation cache hit on S2L1 walk cache entry", + "EventCode": "0xD905", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "ITLB Translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescription": "ITLB Translation cache hit on S2L0 walk cache entry", + "EventCode": "0xD906", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "ITLB Translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescription": "I-side S1 Page walk cache lookup", + "EventCode": "0xD907", + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "I-side S1 Page walk cache lookup" + }, + { + "PublicDescription": "I-side S1 Page walk cache refill", + "EventCode": "0xD908", + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", + "BriefDescription": "I-side S1 Page walk cache refill" + }, + { + "PublicDescription": "I-side S2 Page walk cache lookup", + "EventCode": "0xD909", + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "I-side S2 Page walk cache lookup" + }, + { + "PublicDescription": "I-side S2 Page walk cache refill", + "EventCode": "0xD90A", + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", + "BriefDescription": "I-side S2 Page walk cache refill" + }, + { + "PublicDescription": "I-side Stage1 tablewalk fault", + "EventCode": "0xD90B", + "EventName": "MMU_I_S1_WALK_FAULT", + "BriefDescription": "I-side Stage1 tablewalk fault" + }, + { + "PublicDescription": "I-side Stage2 tablewalk fault", + "EventCode": "0xD90C", + "EventName": "MMU_I_S2_WALK_FAULT", + "BriefDescription": "I-side Stage2 tablewalk fault" + }, + { + "PublicDescription": "I-side Tablewalk steps or descriptor fetches", + "EventCode": "0xD90D", + "EventName": "MMU_I_WALK_STEPS", + "BriefDescription": "I-side Tablewalk steps or descriptor fetches" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json new file mode 100644 index 000000000000..ada052e19632 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json @@ -0,0 +1,44 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_RETURN" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json new file mode 100644 index 000000000000..18d1f2f76a23 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json @@ -0,0 +1,89 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "ST_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_RETIRED" + }, + { + "ArchStdEvent": "BR_RETURN_RETIRED" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "PC_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "PublicDescription": "Operation speculatively executed, NOP", + "EventCode": "0x100", + "EventName": "NOP_SPEC", + "BriefDescription": "Speculatively executed, NOP" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json new file mode 100644 index 000000000000..7ecffb989ae0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json new file mode 100644 index 000000000000..0711782bfa6b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json @@ -0,0 +1,44 @@ +[ + { + "ArchStdEvent": "LD_RETIRED" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 9d400785d195..32674ddd2b63 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -41,3 +41,4 @@ 0x00000000460f0010,v1,fujitsu/a64fx,core 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core +0x00000000c00fac30,v1,ampere/ampereone,core diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 1f9047553942..840f6f6fc8c5 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -151,7 +151,7 @@ }, { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", - "MetricExpr": "(tma_info_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_slots", + "MetricExpr": "(tma_info_slots - (cpu_atom@TOPDOWN_FE_BOUND.ALL@ + cpu_atom@TOPDOWN_BE_BOUND.ALL@ + cpu_atom@TOPDOWN_RETIRING.ALL@)) / tma_info_slots", "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", @@ -162,7 +162,7 @@ }, { "BriefDescription": "Counts the number of uops that are not from the microsequencer.", - "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_slots", + "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@) / tma_info_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_base", "MetricThreshold": "tma_base > 0.6", @@ -229,7 +229,7 @@ }, { "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)", + "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.DISAMBIGUATION@ / cpu_atom@MACHINE_CLEARS.SLOW@)", "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", "MetricName": "tma_disambiguation", "MetricThreshold": "tma_disambiguation > 0.02", @@ -239,7 +239,7 @@ { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", "MetricThreshold": "tma_dram_bound > 0.1", @@ -277,7 +277,7 @@ }, { "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)", + "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.FP_ASSIST@ / cpu_atom@MACHINE_CLEARS.SLOW@)", "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", "MetricName": "tma_fp_assist", "MetricThreshold": "tma_fp_assist > 0.02", @@ -314,7 +314,7 @@ }, { "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", - "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricExpr": "100 * cpu_atom@LD_BLOCKS.4K_ALIAS@ / MEM_UOPS_RETIRED.ALL_LOADS", "MetricName": "tma_info_address_alias_blocks", "Unit": "cpu_atom" }, @@ -334,14 +334,14 @@ }, { "BriefDescription": "", - "MetricExpr": "CPU_CLK_UNHALTED.CORE", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@", "MetricGroup": " ", "MetricName": "tma_info_clks", "Unit": "cpu_atom" }, { "BriefDescription": "", - "MetricExpr": "CPU_CLK_UNHALTED.CORE_P", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE_P@", "MetricGroup": " ", "MetricName": "tma_info_clks_p", "Unit": "cpu_atom" @@ -383,35 +383,35 @@ }, { "BriefDescription": "Percentage of all uops which are FPDiv uops", - "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.FPDIV@ / UOPS_RETIRED.ALL", "MetricGroup": " ", "MetricName": "tma_info_fpdiv_uop_ratio", "Unit": "cpu_atom" }, { "BriefDescription": "Percentage of all uops which are IDiv uops", - "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.IDIV@ / UOPS_RETIRED.ALL", "MetricGroup": " ", "MetricName": "tma_info_idiv_uop_ratio", "Unit": "cpu_atom" }, { "BriefDescription": "Percent of instruction miss cost that hit in DRAM", - "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", "MetricGroup": " ", "MetricName": "tma_info_inst_miss_cost_dramhit_percent", "Unit": "cpu_atom" }, { "BriefDescription": "Percent of instruction miss cost that hit in the L2", - "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", "MetricGroup": " ", "MetricName": "tma_info_inst_miss_cost_l2hit_percent", "Unit": "cpu_atom" }, { "BriefDescription": "Percent of instruction miss cost that hit in the L3", - "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", "MetricGroup": " ", "MetricName": "tma_info_inst_miss_cost_l3hit_percent", "Unit": "cpu_atom" @@ -439,7 +439,7 @@ }, { "BriefDescription": "Instructions per Far Branch", - "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)", + "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", "MetricGroup": " ", "MetricName": "tma_info_ipfarbranch", "Unit": "cpu_atom" @@ -453,7 +453,7 @@ }, { "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", - "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)", + "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", "MetricName": "tma_info_ipmisp_cond_ntaken", "Unit": "cpu_atom" }, @@ -498,20 +498,20 @@ }, { "BriefDescription": "Percentage of total non-speculative loads that are splits", - "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.SPLIT_LOADS@ / MEM_UOPS_RETIRED.ALL_LOADS", "MetricName": "tma_info_load_splits", "Unit": "cpu_atom" }, { "BriefDescription": "load ops retired per 1000 instruction", - "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@ / INST_RETIRED.ANY", "MetricGroup": " ", "MetricName": "tma_info_memloadpki", "Unit": "cpu_atom" }, { "BriefDescription": "Percentage of all uops which are ucode ops", - "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.MS@ / UOPS_RETIRED.ALL", "MetricGroup": " ", "MetricName": "tma_info_microcode_uop_ratio", "Unit": "cpu_atom" @@ -525,7 +525,7 @@ }, { "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", - "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricExpr": "100 * cpu_atom@LD_BLOCKS.DATA_UNKNOWN@ / MEM_UOPS_RETIRED.ALL_LOADS", "MetricName": "tma_info_store_fwd_blocks", "Unit": "cpu_atom" }, @@ -545,7 +545,7 @@ }, { "BriefDescription": "Percentage of all uops which are x87 uops", - "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.X87@ / UOPS_RETIRED.ALL", "MetricGroup": " ", "MetricName": "tma_info_x87_uop_ratio", "Unit": "cpu_atom" @@ -571,7 +571,7 @@ { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", "MetricThreshold": "tma_l2_bound > 0.1", @@ -580,7 +580,7 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", "MetricThreshold": "tma_l3_bound > 0.1", @@ -589,7 +589,7 @@ }, { "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", - "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL", + "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.LD_BUF@ / MEM_SCHEDULER_BLOCK.ALL", "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", "MetricName": "tma_ld_buffer", "MetricThreshold": "tma_ld_buffer > 0.05", @@ -617,7 +617,7 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", - "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)", + "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_clks + tma_store_bound)", "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2", @@ -627,7 +627,7 @@ }, { "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)", + "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.MEMORY_ORDERING@ / cpu_atom@MACHINE_CLEARS.SLOW@)", "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", "MetricName": "tma_memory_ordering", "MetricThreshold": "tma_memory_ordering > 0.02", @@ -636,7 +636,7 @@ }, { "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", - "MetricExpr": "tma_microcode_sequencer", + "MetricExpr": "UOPS_RETIRED.MS / tma_info_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_ms_uops", "MetricThreshold": "tma_ms_uops > 0.05", @@ -692,7 +692,7 @@ }, { "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", - "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_slots", + "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@ - cpu_atom@UOPS_RETIRED.FPDIV@) / tma_info_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", "MetricName": "tma_other_ret", "MetricThreshold": "tma_other_ret > 0.3", @@ -701,7 +701,7 @@ }, { "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)", + "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.PAGE_FAULT@ / cpu_atom@MACHINE_CLEARS.SLOW@)", "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", "MetricName": "tma_page_fault", "MetricThreshold": "tma_page_fault > 0.02", @@ -758,7 +758,7 @@ }, { "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative", - "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL", + "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.RSV@ / MEM_SCHEDULER_BLOCK.ALL", "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", "MetricName": "tma_rsv", "MetricThreshold": "tma_rsv > 0.05", @@ -776,7 +776,7 @@ }, { "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)", + "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.SMC@ / cpu_atom@MACHINE_CLEARS.SLOW@)", "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", "MetricName": "tma_smc", "MetricThreshold": "tma_smc > 0.02", @@ -812,7 +812,7 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", - "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricExpr": "tma_mem_scheduler * (cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@MEM_SCHEDULER_BLOCK.ALL@)", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_store_bound", "MetricThreshold": "tma_store_bound > 0.1", @@ -830,7 +830,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.", - "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5_11 + UOPS_DISPATCHED.PORT_6) / (5 * tma_info_core_clks)", + "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_clks)", "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", "MetricName": "tma_alu_op_utilization", "MetricThreshold": "tma_alu_op_utilization > 0.6", @@ -849,7 +849,7 @@ }, { "BriefDescription": "This metric estimates fraction of slots the CPU retired uops as a result of handing SSE to AVX* or AVX* to SSE transition Assists.", - "MetricExpr": "63 * ASSISTS.SSE_AVX_MIX / tma_info_slots", + "MetricExpr": "63 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_slots", "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group", "MetricName": "tma_avx_assists", "MetricThreshold": "tma_avx_assists > 0.1", @@ -858,7 +858,7 @@ }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", - "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_slots", "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", @@ -880,7 +880,7 @@ }, { "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction", - "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_slots", "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", @@ -911,7 +911,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears", - "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks", + "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_clks", "MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC", "MetricName": "tma_clears_resteers", "MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", @@ -922,7 +922,7 @@ { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "(25 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 24 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks", + "MetricExpr": "(25 * tma_info_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -944,7 +944,7 @@ { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "24 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks", + "MetricExpr": "24 * tma_info_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -975,7 +975,7 @@ { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks", + "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_clks", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -985,7 +985,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline", - "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2", + "MetricExpr": "(cpu_core@IDQ.DSB_CYCLES_ANY@ - cpu_core@IDQ.DSB_CYCLES_OK@) / tma_info_core_clks / 2", "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_dsb", "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)", @@ -1005,7 +1005,7 @@ }, { "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses", - "MetricExpr": "min(7 * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks", + "MetricExpr": "min(7 * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_clks", "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group", "MetricName": "tma_dtlb_load", "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -1015,7 +1015,7 @@ }, { "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses", - "MetricExpr": "(7 * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks", + "MetricExpr": "(7 * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@) / tma_info_core_clks", "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group", "MetricName": "tma_dtlb_store", "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -1025,7 +1025,7 @@ }, { "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing", - "MetricExpr": "28 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks", + "MetricExpr": "28 * tma_info_average_frequency * cpu_core@OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM@ / tma_info_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group", "MetricName": "tma_false_sharing", "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -1056,7 +1056,7 @@ }, { "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues", - "MetricExpr": "topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_slots", "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", @@ -1077,6 +1077,7 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -1087,7 +1088,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists", - "MetricExpr": "30 * ASSISTS.FP / tma_info_slots", + "MetricExpr": "30 * cpu_core@ASSISTS.FP@ / tma_info_slots", "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group", "MetricName": "tma_fp_assists", "MetricThreshold": "tma_fp_assists > 0.1", @@ -1117,7 +1118,7 @@ }, { "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors", - "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)", + "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_slots)", "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P", "MetricName": "tma_fp_vector_128b", "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))", @@ -1127,7 +1128,7 @@ }, { "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors", - "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)", + "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / (tma_retiring * tma_info_slots)", "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P", "MetricName": "tma_fp_vector_256b", "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))", @@ -1137,7 +1138,7 @@ }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", - "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_slots", "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", @@ -1148,7 +1149,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions", - "MetricExpr": "tma_light_operations * INST_RETIRED.MACRO_FUSED / (tma_retiring * tma_info_slots)", + "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.MACRO_FUSED@ / (tma_retiring * tma_info_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fused_instructions", "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6", @@ -1158,7 +1159,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences", - "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_slots", "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", @@ -1203,6 +1204,7 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_branch_misprediction_cost", @@ -1211,7 +1213,7 @@ }, { "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", - "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)", + "MetricExpr": "100 * ((cpu_core@BR_INST_RETIRED.COND@ + 3 * cpu_core@BR_INST_RETIRED.NEAR_CALL@ + (cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@)) / tma_info_slots)", "MetricGroup": "Ret;tma_issueBC", "MetricName": "tma_info_branching_overhead", "MetricThreshold": "tma_info_branching_overhead > 10", @@ -1220,21 +1222,21 @@ }, { "BriefDescription": "Fraction of branches that are CALL or RET", - "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@BR_INST_RETIRED.NEAR_RETURN@) / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches", "MetricName": "tma_info_callret", "Unit": "cpu_core" }, { "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", - "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@", "MetricGroup": "Pipeline", "MetricName": "tma_info_clks", "Unit": "cpu_core" }, { "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)", - "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@ITLB_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY", "MetricGroup": "Fed;MemoryTLB", "MetricName": "tma_info_code_stlb_mpki", "Unit": "cpu_core" @@ -1255,6 +1257,7 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_core_bound_likely", @@ -1263,7 +1266,7 @@ }, { "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", - "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.DISTRIBUTED@", "MetricGroup": "SMT", "MetricName": "tma_info_core_clks", "Unit": "cpu_core" @@ -1306,7 +1309,7 @@ }, { "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY", + "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", "MetricName": "tma_info_dsb_coverage", "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 6 > 0.35", @@ -1315,6 +1318,7 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_dsb_misses", @@ -1346,7 +1350,7 @@ }, { "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)", - "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_fb_hpki", "Unit": "cpu_core" @@ -1361,7 +1365,7 @@ { "BriefDescription": "Floating Point Operations Per Cycle", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks", + "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_flopc", "Unit": "cpu_core" @@ -1369,7 +1373,7 @@ { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_clks)", + "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_fp_arith_utilization", "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common).", @@ -1377,7 +1381,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time", + "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / duration_time", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_gflops", "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine.", @@ -1401,13 +1405,14 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", - "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", + "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", "MetricName": "tma_info_ilp", "Unit": "cpu_core" }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_instruction_fetch_bw", @@ -1416,7 +1421,7 @@ }, { "BriefDescription": "Total number of retired Instructions", - "MetricExpr": "INST_RETIRED.ANY", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@", "MetricGroup": "Summary;TmaL1;tma_L1_group", "MetricName": "tma_info_instructions", "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST", @@ -1433,7 +1438,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)", + "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_iparith_avx128", "MetricThreshold": "tma_info_iparith_avx128 < 10", @@ -1442,7 +1447,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)", + "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_iparith_avx256", "MetricThreshold": "tma_info_iparith_avx256 < 10", @@ -1509,7 +1514,7 @@ }, { "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", + "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", "MetricGroup": "Branches;OS", "MetricName": "tma_info_ipfarbranch", "MetricThreshold": "tma_info_ipfarbranch < 1e6", @@ -1517,7 +1522,7 @@ }, { "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)", + "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;InsType", "MetricName": "tma_info_ipflop", "MetricThreshold": "tma_info_ipflop < 10", @@ -1605,14 +1610,14 @@ }, { "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps", - "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@) / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches", "MetricName": "tma_info_jump", "Unit": "cpu_core" }, { "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / cpu_core@INST_RETIRED.ANY_P@k", "MetricGroup": "OS", "MetricName": "tma_info_kernel_cpi", "Unit": "cpu_core" @@ -1627,7 +1632,7 @@ }, { "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]", - "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time", + "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / duration_time", "MetricGroup": "Mem;MemoryBW", "MetricName": "tma_info_l1d_cache_fill_bw", "Unit": "cpu_core" @@ -1641,21 +1646,21 @@ }, { "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", - "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l1mpki", "Unit": "cpu_core" }, { "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)", - "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@L2_RQSTS.ALL_DEMAND_DATA_RD@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l1mpki_load", "Unit": "cpu_core" }, { "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]", - "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time", + "MetricExpr": "64 * cpu_core@L2_LINES_IN.ALL@ / 1e9 / duration_time", "MetricGroup": "Mem;MemoryBW", "MetricName": "tma_info_l2_cache_fill_bw", "Unit": "cpu_core" @@ -1669,56 +1674,56 @@ }, { "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", - "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY", + "MetricExpr": "1e3 * (cpu_core@L2_RQSTS.REFERENCES@ - cpu_core@L2_RQSTS.MISS@) / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l2hpki_all", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)", - "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_HIT@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l2hpki_load", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", - "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L2_MISS@ / INST_RETIRED.ANY", "MetricGroup": "Backend;CacheMisses;Mem", "MetricName": "tma_info_l2mpki", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)", - "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@L2_RQSTS.MISS@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem;Offcore", "MetricName": "tma_info_l2mpki_all", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache true code cacheline misses per kilo instruction", - "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@FRONTEND_RETIRED.L2_MISS@ / INST_RETIRED.ANY", "MetricGroup": "IcMiss", "MetricName": "tma_info_l2mpki_code", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction", - "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@L2_RQSTS.CODE_RD_MISS@ / INST_RETIRED.ANY", "MetricGroup": "IcMiss", "MetricName": "tma_info_l2mpki_code_all", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)", - "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_MISS@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l2mpki_load", "Unit": "cpu_core" }, { "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", - "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time", + "MetricExpr": "64 * cpu_core@OFFCORE_REQUESTS.ALL_REQUESTS@ / 1e9 / duration_time", "MetricGroup": "Mem;MemoryBW;Offcore", "MetricName": "tma_info_l3_cache_access_bw", "Unit": "cpu_core" @@ -1732,7 +1737,7 @@ }, { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", - "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time", + "MetricExpr": "64 * cpu_core@LONGEST_LAT_CACHE.MISS@ / 1e9 / duration_time", "MetricGroup": "Mem;MemoryBW", "MetricName": "tma_info_l3_cache_fill_bw", "Unit": "cpu_core" @@ -1746,7 +1751,7 @@ }, { "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", - "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L3_MISS@ / INST_RETIRED.ANY", "MetricGroup": "CacheMisses;Mem", "MetricName": "tma_info_l3mpki", "Unit": "cpu_core" @@ -1781,14 +1786,14 @@ }, { "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)", - "MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@DTLB_LOAD_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY", "MetricGroup": "Mem;MemoryTLB", "MetricName": "tma_info_load_stlb_mpki", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", - "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY", + "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "Fed;LSD", "MetricName": "tma_info_lsd_coverage", "Unit": "cpu_core" @@ -1827,6 +1832,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_memory_data_tlbs", @@ -1836,6 +1842,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_memory_latency", @@ -1845,6 +1852,7 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_mispredictions", @@ -1869,7 +1877,7 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * tma_info_core_clks)", + "MetricExpr": "(cpu_core@ITLB_MISSES.WALK_PENDING@ + cpu_core@DTLB_LOAD_MISSES.WALK_PENDING@ + cpu_core@DTLB_STORE_MISSES.WALK_PENDING@) / (4 * tma_info_core_clks)", "MetricGroup": "Mem;MemoryTLB", "MetricName": "tma_info_page_walks_utilization", "MetricThreshold": "tma_info_page_walks_utilization > 0.5", @@ -1877,6 +1885,7 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_retire", @@ -1884,21 +1893,21 @@ }, { "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", - "MetricExpr": "TOPDOWN.SLOTS", + "MetricExpr": "cpu_core@TOPDOWN.SLOTS@", "MetricGroup": "TmaL1;tma_L1_group", "MetricName": "tma_info_slots", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor", - "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)", + "MetricExpr": "(tma_info_slots / (cpu_core@TOPDOWN.SLOTS@ / 2) if #SMT_on else 1)", "MetricGroup": "SMT;TmaL1;tma_L1_group", "MetricName": "tma_info_slots_utilization", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)", + "MetricExpr": "(1 - cpu_core@CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE@ / cpu_core@CPU_CLK_UNHALTED.REF_DISTRIBUTED@ if #SMT_on else 0)", "MetricGroup": "SMT", "MetricName": "tma_info_smt_2t_utilization", "Unit": "cpu_core" @@ -1912,7 +1921,7 @@ }, { "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)", - "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricExpr": "1e3 * cpu_core@DTLB_STORE_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY", "MetricGroup": "Mem;MemoryTLB", "MetricName": "tma_info_store_stlb_mpki", "Unit": "cpu_core" @@ -1960,7 +1969,7 @@ }, { "BriefDescription": "This metric represents 128-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired", - "MetricExpr": "(INT_VEC_RETIRED.ADD_128 + INT_VEC_RETIRED.VNNI_128) / (tma_retiring * tma_info_slots)", + "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@) / (tma_retiring * tma_info_slots)", "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P", "MetricName": "tma_int_vector_128b", "MetricThreshold": "tma_int_vector_128b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", @@ -1970,7 +1979,7 @@ }, { "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired", - "MetricExpr": "(INT_VEC_RETIRED.ADD_256 + INT_VEC_RETIRED.MUL_256 + INT_VEC_RETIRED.VNNI_256) / (tma_retiring * tma_info_slots)", + "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_slots)", "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P", "MetricName": "tma_int_vector_256b", "MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", @@ -1990,7 +1999,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache", - "MetricExpr": "max((EXE_ACTIVITY.BOUND_ON_LOADS - MEMORY_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)", + "MetricExpr": "max((cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@) / tma_info_clks, 0)", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group", "MetricName": "tma_l1_bound", "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -2001,7 +2010,7 @@ { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_clks", + "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -2011,7 +2020,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_clks", + "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -2021,7 +2030,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)", - "MetricExpr": "9 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks", + "MetricExpr": "9 * tma_info_average_frequency * cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_clks", "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group", "MetricName": "tma_l3_hit_latency", "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2081,7 +2090,7 @@ { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks", + "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", "MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2091,7 +2100,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit", - "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_clks / 2", + "MetricExpr": "(cpu_core@LSD.CYCLES_ACTIVE@ - cpu_core@LSD.CYCLES_OK@) / tma_info_core_clks / 2", "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_lsd", "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)", @@ -2112,7 +2121,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)", - "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks", + "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks", "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW", "MetricName": "tma_mem_bandwidth", "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2122,7 +2131,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)", - "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth", + "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD@) / tma_info_clks - tma_mem_bandwidth", "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat", "MetricName": "tma_mem_latency", "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2132,7 +2141,7 @@ }, { "BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck", - "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)", + "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_slots", "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", @@ -2143,7 +2152,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", - "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_clks", + "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", @@ -2152,7 +2161,8 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_slots)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", "MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6", @@ -2171,7 +2181,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage", - "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks", + "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_clks", "MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM", "MetricName": "tma_mispredicts_resteers", "MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", @@ -2181,7 +2191,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)", - "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2", + "MetricExpr": "(cpu_core@IDQ.MITE_CYCLES_ANY@ - cpu_core@IDQ.MITE_CYCLES_OK@) / tma_info_core_clks / 2", "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_mite", "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)", @@ -2191,7 +2201,7 @@ }, { "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued", - "MetricExpr": "160 * ASSISTS.SSE_AVX_MIX / tma_info_clks", + "MetricExpr": "160 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_clks", "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group", "MetricName": "tma_mixing_vectors", "MetricThreshold": "tma_mixing_vectors > 0.05", @@ -2201,7 +2211,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)", - "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_slots / UOPS_ISSUED.ANY) / tma_info_clks", + "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_slots / cpu_core@UOPS_ISSUED.ANY@) / tma_info_clks", "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO", "MetricName": "tma_ms_switches", "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -2211,7 +2221,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused", - "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - INST_RETIRED.MACRO_FUSED) / (tma_retiring * tma_info_slots)", + "MetricExpr": "tma_light_operations * (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ - cpu_core@INST_RETIRED.MACRO_FUSED@) / (tma_retiring * tma_info_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_non_fused_branches", "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6", @@ -2221,7 +2231,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions", - "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)", + "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.NOP@ / (tma_retiring * tma_info_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_nop_instructions", "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6", @@ -2231,6 +2241,7 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -2241,7 +2252,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults", - "MetricExpr": "99 * ASSISTS.PAGE_FAULT / tma_info_slots", + "MetricExpr": "99 * cpu_core@ASSISTS.PAGE_FAULT@ / tma_info_slots", "MetricGroup": "TopdownL5;tma_L5_group;tma_assists_group", "MetricName": "tma_page_faults", "MetricThreshold": "tma_page_faults > 0.05", @@ -2281,7 +2292,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)", - "MetricExpr": "((cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_clks)", + "MetricExpr": "((cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_clks)", "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group", "MetricName": "tma_ports_utilization", "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", @@ -2291,7 +2302,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_clks", + "MetricExpr": "cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_0", "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2331,7 +2342,7 @@ }, { "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", - "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots", + "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_slots", "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", @@ -2371,7 +2382,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary", - "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks", + "MetricExpr": "tma_info_load_miss_real_latency * cpu_core@LD_BLOCKS.NO_SR@ / tma_info_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_split_loads", "MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2391,7 +2402,7 @@ }, { "BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)", - "MetricExpr": "(XQ.FULL_CYCLES + L1D_PEND_MISS.L2_STALLS) / tma_info_clks", + "MetricExpr": "(cpu_core@XQ.FULL_CYCLES@ + cpu_core@L1D_PEND_MISS.L2_STALLS@) / tma_info_clks", "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group", "MetricName": "tma_sq_full", "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2411,7 +2422,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks", + "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", "MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2421,7 +2432,7 @@ }, { "BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses", - "MetricExpr": "(MEM_STORE_RETIRED.L2_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks", + "MetricExpr": "(cpu_core@MEM_STORE_RETIRED.L2_HIT@ * 10 * (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) + (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) * min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@)) / tma_info_clks", "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group", "MetricName": "tma_store_latency", "MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2431,7 +2442,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations", - "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)", + "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_4_9@ + cpu_core@UOPS_DISPATCHED.PORT_7_8@) / (4 * tma_info_core_clks)", "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", "MetricName": "tma_store_op_utilization", "MetricThreshold": "tma_store_op_utilization > 0.6", @@ -2459,7 +2470,7 @@ }, { "BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores", - "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks", + "MetricExpr": "9 * cpu_core@OCR.STREAMING_WR.ANY_RESPONSE@ / tma_info_clks", "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group", "MetricName": "tma_streaming_stores", "MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2479,7 +2490,7 @@ }, { "BriefDescription": "This metric serves as an approximation of legacy x87 usage", - "MetricExpr": "tma_retiring * UOPS_EXECUTED.X87 / UOPS_EXECUTED.THREAD", + "MetricExpr": "tma_retiring * cpu_core@UOPS_EXECUTED.X87@ / UOPS_EXECUTED.THREAD", "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group", "MetricName": "tma_x87_use", "MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)", diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json index 0402adbf7d92..f4b3c3883643 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -193,7 +193,7 @@ { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", "MetricThreshold": "tma_dram_bound > 0.1", @@ -480,7 +480,7 @@ { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", "MetricConstraint": "NO_GROUP_EVENTS", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", "MetricThreshold": "tma_l2_bound > 0.1", @@ -488,7 +488,7 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", "MetricThreshold": "tma_l3_bound > 0.1", diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json index 1a2154f28b7b..ae8a96ec7fa5 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json @@ -317,6 +317,7 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -421,6 +422,7 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_branch_misprediction_cost", @@ -466,6 +468,7 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_core_bound_likely", @@ -518,6 +521,7 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_dsb_misses", @@ -599,6 +603,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_instruction_fetch_bw", @@ -937,6 +942,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_memory_data_tlbs", @@ -945,6 +951,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_memory_latency", @@ -953,6 +960,7 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_mispredictions", @@ -1004,6 +1012,7 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_retire" @@ -1207,6 +1216,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1277,6 +1287,7 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 1ef772b40e04..b736fec164d0 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -282,6 +282,7 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -386,6 +387,7 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_branch_misprediction_cost", @@ -431,6 +433,7 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_core_bound_likely", @@ -483,6 +486,7 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_dsb_misses", @@ -564,6 +568,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_instruction_fetch_bw", @@ -948,6 +953,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_memory_data_tlbs", @@ -956,6 +962,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_memory_latency", @@ -964,6 +971,7 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_mispredictions", @@ -1027,6 +1035,7 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_retire" @@ -1230,6 +1239,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1300,6 +1310,7 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index 620fc5bd2217..4308e2483112 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -290,6 +290,7 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -412,6 +413,7 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_branch_misprediction_cost", @@ -457,6 +459,7 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_core_bound_likely", @@ -509,6 +512,7 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_dsb_misses", @@ -590,6 +594,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_instruction_fetch_bw", @@ -998,6 +1003,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_memory_data_tlbs", @@ -1006,6 +1012,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_memory_latency", @@ -1014,6 +1021,7 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_mispredictions", @@ -1054,6 +1062,7 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_retire" @@ -1328,6 +1337,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1399,6 +1409,7 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json index b442ed4acfbb..ae62bacf9f5e 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json @@ -311,6 +311,7 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -415,6 +416,7 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_branch_misprediction_cost", @@ -460,6 +462,7 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_core_bound_likely", @@ -512,6 +515,7 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_dsb_misses", @@ -593,6 +597,7 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_instruction_fetch_bw", @@ -957,6 +962,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_memory_data_tlbs", @@ -965,6 +971,7 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_memory_latency", @@ -973,6 +980,7 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_mispredictions", @@ -1024,6 +1032,7 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_retire" @@ -1221,6 +1230,7 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1291,6 +1301,7 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index f57a8f274025..487ff01baf1b 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -51,8 +51,8 @@ _json_event_attributes = [ # Attributes that are in pmu_metric rather than pmu_event. _json_metric_attributes = [ - 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 'desc', - 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 'aggr_mode', + 'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', + 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 'aggr_mode', 'event_grouping' ] # Attributes that are bools or enum int values, encoded as '0', '1',... @@ -391,11 +391,11 @@ def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]: except BaseException as err: print(f"Exception processing {path}") raise - metrics: list[Tuple[str, metric.Expression]] = [] + metrics: list[Tuple[str, str, metric.Expression]] = [] for event in events: event.topic = topic if event.metric_name and '-' not in event.metric_name: - metrics.append((event.metric_name, event.metric_expr)) + metrics.append((event.pmu, event.metric_name, event.metric_expr)) updates = metric.RewriteMetricsInTermsOfOthers(metrics) if updates: for event in events: diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 8ec0ba884673..af58b74d1644 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -552,28 +552,34 @@ def ParsePerfJson(orig: str) -> Expression: return _Constify(eval(compile(parsed, orig, 'eval'))) -def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, Expression]] - )-> Dict[str, Expression]: +def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, str, Expression]] + )-> Dict[Tuple[str, str], Expression]: """Shorten metrics by rewriting in terms of others. Args: - metrics (list): pairs of metric names and their expressions. + metrics (list): pmus, metric names and their expressions. Returns: - Dict: mapping from a metric name to a shortened expression. + Dict: mapping from a pmu, metric name pair to a shortened expression. """ - updates: Dict[str, Expression] = dict() - for outer_name, outer_expression in metrics: + updates: Dict[Tuple[str, str], Expression] = dict() + for outer_pmu, outer_name, outer_expression in metrics: + if outer_pmu is None: + outer_pmu = 'cpu' updated = outer_expression while True: - for inner_name, inner_expression in metrics: + for inner_pmu, inner_name, inner_expression in metrics: + if inner_pmu is None: + inner_pmu = 'cpu' + if inner_pmu.lower() != outer_pmu.lower(): + continue if inner_name.lower() == outer_name.lower(): continue - if inner_name in updates: - inner_expression = updates[inner_name] + if (inner_pmu, inner_name) in updates: + inner_expression = updates[(inner_pmu, inner_name)] updated = updated.Substitute(inner_name, inner_expression) if updated.Equals(outer_expression): break - if outer_name in updates and updated.Equals(updates[outer_name]): + if (outer_pmu, outer_name) in updates and updated.Equals(updates[(outer_pmu, outer_name)]): break - updates[outer_name] = updated + updates[(outer_pmu, outer_name)] = updated return updates diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py index 40a3c7d8b2bc..ee22ff43ddd7 100755 --- a/tools/perf/pmu-events/metric_test.py +++ b/tools/perf/pmu-events/metric_test.py @@ -158,9 +158,9 @@ class TestMetricExpressions(unittest.TestCase): def test_RewriteMetricsInTermsOfOthers(self): Expression.__eq__ = lambda e1, e2: e1.Equals(e2) - before = [('m1', ParsePerfJson('a + b + c + d')), - ('m2', ParsePerfJson('a + b + c'))] - after = {'m1': ParsePerfJson('m2 + d')} + before = [('cpu', 'm1', ParsePerfJson('a + b + c + d')), + ('cpu', 'm2', ParsePerfJson('a + b + c'))] + after = {('cpu', 'm1'): ParsePerfJson('m2 + d')} self.assertEqual(RewriteMetricsInTermsOfOthers(before), after) Expression.__eq__ = None diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 80349685cf4d..3549e6971a4d 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -51,6 +51,7 @@ struct pmu_event { }; struct pmu_metric { + const char *pmu; const char *metric_name; const char *metric_group; const char *metric_expr; diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index e94fed901992..15ff86f9da0b 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -4,114 +4,93 @@ #include "parse-events.h" #include "tests.h" #include "debug.h" -#include "pmu.h" -#include "pmu-hybrid.h" -#include <errno.h> #include <linux/kernel.h> static int perf_evsel__roundtrip_cache_name_test(void) { - char name[128]; - int type, op, err = 0, ret = 0, i, idx; - struct evsel *evsel; - struct evlist *evlist = evlist__new(); + int ret = TEST_OK; - if (evlist == NULL) - return -ENOMEM; - - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ if (!evsel__is_cache_op_valid(type, op)) continue; - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - err = parse_event(evlist, name); - if (err) - ret = err; - } - } - } - - idx = 0; - evsel = evlist__first(evlist); + for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { + char name[128]; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + int err; - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; + if (evlist == NULL) { + pr_debug("Failed to alloc evlist"); + return TEST_FAIL; + } + __evsel__hw_cache_type_op_res_name(type, op, res, + name, sizeof(name)); - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (evsel->core.idx != idx) + err = parse_event(evlist, name); + if (err) { + pr_debug("Failure to parse cache event '%s' possibly as PMUs don't support it", + name); + evlist__delete(evlist); continue; - - ++idx; - - if (strcmp(evsel__name(evsel), name)) { - pr_debug("%s != %s\n", evsel__name(evsel), name); - ret = -1; } - - evsel = evsel__next(evsel); + evlist__for_each_entry(evlist, evsel) { + if (strcmp(evsel__name(evsel), name)) { + pr_debug("%s != %s\n", evsel__name(evsel), name); + ret = TEST_FAIL; + } + } + evlist__delete(evlist); } } } - - evlist__delete(evlist); return ret; } -static int __perf_evsel__name_array_test(const char *const names[], int nr_names, - int distance) +static int perf_evsel__name_array_test(const char *const names[], int nr_names) { - int i, err; - struct evsel *evsel; - struct evlist *evlist = evlist__new(); + int ret = TEST_OK; - if (evlist == NULL) - return -ENOMEM; + for (int i = 0; i < nr_names; ++i) { + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + int err; - for (i = 0; i < nr_names; ++i) { + if (evlist == NULL) { + pr_debug("Failed to alloc evlist"); + return TEST_FAIL; + } err = parse_event(evlist, names[i]); if (err) { pr_debug("failed to parse event '%s', err %d\n", names[i], err); - goto out_delete_evlist; + evlist__delete(evlist); + ret = TEST_FAIL; + continue; } - } - - err = 0; - evlist__for_each_entry(evlist, evsel) { - if (strcmp(evsel__name(evsel), names[evsel->core.idx / distance])) { - --err; - pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->core.idx / distance]); + evlist__for_each_entry(evlist, evsel) { + if (strcmp(evsel__name(evsel), names[i])) { + pr_debug("%s != %s\n", evsel__name(evsel), names[i]); + ret = TEST_FAIL; + } } + evlist__delete(evlist); } - -out_delete_evlist: - evlist__delete(evlist); - return err; + return ret; } -#define perf_evsel__name_array_test(names, distance) \ - __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance) - static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - int err = 0, ret = 0; - - if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) - return perf_evsel__name_array_test(evsel__hw_names, 2); + int err = 0, ret = TEST_OK; - err = perf_evsel__name_array_test(evsel__hw_names, 1); + err = perf_evsel__name_array_test(evsel__hw_names, PERF_COUNT_HW_MAX); if (err) ret = err; - err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1, 1); + err = perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1); if (err) ret = err; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 8068cfd89b84..72a10bed84fd 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -6,7 +6,7 @@ #include "tests.h" #include "debug.h" #include "pmu.h" -#include "pmu-hybrid.h" +#include "pmus.h" #include <dirent.h> #include <errno.h> #include "fncache.h" @@ -20,6 +20,26 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +static bool test_config(const struct evsel *evsel, __u64 expected_config) +{ + __u32 type = evsel->core.attr.type; + __u64 config = evsel->core.attr.config; + + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) { + /* + * HARDWARE and HW_CACHE events encode the PMU's extended type + * in the top 32-bits. Mask in order to ignore. + */ + config &= PERF_HW_EVENT_MASK; + } + return config == expected_config; +} + +static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config) +{ + return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config; +} + #ifdef HAVE_LIBTRACEEVENT #if defined(__s390x__) @@ -82,11 +102,27 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) static int test__checkevent_raw(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); + struct perf_evsel *evsel; + bool raw_type_match = false; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); + + perf_evlist__for_each_evsel(&evlist->core, evsel) { + struct perf_pmu *pmu; + bool type_matched = false; + + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a)); + perf_pmus__for_each_pmu(pmu) { + if (pmu->type == evsel->attr.type) { + TEST_ASSERT_VAL("PMU type expected once", !type_matched); + type_matched = true; + if (pmu->type == PERF_TYPE_RAW) + raw_type_match = true; + } + } + TEST_ASSERT_VAL("No PMU found for type", type_matched); + } + TEST_ASSERT_VAL("Raw PMU not matched", raw_type_match); return TEST_OK; } @@ -96,39 +132,41 @@ static int test__checkevent_numeric(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 1)); return TEST_OK; } static int test__checkevent_symbolic_name(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); + struct perf_evsel *evsel; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); + + perf_evlist__for_each_evsel(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + } return TEST_OK; } static int test__checkevent_symbolic_name_config(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); + struct perf_evsel *evsel; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); - /* - * The period value gets configured within evlist__config, - * while this test executes only parse events method. - */ - TEST_ASSERT_VAL("wrong period", - 0 == evsel->core.attr.sample_period); - TEST_ASSERT_VAL("wrong config1", - 0 == evsel->core.attr.config1); - TEST_ASSERT_VAL("wrong config2", - 1 == evsel->core.attr.config2); + TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); + + perf_evlist__for_each_evsel(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + /* + * The period value gets configured within evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); + TEST_ASSERT_VAL("wrong config2", 1 == evsel->attr.config2); + } return TEST_OK; } @@ -138,18 +176,20 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS)); return TEST_OK; } static int test__checkevent_genhw(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); + struct perf_evsel *evsel; - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); + + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 1 << 16)); + } return TEST_OK; } @@ -159,7 +199,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 == @@ -173,7 +213,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_X == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->core.attr.bp_len); @@ -187,7 +227,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_R == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", @@ -202,7 +242,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", @@ -217,7 +257,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", @@ -241,17 +281,15 @@ static int test__checkevent_tracepoint_modifier(struct evlist *evlist) static int test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) { - struct evsel *evsel; + struct perf_evsel *evsel; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries > 1); - evlist__for_each_entry(evlist, evsel) { - TEST_ASSERT_VAL("wrong exclude_user", - !evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", - evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); } return test__checkevent_tracepoint_multi(evlist); @@ -260,25 +298,27 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) static int test__checkevent_raw_modifier(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); + struct perf_evsel *evsel; + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + } return test__checkevent_raw(evlist); } static int test__checkevent_numeric_modifier(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); + struct perf_evsel *evsel; + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + } return test__checkevent_numeric(evlist); } @@ -296,21 +336,23 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) static int test__checkevent_exclude_host_modifier(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); - TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); + struct perf_evsel *evsel; + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + } return test__checkevent_symbolic_name(evlist); } static int test__checkevent_exclude_guest_modifier(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); - TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); + struct perf_evsel *evsel; + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + } return test__checkevent_symbolic_name(evlist); } @@ -328,13 +370,14 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist) static int test__checkevent_genhw_modifier(struct evlist *evlist) { - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); + struct perf_evsel *evsel; + perf_evlist__for_each_entry(&evlist->core, evsel) { + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + } return test__checkevent_genhw(evlist); } @@ -446,7 +489,7 @@ static int test__checkevent_pmu(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 10 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 10)); TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1); TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2); TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3); @@ -464,21 +507,23 @@ static int test__checkevent_list(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong number of entries", 3 <= evlist->core.nr_entries); /* r1 */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong config1", 0 == evsel->core.attr.config1); - TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2); - TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3); - TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); - TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT != evsel->core.attr.type); + while (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) { + TEST_ASSERT_VAL("wrong config", test_config(evsel, 1)); + TEST_ASSERT_VAL("wrong config1", 0 == evsel->core.attr.config1); + TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2); + TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); + evsel = evsel__next(evsel); + } /* syscalls:sys_enter_openat:k */ - evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -491,7 +536,7 @@ static int test__checkevent_list(struct evlist *evlist) /* 1:1:hp */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 1)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -508,14 +553,14 @@ static int test__checkevent_pmu_name(struct evlist *evlist) /* cpu/config=1,name=krava/u */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 1)); TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava")); /* cpu/config=2/u" */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 2)); TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "cpu/config=2/u")); @@ -529,7 +574,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) /* cpu/config=1,call-graph=fp,time,period=100000/ */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 1)); /* * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. @@ -541,7 +586,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) /* cpu/config=2,call-graph=no,time=0,period=2000/ */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 2)); /* * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. @@ -558,7 +603,8 @@ static int test__checkevent_pmu_events(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || + strcmp(evsel->pmu_name, "cpu")); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -590,7 +636,8 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) /* cpu/pmu-event/u*/ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || + strcmp(evsel->pmu_name, "cpu")); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -661,11 +708,11 @@ static int test__checkterms_simple(struct list_head *terms) */ term = list_entry(term->list.next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", - term->type_term == PARSE_EVENTS__TERM_TYPE_USER); + term->type_term == PARSE_EVENTS__TERM_TYPE_RAW); TEST_ASSERT_VAL("wrong type val", - term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); - TEST_ASSERT_VAL("wrong val", term->val.num == 1); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read")); + term->type_val == PARSE_EVENTS__TERM_TYPE_STR); + TEST_ASSERT_VAL("wrong val", !strcmp(term->val.str, "read")); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "raw")); /* * r0xead @@ -675,11 +722,11 @@ static int test__checkterms_simple(struct list_head *terms) */ term = list_entry(term->list.next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", - term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); + term->type_term == PARSE_EVENTS__TERM_TYPE_RAW); TEST_ASSERT_VAL("wrong type val", - term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); - TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); - TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); + term->type_val == PARSE_EVENTS__TERM_TYPE_STR); + TEST_ASSERT_VAL("wrong val", !strcmp(term->val.str, "r0xead")); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "raw")); return TEST_OK; } @@ -693,8 +740,7 @@ static int test__group1(struct evlist *evlist) /* instructions:k */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -709,8 +755,7 @@ static int test__group1(struct evlist *evlist) /* cycles:upp */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -735,8 +780,7 @@ static int test__group2(struct evlist *evlist) /* faults + :ku modifier */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -751,8 +795,7 @@ static int test__group2(struct evlist *evlist) /* cache-references + :u modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_REFERENCES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -766,8 +809,7 @@ static int test__group2(struct evlist *evlist) /* cycles:k */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -810,8 +852,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) /* group1 cycles:kppp */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -827,8 +868,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) /* group2 cycles + G modifier */ evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -845,7 +885,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) /* group2 1:3 + G modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 3)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -859,8 +899,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) /* instructions:u */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -884,8 +923,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* cycles:u + p */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -902,8 +940,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* instructions:kp + p */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -928,8 +965,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* cycles + G */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -945,8 +981,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions + G */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -960,8 +995,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* cycles:G */ evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -977,8 +1011,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions:G */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -991,8 +1024,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* cycles */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1014,8 +1046,7 @@ static int test__group_gh1(struct evlist *evlist) /* cycles + :H group modifier */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1030,8 +1061,7 @@ static int test__group_gh1(struct evlist *evlist) /* cache-misses:G + :H group modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1054,8 +1084,7 @@ static int test__group_gh2(struct evlist *evlist) /* cycles + :G group modifier */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1070,8 +1099,7 @@ static int test__group_gh2(struct evlist *evlist) /* cache-misses:H + :G group modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1094,8 +1122,7 @@ static int test__group_gh3(struct evlist *evlist) /* cycles:G + :u group modifier */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1110,8 +1137,7 @@ static int test__group_gh3(struct evlist *evlist) /* cache-misses:H + :u group modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1134,8 +1160,7 @@ static int test__group_gh4(struct evlist *evlist) /* cycles:G + :uG group modifier */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1150,8 +1175,7 @@ static int test__group_gh4(struct evlist *evlist) /* cache-misses:H + :uG group modifier */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1173,8 +1197,7 @@ static int test__leader_sample1(struct evlist *evlist) /* cycles - sampling group leader */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1188,8 +1211,7 @@ static int test__leader_sample1(struct evlist *evlist) /* cache-misses - not sampling */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1202,8 +1224,7 @@ static int test__leader_sample1(struct evlist *evlist) /* branch-misses - not sampling */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1226,8 +1247,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* instructions - sampling group leader */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1241,8 +1261,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* branch-misses - not sampling */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1278,8 +1297,7 @@ static int test__pinned_group(struct evlist *evlist) /* cycles - group leader */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned); @@ -1287,14 +1305,12 @@ static int test__pinned_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); return TEST_OK; @@ -1322,8 +1338,7 @@ static int test__exclusive_group(struct evlist *evlist) /* cycles - group leader */ evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); @@ -1331,14 +1346,12 @@ static int test__exclusive_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); return TEST_OK; @@ -1349,7 +1362,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 == @@ -1364,7 +1377,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0)); TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W == evsel->core.attr.bp_type); TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 == @@ -1392,8 +1405,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", - PERF_COUNT_SW_TASK_CLOCK == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_TASK_CLOCK)); return TEST_OK; } @@ -1426,7 +1438,12 @@ static int test__checkevent_config_cache(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "cachepmu")); - return TEST_OK; + return test__checkevent_genhw(evlist); +} + +static bool test__pmu_cpu_valid(void) +{ + return !!perf_pmu__find("cpu"); } static bool test__intel_pt_valid(void) @@ -1446,7 +1463,9 @@ static int test__checkevent_complex_name(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); - TEST_ASSERT_VAL("wrong complex name parsing", evsel__name_is(evsel, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks")); + TEST_ASSERT_VAL("wrong complex name parsing", + evsel__name_is(evsel, + "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks")); return TEST_OK; } @@ -1456,7 +1475,7 @@ static int test__checkevent_raw_pmu(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a)); return TEST_OK; } @@ -1465,7 +1484,7 @@ static int test__sym_event_slash(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return TEST_OK; } @@ -1475,11 +1494,31 @@ static int test__sym_event_dc(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); return TEST_OK; } +static int test__term_equal_term(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); + return TEST_OK; +} + +static int test__term_equal_legacy(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); + return TEST_OK; +} + #ifdef HAVE_LIBTRACEEVENT static int count_tracepoints(void) { @@ -1536,127 +1575,6 @@ static int test__all_tracepoints(struct evlist *evlist) } #endif /* HAVE_LIBTRACEVENT */ -static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) -{ - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - return TEST_OK; -} - -static int test__hybrid_hw_group_event(struct evlist *evlist) -{ - struct evsel *evsel, *leader; - - evsel = leader = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - return TEST_OK; -} - -static int test__hybrid_sw_hw_group_event(struct evlist *evlist) -{ - struct evsel *evsel, *leader; - - evsel = leader = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - return TEST_OK; -} - -static int test__hybrid_hw_sw_group_event(struct evlist *evlist) -{ - struct evsel *evsel, *leader; - - evsel = leader = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - return TEST_OK; -} - -static int test__hybrid_group_modifier1(struct evlist *evlist) -{ - struct evsel *evsel, *leader; - - evsel = leader = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); - TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); - TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); - return TEST_OK; -} - -static int test__hybrid_raw1(struct evlist *evlist) -{ - struct evsel *evsel = evlist__first(evlist); - - if (!perf_pmu__hybrid_mounted("cpu_atom")) { - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); - return TEST_OK; - } - - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); - - /* The type of second event is randome value */ - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); - return TEST_OK; -} - -static int test__hybrid_raw2(struct evlist *evlist) -{ - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); - return TEST_OK; -} - -static int test__hybrid_cache_event(struct evlist *evlist) -{ - struct evsel *evsel = evlist__first(evlist); - - TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); - return TEST_OK; -} - struct evlist_test { const char *name; bool (*valid)(void); @@ -1973,26 +1891,40 @@ static const struct evlist_test test__events[] = { .check = test__exclusive_group, /* 7 */ }, + { + .name = "cycles/name=name/", + .check = test__term_equal_term, + /* 8 */ + }, + { + .name = "cycles/name=l1d/", + .check = test__term_equal_legacy, + /* 9 */ + }, }; static const struct evlist_test test__events_pmu[] = { { .name = "cpu/config=10,config1,config2=3,period=1000/u", + .valid = test__pmu_cpu_valid, .check = test__checkevent_pmu, /* 0 */ }, { .name = "cpu/config=1,name=krava/u,cpu/config=2/u", + .valid = test__pmu_cpu_valid, .check = test__checkevent_pmu_name, /* 1 */ }, { .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/", + .valid = test__pmu_cpu_valid, .check = test__checkevent_pmu_partial_time_callgraph, /* 2 */ }, { .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp", + .valid = test__pmu_cpu_valid, .check = test__checkevent_complex_name, /* 3 */ }, @@ -2006,66 +1938,174 @@ static const struct evlist_test test__events_pmu[] = { .check = test__checkevent_raw_pmu, /* 5 */ }, -}; - -struct terms_test { - const char *str; - int (*check)(struct list_head *terms); -}; - -static const struct terms_test test__terms[] = { - [0] = { - .str = "config=10,config1,config2=3,config3=4,umask=1,read,r0xead", - .check = test__checkterms_simple, + { + .name = "cpu/L1-dcache-load-miss/", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_genhw, + /* 6 */ + }, + { + .name = "cpu/L1-dcache-load-miss/kp", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_genhw_modifier, + /* 7 */ + }, + { + .name = "cpu/L1-dcache-misses,name=cachepmu/", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_config_cache, + /* 8 */ }, -}; - -static const struct evlist_test test__hybrid_events[] = { { - .name = "cpu_core/cpu-cycles/", - .check = test__hybrid_hw_event_with_pmu, + .name = "cpu/instructions/", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_symbolic_name, + /* 9 */ + }, + { + .name = "cpu/cycles,period=100000,config2/", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_symbolic_name_config, /* 0 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", - .check = test__hybrid_hw_group_event, + .name = "cpu/instructions/h", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_symbolic_name_modifier, /* 1 */ }, { - .name = "{cpu-clock,cpu_core/cpu-cycles/}", - .check = test__hybrid_sw_hw_group_event, + .name = "cpu/instructions/G", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_exclude_host_modifier, /* 2 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu-clock}", - .check = test__hybrid_hw_sw_group_event, + .name = "cpu/instructions/H", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_exclude_guest_modifier, /* 3 */ }, { - .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", - .check = test__hybrid_group_modifier1, + .name = "{cpu/instructions/k,cpu/cycles/upp}", + .valid = test__pmu_cpu_valid, + .check = test__group1, /* 4 */ }, { - .name = "r1a", - .check = test__hybrid_raw1, + .name = "{cpu/cycles/u,cpu/instructions/kp}:p", + .valid = test__pmu_cpu_valid, + .check = test__group4, /* 5 */ }, { - .name = "cpu_core/r1a/", - .check = test__hybrid_raw2, + .name = "{cpu/cycles/,cpu/cache-misses/G}:H", + .valid = test__pmu_cpu_valid, + .check = test__group_gh1, /* 6 */ }, { - .name = "cpu_core/config=10,config1,config2=3,period=1000/u", - .check = test__checkevent_pmu, + .name = "{cpu/cycles/,cpu/cache-misses/H}:G", + .valid = test__pmu_cpu_valid, + .check = test__group_gh2, /* 7 */ }, { - .name = "cpu_core/LLC-loads/", - .check = test__hybrid_cache_event, + .name = "{cpu/cycles/G,cpu/cache-misses/H}:u", + .valid = test__pmu_cpu_valid, + .check = test__group_gh3, /* 8 */ }, + { + .name = "{cpu/cycles/G,cpu/cache-misses/H}:uG", + .valid = test__pmu_cpu_valid, + .check = test__group_gh4, + /* 9 */ + }, + { + .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:S", + .valid = test__pmu_cpu_valid, + .check = test__leader_sample1, + /* 0 */ + }, + { + .name = "{cpu/instructions/,cpu/branch-misses/}:Su", + .valid = test__pmu_cpu_valid, + .check = test__leader_sample2, + /* 1 */ + }, + { + .name = "cpu/instructions/uDp", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_pinned_modifier, + /* 2 */ + }, + { + .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:D", + .valid = test__pmu_cpu_valid, + .check = test__pinned_group, + /* 3 */ + }, + { + .name = "cpu/instructions/I", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_exclude_idle_modifier, + /* 4 */ + }, + { + .name = "cpu/instructions/kIG", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_exclude_idle_modifier_1, + /* 5 */ + }, + { + .name = "cpu/cycles/u", + .valid = test__pmu_cpu_valid, + .check = test__sym_event_slash, + /* 6 */ + }, + { + .name = "cpu/cycles/k", + .valid = test__pmu_cpu_valid, + .check = test__sym_event_dc, + /* 7 */ + }, + { + .name = "cpu/instructions/uep", + .valid = test__pmu_cpu_valid, + .check = test__checkevent_exclusive_modifier, + /* 8 */ + }, + { + .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:e", + .valid = test__pmu_cpu_valid, + .check = test__exclusive_group, + /* 9 */ + }, + { + .name = "cpu/cycles,name=name/", + .valid = test__pmu_cpu_valid, + .check = test__term_equal_term, + /* 0 */ + }, + { + .name = "cpu/cycles,name=l1d/", + .valid = test__pmu_cpu_valid, + .check = test__term_equal_legacy, + /* 1 */ + }, +}; + +struct terms_test { + const char *str; + int (*check)(struct list_head *terms); +}; + +static const struct terms_test test__terms[] = { + [0] = { + .str = "config=10,config1,config2=3,config3=4,umask=1,read,r0xead", + .check = test__checkterms_simple, + }, }; static int test_event(const struct evlist_test *e) @@ -2091,7 +2131,7 @@ static int test_event(const struct evlist_test *e) e->name, ret, err.str); parse_events_error__print(&err, e->name); ret = TEST_FAIL; - if (strstr(err.str, "can't access trace events")) + if (err.str && strstr(err.str, "can't access trace events")) ret = TEST_SKIP; } else { ret = e->check(evlist); @@ -2113,8 +2153,8 @@ static int test_event_fake_pmu(const char *str) return -ENOMEM; parse_events_error__init(&err); - perf_pmu__test_parse_init(); - ret = __parse_events(evlist, str, &err, &perf_pmu__fake, /*warn_if_reordered=*/true); + ret = __parse_events(evlist, str, /*pmu_filter=*/NULL, &err, + &perf_pmu__fake, /*warn_if_reordered=*/true); if (ret) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", str, ret, err.str); @@ -2167,13 +2207,6 @@ static int test_term(const struct terms_test *t) INIT_LIST_HEAD(&terms); - /* - * The perf_pmu__test_parse_init prepares perf_pmu_events_list - * which gets freed in parse_events_terms. - */ - if (perf_pmu__test_parse_init()) - return -1; - ret = parse_events_terms(&terms, t->str); if (ret) { pr_debug("failed to parse terms '%s', err %d\n", @@ -2208,99 +2241,91 @@ static int test__terms2(struct test_suite *test __maybe_unused, int subtest __ma return test_terms(test__terms, ARRAY_SIZE(test__terms)); } -static int test_pmu(void) +static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - struct stat st; - char path[PATH_MAX]; - int ret; + struct perf_pmu *pmu; + int ret = TEST_OK; - snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/", - sysfs__mountpoint()); + if (list_empty(&pmus)) + perf_pmu__scan(NULL); - ret = stat(path, &st); - if (ret) - pr_debug("omitting PMU cpu tests\n"); - return !ret; -} + perf_pmus__for_each_pmu(pmu) { + struct stat st; + char path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + int err; -static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct stat st; - char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; - int ret; + snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/", + sysfs__mountpoint(), pmu->name); - if (!test_pmu()) - return TEST_SKIP; + err = stat(path, &st); + if (err) { + pr_debug("skipping PMU %s events tests: %s\n", pmu->name, path); + continue; + } - snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/", - sysfs__mountpoint()); + dir = opendir(path); + if (!dir) { + pr_debug("can't open pmu event dir: %s\n", path); + ret = combine_test_results(ret, TEST_SKIP); + continue; + } - ret = stat(path, &st); - if (ret) { - pr_debug("omitting PMU cpu events tests: %s\n", path); - return TEST_OK; - } + while ((ent = readdir(dir))) { + struct evlist_test e = { .name = NULL, }; + char name[2 * NAME_MAX + 1 + 12 + 3]; + int test_ret; - dir = opendir(path); - if (!dir) { - pr_debug("can't open pmu event dir: %s\n", path); - return TEST_FAIL; - } + /* Names containing . are special and cannot be used directly */ + if (strchr(ent->d_name, '.')) + continue; - ret = TEST_OK; - while ((ent = readdir(dir))) { - struct evlist_test e = { .name = NULL, }; - char name[2 * NAME_MAX + 1 + 12 + 3]; - int test_ret; + snprintf(name, sizeof(name), "%s/event=%s/u", pmu->name, ent->d_name); - /* Names containing . are special and cannot be used directly */ - if (strchr(ent->d_name, '.')) - continue; + e.name = name; + e.check = test__checkevent_pmu_events; - snprintf(name, sizeof(name), "cpu/event=%s/u", ent->d_name); + test_ret = test_event(&e); + if (test_ret != TEST_OK) { + pr_debug("Test PMU event failed for '%s'", name); + ret = combine_test_results(ret, test_ret); + } - e.name = name; - e.check = test__checkevent_pmu_events; + if (!is_pmu_core(pmu->name)) + continue; - test_ret = test_event(&e); - if (test_ret != TEST_OK) { - pr_debug("Test PMU event failed for '%s'", name); - ret = combine_test_results(ret, test_ret); - } - /* - * Names containing '-' are recognized as prefixes and suffixes - * due to '-' being a legacy PMU separator. This fails when the - * prefix or suffix collides with an existing legacy token. For - * example, branch-brs has a prefix (branch) that collides with - * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix - * isn't expected after this. As event names in the config - * slashes are allowed a '-' in the name we check this works - * above. - */ - if (strchr(ent->d_name, '-')) - continue; + /* + * Names containing '-' are recognized as prefixes and suffixes + * due to '-' being a legacy PMU separator. This fails when the + * prefix or suffix collides with an existing legacy token. For + * example, branch-brs has a prefix (branch) that collides with + * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix + * isn't expected after this. As event names in the config + * slashes are allowed a '-' in the name we check this works + * above. + */ + if (strchr(ent->d_name, '-')) + continue; - snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); - e.name = name; - e.check = test__checkevent_pmu_events_mix; - test_ret = test_event(&e); - if (test_ret != TEST_OK) { - pr_debug("Test PMU event failed for '%s'", name); - ret = combine_test_results(ret, test_ret); + snprintf(name, sizeof(name), "%s:u,%s/event=%s/u", + ent->d_name, pmu->name, ent->d_name); + e.name = name; + e.check = test__checkevent_pmu_events_mix; + test_ret = test_event(&e); + if (test_ret != TEST_OK) { + pr_debug("Test PMU event failed for '%s'", name); + ret = combine_test_results(ret, test_ret); + } } - } - closedir(dir); + closedir(dir); + } return ret; } static int test__pmu_events2(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - if (!test_pmu()) - return TEST_SKIP; - return test_events(test__events_pmu, ARRAY_SIZE(test__events_pmu)); } @@ -2362,14 +2387,6 @@ static bool test_alias(char **event, char **alias) return false; } -static int test__hybrid(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - if (!perf_pmu__has_hybrid()) - return TEST_SKIP; - - return test_events(test__hybrid_events, ARRAY_SIZE(test__hybrid_events)); -} - static int test__checkevent_pmu_events_alias(struct evlist *evlist) { struct evsel *evsel1 = evlist__first(evlist); @@ -2433,9 +2450,6 @@ static struct test_case tests__parse_events[] = { TEST_CASE_REASON("Test event parsing", events2, "permissions"), - TEST_CASE_REASON("Test parsing of \"hybrid\" CPU events", - hybrid, - "not hybrid"), TEST_CASE_REASON("Parsing of all PMU events from sysfs", pmu_events, "permissions"), diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 1dff863b9711..734004f1a37d 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -776,16 +776,8 @@ static int check_parse_id(const char *id, struct parse_events_error *error, for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@')) *cur = '/'; - if (fake_pmu) { - /* - * Every call to __parse_events will try to initialize the PMU - * state from sysfs and then clean it up at the end. Reset the - * PMU events to the test state so that we don't pick up - * erroneous prefixes and suffixes. - */ - perf_pmu__test_parse_init(); - } - ret = __parse_events(evlist, dup, error, fake_pmu, /*warn_if_reordered=*/true); + ret = __parse_events(evlist, dup, /*pmu_filter=*/NULL, error, fake_pmu, + /*warn_if_reordered=*/true); free(dup); evlist__delete(evlist); diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index b154fbb15d54..3f1e67795490 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -103,10 +103,54 @@ test_topdown_weak_groups() { echo "Topdown weak groups test [Success]" } +test_cputype() { + # Test --cputype argument. + echo "cputype test" + + # Bogus PMU should fail. + if perf stat --cputype="123" -e instructions true > /dev/null 2>&1 + then + echo "cputype test [Bogus PMU didn't fail]" + err=1 + return + fi + + # Find a known PMU for cputype. + pmu="" + for i in cpu cpu_atom armv8_pmuv3_0 + do + if test -d "/sys/devices/$i" + then + pmu="$i" + break + fi + if perf stat -e "$i/instructions/" true > /dev/null 2>&1 + then + pmu="$i" + break + fi + done + if test "x$pmu" = "x" + then + echo "cputype test [Skipped known PMU not found]" + return + fi + + # Test running with cputype produces output. + if ! perf stat --cputype="$pmu" -e instructions true 2>&1 | grep -E -q "instructions" + then + echo "cputype test [Failed count missed with given filter]" + err=1 + return + fi + echo "cputype test [Success]" +} + test_default_stat test_stat_record_report test_stat_record_script test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups +test_cputype exit $err diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 12c3ce530e42..70bad42b807b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -781,9 +781,9 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_browser__help_window(&browser->b, "UP/DOWN/PGUP\n" "PGDN/SPACE Navigate\n" + "</> Move to prev/next symbol\n" "q/ESC/CTRL+C Exit\n\n" "ENTER Go to target\n" - "ESC Exit\n" "H Go to hottest instruction\n" "TAB/shift+TAB Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" @@ -913,6 +913,8 @@ show_sup_ins: annotation__toggle_full_addr(notes, ms); continue; case K_LEFT: + case '<': + case '>': case K_ESC: case 'q': case CTRL('c'): diff --git a/tools/perf/util/Build b/tools/perf/util/Build index bd18fe5f2719..c146736ead19 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -24,7 +24,6 @@ perf-y += llvm-utils.o perf-y += mmap.o perf-y += memswap.o perf-y += parse-events.o -perf-y += parse-events-hybrid.o perf-y += print-events.o perf-y += tracepoint.o perf-y += perf_regs.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11992cfe271c..b708bbc49c9e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -558,6 +558,19 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy return -1; *s = '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s = ','; + s = strchr(ops->raw, ')'); + if (s == NULL || s[1] != ',') + return -1; + *++s = '\0'; + } + ops->source.raw = strdup(ops->raw); *s = ','; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a86614599269..046fbfcfdaab 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -67,6 +67,39 @@ char dso__symtab_origin(const struct dso *dso) return origin[dso->symtab_type]; } +bool dso__is_object_file(const struct dso *dso) +{ + switch (dso->binary_type) { + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: + return false; + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return true; + } +} + int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 0b7c7633b9f6..b23a157c914d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -379,23 +379,25 @@ void dso__reset_find_symbol_cache(struct dso *dso); size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); size_t dso__fprintf(struct dso *dso, FILE *fp); -static inline bool dso__is_vmlinux(struct dso *dso) +static inline bool dso__is_vmlinux(const struct dso *dso) { return dso->binary_type == DSO_BINARY_TYPE__VMLINUX || dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX; } -static inline bool dso__is_kcore(struct dso *dso) +static inline bool dso__is_kcore(const struct dso *dso) { return dso->binary_type == DSO_BINARY_TYPE__KCORE || dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; } -static inline bool dso__is_kallsyms(struct dso *dso) +static inline bool dso__is_kallsyms(const struct dso *dso) { return dso->kernel && dso->long_name[0] != '/'; } +bool dso__is_object_file(const struct dso *dso); + void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 46cf402add93..e7e5540cc970 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -67,7 +67,6 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; - const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index cc80ec554c0a..79e42d66f55b 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -116,6 +116,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, int print_ip = print_opts & EVSEL__PRINT_IP; int print_sym = print_opts & EVSEL__PRINT_SYM; int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_dsoff = print_opts & EVSEL__PRINT_DSOFF; int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; int print_oneline = print_opts & EVSEL__PRINT_ONELINE; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; @@ -171,11 +172,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, } } - if (print_dso && (!sym || !sym->inlined)) { - printed += fprintf(fp, " ("); - printed += map__fprintf_dsoname(map, fp); - printed += fprintf(fp, ")"); - } + if (print_dso && (!sym || !sym->inlined)) + printed += map__fprintf_dsoname_dsoff(map, print_dsoff, addr, fp); if (print_srcline) printed += map__fprintf_srcline(map, addr, "\n ", fp); @@ -209,6 +207,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int print_ip = print_opts & EVSEL__PRINT_IP; int print_sym = print_opts & EVSEL__PRINT_SYM; int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_dsoff = print_opts & EVSEL__PRINT_DSOFF; int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; @@ -234,11 +233,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, } } - if (print_dso) { - printed += fprintf(fp, " ("); - printed += map__fprintf_dsoname(al->map, fp); - printed += fprintf(fp, ")"); - } + if (print_dso) + printed += map__fprintf_dsoname_dsoff(al->map, print_dsoff, al->addr, fp); if (print_srcline) printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h index 3093d096c29f..c8a9fac2f2dd 100644 --- a/tools/perf/util/evsel_fprintf.h +++ b/tools/perf/util/evsel_fprintf.h @@ -26,6 +26,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) #define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) #define EVSEL__PRINT_SKIP_IGNORED (1<<8) +#define EVSEL__PRINT_DSOFF (1<<9) struct addr_location; struct perf_event_attr; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b7f890950909..4d9944bbf5e4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -431,14 +431,21 @@ size_t map__fprintf(struct map *map, FILE *fp) map__start(map), map__end(map), map__pgoff(map), dso->name); } -size_t map__fprintf_dsoname(struct map *map, FILE *fp) +static bool prefer_dso_long_name(const struct dso *dso, bool print_off) +{ + return dso->long_name && + (symbol_conf.show_kernel_path || + (print_off && (dso->name[0] == '[' || dso__is_kcore(dso)))); +} + +static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp) { char buf[symbol_conf.pad_output_len_dso + 1]; const char *dsoname = "[unknown]"; const struct dso *dso = map ? map__dso(map) : NULL; if (dso) { - if (symbol_conf.show_kernel_path && dso->long_name) + if (prefer_dso_long_name(dso, print_off)) dsoname = dso->long_name; else dsoname = dso->name; @@ -452,6 +459,27 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } +size_t map__fprintf_dsoname(struct map *map, FILE *fp) +{ + return __map__fprintf_dsoname(map, false, fp); +} + +size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp) +{ + const struct dso *dso = map ? map__dso(map) : NULL; + int printed = 0; + + if (print_off && (!dso || !dso__is_object_file(dso))) + print_off = false; + printed += fprintf(fp, " ("); + printed += __map__fprintf_dsoname(map, print_off, fp); + if (print_off) + printed += fprintf(fp, "+0x%" PRIx64, addr); + printed += fprintf(fp, ")"); + + return printed; +} + char *map__srcline(struct map *map, u64 addr, struct symbol *sym) { if (map == NULL) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 823ab7fc0acf..66a87b3d9965 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -194,6 +194,7 @@ static inline void __map__zput(struct map **map) size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); +size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp); char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 5e9c657dd3f7..80ffd6da70c7 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -123,6 +123,7 @@ struct metric { * within the expression. */ struct expr_parse_ctx *pctx; + const char *pmu; /** The name of the metric such as "IPC". */ const char *metric_name; /** Modifier on the metric such as "u" or NULL for none. */ @@ -216,6 +217,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, if (!m->pctx) goto out_err; + m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; m->modifier = NULL; if (modifier) { @@ -259,11 +261,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events, /** * setup_metric_events - Find a group of events in metric_evlist that correspond * to the IDs from a parsed metric expression. + * @pmu: The PMU for the IDs. * @ids: the metric IDs to match. * @metric_evlist: the list of perf events. * @out_metric_events: holds the created metric events array. */ -static int setup_metric_events(struct hashmap *ids, +static int setup_metric_events(const char *pmu, struct hashmap *ids, struct evlist *metric_evlist, struct evsel ***out_metric_events) { @@ -271,6 +274,7 @@ static int setup_metric_events(struct hashmap *ids, const char *metric_id; struct evsel *ev; size_t ids_size, matched_events, i; + bool all_pmus = !strcmp(pmu, "all") || !perf_pmu__is_hybrid(pmu); *out_metric_events = NULL; ids_size = hashmap__size(ids); @@ -283,6 +287,11 @@ static int setup_metric_events(struct hashmap *ids, evlist__for_each_entry(metric_evlist, ev) { struct expr_id_data *val_ptr; + /* Don't match events for the wrong hybrid PMU. */ + if (!all_pmus && ev->pmu_name && + perf_pmu__is_hybrid(ev->pmu_name) && + strcmp(ev->pmu_name, pmu)) + continue; /* * Check for duplicate events with the same name. For * example, uncore_imc/cas_count_read/ will turn into 6 @@ -298,6 +307,7 @@ static int setup_metric_events(struct hashmap *ids, * about this event. */ if (hashmap__find(ids, metric_id, &val_ptr)) { + pr_debug("Matched metric-id %s to %s\n", metric_id, evsel__name(ev)); metric_events[matched_events++] = ev; if (matched_events >= ids_size) @@ -355,8 +365,13 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pm_metric(const struct pmu_metric *pm, const char *metric) +static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) { + const char *pm_pmu = pm->pmu ?: "cpu"; + + if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) + return false; + return match_metric(pm->metric_group, metric) || match_metric(pm->metric_name, metric); } @@ -766,6 +781,7 @@ struct visited_metric { struct metricgroup_add_iter_data { struct list_head *metric_list; + const char *pmu; const char *metric_name; const char *modifier; int *ret; @@ -779,7 +795,8 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *metric, +static bool metricgroup__find_metric(const char *pmu, + const char *metric, const struct pmu_metrics_table *table, struct pmu_metric *pm); @@ -798,6 +815,7 @@ static int add_metric(struct list_head *metric_list, * resolve_metric - Locate metrics within the root metric and recursively add * references to them. * @metric_list: The list the metric is added to. + * @pmu: The PMU name to resolve metrics on, or "all" for all PMUs. * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the @@ -813,6 +831,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, + const char *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -842,7 +861,7 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(cur->pkey, table, &pm)) { + if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -993,9 +1012,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - ret = resolve_metric(metric_list, modifier, metric_no_group, + const char *pmu = pm->pmu ?: "cpu"; + + ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, - system_wide, root_metric, &visited_node, table); + system_wide, root_metric, &visited_node, + table); } if (ret) { if (is_root) @@ -1008,6 +1030,7 @@ static int __add_metric(struct list_head *metric_list, } struct metricgroup__find_metric_data { + const char *pmu; const char *metric; struct pmu_metric *pm; }; @@ -1017,6 +1040,10 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm, void *vdata) { struct metricgroup__find_metric_data *data = vdata; + const char *pm_pmu = pm->pmu ?: "cpu"; + + if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) + return 0; if (!match_metric(pm->metric_name, data->metric)) return 0; @@ -1025,11 +1052,13 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm, return 1; } -static bool metricgroup__find_metric(const char *metric, +static bool metricgroup__find_metric(const char *pmu, + const char *metric, const struct pmu_metrics_table *table, struct pmu_metric *pm) { struct metricgroup__find_metric_data data = { + .pmu = pmu, .metric = metric, .pm = pm, }; @@ -1083,7 +1112,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pm_metric(pm, d->metric_name)) + if (!match_pm_metric(pm, d->pmu, d->metric_name)) return 0; ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, @@ -1128,6 +1157,7 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, struct metricgroup__add_metric_data { struct list_head *list; + const char *pmu; const char *metric_name; const char *modifier; const char *user_requested_cpu_list; @@ -1144,7 +1174,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->metric_name)) { + if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || match_metric(data->metric_name, pm->metricgroup_no_group); @@ -1159,6 +1189,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, /** * metricgroup__add_metric - Find and add a metric, or a metric group. + * @pmu: The PMU name to search for metrics on, or "all" for all PMUs. * @metric_name: The name of the metric or metric group. For example, "IPC" * could be the name of a metric and "TopDownL1" the name of a * metric group. @@ -1172,7 +1203,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ -static int metricgroup__add_metric(const char *metric_name, const char *modifier, +static int metricgroup__add_metric(const char *pmu, const char *metric_name, const char *modifier, bool metric_no_group, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, @@ -1186,6 +1217,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier { struct metricgroup__add_metric_data data = { .list = &list, + .pmu = pmu, .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, @@ -1210,6 +1242,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, + .pmu = pmu, .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, @@ -1239,6 +1272,7 @@ out: /** * metricgroup__add_metric_list - Find and add metrics, or metric groups, * specified in a list. + * @pmu: A pmu to restrict the metrics to, or "all" for all PMUS. * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" * would match the IPC and CPI metrics, and TopDownL1 would match all * the metrics in the TopDownL1 group. @@ -1251,7 +1285,8 @@ out: * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ -static int metricgroup__add_metric_list(const char *list, bool metric_no_group, +static int metricgroup__add_metric_list(const char *pmu, const char *list, + bool metric_no_group, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, @@ -1270,7 +1305,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, if (modifier) *modifier++ = '\0'; - ret = metricgroup__add_metric(metric_name, modifier, + ret = metricgroup__add_metric(pmu, metric_name, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, metric_list, table); @@ -1441,8 +1476,8 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, } pr_debug("Parsing metric events '%s'\n", events.buf); parse_events_error__init(&parse_error); - ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu, - /*warn_if_reordered=*/false); + ret = __parse_events(parsed_evlist, events.buf, /*pmu_filter=*/NULL, + &parse_error, fake_pmu, /*warn_if_reordered=*/false); if (ret) { parse_events_error__print(&parse_error, events.buf); goto err_out; @@ -1460,7 +1495,8 @@ err_out: return ret; } -static int parse_groups(struct evlist *perf_evlist, const char *str, +static int parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, bool metric_no_threshold, @@ -1478,7 +1514,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, if (metric_events_list->nr_entries == 0) metricgroup__rblist_init(metric_events_list); - ret = metricgroup__add_metric_list(str, metric_no_group, metric_no_threshold, + ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, &metric_list, table); if (ret) @@ -1535,6 +1571,11 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, strcmp(m->modifier, n->modifier))) continue; + if ((!m->pmu && n->pmu) || + (m->pmu && !n->pmu) || + (m->pmu && n->pmu && strcmp(m->pmu, n->pmu))) + continue; + if (expr__subset_of_ids(n->pctx, m->pctx)) { pr_debug("Events in '%s' fully contained within '%s'\n", m->metric_name, n->metric_name); @@ -1552,9 +1593,10 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, metric_evlist = m->evlist; } - ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); + ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids, + metric_evlist, &metric_events); if (ret) { - pr_debug("Cannot resolve IDs for %s: %s\n", + pr_err("Cannot resolve IDs for %s: %s\n", m->metric_name, m->metric_expr); goto out; } @@ -1610,6 +1652,7 @@ out: } int metricgroup__parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1623,7 +1666,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, if (!table) return -EINVAL; - return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, + return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, /*fake_pmu=*/NULL, metric_events, table); } @@ -1633,7 +1676,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, const char *str, struct rblist *metric_events) { - return parse_groups(evlist, str, + return parse_groups(evlist, "all", str, /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/false, @@ -1642,28 +1685,32 @@ int metricgroup__parse_groups_test(struct evlist *evlist, &perf_pmu__fake, metric_events, table); } +struct metricgroup__has_metric_data { + const char *pmu; + const char *metric; +}; static int metricgroup__has_metric_callback(const struct pmu_metric *pm, const struct pmu_metrics_table *table __maybe_unused, void *vdata) { - const char *metric = vdata; - - if (match_metric(pm->metric_name, metric) || - match_metric(pm->metric_group, metric)) - return 1; + struct metricgroup__has_metric_data *data = vdata; - return 0; + return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; } -bool metricgroup__has_metric(const char *metric) +bool metricgroup__has_metric(const char *pmu, const char *metric) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); + struct metricgroup__has_metric_data data = { + .pmu = pmu, + .metric = metric, + }; if (!table) return false; - return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, - (void *)metric) ? true : false; + return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, &data) + ? true : false; } static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 77472e35705e..bf18274c15df 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -67,6 +67,7 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, bool create); int metricgroup__parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, @@ -80,7 +81,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct rblist *metric_events); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *metric); +bool metricgroup__has_metric(const char *pmu, const char *metric); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c deleted file mode 100644 index 7c9f9150bad5..000000000000 --- a/tools/perf/util/parse-events-hybrid.c +++ /dev/null @@ -1,214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/err.h> -#include <linux/zalloc.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/param.h> -#include "evlist.h" -#include "evsel.h" -#include "parse-events.h" -#include "parse-events-hybrid.h" -#include "debug.h" -#include "pmu.h" -#include "pmu-hybrid.h" -#include "perf.h" - -static void config_hybrid_attr(struct perf_event_attr *attr, - int type, int pmu_type) -{ - /* - * attr.config layout for type PERF_TYPE_HARDWARE and - * PERF_TYPE_HW_CACHE - * - * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA - * AA: hardware event ID - * EEEEEEEE: PMU type ID - * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB - * BB: hardware cache ID - * CC: hardware cache op ID - * DD: hardware cache op result ID - * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. - */ - attr->type = type; - attr->config = (attr->config & PERF_HW_EVENT_MASK) | - ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); -} - -static int create_event_hybrid(__u32 config_type, int *idx, - struct list_head *list, - struct perf_event_attr *attr, const char *name, - const char *metric_id, - struct list_head *config_terms, - struct perf_pmu *pmu) -{ - struct evsel *evsel; - __u32 type = attr->type; - __u64 config = attr->config; - - config_hybrid_attr(attr, config_type, pmu->type); - - /* - * Some hybrid hardware cache events are only available on one CPU - * PMU. For example, the 'L1-dcache-load-misses' is only available - * on cpu_core, while the 'L1-icache-loads' is only available on - * cpu_atom. We need to remove "not supported" hybrid cache events. - */ - if (attr->type == PERF_TYPE_HW_CACHE - && !is_event_supported(attr->type, attr->config)) - return 0; - - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, - pmu, config_terms); - if (evsel) { - evsel->pmu_name = strdup(pmu->name); - if (!evsel->pmu_name) - return -ENOMEM; - } else - return -ENOMEM; - attr->type = type; - attr->config = config; - return 0; -} - -static int pmu_cmp(struct parse_events_state *parse_state, - struct perf_pmu *pmu) -{ - if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) - return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); - - if (parse_state->hybrid_pmu_name) - return strcmp(parse_state->hybrid_pmu_name, pmu->name); - - return 0; -} - -static int add_hw_hybrid(struct parse_events_state *parse_state, - struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) -{ - struct perf_pmu *pmu; - int ret; - - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_event_hybrid(PERF_TYPE_HARDWARE, - &parse_state->idx, list, attr, name, - metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} - -static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct list_head *config_terms, - struct perf_pmu *pmu) -{ - struct evsel *evsel; - - attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, - pmu, config_terms); - if (evsel) - evsel->pmu_name = strdup(pmu->name); - else - return -ENOMEM; - - return 0; -} - -static int add_raw_hybrid(struct parse_events_state *parse_state, - struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) -{ - struct perf_pmu *pmu; - int ret; - - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} - -int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, - struct list_head *list, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid) -{ - *hybrid = false; - if (attr->type == PERF_TYPE_SOFTWARE) - return 0; - - if (!perf_pmu__has_hybrid()) - return 0; - - *hybrid = true; - if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, metric_id, - config_terms); - } - - return add_raw_hybrid(parse_state, list, attr, name, metric_id, - config_terms); -} - -int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct list_head *config_terms, - bool *hybrid, - struct parse_events_state *parse_state) -{ - struct perf_pmu *pmu; - int ret; - - *hybrid = false; - if (!perf_pmu__has_hybrid()) - return 0; - - *hybrid = true; - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h deleted file mode 100644 index cbc05fec02a2..000000000000 --- a/tools/perf/util/parse-events-hybrid.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_PARSE_EVENTS_HYBRID_H -#define __PERF_PARSE_EVENTS_HYBRID_H - -#include <linux/list.h> -#include <stdbool.h> -#include <linux/types.h> -#include <linux/perf_event.h> -#include <string.h> - -int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, - struct list_head *list, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid); - -int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid, - struct parse_events_state *parse_state); - -#endif /* __PERF_PARSE_EVENTS_HYBRID_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 34ba840ae19a..b93264f8a37c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -25,38 +25,18 @@ #include "util/parse-branch-options.h" #include "util/evsel_config.h" #include "util/event.h" -#include "util/parse-events-hybrid.h" -#include "util/pmu-hybrid.h" #include "util/bpf-filter.h" #include "util/util.h" #include "tracepoint.h" -#include "thread_map.h" #define MAX_NAME_LEN 100 -struct perf_pmu_event_symbol { - char *symbol; - enum perf_pmu_event_symbol_type type; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif int parse_events_parse(void *parse_state, void *scanner); static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused); -static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, - const char *str, char *pmu_name, - struct list_head *list); - -static struct perf_pmu_event_symbol *perf_pmu_events_list; -/* - * The variable indicates the number of supported pmu event symbols. - * 0 means not initialized and ready to init - * -1 means failed to init, don't try anymore - * >0 is the number of supported pmu event symbols - */ -static int perf_pmu_events_list_num; struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { @@ -152,44 +132,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; -bool is_event_supported(u8 type, u64 config) -{ - bool ret = true; - int open_return; - struct evsel *evsel; - struct perf_event_attr attr = { - .type = type, - .config = config, - .disabled = 1, - }; - struct perf_thread_map *tmap = thread_map__new_by_tid(0); - - if (tmap == NULL) - return false; - - evsel = evsel__new(&attr); - if (evsel) { - open_return = evsel__open(evsel, NULL, tmap); - ret = open_return >= 0; - - if (open_return == -EACCES) { - /* - * This happens if the paranoid value - * /proc/sys/kernel/perf_event_paranoid is set to 2 - * Re-run with exclude_kernel set; we don't do that - * by default as some ARM machines do not support it. - * - */ - evsel->core.attr.exclude_kernel = 1; - ret = evsel__open(evsel, NULL, tmap) >= 0; - } - evsel__delete(evsel); - } - - perf_thread_map__put(tmap); - return ret; -} - const char *event_type(int type) { switch (type) { @@ -236,6 +178,57 @@ static char *get_config_name(struct list_head *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +/** + * fix_raw - For each raw term see if there is an event (aka alias) in pmu that + * matches the raw's string value. If the string value matches an + * event then change the term to be an event, if not then change it to + * be a config term. For example, "read" may be an event of the PMU or + * a raw hex encoding of 0xead. The fix-up is done late so the PMU of + * the event can be determined and we don't need to scan all PMUs + * ahead-of-time. + * @config_terms: the list of terms that may contain a raw term. + * @pmu: the PMU to scan for events from. + */ +static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu) +{ + struct parse_events_term *term; + + list_for_each_entry(term, config_terms, list) { + struct perf_pmu_alias *alias; + bool matched = false; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW) + continue; + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcmp(alias->name, term->val.str)) { + free(term->config); + term->config = term->val.str; + term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->val.num = 1; + term->no_value = true; + matched = true; + break; + } + } + if (!matched) { + u64 num; + + free(term->config); + term->config = strdup("config"); + errno = 0; + num = strtoull(term->val.str + 1, NULL, 16); + assert(errno == 0); + free(term->val.str); + term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; + term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG; + term->val.num = num; + term->no_value = false; + } + } +} + static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, @@ -269,6 +262,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; evsel->pmu = pmu; + evsel->pmu_name = pmu && pmu->name ? strdup(pmu->name) : NULL; if (name) evsel->name = strdup(name); @@ -328,18 +322,27 @@ static int add_event_tool(struct list_head *list, int *idx, return 0; } -static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size) +/** + * parse_aliases - search names for entries beginning or equalling str ignoring + * case. If mutliple entries in names match str then the longest + * is chosen. + * @str: The needle to look for. + * @names: The haystack to search. + * @size: The size of the haystack. + * @longest: Out argument giving the length of the matching entry. + */ +static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_ALIASES], int size, + int *longest) { - int i, j; - int n, longest = -1; + *longest = -1; + for (int i = 0; i < size; i++) { + for (int j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { + int n = strlen(names[i][j]); - for (i = 0; i < size; i++) { - for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { - n = strlen(names[i][j]); - if (n > longest && !strncasecmp(str, names[i][j], n)) - longest = n; + if (n > *longest && !strncasecmp(str, names[i][j], n)) + *longest = n; } - if (longest > 0) + if (*longest > 0) return i; } @@ -357,52 +360,52 @@ static int config_attr(struct perf_event_attr *attr, struct parse_events_error *err, config_term_func_t config_term); -int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2, - struct parse_events_error *err, - struct list_head *head_config, - struct parse_events_state *parse_state) +/** + * parse_events__decode_legacy_cache - Search name for the legacy cache event + * name composed of 1, 2 or 3 hyphen + * separated sections. The first section is + * the cache type while the others are the + * optional op and optional result. To make + * life hard the names in the table also + * contain hyphens and the longest name + * should always be selected. + */ +int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config) { - struct perf_event_attr attr; - LIST_HEAD(config_terms); - char name[MAX_NAME_LEN]; - const char *config_name, *metric_id; - int cache_type = -1, cache_op = -1, cache_result = -1; - char *op_result[2] = { op_result1, op_result2 }; - int i, n, ret; - bool hybrid; + int len, cache_type = -1, cache_op = -1, cache_result = -1; + const char *name_end = &name[strlen(name) + 1]; + const char *str = name; - /* - * No fallback - if we cannot get a clear cache type - * then bail out: - */ - cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(str, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX, &len); if (cache_type == -1) return -EINVAL; - - config_name = get_config_name(head_config); - n = snprintf(name, MAX_NAME_LEN, "%s", type); - - for (i = 0; (i < 2) && (op_result[i]); i++) { - char *str = op_result[i]; - - n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); - - if (cache_op == -1) { + str += len + 1; + + if (str < name_end) { + cache_op = parse_aliases(str, evsel__hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX, &len); + if (cache_op >= 0) { + if (!evsel__is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; + str += len + 1; + } else { + cache_result = parse_aliases(str, evsel__hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX, &len); + if (cache_result >= 0) + str += len + 1; + } + } + if (str < name_end) { + if (cache_op < 0) { cache_op = parse_aliases(str, evsel__hw_cache_op, - PERF_COUNT_HW_CACHE_OP_MAX); + PERF_COUNT_HW_CACHE_OP_MAX, &len); if (cache_op >= 0) { if (!evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; - continue; } - } - - if (cache_result == -1) { + } else if (cache_result < 0) { cache_result = parse_aliases(str, evsel__hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); - if (cache_result >= 0) - continue; + PERF_COUNT_HW_CACHE_RESULT_MAX, &len); } } @@ -418,33 +421,73 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (cache_result == -1) cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; - memset(&attr, 0, sizeof(attr)); - attr.config = cache_type | (cache_op << 8) | (cache_result << 16); - attr.type = PERF_TYPE_HW_CACHE; + *config = ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT) | + cache_type | (cache_op << 8) | (cache_result << 16); + return 0; +} - if (head_config) { - if (config_attr(&attr, head_config, err, - config_term_common)) - return -EINVAL; +/** + * parse_events__filter_pmu - returns false if a wildcard PMU should be + * considered, true if it should be filtered. + */ +bool parse_events__filter_pmu(const struct parse_events_state *parse_state, + const struct perf_pmu *pmu) +{ + if (parse_state->pmu_filter == NULL) + return false; - if (get_config_terms(head_config, &config_terms)) + if (pmu->name == NULL) + return true; + + return strcmp(parse_state->pmu_filter, pmu->name) != 0; +} + +int parse_events_add_cache(struct list_head *list, int *idx, const char *name, + struct parse_events_state *parse_state, + struct list_head *head_config) +{ + struct perf_pmu *pmu = NULL; + bool found_supported = false; + const char *config_name = get_config_name(head_config); + const char *metric_id = get_config_metric_id(head_config); + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + LIST_HEAD(config_terms); + struct perf_event_attr attr; + int ret; + + /* Skip unsupported PMUs. */ + if (!perf_pmu__supports_legacy_cache(pmu)) + continue; + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_HW_CACHE; + + ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config); + if (ret) + return ret; + + found_supported = true; + + if (head_config) { + if (config_attr(&attr, head_config, parse_state->error, config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + + if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL) == NULL) return -ENOMEM; - } - metric_id = get_config_metric_id(head_config); - ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, - metric_id, - &config_terms, - &hybrid, parse_state); - if (hybrid) - goto out_free_terms; - - ret = add_event(list, idx, &attr, config_name ? : name, metric_id, - &config_terms); -out_free_terms: - free_config_terms(&config_terms); - return ret; + free_config_terms(&config_terms); + } + return found_supported ? 0 : -EINVAL; } #ifdef HAVE_LIBTRACEEVENT @@ -968,6 +1011,9 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", + [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", + [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", + [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", }; static bool config_term_shrinked; @@ -1089,6 +1135,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_METRIC_ID: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_RAW: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1142,15 +1191,36 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) { + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { + const struct perf_pmu *pmu = perf_pmu__find_by_type(attr->type); + + if (perf_pmu__supports_legacy_cache(pmu)) { + attr->type = PERF_TYPE_HW_CACHE; + return parse_events__decode_legacy_cache(term->config, pmu->type, + &attr->config); + } else + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + } + if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { + const struct perf_pmu *pmu = perf_pmu__find_by_type(attr->type); + + if (!pmu) { + pr_debug("Failed to find PMU for type %d", attr->type); + return -EINVAL; + } + attr->type = PERF_TYPE_HARDWARE; + attr->config = ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT) | term->val.num; + return 0; + } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || - term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) + term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) { /* * Always succeed for sysfs terms, as we dont know * at this point what type they need to have. */ return 0; - else - return config_term_common(attr, term, err); + } + return config_term_common(attr, term, err); } #ifdef HAVE_LIBTRACEEVENT @@ -1364,15 +1434,14 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, #endif } -int parse_events_add_numeric(struct parse_events_state *parse_state, - struct list_head *list, - u32 type, u64 config, - struct list_head *head_config) +static int __parse_events_add_numeric(struct parse_events_state *parse_state, + struct list_head *list, + struct perf_pmu *pmu, u32 type, u64 config, + struct list_head *head_config) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; - bool hybrid; int ret; memset(&attr, 0, sizeof(attr)); @@ -1390,19 +1459,44 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); - ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - name, metric_id, - &config_terms, &hybrid); - if (hybrid) - goto out_free_terms; - - ret = add_event(list, &parse_state->idx, &attr, name, metric_id, - &config_terms); -out_free_terms: + ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL) ? 0 : -ENOMEM; free_config_terms(&config_terms); return ret; } +int parse_events_add_numeric(struct parse_events_state *parse_state, + struct list_head *list, + u32 type, u64 config, + struct list_head *head_config, + bool wildcard) +{ + struct perf_pmu *pmu = NULL; + bool found_supported = false; + + if (!wildcard) + return __parse_events_add_numeric(parse_state, list, /*pmu=*/NULL, + type, config, head_config); + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + int ret; + + if (!perf_pmu__supports_wildcard_numeric(pmu)) + continue; + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + found_supported = true; + ret = __parse_events_add_numeric(parse_state, list, pmu, pmu->type, + config, head_config); + if (ret) + return ret; + } + return found_supported ? 0 : -EINVAL; +} + int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, int tool_event) @@ -1422,33 +1516,6 @@ static bool config_term_percore(struct list_head *config_terms) return false; } -static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state, - struct list_head *list, char *name, - struct list_head *head_config) -{ - struct parse_events_term *term; - int ret = -1; - - if (parse_state->fake_pmu || !head_config || list_empty(head_config) || - !perf_pmu__is_hybrid(name)) { - return -1; - } - - /* - * More than one term in list. - */ - if (head_config->next && head_config->next->next != head_config) - return -1; - - term = list_first_entry(head_config, struct parse_events_term, list); - if (term && term->config && strcmp(term->config, "event")) { - ret = parse_events__with_hybrid_pmu(parse_state, term->config, - name, list); - } - - return ret; -} - int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -1485,6 +1552,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, parse_events_error__handle(err, 0, err_str, NULL); return -EINVAL; } + if (head_config) + fix_raw(head_config, pmu); if (pmu->default_config) { memcpy(&attr, pmu->default_config, @@ -1492,20 +1561,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } else { memset(&attr, 0, sizeof(attr)); } + attr.type = pmu->type; if (!head_config) { - attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, /*config_terms=*/NULL, auto_merge_stats, /*cpu_list=*/NULL); - if (evsel) { - evsel->pmu_name = name ? strdup(name) : NULL; - return 0; - } else { - return -ENOMEM; - } + return evsel ? 0 : -ENOMEM; } if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info)) @@ -1541,11 +1605,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) return -ENOMEM; - if (!parse_events__inside_hybrid_pmu(parse_state, list, name, - head_config)) { - return 0; - } - if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { free_config_terms(&config_terms); return -EINVAL; @@ -1561,7 +1620,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (evsel->name) evsel->use_config_name = true; - evsel->pmu_name = name ? strdup(name) : NULL; evsel->percore = config_term_percore(&evsel->config_terms); if (parse_state->fake_pmu) @@ -1617,13 +1675,19 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, while ((pmu = perf_pmu__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; + bool auto_merge_stats; + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + auto_merge_stats = perf_pmu__auto_merge_stats(pmu); list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { parse_events_copy_term_list(head, &orig_head); if (!parse_events_add_pmu(parse_state, list, pmu->name, orig_head, - /*auto_merge_stats=*/true)) { + auto_merge_stats)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; @@ -1875,180 +1939,6 @@ int parse_events_name(struct list_head *list, const char *name) return 0; } -static int -comp_pmu(const void *p1, const void *p2) -{ - struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1; - struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2; - - return strcasecmp(pmu1->symbol, pmu2->symbol); -} - -static void perf_pmu__parse_cleanup(void) -{ - if (perf_pmu_events_list_num > 0) { - struct perf_pmu_event_symbol *p; - int i; - - for (i = 0; i < perf_pmu_events_list_num; i++) { - p = perf_pmu_events_list + i; - zfree(&p->symbol); - } - zfree(&perf_pmu_events_list); - perf_pmu_events_list_num = 0; - } -} - -#define SET_SYMBOL(str, stype) \ -do { \ - p->symbol = str; \ - if (!p->symbol) \ - goto err; \ - p->type = stype; \ -} while (0) - -/* - * Read the pmu events list from sysfs - * Save it into perf_pmu_events_list - */ -static void perf_pmu__parse_init(void) -{ - - struct perf_pmu *pmu = NULL; - struct perf_pmu_alias *alias; - int len = 0; - - pmu = NULL; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) { - char *tmp = strchr(alias->name, '-'); - - if (tmp) { - char *tmp2 = NULL; - - tmp2 = strchr(tmp + 1, '-'); - len++; - if (tmp2) - len++; - } - - len++; - } - } - - if (len == 0) { - perf_pmu_events_list_num = -1; - return; - } - perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len); - if (!perf_pmu_events_list) - return; - perf_pmu_events_list_num = len; - - len = 0; - pmu = NULL; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) { - struct perf_pmu_event_symbol *p = perf_pmu_events_list + len; - char *tmp = strchr(alias->name, '-'); - char *tmp2 = NULL; - - if (tmp) - tmp2 = strchr(tmp + 1, '-'); - if (tmp2) { - SET_SYMBOL(strndup(alias->name, tmp - alias->name), - PMU_EVENT_SYMBOL_PREFIX); - p++; - tmp++; - SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX); - p++; - SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2); - len += 3; - } else if (tmp) { - SET_SYMBOL(strndup(alias->name, tmp - alias->name), - PMU_EVENT_SYMBOL_PREFIX); - p++; - SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX); - len += 2; - } else { - SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL); - len++; - } - } - } - qsort(perf_pmu_events_list, len, - sizeof(struct perf_pmu_event_symbol), comp_pmu); - - return; -err: - perf_pmu__parse_cleanup(); -} - -/* - * This function injects special term in - * perf_pmu_events_list so the test code - * can check on this functionality. - */ -int perf_pmu__test_parse_init(void) -{ - struct perf_pmu_event_symbol *list, *tmp, symbols[] = { - {(char *)"read", PMU_EVENT_SYMBOL}, - {(char *)"event", PMU_EVENT_SYMBOL_PREFIX}, - {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2}, - }; - unsigned long i, j; - - tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols)); - if (!list) - return -ENOMEM; - - for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { - tmp->type = symbols[i].type; - tmp->symbol = strdup(symbols[i].symbol); - if (!tmp->symbol) - goto err_free; - } - - perf_pmu_events_list = list; - perf_pmu_events_list_num = ARRAY_SIZE(symbols); - - qsort(perf_pmu_events_list, ARRAY_SIZE(symbols), - sizeof(struct perf_pmu_event_symbol), comp_pmu); - return 0; - -err_free: - for (j = 0, tmp = list; j < i; j++, tmp++) - zfree(&tmp->symbol); - free(list); - return -ENOMEM; -} - -enum perf_pmu_event_symbol_type -perf_pmu__parse_check(const char *name) -{ - struct perf_pmu_event_symbol p, *r; - - /* scan kernel pmu events from sysfs if needed */ - if (perf_pmu_events_list_num == 0) - perf_pmu__parse_init(); - /* - * name "cpu" could be prefix of cpu-cycles or cpu// events. - * cpu-cycles has been handled by hardcode. - * So it must be cpu// events, not kernel pmu event. - */ - if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu")) - return PMU_EVENT_SYMBOL_ERR; - - p.symbol = strdup(name); - r = bsearch(&p, perf_pmu_events_list, - (size_t) perf_pmu_events_list_num, - sizeof(struct perf_pmu_event_symbol), comp_pmu); - zfree(&p.symbol); - return r ? r->type : PMU_EVENT_SYMBOL_ERR; -} - static int parse_events__scanner(const char *str, struct parse_events_state *parse_state) { @@ -2086,7 +1976,6 @@ int parse_events_terms(struct list_head *terms, const char *str) int ret; ret = parse_events__scanner(str, &parse_state); - perf_pmu__parse_cleanup(); if (!ret) { list_splice(parse_state.terms, terms); @@ -2098,33 +1987,6 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, - const char *str, char *pmu_name, - struct list_head *list) -{ - struct parse_events_state ps = { - .list = LIST_HEAD_INIT(ps.list), - .stoken = PE_START_EVENTS, - .hybrid_pmu_name = pmu_name, - .idx = parse_state->idx, - }; - int ret; - - ret = parse_events__scanner(str, &ps); - perf_pmu__parse_cleanup(); - - if (!ret) { - if (!list_empty(&ps.list)) { - list_splice(&ps.list, list); - parse_state->idx = ps.idx; - return 0; - } else - return -1; - } - - return ret; -} - __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { /* Order by insertion index. */ @@ -2259,7 +2121,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) return idx_changed || num_leaders != orig_num_leaders; } -int __parse_events(struct evlist *evlist, const char *str, +int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, struct parse_events_error *err, struct perf_pmu *fake_pmu, bool warn_if_reordered) { @@ -2270,11 +2132,12 @@ int __parse_events(struct evlist *evlist, const char *str, .evlist = evlist, .stoken = PE_START_EVENTS, .fake_pmu = fake_pmu, + .pmu_filter = pmu_filter, + .match_legacy_cache_terms = true, }; int ret; ret = parse_events__scanner(str, &parse_state); - perf_pmu__parse_cleanup(); if (!ret && list_empty(&parse_state.list)) { WARN_ONCE(true, "WARNING: event parser found nothing\n"); @@ -2451,12 +2314,13 @@ void parse_events_error__print(struct parse_events_error *err, int parse_events_option(const struct option *opt, const char *str, int unset __maybe_unused) { - struct evlist *evlist = *(struct evlist **)opt->value; + struct parse_events_option_args *args = opt->value; struct parse_events_error err; int ret; parse_events_error__init(&err); - ret = parse_events(evlist, str, &err); + ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err, + /*fake_pmu=*/NULL, /*warn_if_reordered=*/true); if (ret) { parse_events_error__print(&err, str); @@ -2469,22 +2333,21 @@ int parse_events_option(const struct option *opt, const char *str, int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset) { - struct evlist **evlistp = opt->value; + struct parse_events_option_args *args = opt->value; int ret; - if (*evlistp == NULL) { - *evlistp = evlist__new(); + if (*args->evlistp == NULL) { + *args->evlistp = evlist__new(); - if (*evlistp == NULL) { + if (*args->evlistp == NULL) { fprintf(stderr, "Not enough memory to create evlist\n"); return -1; } } - ret = parse_events_option(opt, str, unset); if (ret) { - evlist__delete(*evlistp); - *evlistp = NULL; + evlist__delete(*args->evlistp); + *args->evlistp = NULL; } return ret; @@ -2682,29 +2545,13 @@ int parse_events_term__str(struct parse_events_term **term, return new_term(term, &temp, str, 0); } -int parse_events_term__sym_hw(struct parse_events_term **term, - char *config, unsigned idx) +int parse_events_term__term(struct parse_events_term **term, + int term_lhs, int term_rhs, + void *loc_term, void *loc_val) { - struct event_symbol *sym; - char *str; - struct parse_events_term temp = { - .type_val = PARSE_EVENTS__TERM_TYPE_STR, - .type_term = PARSE_EVENTS__TERM_TYPE_USER, - .config = config, - }; - - if (!temp.config) { - temp.config = strdup("event"); - if (!temp.config) - return -ENOMEM; - } - BUG_ON(idx >= PERF_COUNT_HW_MAX); - sym = &event_symbols_hw[idx]; - - str = strdup(sym->symbol); - if (!str) - return -ENOMEM; - return new_term(term, &temp, str, 0); + return parse_events_term__str(term, term_lhs, NULL, + strdup(config_term_names[term_rhs]), + loc_term, loc_val); } int parse_events_term__clone(struct parse_events_term **new, @@ -2855,15 +2702,3 @@ char *parse_events_formats_error_string(char *additional_terms) fail: return NULL; } - -struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, - struct list_head *config_terms) -{ - return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, - pmu, config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL); -} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 86ad4438a2aa..2021fe145410 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -18,21 +18,26 @@ struct parse_events_error; struct option; struct perf_pmu; -bool is_event_supported(u8 type, u64 config); - const char *event_type(int type); +/* Arguments encoded in opt->value. */ +struct parse_events_option_args { + struct evlist **evlistp; + const char *pmu_filter; +}; int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); -__attribute__((nonnull(1, 2, 3))) -int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error, - struct perf_pmu *fake_pmu, bool warn_if_reordered); +__attribute__((nonnull(1, 2, 4))) +int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, + struct parse_events_error *error, struct perf_pmu *fake_pmu, + bool warn_if_reordered); __attribute__((nonnull(1, 2, 3))) static inline int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *err) { - return __parse_events(evlist, str, err, /*fake_pmu=*/NULL, /*warn_if_reordered=*/true); + return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL, + /*warn_if_reordered=*/true); } int parse_event(struct evlist *evlist, const char *str); @@ -41,14 +46,6 @@ int parse_events_terms(struct list_head *terms, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); int exclude_perf(const struct option *opt, const char *arg, int unset); -enum perf_pmu_event_symbol_type { - PMU_EVENT_SYMBOL_ERR, /* not a PMU EVENT */ - PMU_EVENT_SYMBOL, /* normal style PMU event */ - PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */ - PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */ - PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */ -}; - enum { PARSE_EVENTS__TERM_TYPE_NUM, PARSE_EVENTS__TERM_TYPE_STR, @@ -78,6 +75,9 @@ enum { PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, PARSE_EVENTS__TERM_TYPE_METRIC_ID, + PARSE_EVENTS__TERM_TYPE_RAW, + PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, + PARSE_EVENTS__TERM_TYPE_HARDWARE, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -128,10 +128,15 @@ struct parse_events_state { struct list_head *terms; int stoken; struct perf_pmu *fake_pmu; - char *hybrid_pmu_name; + /* If non-null, when wildcard matching only match the given PMU. */ + const char *pmu_filter; + /* Should PE_LEGACY_NAME tokens be generated for config terms? */ + bool match_legacy_cache_terms; bool wild_card_pmus; }; +bool parse_events__filter_pmu(const struct parse_events_state *parse_state, + const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -141,8 +146,9 @@ int parse_events_term__num(struct parse_events_term **term, int parse_events_term__str(struct parse_events_term **term, int type_term, char *config, char *str, void *loc_term, void *loc_val); -int parse_events_term__sym_hw(struct parse_events_term **term, - char *config, unsigned idx); +int parse_events_term__term(struct parse_events_term **term, + int term_lhs, int term_rhs, + void *loc_term, void *loc_val); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); void parse_events_term__delete(struct parse_events_term *term); @@ -170,15 +176,15 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, - struct list_head *head_config); + struct list_head *head_config, + bool wildcard); int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, int tool_event); -int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2, - struct parse_events_error *error, - struct list_head *head_config, - struct parse_events_state *parse_state); +int parse_events_add_cache(struct list_head *list, int *idx, const char *name, + struct parse_events_state *parse_state, + struct list_head *head_config); +int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config); int parse_events_add_breakpoint(struct list_head *list, int *idx, u64 addr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, @@ -198,8 +204,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int parse_events_copy_term_list(struct list_head *old, struct list_head **new); -enum perf_pmu_event_symbol_type -perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); @@ -241,13 +245,4 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ -int perf_pmu__test_parse_init(void); - -struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, - struct list_head *config_terms); - #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 51fe0a9fb3de..6deb70c25984 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -63,15 +63,9 @@ static int str(yyscan_t scanner, int token) return token; } -static int raw(yyscan_t scanner) +static int lc_str(yyscan_t scanner, const struct parse_events_state *state) { - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL) - return str(scanner, PE_NAME); - - return __value(yylval, text + 1, 16, PE_RAW); + return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); } static bool isbpf_suffix(char *text) @@ -131,35 +125,6 @@ do { \ yyless(0); \ } while (0) -static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - yylval->str = strdup(text); - - /* - * If we're not testing then parse check determines the PMU event type - * which if it isn't a PMU returns PE_NAME. When testing the result of - * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless - * an '!' is present in which case the text can't be a PMU name. - */ - switch (perf_pmu__parse_check(text)) { - case PMU_EVENT_SYMBOL_PREFIX: - return PE_PMU_EVENT_PRE; - case PMU_EVENT_SYMBOL_SUFFIX: - return PE_PMU_EVENT_SUF; - case PMU_EVENT_SYMBOL_SUFFIX2: - return PE_PMU_EVENT_SUF2; - case PMU_EVENT_SYMBOL: - return parse_state->fake_pmu - ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; - default: - return parse_state->fake_pmu && !strchr(text,'!') - ? PE_PMU_EVENT_FAKE : PE_NAME; - } -} - static int sym(yyscan_t scanner, int type, int config) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -184,6 +149,16 @@ static int term(yyscan_t scanner, int type) return PE_TERM; } +static int hw_term(yyscan_t scanner, int config) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + yylval->hardware_term.str = strdup(text); + yylval->hardware_term.num = PERF_TYPE_HARDWARE + config; + return PE_TERM_HW; +} + #define YY_USER_ACTION \ do { \ yylloc->last_column = yylloc->first_column; \ @@ -211,19 +186,20 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpPGHSDIWeb]+ modifier_bp [rwx]{1,3} +lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node) +lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss) %% %{ struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner); - { int start_token = _parse_state->stoken; @@ -303,10 +279,23 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } -r{num_raw_hex} { return raw(yyscanner); } -r0x{num_raw_hex} { return raw(yyscanner); } +cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } +r{num_raw_hex} { return str(yyscanner, PE_RAW); } +r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } +{lc_type} { return lc_str(yyscanner, _parse_state); } +{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } +{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {name_minus} { return str(yyscanner, PE_NAME); } \[all\] { return PE_ARRAY_ALL; } "[" { BEGIN(array); return '['; } @@ -359,47 +348,20 @@ system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } - /* - * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. - * Because the prefix cycles is mixed up with cpu-cycles. - * loads and stores are mixed up with cache event - */ -cycles-ct | -cycles-t | -mem-loads | -mem-loads-aux | -mem-stores | -topdown-[a-z-]+ | -tx-capacity-[a-z-]+ | -el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } - -L1-dcache|l1-d|l1d|L1-data | -L1-icache|l1-i|l1i|L1-instruction | -LLC|L2 | -dTLB|d-tlb|Data-TLB | -iTLB|i-tlb|Instruction-TLB | -branch|branches|bpu|btb|bpc | -node { return str(yyscanner, PE_NAME_CACHE_TYPE); } - -load|loads|read | -store|stores|write | -prefetch|prefetches | -speculative-read|speculative-load | -refs|Reference|ops|access | -misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } - +{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } +{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } +{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } -r{num_raw_hex} { return raw(yyscanner); } +r{num_raw_hex} { return str(yyscanner, PE_RAW); } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } -{name} { return pmu_str_check(yyscanner, _parse_state); } +{name} { return str(yyscanner, PE_NAME); } {name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } -- { return '-'; } , { BEGIN(event); return ','; } : { return ':'; } "{" { BEGIN(event); return '{'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 4488443e506e..4e1f5de35be8 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -8,6 +8,7 @@ #define YYDEBUG 1 +#include <errno.h> #include <fnmatch.h> #include <stdio.h> #include <linux/compiler.h> @@ -52,36 +53,36 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM %token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME -%token PE_NAME +%token PE_RAW PE_NAME %token PE_BPF_OBJECT PE_BPF_SOURCE %token PE_MODIFIER_EVENT PE_MODIFIER_BP -%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT +%token PE_LEGACY_CACHE %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM +%token PE_TERM_HW %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW %type <num> PE_VALUE_SYM_TOOL -%type <num> PE_RAW %type <num> PE_TERM %type <num> value_sym +%type <str> PE_RAW %type <str> PE_NAME %type <str> PE_BPF_OBJECT %type <str> PE_BPF_SOURCE -%type <str> PE_NAME_CACHE_TYPE -%type <str> PE_NAME_CACHE_OP_RESULT +%type <str> PE_LEGACY_CACHE %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM -%type <str> event_pmu_name +%type <str> name_or_raw name_or_legacy %destructor { free ($$); } <str> %type <term> event_term %destructor { parse_events_term__delete ($$); } <term> @@ -112,6 +113,8 @@ static void free_list_evsel(struct list_head* list_evsel) %type <array> array_term %type <array> array_terms %destructor { free ($$.ranges); } <array> +%type <hardware_term> PE_TERM_HW +%destructor { free ($$.str); } <hardware_term> %union { @@ -125,6 +128,10 @@ static void free_list_evsel(struct list_head* list_evsel) char *event; } tracepoint_name; struct parse_events_array array; + struct hardware_term { + char *str; + u64 num; + } hardware_term; } %% @@ -273,11 +280,8 @@ event_def: event_pmu | event_legacy_raw sep_dc | event_bpf_file -event_pmu_name: -PE_NAME | PE_PMU_EVENT_PRE - event_pmu: -event_pmu_name opt_pmu_config +PE_NAME opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -303,25 +307,32 @@ event_pmu_name opt_pmu_config list = alloc_list(); if (!list) CLEANUP_YYABORT; - if (parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { + /* Attempt to add to list assuming $1 is a PMU name. */ + if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { struct perf_pmu *pmu = NULL; int ok = 0; + /* Failure to add, try wildcard expansion of $1 as a PMU name. */ if (asprintf(&pattern, "%s*", $1) < 0) CLEANUP_YYABORT; while ((pmu = perf_pmu__scan(pmu)) != NULL) { char *name = pmu->name; + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + if (!strncmp(name, "uncore_", 7) && strncmp($1, "uncore_", 7)) name += 7; if (!perf_pmu__match(pattern, name, $1) || !perf_pmu__match(pattern, pmu->alias_name, $1)) { + bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); + if (parse_events_copy_term_list(orig_terms, &terms)) CLEANUP_YYABORT; - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, - /*auto_merge_stats=*/true)) { + if (!parse_events_add_pmu(parse_state, list, pmu->name, terms, + auto_merge_stats)) { ok++; parse_state->wild_card_pmus = true; } @@ -329,6 +340,12 @@ event_pmu_name opt_pmu_config } } + if (!ok) { + /* Failure to add, assume $1 is an event name. */ + zfree(&list); + ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list); + $2 = NULL; + } if (!ok) CLEANUP_YYABORT; } @@ -352,41 +369,27 @@ PE_KERNEL_PMU_EVENT sep_dc $$ = list; } | -PE_KERNEL_PMU_EVENT opt_pmu_config +PE_NAME sep_dc { struct list_head *list; int err; - /* frees $2 */ - err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); free($1); if (err < 0) YYABORT; $$ = list; } | -PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc -{ - struct list_head *list; - char pmu_name[128]; - snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5); - free($1); - free($3); - free($5); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) - YYABORT; - $$ = list; -} -| -PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc +PE_KERNEL_PMU_EVENT opt_pmu_config { struct list_head *list; - char pmu_name[128]; + int err; - snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); + /* frees $2 */ + err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); free($1); - free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) + if (err < 0) YYABORT; $$ = list; } @@ -444,7 +447,8 @@ value_sym '/' event_config '/' list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, type, config, $3); + err = parse_events_add_numeric(_parse_state, list, type, config, $3, + /*wildcard=*/false); parse_events_terms__delete($3); if (err) { free_list_evsel(list); @@ -461,7 +465,9 @@ value_sym sep_slash_slash_dc list = alloc_list(); ABORT_ON(!list); - ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, + /*head_config=*/NULL, + /*wildcard=*/false)); $$ = list; } | @@ -476,60 +482,16 @@ PE_VALUE_SYM_TOOL sep_slash_slash_dc } event_legacy_cache: -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - struct list_head *list; - int err; - - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6, - parse_state); - parse_events_terms__delete($6); - free($1); - free($3); - free($5); - if (err) { - free_list_evsel(list); - YYABORT; - } - $$ = list; -} -| -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config +PE_LEGACY_CACHE opt_event_config { struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; struct list_head *list; int err; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4, - parse_state); - parse_events_terms__delete($4); - free($1); - free($3); - if (err) { - free_list_evsel(list); - YYABORT; - } - $$ = list; -} -| -PE_NAME_CACHE_TYPE opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - struct list_head *list; - int err; + err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2, - parse_state); parse_events_terms__delete($2); free($1); if (err) { @@ -633,17 +595,6 @@ tracepoint_name opt_event_config } tracepoint_name: -PE_NAME '-' PE_NAME ':' PE_NAME -{ - struct tracepoint_name tracepoint; - - ABORT_ON(asprintf(&tracepoint.sys, "%s-%s", $1, $3) < 0); - tracepoint.event = $5; - free($1); - free($3); - $$ = tracepoint; -} -| PE_NAME ':' PE_NAME { struct tracepoint_name tracepoint = {$1, $3}; @@ -659,7 +610,8 @@ PE_VALUE ':' PE_VALUE opt_event_config list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4); + err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4, + /*wildcard=*/false); parse_events_terms__delete($4); if (err) { free(list); @@ -673,10 +625,16 @@ PE_RAW opt_event_config { struct list_head *list; int err; + u64 num; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2); + errno = 0; + num = strtoull($1 + 1, NULL, 16); + ABORT_ON(errno); + free($1); + err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2, + /*wildcard=*/true); parse_events_terms__delete($2); if (err) { free(list); @@ -781,17 +739,24 @@ event_term $$ = head; } +name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE + +name_or_legacy: PE_NAME | PE_LEGACY_CACHE + event_term: PE_RAW { struct parse_events_term *term; - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG, - NULL, $1, false, &@1, NULL)); + if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, + strdup("raw"), $1, &@1, &@1)) { + free($1); + YYABORT; + } $$ = term; } | -PE_NAME '=' PE_NAME +name_or_raw '=' name_or_legacy { struct parse_events_term *term; @@ -804,7 +769,7 @@ PE_NAME '=' PE_NAME $$ = term; } | -PE_NAME '=' PE_VALUE +name_or_raw '=' PE_VALUE { struct parse_events_term *term; @@ -816,12 +781,25 @@ PE_NAME '=' PE_VALUE $$ = term; } | -PE_NAME '=' PE_VALUE_SYM_HW +name_or_raw '=' PE_TERM_HW +{ + struct parse_events_term *term; + + if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3.str, &@1, &@3)) { + free($1); + free($3.str); + YYABORT; + } + $$ = term; +} +| +PE_LEGACY_CACHE { struct parse_events_term *term; - int config = $3 & 255; - if (parse_events_term__sym_hw(&term, $1, config)) { + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, + $1, 1, true, &@1, NULL)) { free($1); YYABORT; } @@ -840,16 +818,19 @@ PE_NAME $$ = term; } | -PE_VALUE_SYM_HW +PE_TERM_HW { struct parse_events_term *term; - int config = $1 & 255; - ABORT_ON(parse_events_term__sym_hw(&term, NULL, config)); + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, + $1.str, $1.num & 255, false, &@1, NULL)) { + free($1.str); + YYABORT; + } $$ = term; } | -PE_TERM '=' PE_NAME +PE_TERM '=' name_or_legacy { struct parse_events_term *term; @@ -860,6 +841,25 @@ PE_TERM '=' PE_NAME $$ = term; } | +PE_TERM '=' PE_TERM_HW +{ + struct parse_events_term *term; + + if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) { + free($3.str); + YYABORT; + } + $$ = term; +} +| +PE_TERM '=' PE_TERM +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3)); + $$ = term; +} +| PE_TERM '=' PE_VALUE { struct parse_events_term *term; @@ -876,7 +876,7 @@ PE_TERM $$ = term; } | -PE_NAME array '=' PE_NAME +name_or_raw array '=' name_or_legacy { struct parse_events_term *term; @@ -891,7 +891,7 @@ PE_NAME array '=' PE_NAME $$ = term; } | -PE_NAME array '=' PE_VALUE +name_or_raw array '=' PE_VALUE { struct parse_events_term *term; diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c index 38628805a952..bc4cb0738c35 100644 --- a/tools/perf/util/pmu-hybrid.c +++ b/tools/perf/util/pmu-hybrid.c @@ -50,23 +50,3 @@ bool perf_pmu__is_hybrid(const char *name) { return perf_pmu__find_hybrid_pmu(name) != NULL; } - -char *perf_pmu__hybrid_type_to_pmu(const char *type) -{ - char *pmu_name = NULL; - - if (asprintf(&pmu_name, "cpu_%s", type) < 0) - return NULL; - - if (perf_pmu__is_hybrid(pmu_name)) - return pmu_name; - - /* - * pmu may be not scanned, check the sysfs. - */ - if (perf_pmu__hybrid_mounted(pmu_name)) - return pmu_name; - - free(pmu_name); - return NULL; -} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h index 2b186c26a43e..206b94931531 100644 --- a/tools/perf/util/pmu-hybrid.h +++ b/tools/perf/util/pmu-hybrid.h @@ -17,7 +17,6 @@ bool perf_pmu__hybrid_mounted(const char *name); struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); bool perf_pmu__is_hybrid(const char *name); -char *perf_pmu__hybrid_type_to_pmu(const char *type); static inline int perf_pmu__hybrid_pmu_num(void) { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ad209c88a124..f4f0afbc391c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1398,7 +1398,6 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, { bool zero = !!pmu->default_config; - attr->type = pmu->type; return perf_pmu__config_terms(pmu->name, &pmu->format, attr, head_terms, zero, err); } @@ -1651,6 +1650,21 @@ bool is_pmu_core(const char *name) return !strcmp(name, "cpu") || is_arm_pmu_core(name); } +bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu) +{ + return is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); +} + +bool perf_pmu__supports_wildcard_numeric(const struct perf_pmu *pmu) +{ + return is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); +} + +bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) +{ + return !perf_pmu__is_hybrid(pmu->name); +} + static bool pmu_alias_is_duplicate(struct sevent *alias_a, struct sevent *alias_b) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b9a02dedd473..0e0cb6283594 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -220,6 +220,9 @@ void perf_pmu__del_formats(struct list_head *formats); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); +bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu); +bool perf_pmu__supports_wildcard_numeric(const struct perf_pmu *pmu); +bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu); void print_pmu_events(const struct print_callbacks *print_cb, void *print_state); bool pmu_have_event(const char *pname, const char *name); diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 7f3b93c4d229..140e11f00b29 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -1,5 +1,28 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/list.h> -#include <pmus.h> +#include <string.h> +#include "pmus.h" +#include "pmu.h" LIST_HEAD(pmus); + +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (!strcmp(pmu->name, str)) + return pmu; + /* Ignore "uncore_" prefix. */ + if (!strncmp(pmu->name, "uncore_", 7)) { + if (!strcmp(pmu->name + 7, str)) + return pmu; + } + /* Ignore "cpu_" prefix on Intel hybrid PMUs. */ + if (!strncmp(pmu->name, "cpu_", 4)) { + if (!strcmp(pmu->name + 4, str)) + return pmu; + } + } + return NULL; +} diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 5ec12007eb5c..d475e2960c10 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -3,7 +3,10 @@ #define __PMUS_H extern struct list_head pmus; +struct perf_pmu; #define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list) +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str); + #endif /* __PMUS_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ee145cec42c0..69492cbd6921 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -27,6 +27,7 @@ #include "tracepoint.h" #include "pfm.h" #include "pmu-hybrid.h" +#include "thread_map.h" #define MAX_NAME_LEN 100 @@ -228,60 +229,94 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) strlist__delete(sdtlist); } +static bool is_event_supported(u8 type, u64 config) +{ + bool ret = true; + int open_return; + struct evsel *evsel; + struct perf_event_attr attr = { + .type = type, + .config = config, + .disabled = 1, + }; + struct perf_thread_map *tmap = thread_map__new_by_tid(0); + + if (tmap == NULL) + return false; + + evsel = evsel__new(&attr); + if (evsel) { + open_return = evsel__open(evsel, NULL, tmap); + ret = open_return >= 0; + + if (open_return == -EACCES) { + /* + * This happens if the paranoid value + * /proc/sys/kernel/perf_event_paranoid is set to 2 + * Re-run with exclude_kernel set; we don't do that + * by default as some ARM machines do not support it. + * + */ + evsel->core.attr.exclude_kernel = 1; + ret = evsel__open(evsel, NULL, tmap) >= 0; + } + evsel__delete(evsel); + } + + perf_thread_map__put(tmap); + return ret; +} + int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) { - struct strlist *evt_name_list = strlist__new(NULL, NULL); - struct str_node *nd; + struct perf_pmu *pmu = NULL; + const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE]; - if (!evt_name_list) { - pr_debug("Failed to allocate new strlist for hwcache events\n"); - return -ENOMEM; - } - for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; - - for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - struct perf_pmu *pmu = NULL; - char name[64]; - - __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (!perf_pmu__has_hybrid()) { - if (is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) - strlist__add(evt_name_list, name); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + /* + * Skip uncore PMUs for performance. PERF_TYPE_HW_CACHE type + * attributes can accept software PMUs in the extended type, so + * also skip. + */ + if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE) + continue; + + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!evsel__is_cache_op_valid(type, op)) continue; - } - perf_pmu__for_each_hybrid_pmu(pmu) { - if (is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) { - char new_name[128]; - snprintf(new_name, sizeof(new_name), - "%s/%s/", pmu->name, name); - strlist__add(evt_name_list, new_name); - } + + for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { + char name[64]; + char alias_name[128]; + __u64 config; + int ret; + + __evsel__hw_cache_type_op_res_name(type, op, res, + name, sizeof(name)); + + ret = parse_events__decode_legacy_cache(name, pmu->type, + &config); + if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config)) + continue; + snprintf(alias_name, sizeof(alias_name), "%s/%s/", + pmu->name, name); + print_cb->print_event(print_state, + "cache", + pmu->name, + name, + alias_name, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptor, + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL); } } } } - - strlist__for_each_entry(nd, evt_name_list) { - print_cb->print_event(print_state, - "cache", - /*pmu_name=*/NULL, - nd->s, - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[PERF_TYPE_HW_CACHE], - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - strlist__delete(evt_name_list); return 0; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index c294db713677..869738fc06c3 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -36,6 +36,10 @@ if cc_is_clang: vars[var] = sub("-fno-semantic-interposition", "", vars[var]) if not clang_has_option("-ffat-lto-objects"): vars[var] = sub("-ffat-lto-objects", "", vars[var]) + if not clang_has_option("-ftree-loop-distribute-patterns"): + vars[var] = sub("-ftree-loop-distribute-patterns", "", vars[var]) + if not clang_has_option("-gno-variable-location-views"): + vars[var] = sub("-gno-variable-location-views", "", vars[var]) from setuptools import setup, Extension |