From 1a97cee604dcbdba6c75984b7227223d599ddf32 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Feb 2022 02:33:58 -0800 Subject: perf maps: Use a pointer for kmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct maps is reference counted, using a pointer is more idiomatic. Committer notes: Delay: maps = machine__kernel_maps(&vmlinux); To after: machine__init(&vmlinux, "", HOST_KERNEL_ID); To avoid this on f34: In file included from /var/home/acme/git/perf/tools/perf/util/build-id.h:10, from /var/home/acme/git/perf/tools/perf/util/dso.h:13, from tests/vmlinux-kallsyms.c:8: In function ‘machine__kernel_maps’, inlined from ‘test__vmlinux_matches_kallsyms’ at tests/vmlinux-kallsyms.c:122:22: /var/home/acme/git/perf/tools/perf/util/machine.h:86:23: error: ‘vmlinux.kmaps’ is used uninitialized [-Werror=uninitialized] 86 | return machine->kmaps; | ~~~~~~~^~~~~~~ tests/vmlinux-kallsyms.c: In function ‘test__vmlinux_matches_kallsyms’: tests/vmlinux-kallsyms.c:121:34: note: ‘vmlinux’ declared here 121 | struct machine kallsyms, vmlinux; | ^~~~~~~ cc1: all warnings being treated as errors Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Andrew Morton Cc: André Almeida Cc: Andy Shevchenko Cc: Darren Hart Cc: Davidlohr Bueso Cc: Dmitriy Vyukov Cc: Eric Dumazet Cc: German Gomez Cc: Hao Luo Cc: James Clark Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miaoqian Lin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Shunsuke Nakamura Cc: Song Liu Cc: Stephane Eranian Cc: Stephen Brennan Cc: Steven Rostedt (VMware) Cc: Thomas Gleixner Cc: Thomas Richter Cc: Yury Norov Link: http://lore.kernel.org/lkml/20220211103415.2737789-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 9b31734ee968..e670f3547581 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -18,7 +18,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, { int rc = 0; struct map *pos; - struct maps *kmaps = &machine->kmaps; + struct maps *kmaps = machine__kernel_maps(machine); union perf_event *event = zalloc(sizeof(event->mmap) + machine->id_hdr_size); -- cgit From 32449b430fe1df81ce1da84475fc48536264f919 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 24 Jan 2022 10:41:37 +0200 Subject: perf intel-pt: pkt-decoder-test: Fix scope of test_data Make test_data 'static' otherwise it will conflict with any global variable of the same name. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220124084201.2699795-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c index 2fc882ab24c1..395de4471840 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -17,7 +17,7 @@ * @new_ctx: expected new packet context * @ctx_unchanged: the packet context must not change */ -struct test_data { +static struct test_data { int len; u8 bytes[INTEL_PT_PKT_MAX_SZ]; enum intel_pt_pkt_ctx ctx; -- cgit From 2750af50a360b52c6df1f5652ae728878bececc0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 24 Jan 2022 10:41:39 +0200 Subject: perf intel-pt: pkt-decoder: Add CFE and EVD packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of Intel SDM (https://www.intel.com/sdm) version 076, there is a new Intel PT feature called Event Trace which requires 2 new packets CFE and EVD. Add them to the packet decoder and packet decoder test. Committer notes: I got the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 And these new packets are described in page 3951: 32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. — Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ — VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220124084201.2699795-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c index 395de4471840..0155215c1d41 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -166,6 +166,14 @@ static struct test_data { {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, + /* Control Flow Event Packet */ + {4, {0x02, 0x13, 0x01, 0x03}, 0, {INTEL_PT_CFE, 1, 3}, 0, 0 }, + {4, {0x02, 0x13, 0x81, 0x03}, 0, {INTEL_PT_CFE_IP, 1, 3}, 0, 0 }, + {4, {0x02, 0x13, 0x1f, 0x00}, 0, {INTEL_PT_CFE, 0x1f, 0}, 0, 0 }, + {4, {0x02, 0x13, 0x9f, 0xff}, 0, {INTEL_PT_CFE_IP, 0x1f, 0xff}, 0, 0 }, + /* */ + {11, {0x02, 0x53, 0x09, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_EVD, 0x09, 0x7060504030201}, 0, 0 }, + {11, {0x02, 0x53, 0x3f, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_EVD, 0x3f, 0x8070605040302}, 0, 0 }, /* Terminator */ {0, {0}, 0, {0, 0, 0}, 0, 0 }, }; -- cgit From f7934477ce36dbb18c722226bca7e1b4ec5024ea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 24 Jan 2022 10:41:40 +0200 Subject: perf intel-pt: pkt-decoder: Add MODE.Exec IFLAG bit As of Intel SDM (https://www.intel.com/sdm) version 076, there is a new Intel PT feature called Event Trace which adds a bit to the existing MODE.Exec packet to record the interrupt flag. Amend the packet decoder and packet decoder test accordingly. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220124084201.2699795-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c index 0155215c1d41..42237656f453 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -70,8 +70,11 @@ static struct test_data { {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 }, /* Mode Exec Packet */ {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, - {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 }, - {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 }, + {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 1, 64}, 0, 0 }, + {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 2, 32}, 0, 0 }, + {2, {0x99, 0x04}, 0, {INTEL_PT_MODE_EXEC, 4, 16}, 0, 0 }, + {2, {0x99, 0x05}, 0, {INTEL_PT_MODE_EXEC, 5, 64}, 0, 0 }, + {2, {0x99, 0x06}, 0, {INTEL_PT_MODE_EXEC, 6, 32}, 0, 0 }, /* Mode TSX Packet */ {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 }, {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 }, -- cgit From f2be829e72f90e6d83923398a861f0aaa74952ee Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 24 Jan 2022 10:41:49 +0200 Subject: perf intel-pt: Record Event Trace capability flag The change to the MODE.Exec packet means processing must distinguish between the old and new cases. Record the Event Trace capability flag to make that possible. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220124084201.2699795-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6df0dc00d73a..8c31578d6f4a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -306,6 +306,7 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + intel_pt_filter_bytes(filter); + ptr->priv_size += sizeof(u64); /* Cap Event Trace */ return ptr->priv_size; } @@ -335,6 +336,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, unsigned long max_non_turbo_ratio; size_t filter_str_len; const char *filter; + int event_trace; __u64 *info; int err; @@ -357,6 +359,9 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", "%lu", &max_non_turbo_ratio) != 1) max_non_turbo_ratio = 0; + if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace", + "%d", &event_trace) != 1) + event_trace = 0; filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); filter_str_len = filter ? strlen(filter) : 0; @@ -407,6 +412,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, info += len >> 3; } + *info++ = event_trace; + return 0; } -- cgit From aca8af3c2e8cb57662e6035c0ccb3c111a11d97a Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 13 Jan 2022 09:10:52 +0000 Subject: perf cs-etm: Update deduction of TRCCONFIGR register for branch broadcast Now that a config flag for branch broadcast has been added, take it into account when trying to deduce what the driver would have programmed the TRCCONFIGR register to. Reviewed-by: Leo Yan Reviewed-by: Mike Leach Reviewed-by: Suzuki Poulouse Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20220113091056.1297982-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2e8b2c4365a0..cbc555245959 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -510,6 +510,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) if (config_opts & BIT(ETM_OPT_CTXTID2)) config |= BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT); + if (config_opts & BIT(ETM_OPT_BRANCH_BROADCAST)) + config |= BIT(ETM4_CFG_BIT_BB); + return config; } -- cgit From 521f2688c5af8442ace095815702f08c1aabcb45 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Mon, 21 Feb 2022 17:10:42 +0000 Subject: perf arm-spe: Use advertised caps/min_interval as default sample_period When recording SPE traces, the default sample_period is currently being set to 1 in the perf_event_attr fields, instead of the value advertised in '/sys/devices/arm_spe_0/caps/min_interval': Before: $ perf record -e arm_spe// -vv -- sleep 1 [...] { sample_period, sample_freq } 1 [...] Use the value from the above sysfs location as a more sensible default (it was already being read, but the value not being used) After: $ perf record -e arm_spe// -vv -- sleep 1 [...] { sample_period, sample_freq } 1024 [...] Reviewed-by: Leo Yan Signed-off-by: German Gomez Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220221171042.58460-1-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2100d46ccf5e..5860bbaea95a 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -158,7 +158,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return -EINVAL; } evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = 1; + evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period; arm_spe_evsel = evsel; opts->full_auxtrace = true; } -- cgit From bc355822f0d9623b632069105d425c822d124cc8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 21 Mar 2022 15:33:44 -0700 Subject: perf parse-events: Move slots only with topdown If slots isn't with a topdown event then moving it is unnecessary. For example {instructions, slots} is re-ordered: $ perf stat -e '{instructions,slots}' -a sleep 1 Performance counter stats for 'system wide': 936,600,825 slots 144,440,968 instructions 1.006061423 seconds time elapsed Which can break tools expecting the command line order to match the printed order. It is necessary to move the slots event first when it appears with topdown events. Add extra checking so that the slots event is only moved in the case of there being a topdown event like: $ perf stat -e '{instructions,slots,topdown-fe-bound}' -a sleep 1 Performance counter stats for 'system wide': 2427568570 slots 300927614 instructions 551021649 topdown-fe-bound 1.001771803 seconds time elapsed Fixes: 94dbfd6781a0e87b ("perf parse-events: Architecture specific leader override") Reported-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Kan Liang Cc: Alexander Shishkin Cc: Alexandre Torgue Cc: Andi Kleen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Maxime Coquelin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220321223344.1034479-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/evlist.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 8d9b55959256..cfc208d71f00 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -20,17 +20,27 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) struct evsel *arch_evlist__leader(struct list_head *list) { - struct evsel *evsel, *first; + struct evsel *evsel, *first, *slots = NULL; + bool has_topdown = false; first = list_first_entry(list, struct evsel, core.node); if (!pmu_have_event("cpu", "slots")) return first; + /* If there is a slots event and a topdown event then the slots event comes first. */ __evlist__for_each_entry(list, evsel) { - if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && - evsel->name && strcasestr(evsel->name, "slots")) - return evsel; + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && evsel->name) { + if (strcasestr(evsel->name, "slots")) { + slots = evsel; + if (slots == first) + return first; + } + if (!strncasecmp(evsel->name, "topdown", 7)) + has_topdown = true; + if (slots && has_topdown) + return slots; + } } return first; } -- cgit