summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-05 11:56:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-05 11:56:18 -0700
commit27151f177827d478508e756c7657273261aaf8a9 (patch)
tree393ee6f1b12842c87461c471c0da70e4ecd93da8 /tools/perf/util
parent58ca24158758f1784400d32743373d7d6227d018 (diff)
parentc7a3828d98db2730079265b5f51933dfcef8bb5f (diff)
Merge tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Improvements for the flamegraph python script, including: - Display perf.data header - Display PIDs of user stacks - Added option to change color scheme - Default to blue/green color scheme to improve accessibility - Correctly identify kernel stacks when debuginfo is available - Improvements for 'perf bench futex': - Add --mlockall parameter - Add --broadcast and --pi to the 'requeue' sub benchmark - Add support for PMU aliases. - Introduce an ARM Coresight ETE decoder. - Add a 'perf bench' entry for evlist open/close operations, to help quantify improvements with multithreading 'perf record'. - Allow reporting the [un]throttle PERF_RECORD_ meta event in 'perf script's python scripting. - Add a 'perf test' entry for PMU aliases. - Add a 'perf test' entry for 'perf record/perf report/perf script' pipe mode. Fixes: - perf script dlfilter (API for filtering via dynamically loaded shared object introduced in v5.14) fixes and a 'perf test' entry for it. - Fix get_current_dir_name() compilation on Android. - Fix issues with asciidoc and double dashes uses. - Fix memory leaks in the BTF handling code. - Fix leftover problems in the Documentation from the infrastructure originally lifted from the git codebase. - Fix *probe_vfs_getname.sh 'perf test' failures. - Handle fd gaps in 'perf test's test__dso_data_reopen(). - Make sure to show disasembly warnings for 'perf annotate --stdio'. - Fix output from pipe to file and vice-versa in 'perf record/report/script'. - Correct 'perf data -h' output. - Fix wrong comm in system-wide mode with 'perf record --delay'. - Do not allow --for-each-cgroup without cpu in 'perf stat' - Make 'perf test --skip' work on shell tests. - Fix libperf's verbose printing. Misc improvements: - Preparatory patches for multithreading various 'perf record' phases (synthesizing, opening, recording, etc). - Add sparse context/locking annotations in compiler-types.h, also to help with the multithreading effort. - Optimize the generation of the arch specific erno tables used in 'perf trace'. - Optimize libperf's perf_cpu_map__max(). - Improve ARM's CoreSight warnings. - Report collisions in AUX records. - Improve warnings for the LLVM 'perf test' entry. - Improve the PMU events 'perf test' codebase. - perf test: Do not compare overheads in the zstd comp test - Better support annotation on ARM. - Update 'perf trace's cmd string table to decode sys_bpf() first arg. Vendor events: - Add JSON events and metrics for Intel's Ice Lake, Tiger Lake and Elhart Lake. - Update JSON eventsand metrics for Intel's Cascade Lake and Sky Lake servers. Hardware tracing: - Improvements for the ARM hardware tracing auxtrace support" * tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (130 commits) perf tests: Add test for PMU aliases perf pmu: Add PMU alias support perf session: Report collisions in AUX records perf script python: Allow reporting the [un]throttle PERF_RECORD_ meta event perf build: Report failure for testing feature libopencsd perf cs-etm: Show a warning for an unknown magic number perf cs-etm: Print the decoder name perf cs-etm: Create ETE decoder perf cs-etm: Update OpenCSD decoder for ETE perf cs-etm: Fix typo perf cs-etm: Save TRCDEVARCH register perf cs-etm: Refactor out ETMv4 header saving perf cs-etm: Initialise architecture based on TRCIDR1 perf cs-etm: Refactor initialisation of decoder params. tools build: Fix feature detect clean for out of source builds perf evlist: Add evlist__for_each_entry_from() macro perf evsel: Handle precise_ip fallback in evsel__open_cpu() perf evsel: Move bpf_counter__install_pe() to success path in evsel__open_cpu() perf evsel: Move test_attr__open() to success path in evsel__open_cpu() perf evsel: Move ignore_missing_thread() to fallback code ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/annotate.c26
-rw-r--r--tools/perf/util/auxtrace.c104
-rw-r--r--tools/perf/util/auxtrace.h49
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c170
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h13
-rw-r--r--tools/perf/util/cs-etm.c238
-rw-r--r--tools/perf/util/cs-etm.h11
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/data-convert-json.c2
-rw-r--r--tools/perf/util/debug.h14
-rw-r--r--tools/perf/util/dlfilter.c6
-rw-r--r--tools/perf/util/dlfilter.h2
-rw-r--r--tools/perf/util/dso.h1
-rw-r--r--tools/perf/util/env.c26
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/events_stats.h1
-rw-r--r--tools/perf/util/evlist-hybrid.c73
-rw-r--r--tools/perf/util/evlist-hybrid.h1
-rw-r--r--tools/perf/util/evlist.c3
-rw-r--r--tools/perf/util/evlist.h16
-rw-r--r--tools/perf/util/evsel.c397
-rw-r--r--tools/perf/util/evsel.h14
-rw-r--r--tools/perf/util/get_current_dir_name.c3
-rw-r--r--tools/perf/util/header.c14
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/llvm-utils.c40
-rw-r--r--tools/perf/util/mmap.h1
-rw-r--r--tools/perf/util/parse-events.y3
-rw-r--r--tools/perf/util/parse-sublevel-options.h2
-rw-r--r--tools/perf/util/perf_dlfilter.h150
-rw-r--r--tools/perf/util/pmu.c80
-rw-r--r--tools/perf/util/pmu.h10
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c32
-rw-r--r--tools/perf/util/session.c20
-rw-r--r--tools/perf/util/session.h12
-rw-r--r--tools/perf/util/synthetic-events.c53
-rw-r--r--tools/perf/util/synthetic-events.h6
-rw-r--r--tools/perf/util/target.h1
-rw-r--r--tools/perf/util/trace-event.h3
40 files changed, 994 insertions, 615 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aa04a3655236..0bae061b2d6d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1833,7 +1833,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
ret = 0;
out:
free(prog_linfo);
- free(btf);
+ btf__free(btf);
fclose(s);
bfd_close(bfdf);
return ret;
@@ -2192,8 +2192,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
return errno;
args.arch = arch = arch__find(arch_name);
- if (arch == NULL)
+ if (arch == NULL) {
+ pr_err("%s: unsupported arch %s\n", __func__, arch_name);
return ENOTSUP;
+ }
if (parch)
*parch = arch;
@@ -2787,9 +2789,17 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
struct rb_root source_line = RB_ROOT;
struct hists *hists = evsel__hists(evsel);
char buf[1024];
+ int err;
- if (symbol__annotate2(ms, evsel, opts, NULL) < 0)
+ err = symbol__annotate2(ms, evsel, opts, NULL);
+ if (err) {
+ char msg[BUFSIZ];
+
+ dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
return -1;
+ }
if (opts->print_lines) {
srcline_full_filename = opts->full_path;
@@ -2813,9 +2823,17 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
struct dso *dso = ms->map->dso;
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
+ int err;
+
+ err = symbol__annotate(ms, evsel, opts, NULL);
+ if (err) {
+ char msg[BUFSIZ];
- if (symbol__annotate(ms, evsel, opts, NULL) < 0)
+ dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
return -1;
+ }
symbol__calc_percent(sym, evsel);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index cb19669d2a5b..8d2865b9ade2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -130,11 +130,6 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
return 0;
}
-#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
- pr_err("Cannot use AUX area tracing mmaps\n");
- return -1;
-#endif
-
pc->aux_offset = mp->offset;
pc->aux_size = mp->len;
@@ -1674,6 +1669,82 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
return 0;
}
+/*
+ * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode,
+ * 32-bit perf tool cannot access 64-bit value atomically, which might lead to
+ * the issues caused by the below sequence on multiple CPUs: when perf tool
+ * accesses either the load operation or the store operation for 64-bit value,
+ * on some architectures the operation is divided into two instructions, one
+ * is for accessing the low 32-bit value and another is for the high 32-bit;
+ * thus these two user operations can give the kernel chances to access the
+ * 64-bit value, and thus leads to the unexpected load values.
+ *
+ * kernel (64-bit) user (32-bit)
+ *
+ * if (LOAD ->aux_tail) { --, LOAD ->aux_head_lo
+ * STORE $aux_data | ,--->
+ * FLUSH $aux_data | | LOAD ->aux_head_hi
+ * STORE ->aux_head --|-------` smp_rmb()
+ * } | LOAD $data
+ * | smp_mb()
+ * | STORE ->aux_tail_lo
+ * `----------->
+ * STORE ->aux_tail_hi
+ *
+ * For this reason, it's impossible for the perf tool to work correctly when
+ * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we
+ * can not simply limit the AUX ring buffer to less than 4GB, the reason is
+ * the pointers can be increased monotonically, whatever the buffer size it is,
+ * at the end the head and tail can be bigger than 4GB and carry out to the
+ * high 32-bit.
+ *
+ * To mitigate the issues and improve the user experience, we can allow the
+ * perf tool working in certain conditions and bail out with error if detect
+ * any overflow cannot be handled.
+ *
+ * For reading the AUX head, it reads out the values for three times, and
+ * compares the high 4 bytes of the values between the first time and the last
+ * time, if there has no change for high 4 bytes injected by the kernel during
+ * the user reading sequence, it's safe for use the second value.
+ *
+ * When compat_auxtrace_mmap__write_tail() detects any carrying in the high
+ * 32 bits, it means there have two store operations in user space and it cannot
+ * promise the atomicity for 64-bit write, so return '-1' in this case to tell
+ * the caller an overflow error has happened.
+ */
+u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 first, second, last;
+ u64 mask = (u64)(UINT32_MAX) << 32;
+
+ do {
+ first = READ_ONCE(pc->aux_head);
+ /* Ensure all reads are done after we read the head */
+ smp_rmb();
+ second = READ_ONCE(pc->aux_head);
+ /* Ensure all reads are done after we read the head */
+ smp_rmb();
+ last = READ_ONCE(pc->aux_head);
+ } while ((first & mask) != (last & mask));
+
+ return second;
+}
+
+int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 mask = (u64)(UINT32_MAX) << 32;
+
+ if (tail & mask)
+ return -1;
+
+ /* Ensure all reads are done before we write the tail out */
+ smp_mb();
+ WRITE_ONCE(pc->aux_tail, tail);
+ return 0;
+}
+
static int __auxtrace_mmap__read(struct mmap *map,
struct auxtrace_record *itr,
struct perf_tool *tool, process_auxtrace_t fn,
@@ -1685,15 +1756,13 @@ static int __auxtrace_mmap__read(struct mmap *map,
size_t size, head_off, old_off, len1, len2, padding;
union perf_event ev;
void *data1, *data2;
+ int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL));
- if (snapshot) {
- head = auxtrace_mmap__read_snapshot_head(mm);
- if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
- &head, &old))
- return -1;
- } else {
- head = auxtrace_mmap__read_head(mm);
- }
+ head = auxtrace_mmap__read_head(mm, kernel_is_64_bit);
+
+ if (snapshot &&
+ auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old))
+ return -1;
if (old == head)
return 0;
@@ -1772,10 +1841,13 @@ static int __auxtrace_mmap__read(struct mmap *map,
mm->prev = head;
if (!snapshot) {
- auxtrace_mmap__write_tail(mm, head);
- if (itr->read_finish) {
- int err;
+ int err;
+ err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit);
+ if (err < 0)
+ return err;
+
+ if (itr->read_finish) {
err = itr->read_finish(itr, mm->idx);
if (err < 0)
return err;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index cc1c1b9cec9c..5f383908ca6e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -440,52 +440,39 @@ struct auxtrace_cache;
#ifdef HAVE_AUXTRACE_SUPPORT
-/*
- * In snapshot mode the mmapped page is read-only which makes using
- * __sync_val_compare_and_swap() problematic. However, snapshot mode expects
- * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
- * the event) so there is not a race anyway.
- */
-static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
-{
- struct perf_event_mmap_page *pc = mm->userpg;
- u64 head = READ_ONCE(pc->aux_head);
-
- /* Ensure all reads are done after we read the head */
- smp_rmb();
- return head;
-}
+u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm);
+int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail);
-static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm,
+ int kernel_is_64_bit __maybe_unused)
{
struct perf_event_mmap_page *pc = mm->userpg;
-#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
- u64 head = READ_ONCE(pc->aux_head);
-#else
- u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+ u64 head;
+
+#if BITS_PER_LONG == 32
+ if (kernel_is_64_bit)
+ return compat_auxtrace_mmap__read_head(mm);
#endif
+ head = READ_ONCE(pc->aux_head);
/* Ensure all reads are done after we read the head */
smp_rmb();
return head;
}
-static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+static inline int auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail,
+ int kernel_is_64_bit __maybe_unused)
{
struct perf_event_mmap_page *pc = mm->userpg;
-#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
- u64 old_tail;
-#endif
+#if BITS_PER_LONG == 32
+ if (kernel_is_64_bit)
+ return compat_auxtrace_mmap__write_tail(mm, tail);
+#endif
/* Ensure all reads are done before we write the tail out */
smp_mb();
-#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
- pc->aux_tail = tail;
-#else
- do {
- old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
- } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
-#endif
+ WRITE_ONCE(pc->aux_tail, tail);
+ return 0;
}
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 63d472b336de..4fb5e90d7a57 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -581,7 +581,10 @@ const char *perf_home_perfconfig(void)
static const char *config;
static bool failed;
- config = failed ? NULL : home_perfconfig();
+ if (failed || config)
+ return config;
+
+ config = home_perfconfig();
if (!config)
failed = true;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 3e1a05bc82cc..31fa3b45134a 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -13,8 +13,6 @@
#include <linux/zalloc.h>
#include <stdlib.h>
#include <opencsd/c_api/opencsd_c_api.h>
-#include <opencsd/etmv4/trc_pkt_types_etmv4.h>
-#include <opencsd/ocsd_if_types.h>
#include "cs-etm.h"
#include "cs-etm-decoder.h"
@@ -35,9 +33,11 @@
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
+ bool suppress_printing;
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
ocsd_datapath_resp_t prev_return;
+ const char *decoder_name;
};
static u32
@@ -74,9 +74,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
ocsd_datapath_resp_t dp_ret;
decoder->prev_return = OCSD_RESP_CONT;
-
+ decoder->suppress_printing = true;
dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
0, 0, NULL, NULL);
+ decoder->suppress_printing = false;
if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
return -1;
@@ -124,6 +125,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
return 0;
}
+#define TRCIDR1_TRCARCHMIN_SHIFT 4
+#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4)
+#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT)
+
+static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1)
+{
+ /*
+ * For ETMv4 if the trace minor version is 4 or more then we can assume
+ * the architecture is ARCH_AA64 rather than just V8.
+ * ARCH_V8 = V8 architecture
+ * ARCH_AA64 = Min v8r3 plus additional AA64 PE features
+ */
+ return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8;
+}
+
static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
ocsd_etmv4_cfg *config)
{
@@ -138,7 +154,21 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
config->reg_idr11 = 0;
config->reg_idr12 = 0;
config->reg_idr13 = 0;
- config->arch_ver = ARCH_V8;
+ config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1);
+ config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params,
+ ocsd_ete_cfg *config)
+{
+ config->reg_configr = params->ete.reg_configr;
+ config->reg_traceidr = params->ete.reg_traceidr;
+ config->reg_idr0 = params->ete.reg_idr0;
+ config->reg_idr1 = params->ete.reg_idr1;
+ config->reg_idr2 = params->ete.reg_idr2;
+ config->reg_idr8 = params->ete.reg_idr8;
+ config->reg_devarch = params->ete.reg_devarch;
+ config->arch_ver = ARCH_AA64;
config->core_prof = profile_CortexA;
}
@@ -146,8 +176,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
const char *msg,
const int str_len)
{
- if (p_context && str_len)
- ((struct cs_etm_decoder *)p_context)->packet_printer(msg);
+ const struct cs_etm_decoder *decoder = p_context;
+
+ if (p_context && str_len && !decoder->suppress_printing)
+ decoder->packet_printer(msg);
}
static int
@@ -223,55 +255,6 @@ cs_etm_decoder__init_raw_frame_logging(
}
#endif
-static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder,
- const char *decoder_name,
- void *trace_config)
-{
- u8 csid;
-
- if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name,
- OCSD_CREATE_FLG_PACKET_PROC,
- trace_config, &csid))
- return -1;
-
- if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
- return -1;
-
- return 0;
-}
-
-static int
-cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
- struct cs_etm_decoder *decoder)
-{
- const char *decoder_name;
- ocsd_etmv3_cfg config_etmv3;
- ocsd_etmv4_cfg trace_config_etmv4;
- void *trace_config;
-
- switch (t_params->protocol) {
- case CS_ETM_PROTO_ETMV3:
- case CS_ETM_PROTO_PTM:
- cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
- decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
- OCSD_BUILTIN_DCD_ETMV3 :
- OCSD_BUILTIN_DCD_PTM;
- trace_config = &config_etmv3;
- break;
- case CS_ETM_PROTO_ETMV4i:
- cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
- decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
- trace_config = &trace_config_etmv4;
- break;
- default:
- return -1;
- }
-
- return cs_etm_decoder__create_packet_printer(decoder,
- decoder_name,
- trace_config);
-}
-
static ocsd_datapath_resp_t
cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
struct cs_etm_packet_queue *packet_queue,
@@ -324,8 +307,11 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
* underflow.
*/
packet_queue->cs_timestamp = 0;
- WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
- ". Decoding may be improved with --itrace=Z...\n", indx);
+ if (!cs_etm__etmq_is_timeless(etmq))
+ pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+ ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
+ indx);
+
} else if (packet_queue->instr_count > elem->timestamp) {
/*
* Sanity check that the elem->timestamp - packet_queue->instr_count would not
@@ -625,13 +611,14 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
return resp;
}
-static int cs_etm_decoder__create_etm_packet_decoder(
- struct cs_etm_trace_params *t_params,
- struct cs_etm_decoder *decoder)
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params *t_params,
+ struct cs_etm_decoder *decoder)
{
- const char *decoder_name;
ocsd_etmv3_cfg config_etmv3;
ocsd_etmv4_cfg trace_config_etmv4;
+ ocsd_ete_cfg trace_config_ete;
void *trace_config;
u8 csid;
@@ -639,51 +626,55 @@ static int cs_etm_decoder__create_etm_packet_decoder(
case CS_ETM_PROTO_ETMV3:
case CS_ETM_PROTO_PTM:
cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
- decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+ decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
OCSD_BUILTIN_DCD_ETMV3 :
OCSD_BUILTIN_DCD_PTM;
trace_config = &config_etmv3;
break;
case CS_ETM_PROTO_ETMV4i:
cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
- decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+ decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
trace_config = &trace_config_etmv4;
break;
+ case CS_ETM_PROTO_ETE:
+ cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
+ decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
+ trace_config = &trace_config_ete;
+ break;
default:
return -1;
}
- if (ocsd_dt_create_decoder(decoder->dcd_tree,
- decoder_name,
- OCSD_CREATE_FLG_FULL_DECODER,
- trace_config, &csid))
- return -1;
+ if (d_params->operation == CS_ETM_OPERATION_DECODE) {
+ if (ocsd_dt_create_decoder(decoder->dcd_tree,
+ decoder->decoder_name,
+ OCSD_CREATE_FLG_FULL_DECODER,
+ trace_config, &csid))
+ return -1;
- if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
- cs_etm_decoder__gen_trace_elem_printer,
- decoder))
- return -1;
+ if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+ cs_etm_decoder__gen_trace_elem_printer,
+ decoder))
+ return -1;
- return 0;
-}
+ return 0;
+ } else if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+ if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name,
+ OCSD_CREATE_FLG_PACKET_PROC,
+ trace_config, &csid))
+ return -1;
-static int
-cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
- struct cs_etm_trace_params *t_params,
- struct cs_etm_decoder *decoder)
-{
- if (d_params->operation == CS_ETM_OPERATION_PRINT)
- return cs_etm_decoder__create_etm_packet_printer(t_params,
- decoder);
- else if (d_params->operation == CS_ETM_OPERATION_DECODE)
- return cs_etm_decoder__create_etm_packet_decoder(t_params,
- decoder);
+ if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+ return -1;
+
+ return 0;
+ }
return -1;
}
struct cs_etm_decoder *
-cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
+cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
struct cs_etm_trace_params t_params[])
{
struct cs_etm_decoder *decoder;
@@ -728,7 +719,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
/* init raw frame logging if required */
cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
- for (i = 0; i < num_cpu; i++) {
+ for (i = 0; i < decoders; i++) {
ret = cs_etm_decoder__create_etm_decoder(d_params,
&t_params[i],
decoder);
@@ -800,3 +791,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
decoder->dcd_tree = NULL;
free(decoder);
}
+
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
+{
+ return decoder->decoder_name;
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 11f3391d06f2..92a855fbe5b8 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -37,11 +37,22 @@ struct cs_etmv4_trace_params {
u32 reg_traceidr;
};
+struct cs_ete_trace_params {
+ u32 reg_idr0;
+ u32 reg_idr1;
+ u32 reg_idr2;
+ u32 reg_idr8;
+ u32 reg_configr;
+ u32 reg_traceidr;
+ u32 reg_devarch;
+};
+
struct cs_etm_trace_params {
int protocol;
union {
struct cs_etmv3_trace_params etmv3;
struct cs_etmv4_trace_params etmv4;
+ struct cs_ete_trace_params ete;
};
};
@@ -65,6 +76,7 @@ enum {
CS_ETM_PROTO_ETMV4i,
CS_ETM_PROTO_ETMV4d,
CS_ETM_PROTO_PTM,
+ CS_ETM_PROTO_ETE
};
enum cs_etm_decoder_operation {
@@ -92,5 +104,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
struct cs_etm_packet *packet);
int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index bc1f64873c8f..f323adb1af85 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -62,7 +62,6 @@ struct cs_etm_auxtrace {
u64 instructions_sample_period;
u64 instructions_id;
u64 **metadata;
- u64 kernel_start;
unsigned int pmu_type;
};
@@ -97,7 +96,6 @@ struct cs_etm_queue {
/* RB tree for quick conversion between traceID and metadata pointers */
static struct intlist *traceid_list;
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
@@ -462,14 +460,30 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
}
+static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = CS_ETM_PROTO_ETE;
+ t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+ t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+ t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+ t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+ t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+ t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+ t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
+}
+
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm)
+ struct cs_etm_auxtrace *etm,
+ int decoders)
{
int i;
u32 etmidr;
u64 architecture;
- for (i = 0; i < etm->num_cpu; i++) {
+ for (i = 0; i < decoders; i++) {
architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) {
@@ -480,6 +494,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
case __perf_cs_etmv4_magic:
cs_etm__set_trace_param_etmv4(t_params, etm, i);
break;
+ case __perf_cs_ete_magic:
+ cs_etm__set_trace_param_ete(t_params, etm, i);
+ break;
default:
return -EINVAL;
}
@@ -490,7 +507,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
- enum cs_etm_decoder_operation mode)
+ enum cs_etm_decoder_operation mode,
+ bool formatted)
{
int ret = -EINVAL;
@@ -500,7 +518,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
- d_params->formatted = true;
+ d_params->formatted = formatted;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
@@ -510,44 +528,23 @@ out:
return ret;
}
-static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
struct auxtrace_buffer *buffer)
{
int ret;
const char *color = PERF_COLOR_BLUE;
- struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params;
- struct cs_etm_decoder *decoder;
size_t buffer_used = 0;
fprintf(stdout, "\n");
color_fprintf(stdout, color,
- ". ... CoreSight ETM Trace data: size %zu bytes\n",
- buffer->size);
-
- /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
-
- if (!t_params)
- return;
-
- if (cs_etm__init_trace_params(t_params, etm))
- goto out_free;
+ ". ... CoreSight %s Trace data: size %zu bytes\n",
+ cs_etm_decoder__get_name(etmq->decoder), buffer->size);
- /* Set decoder parameters to simply print the trace packets */
- if (cs_etm__init_decoder_params(&d_params, NULL,
- CS_ETM_OPERATION_PRINT))
- goto out_free;
-
- decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
-
- if (!decoder)
- goto out_free;
do {
size_t consumed;
ret = cs_etm_decoder__process_data_block(
- decoder, buffer->offset,
+ etmq->decoder, buffer->offset,
&((u8 *)buffer->data)[buffer_used],
buffer->size - buffer_used, &consumed);
if (ret)
@@ -556,16 +553,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
buffer_used += consumed;
} while (buffer_used < buffer->size);
- cs_etm_decoder__free(decoder);
-
-out_free:
- zfree(&t_params);
+ cs_etm_decoder__reset(etmq->decoder);
}
static int cs_etm__flush_events(struct perf_session *session,
struct perf_tool *tool)
{
- int ret;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
@@ -575,11 +568,6 @@ static int cs_etm__flush_events(struct perf_session *session,
if (!tool->ordered_events)
return -EINVAL;
- ret = cs_etm__update_queues(etm);
-
- if (ret < 0)
- return ret;
-
if (etm->timeless_decoding)
return cs_etm__process_timeless_queues(etm, -1);
@@ -691,7 +679,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
machine = etmq->etm->machine;
- if (address >= etmq->etm->kernel_start) {
+ if (address >= machine__kernel_start(machine)) {
if (machine__is_host(machine))
return PERF_RECORD_MISC_KERNEL;
else
@@ -746,17 +734,32 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
- if (len <= 0)
+ if (len <= 0) {
+ ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
+ " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
+ if (!al.map->dso->auxtrace_warned) {
+ pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
+ address,
+ al.map->dso->long_name ? al.map->dso->long_name : "Unknown");
+ al.map->dso->auxtrace_warned = true;
+ }
return 0;
+ }
return len;
}
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+ bool formatted)
{
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq;
+ /*
+ * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+ * needed.
+ */
+ int decoders = formatted ? etm->num_cpu : 1;
etmq = zalloc(sizeof(*etmq));
if (!etmq)
@@ -767,20 +770,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
goto out_free;
/* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+ t_params = zalloc(sizeof(*t_params) * decoders);
if (!t_params)
goto out_free;
- if (cs_etm__init_trace_params(t_params, etm))
+ if (cs_etm__init_trace_params(t_params, etm, decoders))
goto out_free;
/* Set decoder parameters to decode trace packets */
if (cs_etm__init_decoder_params(&d_params, etmq,
- CS_ETM_OPERATION_DECODE))
+ dump_trace ? CS_ETM_OPERATION_PRINT :
+ CS_ETM_OPERATION_DECODE,
+ formatted))
goto out_free;
- etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+ etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+ t_params);
if (!etmq->decoder)
goto out_free;
@@ -808,31 +814,35 @@ out_free:
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
- unsigned int queue_nr)
+ unsigned int queue_nr,
+ bool formatted)
{
- int ret = 0;
- unsigned int cs_queue_nr;
- u8 trace_chan_id;
- u64 cs_timestamp;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
- goto out;
+ return 0;
- etmq = cs_etm__alloc_queue(etm);
+ etmq = cs_etm__alloc_queue(etm, formatted);
- if (!etmq) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!etmq)
+ return -ENOMEM;
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
etmq->offset = 0;
- if (etm->timeless_decoding)
- goto out;
+ return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+ struct cs_etm_queue *etmq,
+ unsigned int queue_nr)
+{
+ int ret = 0;
+ unsigned int cs_queue_nr;
+ u8 trace_chan_id;
+ u64 cs_timestamp;
/*
* We are under a CPU-wide trace scenario. As such we need to know
@@ -896,33 +906,6 @@ out:
return ret;
}
-static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
-{
- unsigned int i;
- int ret;
-
- if (!etm->kernel_start)
- etm->kernel_start = machine__kernel_start(etm->machine);
-
- for (i = 0; i < etm->queues.nr_queues; i++) {
- ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
-{
- if (etm->queues.new_data) {
- etm->queues.new_data = false;
- return cs_etm__setup_queues(etm);
- }
-
- return 0;
-}
-
static inline
void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq)
@@ -2222,13 +2205,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
{
int ret = 0;
- unsigned int cs_queue_nr, queue_nr;
+ unsigned int cs_queue_nr, queue_nr, i;
u8 trace_chan_id;
u64 cs_timestamp;
struct auxtrace_queue *queue;
struct cs_etm_queue *etmq;
struct cs_etm_traceid_queue *tidq;
+ /*
+ * Pre-populate the heap with one entry from each queue so that we can
+ * start processing in time order across all queues.
+ */
+ for (i = 0; i < etm->queues.nr_queues; i++) {
+ etmq = etm->queues.queue_array[i].priv;
+ if (!etmq)
+ continue;
+
+ ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+ if (ret)
+ return ret;
+ }
+
while (1) {
if (!etm->heap.heap_cnt)
goto out;
@@ -2382,7 +2379,6 @@ static int cs_etm__process_event(struct perf_session *session,
struct perf_sample *sample,
struct perf_tool *tool)
{
- int err = 0;
u64 sample_kernel_timestamp;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2401,12 +2397,6 @@ static int cs_etm__process_event(struct perf_session *session,
else
sample_kernel_timestamp = 0;
- if (sample_kernel_timestamp || etm->timeless_decoding) {
- err = cs_etm__update_queues(etm);
- if (err)
- return err;
- }
-
/*
* Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
* need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
@@ -2447,7 +2437,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm,
for (i = 0; i < etm->queues.nr_queues; ++i)
list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
if (buf->reference == event->reference)
- cs_etm__dump_event(etm, buf);
+ cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
}
static int cs_etm__process_auxtrace_event(struct perf_session *session,
@@ -2463,6 +2453,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
int fd = perf_data__fd(session->data);
bool is_pipe = perf_data__is_pipe(session->data);
int err;
+ int idx = event->auxtrace.idx;
if (is_pipe)
data_offset = 0;
@@ -2477,9 +2468,20 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
if (err)
return err;
+ /*
+ * Knowing if the trace is formatted or not requires a lookup of
+ * the aux record so only works in non-piped mode where data is
+ * queued in cs_etm__queue_aux_records(). Always assume
+ * formatted in piped mode (true).
+ */
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+ idx, true);
+ if (err)
+ return err;
+
if (dump_trace)
if (auxtrace_buffer__get_data(buffer, fd)) {
- cs_etm__dump_event(etm, buffer);
+ cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
auxtrace_buffer__put_data(buffer);
}
} else if (dump_trace)
@@ -2537,6 +2539,7 @@ static const char * const cs_etmv4_priv_fmts[] = {
[CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n",
[CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n",
[CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
+ [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n"
};
static const char * const param_unk_fmt =
@@ -2596,10 +2599,15 @@ static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
else
fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
}
- } else if (magic == __perf_cs_etmv4_magic) {
+ } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
+ /*
+ * ETE and ETMv4 can be printed in the same block because the number of parameters
+ * is saved and they share the list of parameter names. ETE is also only supported
+ * in V1 files.
+ */
for (j = 0; j < total_params; j++, i++) {
/* if newer record - could be excess params */
- if (j >= CS_ETMV4_PRIV_MAX)
+ if (j >= CS_ETE_PRIV_MAX)
fprintf(stdout, param_unk_fmt, j, val[i]);
else
fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
@@ -2719,6 +2727,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
struct perf_record_auxtrace *auxtrace_event;
union perf_event auxtrace_fragment;
__u64 aux_offset, aux_size;
+ __u32 idx;
+ bool formatted;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2780,8 +2790,15 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
" tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
- return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
- file_offset, NULL);
+ err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+ file_offset, NULL);
+ if (err)
+ return err;
+
+ idx = auxtrace_event->idx;
+ formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+ return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+ idx, formatted);
}
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -2959,6 +2976,16 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
/* The traceID is our handle */
trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+ } else if (ptr[i] == __perf_cs_ete_magic) {
+ metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
+
+ /* ETE shares first part of metadata with ETMv4 */
+ trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+ } else {
+ ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
+ ptr[i]);
+ err = -EINVAL;
+ goto err_free_metadata;
}
if (!metadata[j]) {
@@ -3070,6 +3097,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
etm->data_queued = etm->queues.populated;
+ /*
+ * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
+ * cs_etm__queue_aux_fragment() for details relating to limitations.
+ */
+ if (!etm->data_queued)
+ pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
+ "Continuing with best effort decoding in piped mode.\n\n");
return 0;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index d65c7b19407d..90c83f932d9a 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -77,6 +77,15 @@ enum {
#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
/*
+ * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was
+ * added in header V1
+ */
+enum {
+ CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX,
+ CS_ETE_PRIV_MAX
+};
+
+/*
* ETMv3 exception encoding number:
* See Embedded Trace Macrocell specification (ARM IHI 0014Q)
* table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
@@ -187,8 +196,10 @@ struct cs_etm_packet_queue {
#define __perf_cs_etmv3_magic 0x3030303030303030ULL
#define __perf_cs_etmv4_magic 0x4040404040404040ULL
+#define __perf_cs_ete_magic 0x5050505050505050ULL
#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64))
#ifdef HAVE_CSTRACE_SUPPORT
int cs_etm__process_auxtrace_info(union perf_event *event,
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index cace349fb700..aa862a26d95c 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1634,7 +1634,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
err = -1;
/* perf.data session */
- session = perf_session__new(&data, 0, &c.tool);
+ session = perf_session__new(&data, &c.tool);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 355cd1948bdf..f1ab6edba446 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -334,7 +334,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
goto err;
}
- session = perf_session__new(&data, false, &c.tool);
+ session = perf_session__new(&data, &c.tool);
if (IS_ERR(session)) {
fprintf(stderr, "Error creating perf session!\n");
goto err_fclose;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 48f631966067..f99468a7f681 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -22,6 +22,13 @@ extern int debug_data_convert;
eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
#define pr_warning(fmt, ...) \
eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_once(fmt, ...) ({ \
+ static int __warned; \
+ if (unlikely(!__warned)) { \
+ pr_warning(fmt, ##__VA_ARGS__); \
+ __warned = 1; \
+ } \
+})
#define pr_info(fmt, ...) \
eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
#define pr_debug(fmt, ...) \
@@ -55,6 +62,13 @@ void trace_event(union perf_event *event);
int ui__error(const char *format, ...) __printf(1, 2);
int ui__warning(const char *format, ...) __printf(1, 2);
+#define ui__warning_once(format, ...) ({ \
+ static int __warned; \
+ if (unlikely(!__warned)) { \
+ ui__warning(format, ##__VA_ARGS__); \
+ __warned = 1; \
+ } \
+})
void pr_stat(const char *fmt, ...);
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index ca33fbc5efde..db964d5a52af 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -21,7 +21,7 @@
#include "symbol.h"
#include "srcline.h"
#include "dlfilter.h"
-#include "perf_dlfilter.h"
+#include "../include/perf/perf_dlfilter.h"
static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
{
@@ -530,8 +530,8 @@ int dlfilter__do_filter_event(struct dlfilter *d,
return ret;
}
-static bool get_filter_desc(const char *dirname, const char *name,
- char **desc, char **long_desc)
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+ char **long_desc)
{
char path[PATH_MAX];
void *handle;
diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h
index 505980442360..cc4bb9657d05 100644
--- a/tools/perf/util/dlfilter.h
+++ b/tools/perf/util/dlfilter.h
@@ -93,5 +93,7 @@ static inline int dlfilter__filter_event_early(struct dlfilter *d,
}
int list_available_dlfilters(const struct option *opt, const char *s, int unset);
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+ char **long_desc);
#endif
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 52e7101c5609..83723ba11dc8 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -170,6 +170,7 @@ struct dso {
u8 has_srcline:1;
u8 hit:1;
u8 annotate_warned:1;
+ u8 auxtrace_warned:1;
u8 short_name_allocated:1;
u8 long_name_allocated:1;
u8 is_64_bit:1;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index cec2e6cad8aa..8f7ff0035c41 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -219,13 +219,35 @@ void perf_env__exit(struct perf_env *env)
zfree(&env->hybrid_cpc_nodes);
}
-void perf_env__init(struct perf_env *env __maybe_unused)
+void perf_env__init(struct perf_env *env)
{
#ifdef HAVE_LIBBPF_SUPPORT
env->bpf_progs.infos = RB_ROOT;
env->bpf_progs.btfs = RB_ROOT;
init_rwsem(&env->bpf_progs.lock);
#endif
+ env->kernel_is_64_bit = -1;
+}
+
+static void perf_env__init_kernel_mode(struct perf_env *env)
+{
+ const char *arch = perf_env__raw_arch(env);
+
+ if (!strncmp(arch, "x86_64", 6) || !strncmp(arch, "aarch64", 7) ||
+ !strncmp(arch, "arm64", 5) || !strncmp(arch, "mips64", 6) ||
+ !strncmp(arch, "parisc64", 8) || !strncmp(arch, "riscv64", 7) ||
+ !strncmp(arch, "s390x", 5) || !strncmp(arch, "sparc64", 7))
+ env->kernel_is_64_bit = 1;
+ else
+ env->kernel_is_64_bit = 0;
+}
+
+int perf_env__kernel_is_64_bit(struct perf_env *env)
+{
+ if (env->kernel_is_64_bit == -1)
+ perf_env__init_kernel_mode(env);
+
+ return env->kernel_is_64_bit;
}
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -349,7 +371,7 @@ static const char *normalize_arch(char *arch)
return "x86";
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
return "sparc";
- if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
+ if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
return "arm64";
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
return "arm";
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 6824a7423a2d..1f5175820a05 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -61,6 +61,7 @@ struct perf_env {
unsigned long long total_mem;
unsigned int msr_pmu_type;
unsigned int max_branches;
+ int kernel_is_64_bit;
int nr_cmdline;
int nr_sibling_cores;
@@ -143,6 +144,8 @@ extern struct perf_env perf_env;
void perf_env__exit(struct perf_env *env);
+int perf_env__kernel_is_64_bit(struct perf_env *env);
+
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
int perf_env__read_cpuid(struct perf_env *env);
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index 3480bafd414b..1b0006092265 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -30,6 +30,7 @@ struct events_stats {
u64 total_lost_samples;
u64 total_aux_lost;
u64 total_aux_partial;
+ u64 total_aux_collision;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_lost_warned;
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index db3f5fbdebe1..7c554234b43d 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -86,3 +86,76 @@ bool evlist__has_hybrid(struct evlist *evlist)
return false;
}
+
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
+{
+ struct perf_cpu_map *cpus;
+ struct evsel *evsel, *tmp;
+ struct perf_pmu *pmu;
+ int ret, unmatched_count = 0, events_nr = 0;
+
+ if (!perf_pmu__has_hybrid() || !cpu_list)
+ return 0;
+
+ cpus = perf_cpu_map__new(cpu_list);
+ if (!cpus)
+ return -1;
+
+ /*
+ * The evsels are created with hybrid pmu's cpus. But now we
+ * need to check and adjust the cpus of evsel by cpu_list because
+ * cpu_list may cause conflicts with cpus of evsel. For example,
+ * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
+ * to adjust the cpus of evsel to cpu6-7. And then propatate maps
+ * in evlist__create_maps().
+ */
+ evlist__for_each_entry_safe(evlist, tmp, evsel) {
+ struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+ char buf1[128], buf2[128];
+
+ pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
+ if (!pmu)
+ continue;
+
+ ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
+ &unmatched_cpus);
+ if (ret)
+ goto out;
+
+ events_nr++;
+
+ if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
+ matched_cpus->nr < cpus->nr ||
+ matched_cpus->nr < pmu->cpus->nr)) {
+ perf_cpu_map__put(evsel->core.cpus);
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.cpus = perf_cpu_map__get(matched_cpus);
+ evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
+
+ if (unmatched_cpus->nr > 0) {
+ cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
+ pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
+ buf1, pmu->name, evsel->name);
+ }
+ }
+
+ if (matched_cpus->nr == 0) {
+ evlist__remove(evlist, evsel);
+ evsel__delete(evsel);
+
+ cpu_map__snprint(cpus, buf1, sizeof(buf1));
+ cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
+ pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
+ buf1, pmu->name, pmu->name, buf2);
+ unmatched_count++;
+ }
+
+ perf_cpu_map__put(matched_cpus);
+ perf_cpu_map__put(unmatched_cpus);
+ }
+
+ ret = (unmatched_count == events_nr) ? -1 : 0;
+out:
+ perf_cpu_map__put(cpus);
+ return ret;
+}
diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
index 19f74b4c340a..aacdb1b0f948 100644
--- a/tools/perf/util/evlist-hybrid.h
+++ b/tools/perf/util/evlist-hybrid.h
@@ -10,5 +10,6 @@
int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
void evlist__warn_hybrid_group(struct evlist *evlist);
bool evlist__has_hybrid(struct evlist *evlist);
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
#endif /* __PERF_EVLIST_HYBRID_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 47581a237c7a..5f92319ce258 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -27,6 +27,7 @@
#include "util/perf_api_probe.h"
#include "util/evsel_fprintf.h"
#include "util/evlist-hybrid.h"
+#include "util/pmu.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -1002,7 +1003,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
if (!cpus)
goto out_delete_threads;
- evlist->core.has_user_cpus = !!target->cpu_list;
+ evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
perf_evlist__set_maps(&evlist->core, cpus, threads);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 5c22383489ae..97bfb8d0be4f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -276,6 +276,22 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
__evlist__for_each_entry_continue(&(evlist)->core.entries, evsel)
/**
+ * __evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_from(list, evsel) \
+ list_for_each_entry_from(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_from(evlist, evsel) \
+ __evlist__for_each_entry_from(&(evlist)->core.entries, evsel)
+
+/**
* __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
* @list: list_head instance to iterate
* @evsel: struct evsel iterator
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f61e5dd53f5d..54d251327b5b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1656,7 +1656,7 @@ static int update_fds(struct evsel *evsel,
return 0;
}
-static bool ignore_missing_thread(struct evsel *evsel,
+bool evsel__ignore_missing_thread(struct evsel *evsel,
int nr_cpus, int cpu,
struct perf_thread_map *threads,
int thread, int err)
@@ -1709,59 +1709,43 @@ static void display_attr(struct perf_event_attr *attr)
}
}
-static int perf_event_open(struct evsel *evsel,
- pid_t pid, int cpu, int group_fd,
- unsigned long flags)
+bool evsel__precise_ip_fallback(struct evsel *evsel)
{
- int precise_ip = evsel->core.attr.precise_ip;
- int fd;
-
- while (1) {
- pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpu, group_fd, flags);
-
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags);
- if (fd >= 0)
- break;
-
- /* Do not try less precise if not requested. */
- if (!evsel->precise_max)
- break;
-
- /*
- * We tried all the precise_ip values, and it's
- * still failing, so leave it to standard fallback.
- */
- if (!evsel->core.attr.precise_ip) {
- evsel->core.attr.precise_ip = precise_ip;
- break;
- }
+ /* Do not try less precise if not requested. */
+ if (!evsel->precise_max)
+ return false;
- pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
- evsel->core.attr.precise_ip--;
- pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
- display_attr(&evsel->core.attr);
+ /*
+ * We tried all the precise_ip values, and it's
+ * still failing, so leave it to standard fallback.
+ */
+ if (!evsel->core.attr.precise_ip) {
+ evsel->core.attr.precise_ip = evsel->precise_ip_original;
+ return false;
}
- return fd;
+ if (!evsel->precise_ip_original)
+ evsel->precise_ip_original = evsel->core.attr.precise_ip;
+
+ evsel->core.attr.precise_ip--;
+ pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
+ display_attr(&evsel->core.attr);
+ return true;
}
-static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
- struct perf_thread_map *threads,
- int start_cpu, int end_cpu)
+static struct perf_cpu_map *empty_cpu_map;
+static struct perf_thread_map *empty_thread_map;
+
+static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
{
- int cpu, thread, nthreads;
- unsigned long flags = PERF_FLAG_FD_CLOEXEC;
- int pid = -1, err, old_errno;
- enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
+ int nthreads;
if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
(perf_missing_features.aux_output && evsel->core.attr.aux_output))
return -EINVAL;
if (cpus == NULL) {
- static struct perf_cpu_map *empty_cpu_map;
-
if (empty_cpu_map == NULL) {
empty_cpu_map = perf_cpu_map__dummy_new();
if (empty_cpu_map == NULL)
@@ -1772,8 +1756,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
}
if (threads == NULL) {
- static struct perf_thread_map *empty_thread_map;
-
if (empty_thread_map == NULL) {
empty_thread_map = thread_map__new_by_tid(-1);
if (empty_thread_map == NULL)
@@ -1792,12 +1774,15 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
return -ENOMEM;
- if (evsel->cgrp) {
- flags |= PERF_FLAG_PID_CGROUP;
- pid = evsel->cgrp->fd;
- }
+ evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
+ if (evsel->cgrp)
+ evsel->open_flags |= PERF_FLAG_PID_CGROUP;
-fallback_missing_features:
+ return 0;
+}
+
+static void evsel__disable_missing_features(struct evsel *evsel)
+{
if (perf_missing_features.weight_struct) {
evsel__set_sample_bit(evsel, WEIGHT);
evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
@@ -1809,7 +1794,7 @@ fallback_missing_features:
evsel->core.attr.clockid = 0;
}
if (perf_missing_features.cloexec)
- flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+ evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
if (perf_missing_features.mmap2)
evsel->core.attr.mmap2 = 0;
if (perf_missing_features.exclude_guest)
@@ -1825,119 +1810,26 @@ fallback_missing_features:
evsel->core.attr.bpf_event = 0;
if (perf_missing_features.branch_hw_idx)
evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
-retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->core.attr.sample_id_all = 0;
+}
- display_attr(&evsel->core.attr);
-
- for (cpu = start_cpu; cpu < end_cpu; cpu++) {
-
- for (thread = 0; thread < nthreads; thread++) {
- int fd, group_fd;
-
- if (!evsel->cgrp && !evsel->core.system_wide)
- pid = perf_thread_map__pid(threads, thread);
-
- group_fd = get_group_fd(evsel, cpu, thread);
-retry_open:
- test_attr__ready();
-
- fd = perf_event_open(evsel, pid, cpus->map[cpu],
- group_fd, flags);
-
- FD(evsel, cpu, thread) = fd;
-
- bpf_counter__install_pe(evsel, cpu, fd);
-
- if (unlikely(test_attr__enabled)) {
- test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
- fd, group_fd, flags);
- }
-
- if (fd < 0) {
- err = -errno;
-
- if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
- /*
- * We just removed 1 thread, so take a step
- * back on thread index and lower the upper
- * nthreads limit.
- */
- nthreads--;
- thread--;
-
- /* ... and pretend like nothing have happened. */
- err = 0;
- continue;
- }
-
- pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
- err);
- goto try_fallback;
- }
-
- pr_debug2_peo(" = %d\n", fd);
-
- if (evsel->bpf_fd >= 0) {
- int evt_fd = fd;
- int bpf_fd = evsel->bpf_fd;
-
- err = ioctl(evt_fd,
- PERF_EVENT_IOC_SET_BPF,
- bpf_fd);
- if (err && errno != EEXIST) {
- pr_err("failed to attach bpf fd %d: %s\n",
- bpf_fd, strerror(errno));
- err = -EINVAL;
- goto out_close;
- }
- }
-
- set_rlimit = NO_CHANGE;
-
- /*
- * If we succeeded but had to kill clockid, fail and
- * have evsel__open_strerror() print us a nice error.
- */
- if (perf_missing_features.clockid ||
- perf_missing_features.clockid_wrong) {
- err = -EINVAL;
- goto out_close;
- }
- }
- }
-
- return 0;
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ int err;
-try_fallback:
- /*
- * perf stat needs between 5 and 22 fds per CPU. When we run out
- * of them try to increase the limits.
- */
- if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
- struct rlimit l;
+ err = __evsel__prepare_open(evsel, cpus, threads);
+ if (err)
+ return err;
- old_errno = errno;
- if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
- if (set_rlimit == NO_CHANGE)
- l.rlim_cur = l.rlim_max;
- else {
- l.rlim_cur = l.rlim_max + 1000;
- l.rlim_max = l.rlim_cur;
- }
- if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
- set_rlimit++;
- errno = old_errno;
- goto retry_open;
- }
- }
- errno = old_errno;
- }
+ evsel__disable_missing_features(evsel);
- if (err != -EINVAL || cpu > 0 || thread > 0)
- goto out_close;
+ return err;
+}
+bool evsel__detect_missing_features(struct evsel *evsel)
+{
/*
* Must probe features in the order they were added to the
* perf_event_attr interface.
@@ -1946,82 +1838,239 @@ try_fallback:
(evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
perf_missing_features.weight_struct = true;
pr_debug2("switching off weight struct support\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.code_page_size &&
(evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
perf_missing_features.code_page_size = true;
pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
- goto out_close;
+ return false;
} else if (!perf_missing_features.data_page_size &&
(evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
perf_missing_features.data_page_size = true;
pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
- goto out_close;
+ return false;
} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
perf_missing_features.cgroup = true;
pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
- goto out_close;
- } else if (!perf_missing_features.branch_hw_idx &&
+ return false;
+ } else if (!perf_missing_features.branch_hw_idx &&
(evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
perf_missing_features.branch_hw_idx = true;
pr_debug2("switching off branch HW index support\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
perf_missing_features.aux_output = true;
pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
- goto out_close;
+ return false;
} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
perf_missing_features.bpf = true;
pr_debug2_peo("switching off bpf_event\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
perf_missing_features.ksymbol = true;
pr_debug2_peo("switching off ksymbol\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
perf_missing_features.write_backward = true;
pr_debug2_peo("switching off write_backward\n");
- goto out_close;
+ return false;
} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
perf_missing_features.clockid_wrong = true;
pr_debug2_peo("switching off clockid\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
perf_missing_features.clockid = true;
pr_debug2_peo("switching off use_clockid\n");
- goto fallback_missing_features;
- } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+ return true;
+ } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
perf_missing_features.cloexec = true;
pr_debug2_peo("switching off cloexec flag\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
perf_missing_features.mmap2 = true;
pr_debug2_peo("switching off mmap2\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.exclude_guest &&
(evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
perf_missing_features.exclude_guest = true;
pr_debug2_peo("switching off exclude_guest, exclude_host\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.sample_id_all) {
perf_missing_features.sample_id_all = true;
pr_debug2_peo("switching off sample_id_all\n");
- goto retry_sample_id;
+ return true;
} else if (!perf_missing_features.lbr_flags &&
(evsel->core.attr.branch_sample_type &
(PERF_SAMPLE_BRANCH_NO_CYCLES |
PERF_SAMPLE_BRANCH_NO_FLAGS))) {
perf_missing_features.lbr_flags = true;
pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
- goto fallback_missing_features;
+ return true;
} else if (!perf_missing_features.group_read &&
evsel->core.attr.inherit &&
(evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
evsel__is_group_leader(evsel)) {
perf_missing_features.group_read = true;
pr_debug2_peo("switching off group read\n");
- goto fallback_missing_features;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
+{
+ int old_errno;
+ struct rlimit l;
+
+ if (*set_rlimit < INCREASED_MAX) {
+ old_errno = errno;
+
+ if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+ if (*set_rlimit == NO_CHANGE) {
+ l.rlim_cur = l.rlim_max;
+ } else {
+ l.rlim_cur = l.rlim_max + 1000;
+ l.rlim_max = l.rlim_cur;
+ }
+ if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+ (*set_rlimit) += 1;
+ errno = old_errno;
+ return true;
+ }
+ }
+ errno = old_errno;
}
+
+ return false;
+}
+
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads,
+ int start_cpu, int end_cpu)
+{
+ int cpu, thread, nthreads;
+ int pid = -1, err, old_errno;
+ enum rlimit_action set_rlimit = NO_CHANGE;
+
+ err = __evsel__prepare_open(evsel, cpus, threads);
+ if (err)
+ return err;
+
+ if (cpus == NULL)
+ cpus = empty_cpu_map;
+
+ if (threads == NULL)
+ threads = empty_thread_map;
+
+ if (evsel->core.system_wide)
+ nthreads = 1;
+ else
+ nthreads = threads->nr;
+
+ if (evsel->cgrp)
+ pid = evsel->cgrp->fd;
+
+fallback_missing_features:
+ evsel__disable_missing_features(evsel);
+
+ display_attr(&evsel->core.attr);
+
+ for (cpu = start_cpu; cpu < end_cpu; cpu++) {
+
+ for (thread = 0; thread < nthreads; thread++) {
+ int fd, group_fd;
+retry_open:
+ if (thread >= nthreads)
+ break;
+
+ if (!evsel->cgrp && !evsel->core.system_wide)
+ pid = perf_thread_map__pid(threads, thread);
+
+ group_fd = get_group_fd(evsel, cpu, thread);
+
+ test_attr__ready();
+
+ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, cpus->map[cpu], group_fd, evsel->open_flags);
+
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu],
+ group_fd, evsel->open_flags);
+
+ FD(evsel, cpu, thread) = fd;
+
+ if (fd < 0) {
+ err = -errno;
+
+ pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
+ err);
+ goto try_fallback;
+ }
+
+ bpf_counter__install_pe(evsel, cpu, fd);
+
+ if (unlikely(test_attr__enabled)) {
+ test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd, group_fd, evsel->open_flags);
+ }
+
+ pr_debug2_peo(" = %d\n", fd);
+
+ if (evsel->bpf_fd >= 0) {
+ int evt_fd = fd;
+ int bpf_fd = evsel->bpf_fd;
+
+ err = ioctl(evt_fd,
+ PERF_EVENT_IOC_SET_BPF,
+ bpf_fd);
+ if (err && errno != EEXIST) {
+ pr_err("failed to attach bpf fd %d: %s\n",
+ bpf_fd, strerror(errno));
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+
+ set_rlimit = NO_CHANGE;
+
+ /*
+ * If we succeeded but had to kill clockid, fail and
+ * have evsel__open_strerror() print us a nice error.
+ */
+ if (perf_missing_features.clockid ||
+ perf_missing_features.clockid_wrong) {
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+ }
+
+ return 0;
+
+try_fallback:
+ if (evsel__precise_ip_fallback(evsel))
+ goto retry_open;
+
+ if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+ /* We just removed 1 thread, so lower the upper nthreads limit. */
+ nthreads--;
+
+ /* ... and pretend like nothing have happened. */
+ err = 0;
+ goto retry_open;
+ }
+ /*
+ * perf stat needs between 5 and 22 fds per CPU. When we run out
+ * of them try to increase the limits.
+ */
+ if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
+ goto retry_open;
+
+ if (err != -EINVAL || cpu > 0 || thread > 0)
+ goto out_close;
+
+ if (evsel__detect_missing_features(evsel))
+ goto fallback_missing_features;
out_close:
if (err)
threads->err_thread = thread;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 80383096d51c..1b3eeab5f188 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -150,6 +150,8 @@ struct evsel {
struct bperf_leader_bpf *leader_skel;
struct bperf_follower_bpf *follower_skel;
};
+ unsigned long open_flags;
+ int precise_ip_original;
};
struct perf_missing_features {
@@ -286,6 +288,18 @@ int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void evsel__close(struct evsel *evsel);
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads);
+bool evsel__detect_missing_features(struct evsel *evsel);
+
+enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
+
+bool evsel__ignore_missing_thread(struct evsel *evsel,
+ int nr_cpus, int cpu,
+ struct perf_thread_map *threads,
+ int thread, int err);
+bool evsel__precise_ip_fallback(struct evsel *evsel);
struct perf_sample;
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
index b205d929245f..e68935e9ac8c 100644
--- a/tools/perf/util/get_current_dir_name.c
+++ b/tools/perf/util/get_current_dir_name.c
@@ -3,8 +3,9 @@
//
#ifndef HAVE_GET_CURRENT_DIR_NAME
#include "get_current_dir_name.h"
+#include <limits.h>
+#include <string.h>
#include <unistd.h>
-#include <stdlib.h>
/* Android's 'bionic' library, for one, doesn't have this */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 44249027507a..d2231cb7c4f7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1284,7 +1284,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
dir = opendir(path);
if (!dir) {
- pr_warning("failed: cant' open memory sysfs data\n");
+ pr_warning("failed: can't open memory sysfs data\n");
return -1;
}
@@ -3865,10 +3865,10 @@ static int perf_file_section__process(struct perf_file_section *section,
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
struct perf_header *ph,
struct perf_data* data,
- bool repipe)
+ bool repipe, int repipe_fd)
{
struct feat_fd ff = {
- .fd = STDOUT_FILENO,
+ .fd = repipe_fd,
.ph = ph,
};
ssize_t ret;
@@ -3891,13 +3891,13 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
return 0;
}
-static int perf_header__read_pipe(struct perf_session *session)
+static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
{
struct perf_header *header = &session->header;
struct perf_pipe_file_header f_header;
if (perf_file_header__read_pipe(&f_header, header, session->data,
- session->repipe) < 0) {
+ session->repipe, repipe_fd) < 0) {
pr_debug("incompatible file format\n");
return -EINVAL;
}
@@ -3995,7 +3995,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h
return 0;
}
-int perf_session__read_header(struct perf_session *session)
+int perf_session__read_header(struct perf_session *session, int repipe_fd)
{
struct perf_data *data = session->data;
struct perf_header *header = &session->header;
@@ -4016,7 +4016,7 @@ int perf_session__read_header(struct perf_session *session)
* We can read 'pipe' data event from regular file,
* check for the pipe header regardless of source.
*/
- err = perf_header__read_pipe(session);
+ err = perf_header__read_pipe(session, repipe_fd);
if (!err || perf_data__is_pipe(data)) {
data->is_pipe = true;
return err;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ae6b1cf19a7d..c9e3265832d9 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -115,7 +115,7 @@ struct perf_session;
struct perf_tool;
union perf_event;
-int perf_session__read_header(struct perf_session *session);
+int perf_session__read_header(struct perf_session *session, int repipe_fd);
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit);
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index cbd9b268f168..96c8ef60f4f8 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -38,6 +38,8 @@ struct llvm_param llvm_param = {
.user_set_param = false,
};
+static void version_notice(void);
+
int perf_llvm_config(const char *var, const char *value)
{
if (!strstarts(var, "llvm."))
@@ -108,6 +110,21 @@ search_program(const char *def, const char *name,
return ret;
}
+static int search_program_and_warn(const char *def, const char *name,
+ char *output)
+{
+ int ret = search_program(def, name, output);
+
+ if (ret) {
+ pr_err("ERROR:\tunable to find %s.\n"
+ "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+ " \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n",
+ name, name);
+ version_notice();
+ }
+ return ret;
+}
+
#define READ_SIZE 4096
static int
read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
@@ -217,7 +234,7 @@ version_notice(void)
" \t\tgit clone http://llvm.org/git/clang.git\n\n"
" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
" \tdebian/ubuntu:\n"
-" \t\thttp://llvm.org/apt\n\n"
+" \t\thttps://apt.llvm.org/\n\n"
" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
" \toption in [llvm] section of ~/.perfconfig to:\n\n"
" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
@@ -458,16 +475,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
if (!template)
template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
- err = search_program(llvm_param.clang_path,
+ err = search_program_and_warn(llvm_param.clang_path,
"clang", clang_path);
- if (err) {
- pr_err(
-"ERROR:\tunable to find clang.\n"
-"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
-" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
- version_notice();
+ if (err)
return -ENOENT;
- }
/*
* This is an optional work. Even it fail we can continue our
@@ -495,14 +506,9 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
force_set_env("WORKING_DIR", kbuild_dir ? : ".");
if (opts) {
- err = search_program(llvm_param.llc_path, "llc", llc_path);
- if (err) {
- pr_err("ERROR:\tunable to find llc.\n"
- "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
- " \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n");
- version_notice();
+ err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path);
+ if (err)
goto errout;
- }
err = -ENOMEM;
if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
@@ -579,5 +585,5 @@ int llvm__search_clang(void)
{
char clang_path[PATH_MAX];
- return search_program(llvm_param.clang_path, "clang", clang_path);
+ return search_program_and_warn(llvm_param.clang_path, "clang", clang_path);
}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 9d5f589f02ae..af33118354dd 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/ring_buffer.h>
+#include <linux/bitops.h>
#include <stdbool.h>
#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 9321bd0e2f76..d94e48e1ff9b 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -316,7 +316,8 @@ event_pmu_name opt_pmu_config
if (!strncmp(name, "uncore_", 7) &&
strncmp($1, "uncore_", 7))
name += 7;
- if (!perf_pmu__match(pattern, name, $1)) {
+ if (!perf_pmu__match(pattern, name, $1) ||
+ !perf_pmu__match(pattern, pmu->alias_name, $1)) {
if (parse_events_copy_term_list(orig_terms, &terms))
CLEANUP_YYABORT;
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
index 9b9efcc2aaad..578b18ef03bb 100644
--- a/tools/perf/util/parse-sublevel-options.h
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -8,4 +8,4 @@ struct sublevel_option {
int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
-#endif \ No newline at end of file
+#endif
diff --git a/tools/perf/util/perf_dlfilter.h b/tools/perf/util/perf_dlfilter.h
deleted file mode 100644
index 3eef03d661b4..000000000000
--- a/tools/perf/util/perf_dlfilter.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * perf_dlfilter.h: API for perf --dlfilter shared object
- * Copyright (c) 2021, Intel Corporation.
- */
-#ifndef _LINUX_PERF_DLFILTER_H
-#define _LINUX_PERF_DLFILTER_H
-
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-/* Definitions for perf_dlfilter_sample flags */
-enum {
- PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
- PERF_DLFILTER_FLAG_CALL = 1ULL << 1,
- PERF_DLFILTER_FLAG_RETURN = 1ULL << 2,
- PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3,
- PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4,
- PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5,
- PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6,
- PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7,
- PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8,
- PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9,
- PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10,
- PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11,
- PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12,
-};
-
-/*
- * perf sample event information (as per perf script and <linux/perf_event.h>)
- */
-struct perf_dlfilter_sample {
- __u32 size; /* Size of this structure (for compatibility checking) */
- __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
- __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
- __u64 ip;
- __s32 pid;
- __s32 tid;
- __u64 time;
- __u64 addr;
- __u64 id;
- __u64 stream_id;
- __u64 period;
- __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
- __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
- __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
- __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */
- __s32 cpu;
- __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */
- __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
- __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
- __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
- __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
- __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
- __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
- __u8 addr_correlates_sym; /* True => resolve_addr() can be called */
- __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */
- __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
- const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
- __u64 brstack_nr; /* Number of brstack entries */
- const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
- __u64 raw_callchain_nr; /* Number of raw_callchain entries */
- const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
- const char *event;
-};
-
-/*
- * Address location (as per perf script)
- */
-struct perf_dlfilter_al {
- __u32 size; /* Size of this structure (for compatibility checking) */
- __u32 symoff;
- const char *sym;
- __u64 addr; /* Mapped address (from dso) */
- __u64 sym_start;
- __u64 sym_end;
- const char *dso;
- __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
- __u8 is_64_bit; /* Only valid if dso is not NULL */
- __u8 is_kernel_ip; /* True if in kernel space */
- __u32 buildid_size;
- __u8 *buildid;
- /* Below members are only populated by resolve_ip() */
- __u8 filtered; /* True if this sample event will be filtered out */
- const char *comm;
-};
-
-struct perf_dlfilter_fns {
- /* Return information about ip */
- const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
- /* Return information about addr (if addr_correlates_sym) */
- const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
- /* Return arguments from --dlarg option */
- char **(*args)(void *ctx, int *dlargc);
- /*
- * Return information about address (al->size must be set before
- * calling). Returns 0 on success, -1 otherwise.
- */
- __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
- /* Return instruction bytes and length */
- const __u8 *(*insn)(void *ctx, __u32 *length);
- /* Return source file name and line number */
- const char *(*srcline)(void *ctx, __u32 *line_number);
- /* Return perf_event_attr, refer <linux/perf_event.h> */
- struct perf_event_attr *(*attr)(void *ctx);
- /* Read object code, return numbers of bytes read */
- __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
- /* Reserved */
- void *(*reserved[120])(void *);
-};
-
-/*
- * If implemented, 'start' will be called at the beginning,
- * before any calls to 'filter_event'. Return 0 to indicate success,
- * or return a negative error code. '*data' can be assigned for use
- * by other functions. 'ctx' is needed for calls to perf_dlfilter_fns,
- * but most perf_dlfilter_fns are not valid when called from 'start'.
- */
-int start(void **data, void *ctx);
-
-/*
- * If implemented, 'stop' will be called at the end,
- * after any calls to 'filter_event'. Return 0 to indicate success, or
- * return a negative error code. 'data' is set by start(). 'ctx' is
- * needed for calls to perf_dlfilter_fns, but most perf_dlfilter_fns
- * are not valid when called from 'stop'.
- */
-int stop(void *data, void *ctx);
-
-/*
- * If implemented, 'filter_event' will be called for each sample
- * event. Return 0 to keep the sample event, 1 to filter it out, or
- * return a negative error code. 'data' is set by start(). 'ctx' is
- * needed for calls to perf_dlfilter_fns.
- */
-int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
-
-/*
- * The same as 'filter_event' except it is called before internal
- * filtering.
- */
-int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
-
-/*
- * If implemented, return a one-line description of the filter, and optionally
- * a longer description.
- */
-const char *filter_description(const char **long_description);
-
-#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index fc683bc41715..bdabd62170d2 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -843,8 +843,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
break;
}
- if (pmu_is_uncore(name) &&
- pmu_uncore_alias_match(pname, name))
+ if (pmu->is_uncore && pmu_uncore_alias_match(pname, name))
goto new_alias;
if (strcmp(pname, name))
@@ -927,7 +926,7 @@ static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data)
return 0;
}
-static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
{
struct pmu_sys_event_iter_data idata = {
.head = head,
@@ -946,6 +945,18 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
return NULL;
}
+char * __weak
+pmu_find_real_name(const char *name)
+{
+ return (char *)name;
+}
+
+char * __weak
+pmu_find_alias_name(const char *name __maybe_unused)
+{
+ return NULL;
+}
+
static int pmu_max_precise(const char *name)
{
char path[PATH_MAX];
@@ -959,13 +970,15 @@ static int pmu_max_precise(const char *name)
return max_precise;
}
-static struct perf_pmu *pmu_lookup(const char *name)
+static struct perf_pmu *pmu_lookup(const char *lookup_name)
{
struct perf_pmu *pmu;
LIST_HEAD(format);
LIST_HEAD(aliases);
__u32 type;
+ char *name = pmu_find_real_name(lookup_name);
bool is_hybrid = perf_pmu__hybrid_mounted(name);
+ char *alias_name;
/*
* Check pmu name for hybrid and the pmu may be invalid in sysfs
@@ -996,6 +1009,16 @@ static struct perf_pmu *pmu_lookup(const char *name)
pmu->cpus = pmu_cpumask(name);
pmu->name = strdup(name);
+ if (!pmu->name)
+ goto err;
+
+ alias_name = pmu_find_alias_name(name);
+ if (alias_name) {
+ pmu->alias_name = strdup(alias_name);
+ if (!pmu->alias_name)
+ goto err;
+ }
+
pmu->type = type;
pmu->is_uncore = pmu_is_uncore(name);
if (pmu->is_uncore)
@@ -1018,15 +1041,22 @@ static struct perf_pmu *pmu_lookup(const char *name)
pmu->default_config = perf_pmu__get_default_config(pmu);
return pmu;
+err:
+ if (pmu->name)
+ free(pmu->name);
+ free(pmu);
+ return NULL;
}
static struct perf_pmu *pmu_find(const char *name)
{
struct perf_pmu *pmu;
- list_for_each_entry(pmu, &pmus, list)
- if (!strcmp(pmu->name, name))
+ list_for_each_entry(pmu, &pmus, list) {
+ if (!strcmp(pmu->name, name) ||
+ (pmu->alias_name && !strcmp(pmu->alias_name, name)))
return pmu;
+ }
return NULL;
}
@@ -1920,6 +1950,9 @@ bool perf_pmu__has_hybrid(void)
int perf_pmu__match(char *pattern, char *name, char *tok)
{
+ if (!name)
+ return -1;
+
if (fnmatch(pattern, name, 0))
return -1;
@@ -1928,3 +1961,38 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
return 0;
}
+
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **mcpus_ptr,
+ struct perf_cpu_map **ucpus_ptr)
+{
+ struct perf_cpu_map *pmu_cpus = pmu->cpus;
+ struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+ int matched_nr = 0, unmatched_nr = 0;
+
+ matched_cpus = perf_cpu_map__default_new();
+ if (!matched_cpus)
+ return -1;
+
+ unmatched_cpus = perf_cpu_map__default_new();
+ if (!unmatched_cpus) {
+ perf_cpu_map__put(matched_cpus);
+ return -1;
+ }
+
+ for (int i = 0; i < cpus->nr; i++) {
+ int cpu;
+
+ cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]);
+ if (cpu == -1)
+ unmatched_cpus->map[unmatched_nr++] = cpus->map[i];
+ else
+ matched_cpus->map[matched_nr++] = cpus->map[i];
+ }
+
+ unmatched_cpus->nr = unmatched_nr;
+ matched_cpus->nr = matched_nr;
+ *mcpus_ptr = matched_cpus;
+ *ucpus_ptr = unmatched_cpus;
+ return 0;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 926da483a141..394898b07fd9 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -11,6 +11,7 @@
#include "pmu-events/pmu-events.h"
struct evsel_config_term;
+struct perf_cpu_map;
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -21,6 +22,7 @@ enum {
#define PERF_PMU_FORMAT_BITS 64
#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus"
+#define MAX_PMU_NAME_LEN 128
struct perf_event_attr;
@@ -32,6 +34,7 @@ struct perf_pmu_caps {
struct perf_pmu {
char *name;
+ char *alias_name;
char *id;
__u32 type;
bool selectable;
@@ -81,6 +84,7 @@ struct perf_pmu_alias {
struct perf_pmu *perf_pmu__find(const char *name);
struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct list_head *head_terms,
struct parse_events_error *error);
@@ -135,4 +139,10 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
bool perf_pmu__has_hybrid(void);
int perf_pmu__match(char *pattern, char *name, char *tok);
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **mcpus_ptr,
+ struct perf_cpu_map **ucpus_ptr);
+
+char *pmu_find_real_name(const char *name);
+char *pmu_find_alias_name(const char *name);
#endif /* __PMU_H */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 69129e2aa7a1..c0c010350bc2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1422,6 +1422,37 @@ static void python_process_event(union perf_event *event,
}
}
+static void python_process_throttle(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const char *handler_name;
+ PyObject *handler, *t;
+
+ if (event->header.type == PERF_RECORD_THROTTLE)
+ handler_name = "throttle";
+ else
+ handler_name = "unthrottle";
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ t = tuple_new(6);
+ if (!t)
+ return;
+
+ tuple_set_u64(t, 0, event->throttle.time);
+ tuple_set_u64(t, 1, event->throttle.id);
+ tuple_set_u64(t, 2, event->throttle.stream_id);
+ tuple_set_s32(t, 3, sample->cpu);
+ tuple_set_s32(t, 4, sample->pid);
+ tuple_set_s32(t, 5, sample->tid);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
static void python_do_process_switch(union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2079,5 +2110,6 @@ struct scripting_ops python_scripting_ops = {
.process_auxtrace_error = python_process_auxtrace_error,
.process_stat = python_process_stat,
.process_stat_interval = python_process_stat_interval,
+ .process_throttle = python_process_throttle,
.generate_script = python_generate_script,
};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 51f727402912..069c2cfdd3be 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -102,11 +102,11 @@ static int perf_session__deliver_event(struct perf_session *session,
struct perf_tool *tool,
u64 file_offset);
-static int perf_session__open(struct perf_session *session)
+static int perf_session__open(struct perf_session *session, int repipe_fd)
{
struct perf_data *data = session->data;
- if (perf_session__read_header(session) < 0) {
+ if (perf_session__read_header(session, repipe_fd) < 0) {
pr_err("incompatible file format (rerun with -v to learn more)\n");
return -1;
}
@@ -185,8 +185,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
session->tool, event->file_offset);
}
-struct perf_session *perf_session__new(struct perf_data *data,
- bool repipe, struct perf_tool *tool)
+struct perf_session *__perf_session__new(struct perf_data *data,
+ bool repipe, int repipe_fd,
+ struct perf_tool *tool)
{
int ret = -ENOMEM;
struct perf_session *session = zalloc(sizeof(*session));
@@ -210,7 +211,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
session->data = data;
if (perf_data__is_read(data)) {
- ret = perf_session__open(session);
+ ret = perf_session__open(session, repipe_fd);
if (ret < 0)
goto out_delete;
@@ -1540,6 +1541,8 @@ static int machines__deliver_event(struct machines *machines,
evlist->stats.total_aux_lost += 1;
if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
evlist->stats.total_aux_partial += 1;
+ if (event->aux.flags & PERF_AUX_FLAG_COLLISION)
+ evlist->stats.total_aux_collision += 1;
}
return tool->aux(tool, event, sample, machine);
case PERF_RECORD_ITRACE_START:
@@ -1895,6 +1898,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
"");
}
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_collision != 0) {
+ ui__warning("AUX data detected collision %" PRIu64 " times out of %u!\n\n",
+ stats->total_aux_collision,
+ stats->nr_events[PERF_RECORD_AUX]);
+ }
+
if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data "
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index e31ba4c92a6c..5d8bd14a0a39 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -54,8 +54,16 @@ struct decomp {
struct perf_tool;
-struct perf_session *perf_session__new(struct perf_data *data,
- bool repipe, struct perf_tool *tool);
+struct perf_session *__perf_session__new(struct perf_data *data,
+ bool repipe, int repipe_fd,
+ struct perf_tool *tool);
+
+static inline struct perf_session *perf_session__new(struct perf_data *data,
+ struct perf_tool *tool)
+{
+ return __perf_session__new(data, false, -1, tool);
+}
+
void perf_session__delete(struct perf_session *session);
void perf_event_header__bswap(struct perf_event_header *hdr);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 35aa0c0f7cd9..a7e981b2d7de 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include "util/cgroup.h"
+#include "util/data.h"
#include "util/debug.h"
#include "util/dso.h"
#include "util/event.h"
@@ -16,7 +18,6 @@
#include "util/synthetic-events.h"
#include "util/target.h"
#include "util/time-utils.h"
-#include "util/cgroup.h"
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2179,3 +2180,53 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session
free(ff.buf);
return ret;
}
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_data *data,
+ perf_event__handler_t process)
+{
+ int err;
+ int ret = 0;
+ struct evlist *evlist = session->evlist;
+
+ /*
+ * We need to synthesize events first, because some
+ * features works on top of them (on report side).
+ */
+ err = perf_event__synthesize_attrs(tool, evlist, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ ret += err;
+
+ err = perf_event__synthesize_features(tool, session, evlist, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize features.\n");
+ return err;
+ }
+ ret += err;
+
+ if (have_tracepoints(&evlist->core.entries)) {
+ int fd = perf_data__fd(data);
+
+ /*
+ * FIXME err <= 0 here actually means that
+ * there were no tracepoints so its not really
+ * an error, just that we don't need to
+ * synthesize anything. We really have to
+ * return this more properly and also
+ * propagate errors that now are calling die()
+ */
+ err = perf_event__synthesize_tracing_data(tool, fd, evlist,
+ process);
+ if (err <= 0) {
+ pr_err("Couldn't record tracing data.\n");
+ return err;
+ }
+ ret += err;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index e7a3e9589738..c845e2b9b444 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -14,6 +14,7 @@ struct evsel;
struct machine;
struct perf_counts_values;
struct perf_cpu_map;
+struct perf_data;
struct perf_event_attr;
struct perf_event_mmap_page;
struct perf_sample;
@@ -101,4 +102,9 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
}
#endif // HAVE_LIBBPF_SUPPORT
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_data *data,
+ perf_event__handler_t process);
+
#endif // __PERF_SYNTHETIC_EVENTS_H
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 4ff56217f2a6..daec6cba500d 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -17,6 +17,7 @@ struct target {
bool default_per_cpu;
bool per_thread;
bool use_bpf;
+ bool hybrid;
const char *attr_map;
};
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 54aadeedf28c..640981105788 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -90,6 +90,9 @@ struct scripting_ops {
void (*process_stat)(struct perf_stat_config *config,
struct evsel *evsel, u64 tstamp);
void (*process_stat_interval)(u64 tstamp);
+ void (*process_throttle)(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int (*generate_script) (struct tep_handle *pevent, const char *outfile);
};