diff options
Diffstat (limited to 'tools/perf/util')
55 files changed, 1760 insertions, 328 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9a7209a99e16..a51267d88ca9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 1a80151baed9..d040406f3314 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -387,26 +387,16 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ - ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ - ARM_SPE_REMOTE_ACCESS) - -static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) -{ - if (type & SPE_MEM_TYPE) - return true; - - return false; -} - static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) { union perf_mem_data_src data_src = { 0 }; if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; - else + else if (record->op == ARM_SPE_ST) data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src.mem_lvl = PERF_MEM_LVL_L3; @@ -510,7 +500,11 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { + /* + * When data_src is zero it means the record is not a memory operation, + * skip to synthesize memory sample for this case. + */ + if (spe->sample_memory && data_src) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index df1c5bbbaa0d..511dd3caa1bc 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->tid = mp->tid; mm->cpu = mp->cpu.cpu; - if (!mp->len) { + if (!mp->len || !mp->mmap_needed) { mm->base = NULL; return 0; } @@ -168,13 +168,20 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, } void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu) + struct evlist *evlist, + struct evsel *evsel, int idx) { + bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + + mp->mmap_needed = evsel->needs_auxtrace_mmap; + + if (!mp->mmap_needed) + return; + mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else @@ -636,6 +643,22 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) +{ + bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + + if (per_cpu_mmaps) { + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); + int cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); + + if (cpu_map_idx == -1) + return -EINVAL; + return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); + } + + return perf_evsel__enable_thread(&evsel->core, idx); +} + int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) { struct evsel *evsel; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index dc38b6f57232..cd0d25c2751c 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -344,6 +344,10 @@ struct auxtrace_mmap { * @idx: index of this mmap * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu * mmap) otherwise %0 + * @mmap_needed: set to %false for non-auxtrace events. This is needed because + * auxtrace mmapping is done in the same code path as non-auxtrace + * mmapping but not every evsel that needs non-auxtrace mmapping + * also needs auxtrace mmapping. * @cpu: cpu number for a per-cpu mmap otherwise %-1 */ struct auxtrace_mmap_params { @@ -353,6 +357,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; + bool mmap_needed; struct perf_cpu cpu; }; @@ -490,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); typedef int (*process_auxtrace_t)(struct perf_tool *tool, struct mmap *map, @@ -863,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); #define ITRACE_HELP "" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 8271ab764eb5..eee64ddb766d 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -35,11 +35,12 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) } #endif -int __weak bpf_prog_load(enum bpf_prog_type prog_type, - const char *prog_name __maybe_unused, - const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +#ifndef HAVE_LIBBPF_BPF_PROG_LOAD +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name __maybe_unused, + const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -47,8 +48,10 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type, opts->kern_version, opts->log_buf, opts->log_size); #pragma GCC diagnostic pop } +#endif -struct bpf_program * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM +struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { #pragma GCC diagnostic push @@ -56,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) return bpf_program__next(prev, obj); #pragma GCC diagnostic pop } +#endif -struct bpf_map * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP +struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { #pragma GCC diagnostic push @@ -65,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) return bpf_map__next(prev, obj); #pragma GCC diagnostic pop } +#endif -const void * __weak +#ifndef HAVE_LIBBPF_BTF__RAW_DATA +const void * btf__raw_data(const struct btf *btf_ro, __u32 *size) { #pragma GCC diagnostic push @@ -74,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size) return btf__get_raw_data(btf_ro, size); #pragma GCC diagnostic pop } +#endif static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index b72cef1ae959..f8ad581ea247 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -99,16 +99,26 @@ static int bpf_perf_object__add(struct bpf_object *obj) return perf_obj ? 0 : -ENOMEM; } +static int libbpf_init(void) +{ + if (libbpf_initialized) + return 0; + + libbpf_set_print(libbpf_perf_print); + libbpf_initialized = true; + return 0; +} + struct bpf_object * bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) { LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name); struct bpf_object *obj; + int err; - if (!libbpf_initialized) { - libbpf_set_print(libbpf_perf_print); - libbpf_initialized = true; - } + err = libbpf_init(); + if (err) + return ERR_PTR(err); obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); if (IS_ERR_OR_NULL(obj)) { @@ -135,14 +145,13 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) { LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename); struct bpf_object *obj; + int err; - if (!libbpf_initialized) { - libbpf_set_print(libbpf_perf_print); - libbpf_initialized = true; - } + err = libbpf_init(); + if (err) + return ERR_PTR(err); if (source) { - int err; void *obj_buf; size_t obj_buf_sz; diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index e271e05e51bc..80b1d2b3729b 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -149,11 +149,10 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - data_len += count * size; + data_len += roundup(count * size, sizeof(__u64)); } /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); info_linear = malloc(sizeof(struct perf_bpil) + data_len); if (!info_linear) return ERR_PTR(-ENOMEM); @@ -180,7 +179,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); - ptr += count * size; + ptr += roundup(count * size, sizeof(__u64)); } /* step 5: call syscall again to get required arrays */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 3ce8d03cb7ec..ef1c15e4aeba 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -224,25 +224,25 @@ static int bpf_program_profiler__disable(struct evsel *evsel) static int bpf_program_profiler__read(struct evsel *evsel) { - // perf_cpu_map uses /sys/devices/system/cpu/online - int num_cpu = evsel__nr_cpus(evsel); // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible // Sometimes possible > online, like on a Ryzen 3900X that has 24 // threads but its possible showed 0-31 -acme int num_cpu_bpf = libbpf_num_possible_cpus(); struct bpf_perf_event_value values[num_cpu_bpf]; struct bpf_counter *counter; + struct perf_counts_values *counts; int reading_map_fd; __u32 key = 0; - int err, cpu; + int err, idx, bpf_cpu; if (list_empty(&evsel->bpf_counter_list)) return -EAGAIN; - for (cpu = 0; cpu < num_cpu; cpu++) { - perf_counts(evsel->counts, cpu, 0)->val = 0; - perf_counts(evsel->counts, cpu, 0)->ena = 0; - perf_counts(evsel->counts, cpu, 0)->run = 0; + perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) { + counts = perf_counts(evsel->counts, idx, 0); + counts->val = 0; + counts->ena = 0; + counts->run = 0; } list_for_each_entry(counter, &evsel->bpf_counter_list, list) { struct bpf_prog_profiler_bpf *skel = counter->skel; @@ -256,10 +256,15 @@ static int bpf_program_profiler__read(struct evsel *evsel) return err; } - for (cpu = 0; cpu < num_cpu; cpu++) { - perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter; - perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled; - perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running; + for (bpf_cpu = 0; bpf_cpu < num_cpu_bpf; bpf_cpu++) { + idx = perf_cpu_map__idx(evsel__cpus(evsel), + (struct perf_cpu){.cpu = bpf_cpu}); + if (idx == -1) + continue; + counts = perf_counts(evsel->counts, idx, 0); + counts->val += values[bpf_cpu].counter; + counts->ena += values[bpf_cpu].enabled; + counts->run += values[bpf_cpu].running; } } return 0; @@ -307,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } -int __weak +#ifndef HAVE_LIBBPF_BPF_MAP_CREATE +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +int bpf_map_create(enum bpf_map_type map_type, const char *map_name __maybe_unused, __u32 key_size, @@ -320,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type, return bpf_create_map(map_type, key_size, value_size, max_entries, 0); #pragma GCC diagnostic pop } +#endif static int bperf_lock_attr_map(struct target *target) { @@ -621,6 +630,7 @@ static int bperf__read(struct evsel *evsel) struct bperf_follower_bpf *skel = evsel->follower_skel; __u32 num_cpu_bpf = cpu__max_cpu().cpu; struct bpf_perf_event_value values[num_cpu_bpf]; + struct perf_counts_values *counts; int reading_map_fd, err = 0; __u32 i; int j; @@ -639,29 +649,32 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { - cpu = entry.cpu; - perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; - perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; - perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + perf_cpu_map__for_each_cpu(entry, j, evsel__cpus(evsel)) { + counts = perf_counts(evsel->counts, j, 0); + counts->val = values[entry.cpu].counter; + counts->ena = values[entry.cpu].enabled; + counts->run = values[entry.cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i].cpu; - perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; - perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; - perf_counts(evsel->counts, i, 0)->run = values[cpu].running; + cpu = perf_cpu_map__cpu(evsel__cpus(evsel), i).cpu; + assert(cpu >= 0); + counts = perf_counts(evsel->counts, i, 0); + counts->val = values[cpu].counter; + counts->ena = values[cpu].enabled; + counts->run = values[cpu].running; break; case BPERF_FILTER_PID: case BPERF_FILTER_TGID: - perf_counts(evsel->counts, 0, i)->val = 0; - perf_counts(evsel->counts, 0, i)->ena = 0; - perf_counts(evsel->counts, 0, i)->run = 0; + counts = perf_counts(evsel->counts, 0, i); + counts->val = 0; + counts->ena = 0; + counts->run = 0; for (cpu = 0; cpu < num_cpu_bpf; cpu++) { - perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; - perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; - perf_counts(evsel->counts, 0, i)->run += values[cpu].running; + counts->val += values[cpu].counter; + counts->ena += values[cpu].enabled; + counts->run += values[cpu].running; } break; default: diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index ac60c08e8e2a..63b9db657442 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -46,8 +46,8 @@ static int bperf_load_program(struct evlist *evlist) struct bpf_link *link; struct evsel *evsel; struct cgroup *cgrp, *leader_cgrp; - __u32 i, cpu; - __u32 nr_cpus = evlist->core.all_cpus->nr; + int i, j; + struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; int map_size, map_fd; int prog_fd, err; @@ -93,9 +93,9 @@ static int bperf_load_program(struct evlist *evlist) goto out; } - for (i = 0; i < nr_cpus; i++) { + perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch, - FD(cgrp_switch, i)); + FD(cgrp_switch, cpu.cpu)); if (IS_ERR(link)) { pr_err("Failed to attach cgroup program\n"); err = PTR_ERR(link); @@ -122,10 +122,9 @@ static int bperf_load_program(struct evlist *evlist) } map_fd = bpf_map__fd(skel->maps.events); - for (cpu = 0; cpu < nr_cpus; cpu++) { - int fd = FD(evsel, cpu); - __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu].cpu; + perf_cpu_map__for_each_cpu(cpu, j, evlist->core.all_cpus) { + int fd = FD(evsel, cpu.cpu); + __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -207,14 +206,12 @@ static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, */ static int bperf_cgrp__sync_counters(struct evlist *evlist) { - int i, cpu; - int nr_cpus = evlist->core.all_cpus->nr; + struct perf_cpu cpu; + int idx; int prog_fd = bpf_program__fd(skel->progs.trigger_read); - for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; - bperf_trigger_reading(prog_fd, cpu); - } + perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus) + bperf_trigger_reading(prog_fd, cpu.cpu); return 0; } @@ -244,12 +241,10 @@ static int bperf_cgrp__disable(struct evsel *evsel) static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; - int i, cpu, nr_cpus = evlist->core.all_cpus->nr; int total_cpus = cpu__max_cpu().cpu; struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; - __u32 idx; if (evsel->core.idx) return 0; @@ -263,7 +258,10 @@ static int bperf_cgrp__read(struct evsel *evsel) reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings); evlist__for_each_entry(evlist, evsel) { - idx = evsel->core.idx; + __u32 idx = evsel->core.idx; + int i; + struct perf_cpu cpu; + err = bpf_map_lookup_elem(reading_map_fd, &idx, values); if (err) { pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", @@ -271,13 +269,11 @@ static int bperf_cgrp__read(struct evsel *evsel) goto out; } - for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; - + perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { counts = perf_counts(evsel->counts, i, 0); - counts->val = values[cpu].counter; - counts->ena = values[cpu].enabled; - counts->run = values[cpu].running; + counts->val = values[cpu.cpu].counter; + counts->ena = values[cpu.cpu].enabled; + counts->run = values[cpu.cpu].running; } } diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c new file mode 100644 index 000000000000..f289b7713598 --- /dev/null +++ b/tools/perf/util/bpf_off_cpu.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/bpf_counter.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/off_cpu.h" +#include "util/perf-hooks.h" +#include "util/record.h" +#include "util/session.h" +#include "util/target.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/cgroup.h" +#include <bpf/bpf.h> + +#include "bpf_skel/off_cpu.skel.h" + +#define MAX_STACKS 32 +/* we don't need actual timestamp, just want to put the samples at last */ +#define OFF_CPU_TIMESTAMP (~0ull << 32) + +static struct off_cpu_bpf *skel; + +struct off_cpu_key { + u32 pid; + u32 tgid; + u32 stack_id; + u32 state; + u64 cgroup_id; +}; + +union off_cpu_data { + struct perf_event_header hdr; + u64 array[1024 / sizeof(u64)]; +}; + +static int off_cpu_config(struct evlist *evlist) +{ + struct evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .size = sizeof(attr), /* to capture ABI version */ + }; + char *evname = strdup(OFFCPU_EVENT); + + if (evname == NULL) + return -ENOMEM; + + evsel = evsel__new(&attr); + if (!evsel) { + free(evname); + return -ENOMEM; + } + + evsel->core.attr.freq = 1; + evsel->core.attr.sample_period = 1; + /* off-cpu analysis depends on stack trace */ + evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; + + evlist__add(evlist, evsel); + + free(evsel->name); + evsel->name = evname; + + return 0; +} + +static void off_cpu_start(void *arg) +{ + struct evlist *evlist = arg; + + /* update task filter for the given workload */ + if (!skel->bss->has_cpu && !skel->bss->has_task && + perf_thread_map__pid(evlist->core.threads, 0) != -1) { + int fd; + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + pid = perf_thread_map__pid(evlist->core.threads, 0); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + + skel->bss->enabled = 1; +} + +static void off_cpu_finish(void *arg __maybe_unused) +{ + skel->bss->enabled = 0; + off_cpu_bpf__destroy(skel); +} + +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 4th arg */ + skel->rodata->has_prev_state = true; + } +} + +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts) +{ + int err, fd, i; + int ncpus = 1, ntasks = 1, ncgrps = 1; + + if (off_cpu_config(evlist) < 0) { + pr_err("Failed to config off-cpu BPF event\n"); + return -1; + } + + skel = off_cpu_bpf__open(); + if (!skel) { + pr_err("Failed to open off-cpu BPF skeleton\n"); + return -1; + } + + /* don't need to set cpu filter for system-wide mode */ + if (target->cpu_list) { + ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(target)) { + ntasks = perf_thread_map__nr(evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + if (evlist__first(evlist)->cgrp) { + ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */ + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + if (opts->record_cgroup) { + skel->rodata->needs_cgroup = true; + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + set_max_rlimit(); + check_sched_switch_args(); + + err = off_cpu_bpf__load(skel); + if (err) { + pr_err("Failed to load off-cpu skeleton\n"); + goto out; + } + + if (target->cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + + if (evlist__first(evlist)->cgrp) { + struct evsel *evsel; + u8 val = 1; + + skel->bss->has_cgroup = 1; + fd = bpf_map__fd(skel->maps.cgroup_filter); + + evlist__for_each_entry(evlist, evsel) { + struct cgroup *cgrp = evsel->cgrp; + + if (cgrp == NULL) + continue; + + if (!cgrp->id && read_cgroup_id(cgrp) < 0) { + pr_err("Failed to read cgroup id of %s\n", + cgrp->name); + goto out; + } + + bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY); + } + } + + err = off_cpu_bpf__attach(skel); + if (err) { + pr_err("Failed to attach off-cpu BPF skeleton\n"); + goto out; + } + + if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) || + perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) { + pr_err("Failed to attach off-cpu skeleton\n"); + goto out; + } + + return 0; + +out: + off_cpu_bpf__destroy(skel); + return -1; +} + +int off_cpu_write(struct perf_session *session) +{ + int bytes = 0, size; + int fd, stack; + u64 sample_type, val, sid = 0; + struct evsel *evsel; + struct perf_data_file *file = &session->data->file; + struct off_cpu_key prev, key; + union off_cpu_data data = { + .hdr = { + .type = PERF_RECORD_SAMPLE, + .misc = PERF_RECORD_MISC_USER, + }, + }; + u64 tstamp = OFF_CPU_TIMESTAMP; + + skel->bss->enabled = 0; + + evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return 0; + } + + sample_type = evsel->core.attr.sample_type; + + if (sample_type & ~OFFCPU_SAMPLE_TYPES) { + pr_err("not supported sample type: %llx\n", + (unsigned long long)sample_type); + return -1; + } + + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { + if (evsel->core.id) + sid = evsel->core.id[0]; + } + + fd = bpf_map__fd(skel->maps.off_cpu); + stack = bpf_map__fd(skel->maps.stacks); + memset(&prev, 0, sizeof(prev)); + + while (!bpf_map_get_next_key(fd, &prev, &key)) { + int n = 1; /* start from perf_event_header */ + int ip_pos = -1; + + bpf_map_lookup_elem(fd, &key, &val); + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_IP) { + ip_pos = n; + data.array[n++] = 0; /* will be updated */ + } + if (sample_type & PERF_SAMPLE_TID) + data.array[n++] = (u64)key.pid << 32 | key.tgid; + if (sample_type & PERF_SAMPLE_TIME) + data.array[n++] = tstamp; + if (sample_type & PERF_SAMPLE_ID) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_CPU) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_PERIOD) + data.array[n++] = val; + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int len = 0; + + /* data.array[n] is callchain->nr (updated later) */ + data.array[n + 1] = PERF_CONTEXT_USER; + data.array[n + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); + while (data.array[n + 2 + len]) + len++; + + /* update length of callchain */ + data.array[n] = len + 1; + + /* update sample ip with the first callchain entry */ + if (ip_pos >= 0) + data.array[ip_pos] = data.array[n + 2]; + + /* calculate sample callchain data array length */ + n += len + 2; + } + if (sample_type & PERF_SAMPLE_CGROUP) + data.array[n++] = key.cgroup_id; + + size = n * sizeof(u64); + data.hdr.size = size; + bytes += size; + + if (perf_data_file__write(file, &data, size) < 0) { + pr_err("failed to write perf data, error: %m\n"); + return bytes; + } + + prev = key; + /* increase dummy timestamp to sort later samples */ + tstamp++; + } + return bytes; +} diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c new file mode 100644 index 000000000000..cc6d7fd55118 --- /dev/null +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2022 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* task->flags for off-cpu analysis */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + +/* task->state for off-cpu analysis */ +#define TASK_INTERRUPTIBLE 0x0001 +#define TASK_UNINTERRUPTIBLE 0x0002 + +#define MAX_STACKS 32 +#define MAX_ENTRIES 102400 + +struct tstamp_data { + __u32 stack_id; + __u32 state; + __u64 timestamp; +}; + +struct offcpu_key { + __u32 pid; + __u32 tgid; + __u32 stack_id; + __u32 state; + __u64 cgroup_id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(__u32)); + __uint(value_size, MAX_STACKS * sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} stacks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tstamp_data); +} tstamp SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct offcpu_key)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} off_cpu SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cgroup_filter SEC(".maps"); + +/* new kernel task_struct definition */ +struct task_struct___new { + long __state; +} __attribute__((preserve_access_index)); + +/* old kernel task_struct definition */ +struct task_struct___old { + long state; +} __attribute__((preserve_access_index)); + +int enabled = 0; +int has_cpu = 0; +int has_task = 0; +int has_cgroup = 0; + +const volatile bool has_prev_state = false; +const volatile bool needs_cgroup = false; +const volatile bool uses_cgroup_v1 = false; + +/* + * Old kernel used to call it task_struct->state and now it's '__state'. + * Use BPF CO-RE "ignored suffix rule" to deal with it like below: + * + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes + */ +static inline int get_task_state(struct task_struct *t) +{ + /* recast pointer to capture new type for compiler */ + struct task_struct___new *t_new = (void *)t; + + if (bpf_core_field_exists(t_new->__state)) { + return BPF_CORE_READ(t_new, __state); + } else { + /* recast pointer to capture old type for compiler */ + struct task_struct___old *t_old = (void *)t; + + return BPF_CORE_READ(t_old, state); + } +} + +static inline __u64 get_cgroup_id(struct task_struct *t) +{ + struct cgroup *cgrp; + + if (uses_cgroup_v1) + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); + else + cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + + return BPF_CORE_READ(cgrp, kn, id); +} + +static inline int can_record(struct task_struct *t, int state) +{ + /* kernel threads don't have user stack */ + if (t->flags & PF_KTHREAD) + return 0; + + if (state != TASK_INTERRUPTIBLE && + state != TASK_UNINTERRUPTIBLE) + return 0; + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u8 *ok; + __u32 pid = t->pid; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + if (has_cgroup) { + __u8 *ok; + __u64 cgrp_id = get_cgroup_id(t); + + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id); + if (!ok) + return 0; + } + + return 1; +} + +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) +{ + __u64 ts; + __u32 stack_id; + struct tstamp_data *pelem; + + ts = bpf_ktime_get_ns(); + + if (!can_record(prev, state)) + goto next; + + stack_id = bpf_get_stackid(ctx, &stacks, + BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK); + + pelem = bpf_task_storage_get(&tstamp, prev, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!pelem) + goto next; + + pelem->timestamp = ts; + pelem->state = state; + pelem->stack_id = stack_id; + +next: + pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); + + if (pelem && pelem->timestamp) { + struct offcpu_key key = { + .pid = next->pid, + .tgid = next->tgid, + .stack_id = pelem->stack_id, + .state = pelem->state, + .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0, + }; + __u64 delta = ts - pelem->timestamp; + __u64 *total; + + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + + /* prevent to reuse the timestamp later */ + pelem->timestamp = 0; + } + + return 0; +} + +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (has_prev_state) + prev_state = (int)ctx[3]; + else + prev_state = get_task_state(prev); + + return off_cpu_stat(ctx, prev, next, prev_state); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 82f3d46bea70..328668f38c69 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -872,6 +872,30 @@ out_free: return err; } +static int filename__read_build_id_ns(const char *filename, + struct build_id *bid, + struct nsinfo *nsi) +{ + struct nscookie nsc; + int ret; + + nsinfo__mountns_enter(nsi, &nsc); + ret = filename__read_build_id(filename, bid); + nsinfo__mountns_exit(&nsc); + + return ret; +} + +static bool dso__build_id_mismatch(struct dso *dso, const char *name) +{ + struct build_id bid; + + if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0) + return false; + + return !dso__build_id_equal(dso, &bid); +} + static int dso__cache_build_id(struct dso *dso, struct machine *machine, void *priv __maybe_unused) { @@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, is_kallsyms = true; name = machine->mmap_name; } + + if (!is_kallsyms && dso__build_id_mismatch(dso, name)) + return 0; + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso); } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a5ace2bbc28d..caabeac24c69 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -479,6 +479,20 @@ int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz) return mkdir(buf, S_IRWXU); } +bool has_kcore_dir(const char *path) +{ + char *kcore_dir; + int ret; + + if (asprintf(&kcore_dir, "%s/kcore_dir", path) < 0) + return false; + + ret = access(kcore_dir, F_OK); + + free(kcore_dir); + return !ret; +} + char *perf_data__kallsyms_name(struct perf_data *data) { char *kallsyms_name; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index c9de82af5584..7de53d6e2d7f 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdbool.h> +#include <linux/types.h> enum perf_data_mode { PERF_DATA_MODE_WRITE, @@ -98,6 +99,7 @@ void perf_data__close_dir(struct perf_data *data); int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); +bool has_kcore_dir(const char *path); char *perf_data__kallsyms_name(struct perf_data *data); bool is_perf_data(const char *path); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3a9fd4d389b5..97047a11282b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -196,7 +196,9 @@ struct dso { u32 status_seen; u64 file_size; struct list_head open_entry; + u64 elf_base_addr; u64 debug_frame_offset; + u64 eh_frame_hdr_addr; u64 eh_frame_hdr_offset; } data; /* bpf prog information */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6439c888ae38..0476bb3a4188 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -683,9 +683,12 @@ static bool check_address_range(struct intlist *addr_list, int addr_range, int machine__resolve(struct machine *machine, struct addr_location *al, struct perf_sample *sample) { - struct thread *thread = machine__findnew_thread(machine, sample->pid, - sample->tid); + struct thread *thread; + if (symbol_conf.guest_code && !machine__is_host(machine)) + thread = machine__findnew_guest_code(machine, sample->pid); + else + thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 52ea004ba01e..48af7d379d82 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int evlist__add_dummy(struct evlist *evlist) +static struct evsel *evlist__dummy_event(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); + + return evsel__new_idx(&attr, evlist->core.nr_entries); +} + +int evlist__add_dummy(struct evlist *evlist) +{ + struct evsel *evsel = evlist__dummy_event(evlist); if (evsel == NULL) return -ENOMEM; @@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist) return 0; } +static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) +{ + evsel->core.system_wide = true; + + /* + * All CPUs. + * + * Note perf_event_open() does not accept CPUs that are not online, so + * in fact this CPU list will include only all online CPUs. + */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + /* No threads */ + perf_thread_map__put(evsel->core.threads); + evsel->core.threads = perf_thread_map__new_dummy(); + + evlist__add(evlist, evsel); +} + +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel = evlist__dummy_event(evlist); + + if (!evsel) + return NULL; + + evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_guest = 1; + evsel->core.attr.exclude_hv = 1; + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); + + if (system_wide) + evlist__add_on_all_cpus(evlist, evsel); + else + evlist__add(evlist, evsel); + + return evsel; +} + static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; @@ -334,14 +385,6 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, return 0; } -static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) -{ - if (evsel->core.system_wide) - return 1; - else - return perf_thread_map__nr(evlist->core.threads); -} - struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) { struct evlist_cpu_iterator itr = { @@ -546,48 +589,6 @@ void evlist__toggle_enable(struct evlist *evlist) (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } -static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) -{ - int thread; - int nr_threads = evlist__nr_threads(evlist, evsel); - - if (!evsel->core.fd) - return -EINVAL; - - for (thread = 0; thread < nr_threads; thread++) { - int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); - if (err) - return err; - } - return 0; -} - -static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread) -{ - int cpu; - int nr_cpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); - - if (!evsel->core.fd) - return -EINVAL; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); - if (err) - return err; - } - return 0; -} - -int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) -{ - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); - - if (per_cpu_mmaps) - return evlist__enable_event_cpu(evlist, evsel, idx); - - return evlist__enable_event_thread(evlist, evsel, idx); -} - int evlist__add_pollfd(struct evlist *evlist, int fd) { return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default); @@ -797,13 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, + struct perf_evsel *_evsel, struct perf_mmap_param *_mp, - int idx, bool per_cpu) + int idx) { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + struct evsel *evsel = container_of(_evsel, struct evsel, core); - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx); } static struct perf_mmap* @@ -1790,8 +1793,13 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * if (evsel__has_leader(c2, leader)) { if (is_open && close) perf_evsel__close(&c2->core); - evsel__set_leader(c2, c2); - c2->core.nr_members = 0; + /* + * We want to close all members of the group and reopen + * them. Some events, like Intel topdown, require being + * in a group and so keep these in the group. + */ + evsel__remove_from_group(c2, leader); + /* * Set this for all former members of the group * to indicate they get reopened. @@ -1799,6 +1807,9 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * c2->reset_group = true; } } + /* Reset the leader count if all entries were removed. */ + if (leader->core.nr_members == 1) + leader->core.nr_members = 0; return leader; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a21daaa5fc1b..1bde9ccf4e7d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist); struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); +static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) +{ + return evlist__add_aux_dummy(evlist, true); +} int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); @@ -196,8 +201,6 @@ void evlist__toggle_enable(struct evlist *evlist); void evlist__disable_evsel(struct evlist *evlist, char *evsel_name); void evlist__enable_evsel(struct evlist *evlist, char *evsel_name); -int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); - void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2a1729e7aee4..094b0a9c0bc0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -48,6 +48,7 @@ #include "util.h" #include "hashmap.h" #include "pmu-hybrid.h" +#include "off_cpu.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include <internal/xyarray.h> @@ -59,6 +60,33 @@ struct perf_missing_features perf_missing_features; static clockid_t clockid; +static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", +}; + +const char *perf_tool_event__to_str(enum perf_tool_event ev) +{ + if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) + return perf_tool_event__tool_names[ev]; + + return NULL; +} + +enum perf_tool_event perf_tool_event__from_str(const char *str) +{ + int i; + + perf_tool_event__for_each_event(i) { + if (!strcmp(str, perf_tool_event__tool_names[i])) + return i; + } + return PERF_TOOL_NONE; +} + + static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { return 0; @@ -269,8 +297,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (evsel__is_bpf_output(evsel)) { - evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + if (evsel__is_bpf_output(evsel) && !attr->sample_type) { + evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } @@ -382,6 +410,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; + evsel->core.requires_cpu = orig->core.requires_cpu; if (orig->name) { evsel->name = strdup(orig->name); @@ -486,7 +515,7 @@ out_err: return ERR_PTR(err); } -const char *evsel__hw_names[PERF_COUNT_HW_MAX] = { +const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -571,7 +600,7 @@ static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { +const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", @@ -597,6 +626,11 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } +static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) +{ + return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); +} + static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -622,7 +656,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, { "L1-icache", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2", }, @@ -632,13 +666,13 @@ const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { { "node", }, }; -const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { { "load", "loads", "read", }, { "store", "stores", "write", }, { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; -const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { { "refs", "Reference", "ops", "access", }, { "misses", "miss", }, }; @@ -654,7 +688,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL * L1I : Read and prefetch only * ITLB and BPU : Read-only */ -static unsigned long evsel__hw_cache_stat[C(MAX)] = { +static const unsigned long evsel__hw_cache_stat[C(MAX)] = { [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), @@ -723,12 +757,6 @@ static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int evsel__tool_name(char *bf, size_t size) -{ - int ret = scnprintf(bf, size, "duration_time"); - return ret; -} - const char *evsel__name(struct evsel *evsel) { char bf[128]; @@ -753,8 +781,8 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel->tool_event) - evsel__tool_name(bf, sizeof(bf)); + if (evsel__is_tool(evsel)) + evsel__tool_name(evsel->tool_event, bf, sizeof(bf)); else evsel__sw_name(evsel, bf, sizeof(bf)); break; @@ -786,8 +814,8 @@ const char *evsel__metric_id(const struct evsel *evsel) if (evsel->metric_id) return evsel->metric_id; - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) - return "duration_time"; + if (evsel__is_tool(evsel)) + return perf_tool_event__to_str(evsel->tool_event); return "unknown"; } @@ -870,7 +898,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o "specifying a subset with --user-regs may render DWARF unwinding unreliable, " "so the minimal registers set (IP, SP) is explicitly forced.\n"); } else { - attr->sample_regs_user |= PERF_REGS_MASK; + attr->sample_regs_user |= arch__user_reg_mask(); } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; @@ -1075,6 +1103,11 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } +static bool evsel__is_offcpu_event(struct evsel *evsel) +{ + return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT); +} + /* * The enable_on_exec/disabled value strategy: * @@ -1339,6 +1372,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); + + if (evsel__is_offcpu_event(evsel)) + evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; } int evsel__set_filter(struct evsel *evsel, const char *filter) @@ -3077,3 +3113,22 @@ int evsel__source_count(const struct evsel *evsel) } return count; } + +bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused) +{ + return false; +} + +/* + * Remove an event from a given group (leader). + * Some events, e.g., perf metrics Topdown events, + * must always be grouped. Ignore the events. + */ +void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) +{ + if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) { + evsel__set_leader(evsel, evsel); + evsel->core.nr_members = 0; + leader->core.nr_members--; + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 041b42d33bf5..73ea48e94079 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -30,8 +30,18 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); enum perf_tool_event { PERF_TOOL_NONE = 0, PERF_TOOL_DURATION_TIME = 1, + PERF_TOOL_USER_TIME = 2, + PERF_TOOL_SYSTEM_TIME = 3, + + PERF_TOOL_MAX, }; +const char *perf_tool_event__to_str(enum perf_tool_event ev); +enum perf_tool_event perf_tool_event__from_str(const char *str); + +#define perf_tool_event__for_each_event(ev) \ + for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) + /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -120,6 +130,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; + bool needs_auxtrace_mmap; struct hashmap *per_pkg_mask; int err; struct { @@ -253,11 +264,11 @@ static inline bool evsel__is_bpf(struct evsel *evsel) #define EVSEL__MAX_ALIASES 8 -extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; -extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; +extern const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_names[PERF_COUNT_HW_MAX]; +extern const char *const evsel__sw_names[PERF_COUNT_SW_MAX]; extern char *evsel__bpf_counter_events; bool evsel__match_bpf_counter_events(const char *name); @@ -265,6 +276,11 @@ int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size const char *evsel__name(struct evsel *evsel); const char *evsel__metric_id(const struct evsel *evsel); +static inline bool evsel__is_tool(const struct evsel *evsel) +{ + return evsel->tool_event != PERF_TOOL_NONE; +} + const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -483,6 +499,9 @@ bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); int evsel__source_count(const struct evsel *evsel); +void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); + +bool arch_evsel__must_be_in_group(const struct evsel *evsel); /* * Macro to swap the bit-field postition and size. diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 0a13eb20c814..4dc8edbfd9ce 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -91,7 +91,7 @@ static int literal(yyscan_t scanner) } %} -number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) +number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)? sch [-,=] spec \\{sch} diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 3db3293213a9..ae138afe6c56 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__s390x__) #define GEN_ELF_ARCH EM_S390 #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__riscv) && __riscv_xlen == 64 +#define GEN_ELF_ARCH EM_RISCV +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a27132e5a5ef..6ad629db63b7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3462,9 +3462,22 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } +struct header_fw { + struct feat_writer fw; + struct feat_fd *ff; +}; + +static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz) +{ + struct header_fw *h = container_of(fw, struct header_fw, fw); + + return do_write(h->ff, buf, sz); +} + static int do_write_feat(struct feat_fd *ff, int type, struct perf_file_section **p, - struct evlist *evlist) + struct evlist *evlist, + struct feat_copier *fc) { int err; int ret = 0; @@ -3478,7 +3491,23 @@ static int do_write_feat(struct feat_fd *ff, int type, (*p)->offset = lseek(ff->fd, 0, SEEK_CUR); - err = feat_ops[type].write(ff, evlist); + /* + * Hook to let perf inject copy features sections from the input + * file. + */ + if (fc && fc->copy) { + struct header_fw h = { + .fw.write = feat_writer_cb, + .ff = ff, + }; + + /* ->copy() returns 0 if the feature was not copied */ + err = fc->copy(fc, type, &h.fw); + } else { + err = 0; + } + if (!err) + err = feat_ops[type].write(ff, evlist); if (err < 0) { pr_debug("failed to write feature %s\n", feat_ops[type].name); @@ -3494,7 +3523,8 @@ static int do_write_feat(struct feat_fd *ff, int type, } static int perf_header__adds_write(struct perf_header *header, - struct evlist *evlist, int fd) + struct evlist *evlist, int fd, + struct feat_copier *fc) { int nr_sections; struct feat_fd ff; @@ -3523,7 +3553,7 @@ static int perf_header__adds_write(struct perf_header *header, lseek(fd, sec_start + sec_size, SEEK_SET); for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (do_write_feat(&ff, feat, &p, evlist)) + if (do_write_feat(&ff, feat, &p, evlist, fc)) perf_header__clear_feat(header, feat); } @@ -3561,9 +3591,10 @@ int perf_header__write_pipe(int fd) return 0; } -int perf_session__write_header(struct perf_session *session, - struct evlist *evlist, - int fd, bool at_exit) +static int perf_session__do_write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit, + struct feat_copier *fc) { struct perf_file_header f_header; struct perf_file_attr f_attr; @@ -3615,7 +3646,7 @@ int perf_session__write_header(struct perf_session *session, header->feat_offset = header->data_offset + header->data_size; if (at_exit) { - err = perf_header__adds_write(header, evlist, fd); + err = perf_header__adds_write(header, evlist, fd, fc); if (err < 0) return err; } @@ -3648,6 +3679,35 @@ int perf_session__write_header(struct perf_session *session, return 0; } +int perf_session__write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit) +{ + return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); +} + +size_t perf_session__data_offset(const struct evlist *evlist) +{ + struct evsel *evsel; + size_t data_offset; + + data_offset = sizeof(struct perf_file_header); + evlist__for_each_entry(evlist, evsel) { + data_offset += evsel->core.ids * sizeof(u64); + } + data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr); + + return data_offset; +} + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc) +{ + return perf_session__do_write_header(session, evlist, fd, true, fc); +} + static int perf_header__getbuffer64(struct perf_header *header, int fd, void *buf, size_t size) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0eb4bc29a5a4..56916dabce7b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -121,6 +121,23 @@ int perf_session__write_header(struct perf_session *session, int fd, bool at_exit); int perf_header__write_pipe(int fd); +/* feat_writer writes a feature section to output */ +struct feat_writer { + int (*write)(struct feat_writer *fw, void *buf, size_t sz); +}; + +/* feat_copier copies a feature section using feat_writer to output */ +struct feat_copier { + int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw); +}; + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc); + +size_t perf_session__data_offset(const struct evlist *evlist); + void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index e1d8f7504cbe..0ac860c8dd2b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -137,6 +137,7 @@ struct intel_pt_decoder { bool in_psb; bool hop; bool leap; + bool emulated_ptwrite; bool vm_time_correlation; bool vm_tm_corr_dry_run; bool vm_tm_corr_reliable; @@ -481,6 +482,8 @@ static int intel_pt_ext_err(int code) return INTEL_PT_ERR_LOST; case -ELOOP: return INTEL_PT_ERR_NELOOP; + case -ECONNRESET: + return INTEL_PT_ERR_EPTW; default: return INTEL_PT_ERR_UNK; } @@ -497,6 +500,7 @@ static const char *intel_pt_err_msgs[] = { [INTEL_PT_ERR_LOST] = "Lost trace data", [INTEL_PT_ERR_UNK] = "Unknown error!", [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)", + [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite", }; int intel_pt__strerror(int code, char *buf, size_t buflen) @@ -1535,17 +1539,108 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) return intel_pt_bug(decoder); } +struct eptw_data { + int bit_countdown; + uint64_t payload; +}; + +static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct eptw_data *data = pkt_info->data; + int nr_bits; + + switch (pkt_info->packet.type) { + case INTEL_PT_PAD: + case INTEL_PT_MNT: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_FUP: + case INTEL_PT_CYC: + case INTEL_PT_CBR: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_PIP: + case INTEL_PT_VMCS: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + case INTEL_PT_CFE: + case INTEL_PT_CFE_IP: + case INTEL_PT_EVD: + break; + + case INTEL_PT_TNT: + nr_bits = data->bit_countdown; + if (nr_bits > pkt_info->packet.count) + nr_bits = pkt_info->packet.count; + data->payload <<= nr_bits; + data->payload |= pkt_info->packet.payload >> (64 - nr_bits); + data->bit_countdown -= nr_bits; + return !data->bit_countdown; + + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP: + case INTEL_PT_BAD: + case INTEL_PT_OVF: + case INTEL_PT_TRACESTOP: + default: + return 1; + } + + return 0; +} + +static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder) +{ + int n = 64 - decoder->tnt.count; + struct eptw_data data = { + .bit_countdown = n, + .payload = decoder->tnt.payload >> n, + }; + + decoder->emulated_ptwrite = false; + intel_pt_log("Emulated ptwrite detected\n"); + + intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data); + if (data.bit_countdown) + return -ECONNRESET; + + decoder->state.type = INTEL_PT_PTW; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = data.payload; + return 0; +} + static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; int err; while (1) { + if (decoder->emulated_ptwrite) + return intel_pt_emulated_ptwrite(decoder); err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); - if (err == INTEL_PT_RETURN) + if (err == INTEL_PT_RETURN) { + decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite; return 0; - if (err) + } + if (err) { + decoder->emulated_ptwrite = false; return err; + } if (intel_pt_insn.op == INTEL_PT_OP_RET) { if (!decoder->return_compression) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index efb2cb3ae0ca..c773028df80e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -58,6 +58,7 @@ enum { INTEL_PT_ERR_LOST, INTEL_PT_ERR_UNK, INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_EPTW, INTEL_PT_ERR_MAX, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 9d5e65cec89b..1376077183f7 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -32,6 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn, int ext; intel_pt_insn->rel = 0; + intel_pt_insn->emulated_ptwrite = false; if (insn_is_avx(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h index c2861cfdd768..e3338b56a75f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -37,6 +37,7 @@ enum intel_pt_insn_branch { struct intel_pt_insn { enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; unsigned char buf[INTEL_PT_INSN_BUF_SZ]; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ec43d364d0de..62b2f375a94d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -192,6 +192,7 @@ struct intel_pt_queue { pid_t next_tid; struct thread *thread; struct machine *guest_machine; + struct thread *guest_thread; struct thread *unknown_guest_thread; pid_t guest_machine_pid; bool exclude_kernel; @@ -530,6 +531,7 @@ struct intel_pt_cache_entry { u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; @@ -616,6 +618,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); @@ -688,6 +691,11 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) ptq->guest_machine = NULL; thread__zput(ptq->unknown_guest_thread); + if (symbol_conf.guest_code) { + thread__zput(ptq->guest_thread); + ptq->guest_thread = machines__findnew_guest_code(machines, pid); + } + machine = machines__find_guest(machines, pid); if (!machine) return -1; @@ -702,6 +710,28 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) return 0; } +static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) +{ + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; +} + +#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " +#define PTWRITE_MAGIC_LEN 16 + +static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) +{ + unsigned char buf[PTWRITE_MAGIC_LEN]; + ssize_t len; + + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { + intel_pt_log("Emulated ptwrite signature found\n"); + return true; + } + intel_pt_log("Emulated ptwrite signature not found\n"); + return false; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -729,11 +759,16 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); if (nr) { - if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL || + if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || intel_pt_get_guest(ptq)) return -EINVAL; machine = ptq->guest_machine; - thread = ptq->unknown_guest_thread; + thread = ptq->guest_thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) + return -EINVAL; + thread = ptq->unknown_guest_thread; + } } else { thread = ptq->thread; if (!thread) { @@ -764,6 +799,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, @@ -795,8 +831,18 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, insn_cnt += 1; - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { + bool eptw; + u64 offs; + + if (!intel_pt_jmp_16(intel_pt_insn)) + goto out; + /* Check for emulated ptwrite */ + offs = offset + intel_pt_insn->length; + eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); + intel_pt_insn->emulated_ptwrite = eptw; goto out; + } if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; @@ -1300,6 +1346,7 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->guest_thread); thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); @@ -2372,6 +2419,10 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; } + /* Ensure guest code maps are set up */ + if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) + intel_pt_get_guest(ptq); + /* * Do PEBS first to allow for the possibility that the PEBS timestamp * precedes the current timestamp. diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 15f60fd09424..014d82159656 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -24,7 +24,7 @@ #include "unwind.h" #include "libunwind-aarch64.h" #define perf_event_arm_regs perf_event_arm64_regs -#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include <../../../arch/arm64/include/uapi/asm/perf_regs.h> #undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 95391236f5f6..009061852808 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -84,6 +84,14 @@ static int machine__set_mmap_name(struct machine *machine) return machine->mmap_name ? 0 : -ENOMEM; } +static void thread__set_guest_comm(struct thread *thread, pid_t pid) +{ + char comm[64]; + + snprintf(comm, sizeof(comm), "[guest/%d]", pid); + thread__set_comm(thread, comm, 0); +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { int err = -ENOMEM; @@ -119,13 +127,11 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); - char comm[64]; if (thread == NULL) goto out; - snprintf(comm, sizeof(comm), "[guest/%d]", pid); - thread__set_comm(thread, comm, 0); + thread__set_guest_comm(thread, pid); thread__put(thread); } @@ -299,6 +305,8 @@ struct machine *machines__add(struct machines *machines, pid_t pid, rb_link_node(&machine->rb_node, parent, p); rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); + machine->machines = machines; + return machine; } @@ -384,6 +392,93 @@ struct machine *machines__find_guest(struct machines *machines, pid_t pid) return machine; } +/* + * A common case for KVM test programs is that the test program acts as the + * hypervisor, creating, running and destroying the virtual machine, and + * providing the guest object code from its own object code. In this case, + * the VM is not running an OS, but only the functions loaded into it by the + * hypervisor test program, and conveniently, loaded at the same virtual + * addresses. + * + * Normally to resolve addresses, MMAP events are needed to map addresses + * back to the object code and debug symbols for that object code. + * + * Currently, there is no way to get such mapping information from guests + * but, in the scenario described above, the guest has the same mappings + * as the hypervisor, so support for that scenario can be achieved. + * + * To support that, copy the host thread's maps to the guest thread's maps. + * Note, we do not discover the guest until we encounter a guest event, + * which works well because it is not until then that we know that the host + * thread's maps have been set up. + * + * This function returns the guest thread. Apart from keeping the data + * structures sane, using a thread belonging to the guest machine, instead + * of the host thread, allows it to have its own comm (refer + * thread__set_guest_comm()). + */ +static struct thread *findnew_guest_code(struct machine *machine, + struct machine *host_machine, + pid_t pid) +{ + struct thread *host_thread; + struct thread *thread; + int err; + + if (!machine) + return NULL; + + thread = machine__findnew_thread(machine, -1, pid); + if (!thread) + return NULL; + + /* Assume maps are set up if there are any */ + if (thread->maps->nr_maps) + return thread; + + host_thread = machine__find_thread(host_machine, -1, pid); + if (!host_thread) + goto out_err; + + thread__set_guest_comm(thread, pid); + + /* + * Guest code can be found in hypervisor process at the same address + * so copy host maps. + */ + err = maps__clone(thread, host_thread->maps); + thread__put(host_thread); + if (err) + goto out_err; + + return thread; + +out_err: + thread__zput(thread); + return NULL; +} + +struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid) +{ + struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID); + struct machine *machine = machines__findnew(machines, pid); + + return findnew_guest_code(machine, host_machine, pid); +} + +struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid) +{ + struct machines *machines = machine->machines; + struct machine *host_machine; + + if (!machines) + return NULL; + + host_machine = machines__find(machines, HOST_KERNEL_ID); + + return findnew_guest_code(machine, host_machine, pid); +} + void machines__process_guests(struct machines *machines, machine__process_t process, void *data) { diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 0023165422aa..5d7daf7cb7bc 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -18,6 +18,7 @@ struct symbol; struct target; struct thread; union perf_event; +struct machines; /* Native host kernel uses -1 as pid index in machine */ #define HOST_KERNEL_ID (-1) @@ -59,6 +60,7 @@ struct machine { void *priv; u64 db_id; }; + struct machines *machines; bool trampolines_mapped; }; @@ -162,10 +164,11 @@ void machines__process_guests(struct machines *machines, struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir); -struct machine *machines__find_host(struct machines *machines); struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); struct machine *machines__find_guest(struct machines *machines, pid_t pid); +struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid); +struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ed0ab838bcc5..c3c21a9c350b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -314,6 +314,30 @@ static const char * const mem_hops[] = { "board", }; +static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + u64 op = PERF_MEM_LOCK_NA; + int l; + + if (mem_info) + op = mem_info->data_src.mem_op; + + if (op & PERF_MEM_OP_NA) + l = scnprintf(out, sz, "N/A"); + else if (op & PERF_MEM_OP_LOAD) + l = scnprintf(out, sz, "LOAD"); + else if (op & PERF_MEM_OP_STORE) + l = scnprintf(out, sz, "STORE"); + else if (op & PERF_MEM_OP_PFETCH) + l = scnprintf(out, sz, "PFETCH"); + else if (op & PERF_MEM_OP_EXEC) + l = scnprintf(out, sz, "EXEC"); + else + l = scnprintf(out, sz, "No"); + + return l; +} + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -466,7 +490,10 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in { int i = 0; - i += perf_mem__lvl_scnprintf(out, sz, mem_info); + i += scnprintf(out, sz, "|OP "); + i += perf_mem__op_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|LVL "); + i += perf_mem__lvl_scnprintf(out + i, sz, mem_info); i += scnprintf(out + i, sz - i, "|SNP "); i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); i += scnprintf(out + i, sz - i, "|TLB "); @@ -582,6 +609,8 @@ do { \ } if (lvl & P(LVL, MISS)) if (lvl & P(LVL, L1)) stats->st_l1miss++; + if (lvl & P(LVL, NA)) + stats->st_na++; } else { /* unparsable data_src? */ stats->noparse++; @@ -608,6 +637,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->st_noadrs += add->st_noadrs; stats->st_l1hit += add->st_l1hit; stats->st_l1miss += add->st_l1miss; + stats->st_na += add->st_na; stats->load += add->load; stats->ld_excl += add->ld_excl; stats->ld_shared += add->ld_shared; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 916242f8020a..8a8b568baeee 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -63,6 +63,7 @@ struct c2c_stats { u32 st_noadrs; /* cacheable store with no address */ u32 st_l1hit; /* count of stores that hit L1D */ u32 st_l1miss; /* count of stores that miss L1D */ + u32 st_na; /* count of stores with memory level is not available */ u32 load; /* count of all loads in trace */ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index d8492e339521..8f7baeabc5cf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -728,22 +728,23 @@ static int metricgroup__build_event_string(struct strbuf *events, { struct hashmap_entry *cur; size_t bkt; - bool no_group = true, has_duration = false; + bool no_group = true, has_tool_events = false; + bool tool_events[PERF_TOOL_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->key; + enum perf_tool_event ev; pr_debug("found event %s\n", id); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(id, "duration_time")) { - has_duration = true; + + /* Always move tool events outside of the group. */ + ev = perf_tool_event__from_str(id); + if (ev != PERF_TOOL_NONE) { + has_tool_events = true; + tool_events[ev] = true; continue; } /* Separate events with commas and open the group if necessary. */ @@ -802,16 +803,25 @@ static int metricgroup__build_event_string(struct strbuf *events, RETURN_IF_NON_ZERO(ret); } } - if (has_duration) { - if (no_group) { - /* Strange case of a metric of just duration_time. */ - ret = strbuf_addf(events, "duration_time"); - } else if (!has_constraint) - ret = strbuf_addf(events, "}:W,duration_time"); - else - ret = strbuf_addf(events, ",duration_time"); - } else if (!no_group && !has_constraint) + if (!no_group && !has_constraint) { ret = strbuf_addf(events, "}:W"); + RETURN_IF_NON_ZERO(ret); + } + if (has_tool_events) { + int i; + + perf_tool_event__for_each_event(i) { + if (tool_events[i]) { + if (!no_group) { + ret = strbuf_addch(events, ','); + RETURN_IF_NON_ZERO(ret); + } + no_group = false; + ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + RETURN_IF_NON_ZERO(ret); + } + } + } return ret; #undef RETURN_IF_NON_ZERO @@ -1117,7 +1127,7 @@ out: /** * metric_list_cmp - list_sort comparator that sorts metrics with more events to - * the front. duration_time is excluded from the count. + * the front. tool events are excluded from the count. */ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct list_head *r) @@ -1125,15 +1135,19 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct metric *left = container_of(l, struct metric, nd); const struct metric *right = container_of(r, struct metric, nd); struct expr_id_data *data; - int left_count, right_count; + int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - if (!expr__get_id(left->pctx, "duration_time", &data)) - left_count--; + perf_tool_event__for_each_event(i) { + if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + left_count--; + } right_count = hashmap__size(right->pctx->ids); - if (!expr__get_id(right->pctx, "duration_time", &data)) - right_count--; + perf_tool_event__for_each_event(i) { + if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + right_count--; + } return right_count - left_count; } @@ -1270,6 +1284,30 @@ static void metricgroup__free_metrics(struct list_head *metric_list) } /** + * find_tool_events - Search for the pressence of tool events in metric_list. + * @metric_list: List to take metrics from. + * @tool_events: Array of false values, indices corresponding to tool events set + * to true if tool event is found. + */ +static void find_tool_events(const struct list_head *metric_list, + bool tool_events[PERF_TOOL_MAX]) +{ + struct metric *m; + + list_for_each_entry(m, metric_list, nd) { + int i; + + perf_tool_event__for_each_event(i) { + struct expr_id_data *data; + + if (!tool_events[i] && + !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + tool_events[i] = true; + } + } +} + +/** * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint * metric IDs, as the IDs are held in a set, * duplicates will be removed. @@ -1318,11 +1356,14 @@ err_out: * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. + * @tool_events: entries set true if the tool event of index could be present in + * the overall list of metrics. * @out_evlist: the created list of events. */ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool has_constraint, struct evlist **out_evlist) + bool has_constraint, const bool tool_events[PERF_TOOL_MAX], + struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1331,26 +1372,38 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, *out_evlist = NULL; if (!metric_no_merge || hashmap__size(ids->ids) == 0) { - char *tmp; + bool added_event = false; + int i; /* - * We may fail to share events between metrics because - * duration_time isn't present in one metric. For example, a - * ratio of cache misses doesn't need duration_time but the same - * events may be used for a misses per second. Events without - * sharing implies multiplexing, that is best avoided, so place - * duration_time in every group. + * We may fail to share events between metrics because a tool + * event isn't present in one metric. For example, a ratio of + * cache misses doesn't need duration_time but the same events + * may be used for a misses per second. Events without sharing + * implies multiplexing, that is best avoided, so place + * all tool events in every group. * * Also, there may be no ids/events in the expression parsing * context because of constant evaluation, e.g.: * event1 if #smt_on else 0 - * Add a duration_time event to avoid a parse error on an empty - * string. + * Add a tool event to avoid a parse error on an empty string. */ - tmp = strdup("duration_time"); - if (!tmp) - return -ENOMEM; + perf_tool_event__for_each_event(i) { + if (tool_events[i]) { + char *tmp = strdup(perf_tool_event__to_str(i)); + + if (!tmp) + return -ENOMEM; + ids__insert(ids->ids, tmp); + added_event = true; + } + } + if (!added_event && hashmap__size(ids->ids) == 0) { + char *tmp = strdup("duration_time"); - ids__insert(ids->ids, tmp); + if (!tmp) + return -ENOMEM; + ids__insert(ids->ids, tmp); + } } ret = metricgroup__build_event_string(&events, ids, modifier, has_constraint); @@ -1392,6 +1445,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; + bool tool_events[PERF_TOOL_MAX] = {false}; int ret; if (metric_events_list->nr_entries == 0) @@ -1407,12 +1461,15 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, if (!metric_no_merge) { struct expr_parse_ctx *combined = NULL; + find_tool_events(&metric_list, tool_events); + ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { ret = parse_ids(metric_no_merge, fake_pmu, combined, /*modifier=*/NULL, /*has_constraint=*/true, + tool_events, &combined_evlist); } if (combined) @@ -1460,7 +1517,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } if (!metric_evlist) { ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, - m->has_constraint, &m->evlist); + m->has_constraint, tool_events, &m->evlist); if (ret) goto out; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 50502b4a7ca4..a4dff881be39 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, struct evlist *evlist __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) + struct evsel *evsel __maybe_unused, + int idx __maybe_unused) { } diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h new file mode 100644 index 000000000000..2dd67c60f211 --- /dev/null +++ b/tools/perf/util/off_cpu.h @@ -0,0 +1,38 @@ +#ifndef PERF_UTIL_OFF_CPU_H +#define PERF_UTIL_OFF_CPU_H + +#include <linux/perf_event.h> + +struct evlist; +struct target; +struct perf_session; +struct record_opts; + +#define OFFCPU_EVENT "offcpu-time" + +#define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_CGROUP) + + +#ifdef HAVE_BPF_SKEL +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts); +int off_cpu_write(struct perf_session *session); +#else +static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, + struct target *target __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return -1; +} + +static inline int off_cpu_write(struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + +#endif /* PERF_UTIL_OFF_CPU_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index dd84fed698a3..7ed235740431 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -154,6 +154,21 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; +struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { + [PERF_TOOL_DURATION_TIME] = { + .symbol = "duration_time", + .alias = "", + }, + [PERF_TOOL_USER_TIME] = { + .symbol = "user_time", + .alias = "", + }, + [PERF_TOOL_SYSTEM_TIME] = { + .symbol = "system_time", + .alias = "", + }, +}; + #define __PERF_EVENT_FIELD(config, name) \ ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) @@ -350,7 +365,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->core.system_wide = pmu ? pmu->is_uncore : false; + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -402,14 +417,16 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) { + if (tool_event == PERF_TOOL_DURATION_TIME + || tool_event == PERF_TOOL_USER_TIME + || tool_event == PERF_TOOL_SYSTEM_TIME) { free((char *)evsel->unit); evsel->unit = strdup("ns"); } return 0; } -static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size) { int i, j; int n, longest = -1; @@ -3056,21 +3073,34 @@ out_enomem: return evt_num; } -static void print_tool_event(const char *name, const char *event_glob, +static void print_tool_event(const struct event_symbol *syms, const char *event_glob, bool name_only) { - if (event_glob && !strglobmatch(name, event_glob)) + if (syms->symbol == NULL) + return; + + if (event_glob && !(strglobmatch(syms->symbol, event_glob) || + (syms->alias && strglobmatch(syms->alias, event_glob)))) return; + if (name_only) - printf("%s ", name); - else + printf("%s ", syms->symbol); + else { + char name[MAX_NAME_LEN]; + if (syms->alias && strlen(syms->alias)) + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); + else + strlcpy(name, syms->symbol, MAX_NAME_LEN); printf(" %-50s [%s]\n", name, "Tool event"); - + } } void print_tool_events(const char *event_glob, bool name_only) { - print_tool_event("duration_time", event_glob, name_only); + // Start at 1 because the first enum entry symbols no tool event + for (int i = 1; i < PERF_TOOL_MAX; ++i) { + print_tool_event(event_symbols_tool + i, event_glob, name_only); + } if (pager_in_use()) printf("\n"); } diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5b6e4b5249cf..3a9ce96c8bce 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -353,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } +user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } +system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index caed0336429f..ce80b79be103 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -86,9 +86,21 @@ bool is_directory(const char *base_path, const struct dirent *dent) char path[PATH_MAX]; struct stat st; - sprintf(path, "%s/%s", base_path, dent->d_name); + snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); if (stat(path, &st)) return false; return S_ISDIR(st.st_mode); } + +bool is_executable_file(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 083429b7efa3..d94902c22222 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -12,5 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con bool is_regular_file(const char *file); bool is_directory(const char *base_path, const struct dirent *dent); +bool is_executable_file(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a982e40ee5a9..872dd3d38782 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id) return "lr"; case PERF_REG_ARM64_PC: return "pc"; + case PERF_REG_ARM64_VG: + return "vg"; default: return NULL; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a685d20165f7..aa5156c2bcff 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -38,5 +38,6 @@ util/units.c util/affinity.c util/rwsem.c util/hashmap.c +util/perf_regs.c util/pmu-hybrid.c util/fncache.c diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 413f2d19c13f..adba01b7d9dd 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map) return dsoname; } +static unsigned long get_offset(struct symbol *sym, struct addr_location *al) +{ + unsigned long offset; + + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + + return offset; +} + static PyObject *python_process_callchain(struct perf_sample *sample, struct evsel *evsel, struct addr_location *al) @@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample, _PyUnicode_FromStringAndSize(node->ms.sym->name, node->ms.sym->namelen)); pydict_set_item_string_decref(pyelem, "sym", pysym); + + if (node->ms.map) { + struct map *map = node->ms.map; + struct addr_location node_al; + unsigned long offset; + + node_al.addr = map->map_ip(map, node->ip); + node_al.map = map; + offset = get_offset(node->ms.sym, &node_al); + + pydict_set_item_string_decref( + pyelem, "sym_off", + PyLong_FromUnsignedLongLong(offset)); + } + if (node->srcline && strcmp(":0", node->srcline)) { + pydict_set_item_string_decref( + pyelem, "sym_srcline", + _PyUnicode_FromString(node->srcline)); + } } if (node->ms.map) { @@ -520,18 +551,6 @@ exit: return pylist; } -static unsigned long get_offset(struct symbol *sym, struct addr_location *al) -{ - unsigned long offset; - - if (al->addr < sym->end) - offset = al->addr - sym->start; - else - offset = al->addr - al->map->start - sym->start; - - return offset; -} - static int get_symoff(struct symbol *sym, struct addr_location *al, bool print_off, char *bf, int size) { @@ -736,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict, } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, - const char *dso_field, const char *sym_field, - const char *symoff_field) + const char *dso_field, const char *dso_bid_field, + const char *dso_map_start, const char *dso_map_end, + const char *sym_field, const char *symoff_field) { + char sbuild_id[SBUILD_ID_SIZE]; + if (al->map) { pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(al->map->dso->name)); + build_id__sprintf(&al->map->dso->bid, sbuild_id); + pydict_set_item_string_decref(dict, dso_bid_field, + _PyUnicode_FromString(sbuild_id)); + pydict_set_item_string_decref(dict, dso_map_start, + PyLong_FromUnsignedLong(al->map->start)); + pydict_set_item_string_decref(dict, dso_map_end, + PyLong_FromUnsignedLong(al->map->end)); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -821,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); - set_sym_in_dict(dict, al, "dso", "symbol", "symoff"); + set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", + "symbol", "symoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -837,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (addr_al) { pydict_set_item_string_decref(dict_sample, "addr_correlates_sym", PyBool_FromLong(1)); - set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff"); + set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", + "addr_dso_map_start", "addr_dso_map_end", + "addr_symbol", "addr_symoff"); } if (sample->flags) @@ -2074,7 +2106,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); - fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))"); + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% ("); + fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],"); + fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\","); + fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\","); + fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))"); fprintf(ofp, "\n\t\t\telse:"); fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n"); fprintf(ofp, "\t\tprint()\n\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7f93f5a1ac8..0aa818977d2b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1426,6 +1426,13 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, else pid = sample->pid; + /* + * Guest code machine is created as needed and does not use + * DEFAULT_GUEST_KERNEL_ID. + */ + if (symbol_conf.guest_code) + return machines__findnew(machines, pid); + return machines__find_guest(machines, pid); } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 138e3ab9d638..606f09b09226 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -610,6 +610,19 @@ static bool hybrid_uniquify(struct evsel *evsel) return perf_pmu__has_hybrid() && !is_uncore(evsel); } +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, + bool check) +{ + if (hybrid_uniquify(counter)) { + if (check) + return config && config->hybrid_merge; + else + return config && !config->hybrid_merge; + } + + return false; +} + static bool collect_data(struct perf_stat_config *config, struct evsel *counter, void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, bool first), @@ -618,9 +631,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, if (counter->merged_stat) return false; cb(config, counter, data, true); - if (config->no_merge || hybrid_uniquify(counter)) + if (config->no_merge || hybrid_merge(counter, config, false)) uniquify_event_name(counter); - else if (counter->auto_merge_stats) + else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) collect_all_aliases(config, counter, cb, data); return true; } @@ -751,11 +764,11 @@ static int cmp_val(const void *a, const void *b) static struct perf_aggr_thread_value *sort_aggr_thread( struct evsel *counter, - int nthreads, int ncpus, int *ret, struct target *_target) { - int cpu, thread, i = 0; + int nthreads = perf_thread_map__nr(counter->core.threads); + int i = 0; double uval; struct perf_aggr_thread_value *buf; @@ -763,13 +776,17 @@ static struct perf_aggr_thread_value *sort_aggr_thread( if (!buf) return NULL; - for (thread = 0; thread < nthreads; thread++) { + for (int thread = 0; thread < nthreads; thread++) { + int idx; u64 ena = 0, run = 0, val = 0; - for (cpu = 0; cpu < ncpus; cpu++) { - val += perf_counts(counter->counts, cpu, thread)->val; - ena += perf_counts(counter->counts, cpu, thread)->ena; - run += perf_counts(counter->counts, cpu, thread)->run; + perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { + struct perf_counts_values *counts = + perf_counts(counter->counts, idx, thread); + + val += counts->val; + ena += counts->ena; + run += counts->run; } uval = val * counter->scale; @@ -804,13 +821,11 @@ static void print_aggr_thread(struct perf_stat_config *config, struct evsel *counter, char *prefix) { FILE *output = config->output; - int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_cpu_map__nr(counter->core.cpus); int thread, sorted_threads; struct aggr_cpu_id id; struct perf_aggr_thread_value *buf; - buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); + buf = sort_aggr_thread(counter, &sorted_threads, _target); if (!buf) { perror("cannot sort aggr thread"); return; @@ -933,8 +948,6 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evsel *counter; bool first = true; - if (prefix) - fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { u64 ena, run, val; double uval; @@ -946,6 +959,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { + if (prefix) + fputs(prefix, config->output); aggr_printout(config, counter, id, 0); first = false; } @@ -957,7 +972,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); } - fputc('\n', config->output); + if (!first) + fputc('\n', config->output); } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 10af7804e482..979c8cb918f7 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -26,6 +26,7 @@ struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; +struct rusage_stats ru_stats; struct saved_value { struct rb_node rb_node; @@ -199,6 +200,7 @@ void perf_stat__reset_shadow_stats(void) { reset_stat(&rt_stat); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); + memset(&ru_stats, 0, sizeof(ru_stats)); } void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) @@ -831,10 +833,31 @@ static int prepare_metric(struct evsel **metric_events, u64 metric_total = 0; int source_count; - if (!strcmp(metric_events[i]->name, "duration_time")) { - stats = &walltime_nsecs_stats; - scale = 1e-9; + if (evsel__is_tool(metric_events[i])) { source_count = 1; + switch (metric_events[i]->tool_event) { + case PERF_TOOL_DURATION_TIME: + stats = &walltime_nsecs_stats; + scale = 1e-9; + break; + case PERF_TOOL_USER_TIME: + stats = &ru_stats.ru_utime_usec_stat; + scale = 1e-6; + break; + case PERF_TOOL_SYSTEM_TIME: + stats = &ru_stats.ru_stime_usec_stat; + scale = 1e-6; + break; + case PERF_TOOL_NONE: + pr_err("Invalid tool event 'none'"); + abort(); + case PERF_TOOL_MAX: + pr_err("Invalid tool event 'max'"); + abort(); + default: + pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); + abort(); + } } else { v = saved_value_lookup(metric_events[i], cpu_map_idx, false, STAT_NONE, 0, st, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c1af37e11f98..37ea2d044708 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -117,7 +117,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { - if (!strcmp(evsel__name(evsel), id_str[i])) { + if (!strcmp(evsel__name(evsel), id_str[i]) || + (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name + && strstr(evsel__name(evsel), evsel->pmu_name))) { ps->id = i; break; } @@ -235,14 +237,12 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist) static void evsel__copy_prev_raw_counts(struct evsel *evsel) { - int ncpus = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); + int idx, nthreads = perf_thread_map__nr(evsel->core.threads); for (int thread = 0; thread < nthreads; thread++) { - for (int cpu = 0; cpu < ncpus; cpu++) { - *perf_counts(evsel->counts, cpu, thread) = - *perf_counts(evsel->prev_raw_counts, cpu, - thread); + perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) { + *perf_counts(evsel->counts, idx, thread) = + *perf_counts(evsel->prev_raw_counts, idx, thread); } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 335d19cc3063..b5aeb8e6d34b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,6 +108,11 @@ struct runtime_stat { struct rblist value_list; }; +struct rusage_stats { + struct stats ru_utime_usec_stat; + struct stats ru_stime_usec_stat; +}; + typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { @@ -122,6 +127,7 @@ struct perf_stat_config { bool ru_display; bool big_num; bool no_merge; + bool hybrid_merge; bool walltime_run_table; bool all_kernel; bool all_user; @@ -148,6 +154,7 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; + struct rusage_stats *ru_stats; struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; @@ -177,6 +184,20 @@ static inline void init_stats(struct stats *stats) stats->max = 0; } +static inline void init_rusage_stats(struct rusage_stats *ru_stats) { + init_stats(&ru_stats->ru_utime_usec_stat); + init_stats(&ru_stats->ru_stime_usec_stat); +} + +static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) { + const u64 us_to_ns = 1000; + const u64 s_to_ns = 1000000000; + update_stats(&ru_stats->ru_utime_usec_stat, + (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns)); + update_stats(&ru_stats->ru_stime_usec_stat, + (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns)); +} + struct evsel; struct evlist; @@ -196,6 +217,7 @@ bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; +extern struct rusage_stats ru_stats; typedef void (*print_metric_t)(struct perf_stat_config *config, void *ctx, const char *color, const char *unit, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index a70b3ec09dac..bc3d046fbb63 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -43,7 +43,8 @@ struct symbol_conf { report_individual_block, inline_name, disable_add2line_warn, - buildid_mmap2; + buildid_mmap2, + guest_code; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 27acdc5e5723..84d17bd4efae 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -754,7 +754,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - n = scandir(filename, &dirent, filter_task, alphasort); + n = scandir(filename, &dirent, filter_task, NULL); if (n < 0) return n; @@ -767,11 +767,12 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (*end) continue; - rc = -1; + /* some threads may exit just after scan, ignore it */ if (perf_event__prepare_comm(comm_event, pid, _pid, machine, &tgid, &ppid, &kernel_thread) != 0) - break; + continue; + rc = -1; if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, ppid, process, machine) < 0) break; @@ -987,7 +988,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, return 0; snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, filter_task, alphasort); + n = scandir(proc_path, &dirent, filter_task, NULL); if (n < 0) return err; diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c index 1081b20f9891..a369f84ceb6a 100644 --- a/tools/perf/util/topdown.c +++ b/tools/perf/util/topdown.c @@ -1,18 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include "pmu.h" +#include "pmu-hybrid.h" #include "topdown.h" -int topdown_filter_events(const char **attr, char **str, bool use_group) +int topdown_filter_events(const char **attr, char **str, bool use_group, + const char *pmu_name) { int off = 0; int i; int len = 0; char *s; + bool is_hybrid = perf_pmu__is_hybrid(pmu_name); for (i = 0; attr[i]; i++) { - if (pmu_have_event("cpu", attr[i])) { - len += strlen(attr[i]) + 1; + if (pmu_have_event(pmu_name, attr[i])) { + if (is_hybrid) + len += strlen(attr[i]) + strlen(pmu_name) + 3; + else + len += strlen(attr[i]) + 1; attr[i - off] = attr[i]; } else off++; @@ -30,7 +36,10 @@ int topdown_filter_events(const char **attr, char **str, bool use_group) if (use_group) *s++ = '{'; for (i = 0; attr[i]; i++) { - strcpy(s, attr[i]); + if (!is_hybrid) + strcpy(s, attr[i]); + else + sprintf(s, "%s/%s/", pmu_name, attr[i]); s += strlen(s); *s++ = ','; } diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h index 2f0d0b887639..118e75281f93 100644 --- a/tools/perf/util/topdown.h +++ b/tools/perf/util/topdown.h @@ -7,6 +7,7 @@ bool arch_topdown_check_group(bool *warn); void arch_topdown_group_warn(void); bool arch_topdown_sample_read(struct evsel *leader); -int topdown_filter_events(const char **attr, char **str, bool use_group); +int topdown_filter_events(const char **attr, char **str, bool use_group, + const char *pmu_name); #endif diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 41e29fc7648a..81b6bd6e1536 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -169,29 +169,63 @@ static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, __v; \ }) -static u64 elf_section_offset(int fd, const char *name) +static int elf_section_address_and_offset(int fd, const char *name, u64 *address, u64 *offset) { Elf *elf; GElf_Ehdr ehdr; GElf_Shdr shdr; - u64 offset = 0; + int ret = -1; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) + return -1; + + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out_err; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) + goto out_err; + + *address = shdr.sh_addr; + *offset = shdr.sh_offset; + ret = 0; +out_err: + elf_end(elf); + return ret; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static u64 elf_section_offset(int fd, const char *name) +{ + u64 address, offset = 0; + + if (elf_section_address_and_offset(fd, name, &address, &offset)) return 0; - do { - if (gelf_getehdr(elf, &ehdr) == NULL) - break; + return offset; +} +#endif - if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) - break; +static u64 elf_base_address(int fd) +{ + Elf *elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + GElf_Phdr phdr; + u64 retval = 0; + size_t i, phdrnum = 0; - offset = shdr.sh_offset; - } while (0); + if (elf == NULL) + return 0; + (void)elf_getphdrnum(elf, &phdrnum); + /* PT_LOAD segments are sorted by p_vaddr, so the first has the minimum p_vaddr. */ + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, &phdr) && phdr.p_type == PT_LOAD) { + retval = phdr.p_vaddr & -getpagesize(); + break; + } + } elf_end(elf); - return offset; + return retval; } #ifndef NO_LIBUNWIND_DEBUG_FRAME @@ -248,8 +282,7 @@ struct eh_frame_hdr { } __packed; static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, - u64 offset, u64 *table_data, u64 *segbase, - u64 *fde_count) + u64 offset, u64 *table_data_offset, u64 *fde_count) { struct eh_frame_hdr hdr; u8 *enc = (u8 *) &hdr.enc; @@ -265,35 +298,47 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); - *segbase = offset; - *table_data = (enc - (u8 *) &hdr) + offset; + *table_data_offset = enc - (u8 *) &hdr; return 0; } -static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, +static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - int ret = -EINVAL, fd; - u64 offset = dso->data.eh_frame_hdr_offset; + struct map *map; + u64 base_addr = UINT64_MAX; + int ret, fd; - if (offset == 0) { - fd = dso__data_get_fd(dso, machine); + if (dso->data.eh_frame_hdr_offset == 0) { + fd = dso__data_get_fd(dso, ui->machine); if (fd < 0) return -EINVAL; /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso->data.eh_frame_hdr_offset = offset; + ret = elf_section_address_and_offset(fd, ".eh_frame_hdr", + &dso->data.eh_frame_hdr_addr, + &dso->data.eh_frame_hdr_offset); + dso->data.elf_base_addr = elf_base_address(fd); dso__data_put_fd(dso); + if (ret || dso->data.eh_frame_hdr_offset == 0) + return -EINVAL; } - if (offset) - ret = unwind_spec_ehframe(dso, machine, offset, - table_data, segbase, - fde_count); - - return ret; + maps__for_each_entry(ui->thread->maps, map) { + if (map->dso == dso && map->start < base_addr) + base_addr = map->start; + } + base_addr -= dso->data.elf_base_addr; + /* Address of .eh_frame_hdr */ + *segbase = base_addr + dso->data.eh_frame_hdr_addr; + ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, + table_data, fde_count); + if (ret) + return ret; + /* binary_search_table offset plus .eh_frame_hdr address */ + *table_data += *segbase; + return 0; } #ifndef NO_LIBUNWIND_DEBUG_FRAME @@ -388,14 +433,14 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); /* Check the .eh_frame section for unwinding info */ - if (!read_unwind_spec_eh_frame(map->dso, ui->machine, + if (!read_unwind_spec_eh_frame(map->dso, ui, &table_data, &segbase, &fde_count)) { memset(&di, 0, sizeof(di)); di.format = UNW_INFO_FORMAT_REMOTE_TABLE; di.start_ip = map->start; di.end_ip = map->end; - di.u.rti.segbase = map->start + segbase - map->pgoff; - di.u.rti.table_data = map->start + table_data - map->pgoff; + di.u.rti.segbase = segbase; + di.u.rti.table_data = table_data; di.u.rti.table_len = fde_count * sizeof(struct table_entry) / sizeof(unw_word_t); ret = dwarf_search_unwind_table(as, ip, &di, pi, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index f8571a66d063..eeb83c80f458 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -430,6 +430,11 @@ void perf_debuginfod_setup(struct perf_debuginfod *di) setenv("DEBUGINFOD_URLS", di->urls, 1); pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); + +#ifndef HAVE_DEBUGINFOD_SUPPORT + if (di->set) + pr_warning("WARNING: debuginfod support requested, but perf is not built with it\n"); +#endif } /* |