diff options
author | Jiri Kosina <jkosina@suse.cz> | 2022-01-10 09:49:13 +0100 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2022-01-10 09:49:13 +0100 |
commit | 8a2094d679d921d104d3296528d4fa419702ce1c (patch) | |
tree | 5feeb2f8b94eb3632109b778276ccd1007486770 /tools/perf/util | |
parent | 3809fe479861194e310c23ed48b010c7c0f72d22 (diff) | |
parent | b60d3c803d7603432a08aeaf988aff53b3a5ec64 (diff) |
Merge branch 'for-5.17/core' into for-linus
- support for USI style pens (Tero Kristo, Mika Westerberg)
- quirk for devices that need inverted X/Y axes (Alistair Francis)
- small core code cleanups and deduplication (Benjamin Tissoires)
Diffstat (limited to 'tools/perf/util')
80 files changed, 3381 insertions, 1437 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f2914d5bed6e..2e5bfbb69960 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -138,6 +138,7 @@ perf-y += expr.o perf-y += branch.o perf-y += mem2node.o perf-y += clockid.o +perf-y += list_sort.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -201,6 +202,7 @@ endif perf-y += perf-hooks.o perf-$(CONFIG_LIBBPF) += bpf-event.o +perf-$(CONFIG_LIBBPF) += bpf-utils.o perf-$(CONFIG_CXX) += c++/ @@ -315,3 +317,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0bae061b2d6d..01900689dc00 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -28,6 +28,7 @@ #include "evsel.h" #include "evlist.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "block-range.h" #include "string2.h" #include "util/event.h" @@ -151,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/riscv64/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" @@ -183,7 +185,6 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), - .ins_is_fused = x86__ins_is_fused, .objdump = { .comment_char = '#', }, @@ -193,6 +194,10 @@ static struct arch architectures[] = { .init = powerpc__annotate_init, }, { + .name = "riscv64", + .init = riscv64__annotate_init, + }, + { .name = "s390", .init = s390__annotate_init, .objdump = { @@ -1250,6 +1255,17 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); } +void annotation__init(struct annotation *notes) +{ + pthread_mutex_init(¬es->lock, NULL); +} + +void annotation__exit(struct annotation *notes) +{ + annotated_source__delete(notes->src); + pthread_mutex_destroy(¬es->lock); +} + static void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); @@ -1700,12 +1716,12 @@ static int symbol__disassemble_bpf(struct symbol *sym, { struct annotation *notes = symbol__annotation(sym); struct annotation_options *opts = args->options; - struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; disassembler_ftype disassemble; struct map *map = args->ms.map; + struct perf_bpil *info_linear; struct disassemble_info info; struct dso *dso = map->dso; int pc = 0, count, sub_id; @@ -3127,7 +3143,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, notes->nr_events = nr_pcnt; annotation__update_column_widths(notes); - sym->annotate2 = true; + sym->annotate2 = 1; return 0; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 3757416bcf46..986f2bbe4870 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -299,6 +299,9 @@ struct annotation { struct annotated_source *src; }; +void annotation__init(struct annotation *notes); +void annotation__exit(struct annotation *notes); + static inline int annotation__cycles_width(struct annotation *notes) { if (notes->have_cycles && notes->options->show_minmax_cycle) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 32fe41835fa6..3fc528c9270c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -151,6 +151,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) u64 payload, ip; memset(&decoder->record, 0x0, sizeof(decoder->record)); + decoder->record.context_id = (u64)-1; while (1) { err = arm_spe_get_next_packet(decoder); @@ -180,6 +181,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: + decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 59bdb7309674..46a8556a9e95 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -38,6 +38,7 @@ struct arm_spe_record { u64 timestamp; u64 virt_addr; u64 phys_addr; + u64 context_id; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2e5eff4f8f03..2f311189c6e8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,7 +13,7 @@ #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 58b7069c5a5f..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -71,6 +72,7 @@ struct arm_spe { u64 kernel_start; unsigned long num_events; + u8 use_ctx_pkt_for_pid; }; struct arm_spe_queue { @@ -100,7 +102,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, const char *color = PERF_COLOR_BLUE; color_fprintf(stdout, color, - ". ... ARM SPE data: size %zu bytes\n", + ". ... ARM SPE data: size %#zx bytes\n", len); while (len) { @@ -226,6 +228,44 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) PERF_RECORD_MISC_USER; } +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, + struct auxtrace_queue *queue) +{ + struct arm_spe_queue *speq = queue->priv; + pid_t tid; + + tid = machine__get_current_tid(spe->machine, speq->cpu); + if (tid != -1) { + speq->tid = tid; + thread__zput(speq->thread); + } else + speq->tid = queue->tid; + + if ((!speq->thread) && (speq->tid != -1)) { + speq->thread = machine__find_thread(spe->machine, -1, + speq->tid); + } + + if (speq->thread) { + speq->pid = speq->thread->pid_; + if (queue->cpu == -1) + speq->cpu = speq->thread->cpu; + } +} + +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) +{ + struct arm_spe *spe = speq->spe; + int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); + + if (err) + return err; + + arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); + + return 0; +} + static void arm_spe_prep_sample(struct arm_spe *spe, struct arm_spe_queue *speq, union perf_event *event, @@ -248,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -256,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -460,6 +512,19 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) * can correlate samples between Arm SPE trace data and other * perf events with correct time ordering. */ + + /* + * Update pid/tid info. + */ + record = &speq->decoder->record; + if (!spe->timeless_decoding && record->context_id != (u64)-1) { + ret = arm_spe_set_tid(speq, record->context_id); + if (ret) + return ret; + + spe->use_ctx_pkt_for_pid = true; + } + ret = arm_spe_sample(speq); if (ret) return ret; @@ -586,31 +651,6 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) return timeless_decoding; } -static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, - struct auxtrace_queue *queue) -{ - struct arm_spe_queue *speq = queue->priv; - pid_t tid; - - tid = machine__get_current_tid(spe->machine, speq->cpu); - if (tid != -1) { - speq->tid = tid; - thread__zput(speq->thread); - } else - speq->tid = queue->tid; - - if ((!speq->thread) && (speq->tid != -1)) { - speq->thread = machine__find_thread(spe->machine, -1, - speq->tid); - } - - if (speq->thread) { - speq->pid = speq->thread->pid_; - if (queue->cpu == -1) - speq->cpu = speq->thread->cpu; - } -} - static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) { unsigned int queue_nr; @@ -641,7 +681,12 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ts = timestamp; } - arm_spe_set_pid_tid_cpu(spe, queue); + /* + * A previous context-switch event has set pid/tid in the machine's context, so + * here we need to update the pid/tid in the thread and SPE queue. + */ + if (!spe->use_ctx_pkt_for_pid) + arm_spe_set_pid_tid_cpu(spe, queue); ret = arm_spe_run_decoder(speq, &ts); if (ret < 0) { @@ -681,6 +726,25 @@ static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, return 0; } +static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, + struct perf_sample *sample) +{ + pid_t pid, tid; + int cpu; + + if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) + return 0; + + pid = event->context_switch.next_prev_pid; + tid = event->context_switch.next_prev_tid; + cpu = sample->cpu; + + if (tid == -1) + pr_warning("context_switch event has no tid\n"); + + return machine__set_current_tid(spe->machine, cpu, pid, tid); +} + static int arm_spe_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -718,6 +782,13 @@ static int arm_spe_process_event(struct perf_session *session, } } else if (timestamp) { err = arm_spe_process_queues(spe, timestamp); + if (err) + return err; + + if (!spe->use_ctx_pkt_for_pid && + (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || + event->header.type == PERF_RECORD_SWITCH)) + err = arm_spe_context_switch(spe, event, sample); } return err; @@ -783,7 +854,15 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return arm_spe_process_timeless_queues(spe, -1, MAX_TIMESTAMP - 1); - return arm_spe_process_queues(spe, MAX_TIMESTAMP); + ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); + if (ret) + return ret; + + if (!spe->use_ctx_pkt_for_pid) + ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" + "Matching of TIDs to SPE events could be inaccurate.\n"); + + return 0; } static void arm_spe_free_queue(void *priv) @@ -920,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8d2865b9ade2..c679394b898d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1564,6 +1564,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'q': synth_opts->quick += 1; break; + case 'A': + synth_opts->approx_ipc = true; + break; case 'Z': synth_opts->timeless_decoding = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 5f383908ca6e..bbf0d78c6401 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -59,6 +59,7 @@ enum itrace_period_type { #define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) #define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) +#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a')) /** * struct itrace_synth_opts - AUX area tracing synthesis options. @@ -84,6 +85,7 @@ enum itrace_period_type { * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events * @add_last_branch: add branch context to existing event records + * @approx_ipc: approximate IPC * @flc: whether to synthesize first level cache events * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events @@ -127,6 +129,7 @@ struct itrace_synth_opts { bool thread_stack; bool last_branch; bool add_last_branch; + bool approx_ipc; bool flc; bool llc; bool tlb; @@ -639,6 +642,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " d[flags]: create a debug log\n" \ " each flag must be preceded by + or -\n" \ " log flags are: a (all perf events)\n" \ +" o (output to stdout)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ @@ -649,6 +653,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ " q: quicker (less detailed) decoding\n" \ +" A: approximate IPC\n" \ +" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 388847bab6d9..a517eaa51eb3 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,6 +10,7 @@ #include <internal/lib.h> #include <symbol/kallsyms.h> #include "bpf-event.h" +#include "bpf-utils.h" #include "debug.h" #include "dso.h" #include "symbol.h" @@ -32,7 +33,32 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } -#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) +struct bpf_program * __weak +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return bpf_program__next(prev, obj); +#pragma GCC diagnostic pop +} + +struct bpf_map * __weak +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return bpf_map__next(prev, obj); +#pragma GCC diagnostic pop +} + +const void * __weak +btf__raw_data(const struct btf *btf_ro, __u32 *size) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return btf__get_raw_data(btf_ro, size); +#pragma GCC diagnostic pop +} static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { @@ -48,9 +74,9 @@ static int machine__process_bpf_event_load(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = machine->env; + struct perf_bpil *info_linear; int id = event->bpf.id; unsigned int i; @@ -120,7 +146,11 @@ static int perf_env__fetch_btf(struct perf_env *env, node->data_size = data_size; memcpy(node->data, data, data_size); - perf_env__insert_btf(env, node); + if (!perf_env__insert_btf(env, node)) { + /* Insertion failed because of a duplicate. */ + free(node); + return -1; + } return 0; } @@ -175,9 +205,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; - struct bpf_prog_info_linear *info_linear; struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -191,15 +221,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ env = session->data ? &session->header.env : &perf_env; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { info_linear = NULL; pr_debug("%s: failed to get BPF program info. aborting\n", __func__); @@ -452,8 +482,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, static void perf_env__add_bpf_info(struct perf_env *env, u32 id) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; @@ -463,15 +493,15 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (fd < 0) return; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); goto out; @@ -576,7 +606,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", info->id, name, prog_addrs[0], prog_lens[0]); - return; + goto out; } fprintf(fp, "# bpf_prog_info %u:\n", info->id); @@ -586,4 +616,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", i, name, prog_addrs[i], prog_lens[i]); } +out: + btf__free(btf); } diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 68f315c3df5b..144a8a24cc69 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -19,7 +19,7 @@ struct evlist; struct target; struct bpf_prog_info_node { - struct bpf_prog_info_linear *info_linear; + struct perf_bpil *info_linear; struct rb_node rb_node; }; diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c new file mode 100644 index 000000000000..e271e05e51bc --- /dev/null +++ b/tools/perf/util/bpf-utils.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <errno.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <bpf/bpf.h> +#include "bpf-utils.h" +#include "debug.h" + +struct bpil_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpil_array_desc bpil_array_desc[] = { + [PERF_BPIL_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [PERF_BPIL_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [PERF_BPIL_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [PERF_BPIL_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [PERF_BPIL_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [PERF_BPIL_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [PERF_BPIL_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [PERF_BPIL_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [PERF_BPIL_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, + int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, + int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info info = {}; + struct perf_bpil *info_linear; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> PERF_BPIL_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpil_array_desc *desc; + __u32 count, size; + + desc = bpil_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < (__u32)desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct perf_bpil) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpil_addr_to_offs(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpil_offs_to_addr(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h new file mode 100644 index 000000000000..86a5055cdfad --- /dev/null +++ b/tools/perf/util/bpf-utils.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __PERF_BPF_UTILS_H +#define __PERF_BPF_UTILS_H + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +#ifdef HAVE_LIBBPF_SUPPORT + +#include <bpf/libbpf.h> + +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing perf_bpil to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpil_addr_to_offs() and bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << PERF_BPIL_MAP_IDS) | + * (1UL << PERF_BPIL_PROG_TAGS); + * struct perf_bpil *info_linear = + * get_bpf_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, <proper_size>); + * bpil_offs_to_addr(info_linear); + */ +enum perf_bpil_array_types { + PERF_BPIL_FIRST_ARRAY = 0, + PERF_BPIL_JITED_INSNS = 0, + PERF_BPIL_XLATED_INSNS, + PERF_BPIL_MAP_IDS, + PERF_BPIL_JITED_KSYMS, + PERF_BPIL_JITED_FUNC_LENS, + PERF_BPIL_FUNC_INFO, + PERF_BPIL_LINE_INFO, + PERF_BPIL_JITED_LINE_INFO, + PERF_BPIL_PROG_TAGS, + PERF_BPIL_LAST_ARRAY, +}; + +struct perf_bpil { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays); + +void +bpil_addr_to_offs(struct perf_bpil *info_linear); + +void +bpil_offs_to_addr(struct perf_bpil *info_linear); + +#endif /* HAVE_LIBBPF_SUPPORT */ +#endif /* __PERF_BPF_UTILS_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ba0f20853651..c17d4a43ce06 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -13,6 +13,7 @@ #include <perf/bpf_perf.h> #include "bpf_counter.h" +#include "bpf-utils.h" #include "counts.h" #include "debug.h" #include "evsel.h" @@ -61,14 +62,13 @@ static int bpf_program_profiler__destroy(struct evsel *evsel) static char *bpf_target_prog_name(int tgt_fd) { - struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; + struct perf_bpil *info_linear; const struct btf_type *t; struct btf *btf = NULL; char *name = NULL; - info_linear = bpf_program__get_prog_info_linear( - tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); return NULL; @@ -127,9 +127,9 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) skel->rodata->num_cpu = evsel__nr_cpus(evsel); - bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); - bpf_map__resize(skel->maps.fentry_readings, 1); - bpf_map__resize(skel->maps.accum_readings, 1); + bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__set_max_entries(skel->maps.fentry_readings, 1); + bpf_map__set_max_entries(skel->maps.accum_readings, 1); prog_name = bpf_target_prog_name(prog_fd); if (!prog_name) { @@ -399,7 +399,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return -1; } - bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus()); err = bperf_leader_bpf__load(skel); if (err) { pr_err("Failed to load leader skeleton\n"); diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 89aa5e71db1a..cbc6c2bca488 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -65,14 +65,14 @@ static int bperf_load_program(struct evlist *evlist) /* we need one copy of events per cpu for reading */ map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.events, map_size); - bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); + bpf_map__set_max_entries(skel->maps.events, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); /* previous result is saved in a per-cpu array */ map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.prev_readings, map_size); + bpf_map__set_max_entries(skel->maps.prev_readings, map_size); /* cgroup result needs all events (per-cpu) */ map_size = evlist->core.nr_entries; - bpf_map__resize(skel->maps.cgrp_readings, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); set_max_rlimit(); diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h index 2df8a45bd088..d3731a876b6c 100644 --- a/tools/perf/util/c++/clang-c.h +++ b/tools/perf/util/c++/clang-c.h @@ -12,8 +12,9 @@ extern "C" { extern void perf_clang__init(void); extern void perf_clang__cleanup(void); -extern int test__clang_to_IR(void); -extern int test__clang_to_obj(void); +struct test_suite; +extern int test__clang_to_IR(struct test_suite *test, int subtest); +extern int test__clang_to_obj(struct test_suite *test, int subtest); extern int perf_clang__compile_bpf(const char *filename, void **p_obj_buf, @@ -26,9 +27,6 @@ extern int perf_clang__compile_bpf(const char *filename, static inline void perf_clang__init(void) { } static inline void perf_clang__cleanup(void) { } -static inline int test__clang_to_IR(void) { return -1; } -static inline int test__clang_to_obj(void) { return -1;} - static inline int perf_clang__compile_bpf(const char *filename __maybe_unused, void **p_obj_buf __maybe_unused, diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp index 21b23605f78b..a4683ca53697 100644 --- a/tools/perf/util/c++/clang-test.cpp +++ b/tools/perf/util/c++/clang-test.cpp @@ -35,7 +35,8 @@ __test__clang_to_IR(void) } extern "C" { -int test__clang_to_IR(void) +int test__clang_to_IR(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) { perf_clang_scope _scope; @@ -48,7 +49,8 @@ int test__clang_to_IR(void) return -1; } -int test__clang_to_obj(void) +int test__clang_to_obj(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) { perf_clang_scope _scope; diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index c8885dfa3667..df7b18fb6b6e 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -43,8 +43,6 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-cc1", "-triple", "bpf-pc-linux", "-fsyntax-only", - "-ferror-limit", "19", - "-fmessage-length", "127", "-O2", "-nostdsysteminc", "-nobuiltininc", @@ -55,7 +53,11 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-x", "c"}; CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs); + CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs +#if CLANG_VERSION_MAJOR >= 11 + ,/*BinaryName=*/nullptr +#endif + ); FrontendOptions& Opts = CI->getFrontendOpts(); Opts.Inputs.clear(); @@ -151,13 +153,16 @@ getBPFObjectFromModule(llvm::Module *Module) legacy::PassManager PM; bool NotAdded; -#if CLANG_VERSION_MAJOR < 7 - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile); + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream +#if CLANG_VERSION_MAJOR >= 7 + , /*DwoOut=*/nullptr +#endif +#if CLANG_VERSION_MAJOR < 10 + , TargetMachine::CGFT_ObjectFile #else - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, - TargetMachine::CGFT_ObjectFile); + , llvm::CGFT_ObjectFile #endif + ); if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr); diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index ec77e2a7b3ca..51b429c86f98 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -14,14 +14,16 @@ #include "env.h" #include "pmu-hybrid.h" -#define CORE_SIB_FMT \ +#define PACKAGE_CPUS_FMT \ + "%s/devices/system/cpu/cpu%d/topology/package_cpus_list" +#define PACKAGE_CPUS_FMT_OLD \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" -#define DIE_SIB_FMT \ +#define DIE_CPUS_FMT \ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" -#define THRD_SIB_FMT \ - "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" -#define THRD_SIB_FMT_NEW \ +#define CORE_CPUS_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" +#define CORE_CPUS_FMT_OLD \ + "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" #define NODE_ONLINE_FMT \ "%s/devices/system/node/online" #define NODE_MEMINFO_FMT \ @@ -39,8 +41,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) u32 i = 0; int ret = -1; - scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT, + scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT, sysfs__mountpoint(), cpu); + if (access(filename, F_OK) == -1) { + scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD, + sysfs__mountpoint(), cpu); + } fp = fopen(filename, "r"); if (!fp) goto try_dies; @@ -54,23 +60,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) if (p) *p = '\0'; - for (i = 0; i < tp->core_sib; i++) { - if (!strcmp(buf, tp->core_siblings[i])) + for (i = 0; i < tp->package_cpus_lists; i++) { + if (!strcmp(buf, tp->package_cpus_list[i])) break; } - if (i == tp->core_sib) { - tp->core_siblings[i] = buf; - tp->core_sib++; + if (i == tp->package_cpus_lists) { + tp->package_cpus_list[i] = buf; + tp->package_cpus_lists++; buf = NULL; len = 0; } ret = 0; try_dies: - if (!tp->die_siblings) + if (!tp->die_cpus_list) goto try_threads; - scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) @@ -85,23 +91,23 @@ try_dies: if (p) *p = '\0'; - for (i = 0; i < tp->die_sib; i++) { - if (!strcmp(buf, tp->die_siblings[i])) + for (i = 0; i < tp->die_cpus_lists; i++) { + if (!strcmp(buf, tp->die_cpus_list[i])) break; } - if (i == tp->die_sib) { - tp->die_siblings[i] = buf; - tp->die_sib++; + if (i == tp->die_cpus_lists) { + tp->die_cpus_list[i] = buf; + tp->die_cpus_lists++; buf = NULL; len = 0; } ret = 0; try_threads: - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, + scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT, sysfs__mountpoint(), cpu); if (access(filename, F_OK) == -1) { - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD, sysfs__mountpoint(), cpu); } fp = fopen(filename, "r"); @@ -115,13 +121,13 @@ try_threads: if (p) *p = '\0'; - for (i = 0; i < tp->thread_sib; i++) { - if (!strcmp(buf, tp->thread_siblings[i])) + for (i = 0; i < tp->core_cpus_lists; i++) { + if (!strcmp(buf, tp->core_cpus_list[i])) break; } - if (i == tp->thread_sib) { - tp->thread_siblings[i] = buf; - tp->thread_sib++; + if (i == tp->core_cpus_lists) { + tp->core_cpus_list[i] = buf; + tp->core_cpus_lists++; buf = NULL; } ret = 0; @@ -139,16 +145,14 @@ void cpu_topology__delete(struct cpu_topology *tp) if (!tp) return; - for (i = 0 ; i < tp->core_sib; i++) - zfree(&tp->core_siblings[i]); + for (i = 0 ; i < tp->package_cpus_lists; i++) + zfree(&tp->package_cpus_list[i]); - if (tp->die_sib) { - for (i = 0 ; i < tp->die_sib; i++) - zfree(&tp->die_siblings[i]); - } + for (i = 0 ; i < tp->die_cpus_lists; i++) + zfree(&tp->die_cpus_list[i]); - for (i = 0 ; i < tp->thread_sib; i++) - zfree(&tp->thread_siblings[i]); + for (i = 0 ; i < tp->core_cpus_lists; i++) + zfree(&tp->core_cpus_list[i]); free(tp); } @@ -164,7 +168,7 @@ static bool has_die_topology(void) if (strncmp(uts.machine, "x86_64", 6)) return false; - scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, sysfs__mountpoint(), 0); if (access(filename, F_OK) == -1) return false; @@ -205,13 +209,13 @@ struct cpu_topology *cpu_topology__new(void) tp = addr; addr += sizeof(*tp); - tp->core_siblings = addr; + tp->package_cpus_list = addr; addr += sz; if (has_die) { - tp->die_siblings = addr; + tp->die_cpus_list = addr; addr += sz; } - tp->thread_siblings = addr; + tp->core_cpus_list = addr; for (i = 0; i < nr; i++) { if (!cpu_map__has(map, i)) diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index d9af97177068..854e18f9041e 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -5,12 +5,33 @@ #include <linux/types.h> struct cpu_topology { - u32 core_sib; - u32 die_sib; - u32 thread_sib; - char **core_siblings; - char **die_siblings; - char **thread_siblings; + /* The number of unique package_cpus_lists below. */ + u32 package_cpus_lists; + /* The number of unique die_cpu_lists below. */ + u32 die_cpus_lists; + /* The number of unique core_cpu_lists below. */ + u32 core_cpus_lists; + /* + * An array of strings where each string is unique and read from + * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI + * each of these is a human-readable list of CPUs sharing the same + * physical_package_id. The format is like 0-3, 8-11, 14,17. + */ + const char **package_cpus_list; + /* + * An array of string where each string is unique and from + * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI + * each of these is a human-readable list of CPUs within the same die. + * The format is like 0-3, 8-11, 14,17. + */ + const char **die_cpus_list; + /* + * An array of string where each string is unique and from + * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI + * each of these is a human-readable list of CPUs within the same + * core. The format is like 0-3, 8-11, 14,17. + */ + const char **core_cpus_list; }; struct numa_topology_node { diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f323adb1af85..4f672f7d008c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -537,7 +537,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight %s Trace data: size %zu bytes\n", + ". ... CoreSight %s Trace data: size %#zx bytes\n", cs_etm_decoder__get_name(etmq->decoder), buffer->size); do { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index aa862a26d95c..8f7705bbc2da 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1437,7 +1437,7 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); #else bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..c7a9fa0ffae9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,6 +24,16 @@ #include "util/parse-sublevel-options.h" #include <linux/ctype.h> +#include <traceevent/event-parse.h> + +#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) +#ifndef LIBTRACEEVENT_VERSION +/* + * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships + * with the Linux kernel tools. + */ +#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0) +#endif int verbose; int debug_peo_args; @@ -228,6 +238,15 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; +#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION + if (verbose == 1) + tep_set_loglevel(TEP_LOG_INFO); + else if (verbose == 2) + tep_set_loglevel(TEP_LOG_DEBUG); + else if (verbose >= 3) + tep_set_loglevel(TEP_LOG_ALL); +#endif + return 0; } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 9ed9a5676d35..9cc8a1772b4b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -14,6 +14,7 @@ #ifdef HAVE_LIBBPF_SUPPORT #include <bpf/libbpf.h> #include "bpf-event.h" +#include "bpf-utils.h" #endif #include "compress.h" #include "env.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 83723ba11dc8..011da3924fc1 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -193,7 +193,7 @@ struct dso { int fd; int status; u32 status_seen; - size_t file_size; + u64 file_size; struct list_head open_entry; u64 debug_frame_offset; u64 eh_frame_hdr_offset; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cf773f0dec38..b9904896eb97 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -16,6 +16,7 @@ struct perf_env perf_env; #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" +#include "bpf-utils.h" #include <bpf/libbpf.h> void perf_env__insert_bpf_prog_info(struct perf_env *env, @@ -74,12 +75,13 @@ out: return node; } -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; + bool ret = true; down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; @@ -93,6 +95,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); + ret = false; goto out; } } @@ -102,6 +105,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) env->bpf_progs.btfs_cnt++; out: up_write(&env->bpf_progs.lock); + return ret; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1383876f72b3..163e5ec503a2 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -167,7 +167,7 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, int cpu); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ac706304afe9..fe24801f8e9f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -57,6 +57,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", + [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -237,6 +238,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } +int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_aux_output_hw_id_event(machine, event); +} + int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -407,6 +416,12 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " hw_id: %#"PRI_lx64"\n", + event->aux_output_hw_id.hw_id); +} + size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; @@ -534,6 +549,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_TEXT_POKE: ret += perf_event__fprintf_text_poke(event, machine, fp); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret += perf_event__fprintf_aux_output_hw_id(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 19ad64f2bd83..95ffed66369c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -330,6 +330,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_aux_output_hw_id(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -397,6 +401,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbfeceb2546c..ac0127be0459 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = ""; + evsel->unit = strdup(""); evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,13 +276,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - /* - * The evsel->unit points to static alias->unit - * so it's ok to use static string in here. - */ - static const char *unit = "msec"; - - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("msec"); evsel->scale = 1e-6; } @@ -294,7 +289,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) +struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) { struct perf_event_attr attr = { .type = type, @@ -305,18 +300,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) event_attr_init(&attr); - if (!precise) - goto new_event; - /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. */ -new_event: evsel = evsel__new(&attr); if (evsel == NULL) goto out; + arch_evsel__fixup_new_cycles(&evsel->core.attr); + evsel->precise_max = true; /* use asprintf() because free(evsel) assumes name is allocated */ @@ -410,6 +403,11 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->filter == NULL) goto out_err; } + if (orig->metric_id) { + evsel->metric_id = strdup(orig->metric_id); + if (evsel->metric_id == NULL) + goto out_err; + } evsel->cgrp = cgroup__get(orig->cgrp); evsel->tp_format = orig->tp_format; evsel->handler = orig->handler; @@ -417,7 +415,11 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - evsel->unit = orig->unit; + free((char *)evsel->unit); + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -779,6 +781,17 @@ out_unknown: return "unknown"; } +const char *evsel__metric_id(const struct evsel *evsel) +{ + if (evsel->metric_id) + return evsel->metric_id; + + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) + return "duration_time"; + + return "unknown"; +} + const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; @@ -1047,6 +1060,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } +void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) +{ +} + /* * The enable_on_exec/disabled value strategy: * @@ -1423,6 +1440,8 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->unit); + zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; @@ -1807,7 +1826,7 @@ static void evsel__disable_missing_features(struct evsel *evsel) evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) evsel->core.attr.mmap2 = 0; - if (perf_missing_features.exclude_guest) + if (evsel->pmu && evsel->pmu->missing_features.exclude_guest) evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0; if (perf_missing_features.lbr_flags) evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | @@ -1900,10 +1919,27 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if (!perf_missing_features.exclude_guest && - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && + (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { + if (evsel->pmu == NULL) { + evsel->pmu = evsel__find_pmu(evsel); + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; + } + } + + if (evsel->exclude_GH) { + pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); + return false; + } + if (!perf_missing_features.exclude_guest) { + perf_missing_features.exclude_guest = true; + pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } return true; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; @@ -2221,6 +2257,54 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data, data->weight = *array; } +u64 evsel__bitfield_swap_branch_flags(u64 value) +{ + u64 new_val = 0; + + /* + * branch_flags + * union { + * u64 values; + * struct { + * mispred:1 //target mispredicted + * predicted:1 //target predicted + * in_tx:1 //in transaction + * abort:1 //transaction abort + * cycles:16 //cycle count to last branch + * type:4 //branch type + * reserved:40 + * } + * } + * + * Avoid bswap64() the entire branch_flag.value, + * as it has variable bit-field sizes. Instead the + * macro takes the bit-field position/size, + * swaps it based on the host endianness. + * + * tep_is_bigendian() is used here instead of + * bigendian() to avoid python test fails. + */ + if (tep_is_bigendian()) { + new_val = bitfield_swap(value, 0, 1); + new_val |= bitfield_swap(value, 1, 1); + new_val |= bitfield_swap(value, 2, 1); + new_val |= bitfield_swap(value, 3, 1); + new_val |= bitfield_swap(value, 4, 16); + new_val |= bitfield_swap(value, 20, 4); + new_val |= bitfield_swap(value, 24, 40); + } else { + new_val = bitfield_swap(value, 63, 1); + new_val |= bitfield_swap(value, 62, 1); + new_val |= bitfield_swap(value, 61, 1); + new_val |= bitfield_swap(value, 60, 1); + new_val |= bitfield_swap(value, 44, 16); + new_val |= bitfield_swap(value, 40, 4); + new_val |= bitfield_swap(value, 0, 40); + } + + return new_val; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2408,6 +2492,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_BRANCH_STACK) { const u64 max_branch_nr = UINT64_MAX / sizeof(struct branch_entry); + struct branch_entry *e; + unsigned int i; OVERFLOW_CHECK_u64(array); data->branch_stack = (struct branch_stack *)array++; @@ -2416,10 +2502,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) { sz += sizeof(u64); - else + e = &data->branch_stack->entries[0]; + } else { data->no_hw_idx = true; + /* + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, + * only nr and entries[] will be output by kernel. + */ + e = (struct branch_entry *)&data->branch_stack->hw_idx; + } + + if (swapped) { + /* + * struct branch_flag does not have endian + * specific bit field definition. And bswap + * will not resolve the issue, since these + * are bit fields. + * + * evsel__bitfield_swap_branch_flags() uses a + * bitfield_swap macro to swap the bit position + * based on the host endians. + */ + for (i = 0; i < data->branch_stack->nr; i++, e++) + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value); + } + OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } @@ -2928,3 +3037,15 @@ void evsel__set_leader(struct evsel *evsel, struct evsel *leader) { evsel->core.leader = &leader->core; } + +int evsel__source_count(const struct evsel *evsel) +{ + struct evsel *pos; + int count = 0; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->metric_leader == evsel) + count++; + } + return count; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1f7edfa8568a..29d49a8c1e92 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -22,6 +22,7 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; +struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -68,6 +69,7 @@ struct evsel { double scale; const char *unit; struct cgroup *cgrp; + const char *metric_id; enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; @@ -152,6 +154,9 @@ struct evsel { }; unsigned long open_flags; int precise_ip_original; + + /* for missing_features */ + struct perf_pmu *pmu; }; struct perf_missing_features { @@ -261,6 +266,7 @@ bool evsel__match_bpf_counter_events(const char *name); int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); +const char *evsel__metric_id(const struct evsel *evsel); const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -277,6 +283,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); @@ -482,4 +489,18 @@ struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); +int evsel__source_count(const struct evsel *evsel); + +/* + * Macro to swap the bit-field postition and size. + * Used when, + * - dont need to swap the entire u64 && + * - when u64 has variable bit-field sizes && + * - when presented in a host endian which is different + * than the source endian of the perf.data file + */ +#define bitfield_swap(src, pos, size) \ + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1))) + +u64 evsel__bitfield_swap_branch_flags(u64 value); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index bfedd7b23521..8c2ea8001329 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -11,6 +11,7 @@ #include "strlist.h" #include "symbol.h" #include "srcline.h" +#include "dso.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -144,12 +145,17 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_arrow && !first) printed += fprintf(fp, " <-"); - if (print_ip) - printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); - if (map) addr = map->map_ip(map, node->ip); + if (print_ip) { + /* Show binary offset for userspace addr */ + if (map && !map->dso->kernel) + printed += fprintf(fp, "%c%16" PRIx64, s, addr); + else + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + } + if (print_sym) { printed += fprintf(fp, " "); node_al.addr = addr; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index a850fd0be3ee..1d532b9fed29 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,13 +5,17 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" +#include "cpumap.h" +#include "cputopo.h" #include "debug.h" #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" +#include "smt.h" #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> +#include <math.h> #ifdef PARSER_DEBUG extern int expr_debug; @@ -19,13 +23,15 @@ extern int expr_debug; struct expr_id_data { union { - double val; + struct { + double val; + int source_count; + } val; struct { double val; const char *metric_name; const char *metric_expr; } ref; - struct expr_id *parent; }; enum { @@ -35,8 +41,6 @@ struct expr_id_data { EXPR_ID_DATA__REF, /* A reference but the value has been computed. */ EXPR_ID_DATA__REF_VALUE, - /* A parent is remembered for the recursion check. */ - EXPR_ID_DATA__PARENT, } kind; }; @@ -59,21 +63,34 @@ static bool key_equal(const void *key1, const void *key2, return !strcmp((const char *)key1, (const char *)key2); } -/* Caller must make sure id is allocated */ -int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +struct hashmap *ids__new(void) +{ + return hashmap__new(key_hash, key_equal, NULL); +} + +void ids__free(struct hashmap *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (ids == NULL) + return; + + hashmap__for_each_entry(ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + + hashmap__free(ids); +} + +int ids__insert(struct hashmap *ids, const char *id) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; - data_ptr = malloc(sizeof(*data_ptr)); - if (!data_ptr) - return -ENOMEM; - - data_ptr->parent = ctx->parent; - data_ptr->kind = EXPR_ID_DATA__PARENT; - - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -82,9 +99,58 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) return ret; } +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) +{ + size_t bkt; + struct hashmap_entry *cur; + int ret; + struct expr_id_data *old_data = NULL; + char *old_key = NULL; + + if (!ids1) + return ids2; + + if (!ids2) + return ids1; + + if (hashmap__size(ids1) < hashmap__size(ids2)) { + struct hashmap *tmp = ids1; + + ids1 = ids2; + ids2 = tmp; + } + hashmap__for_each_entry(ids2, cur, bkt) { + ret = hashmap__set(ids1, cur->key, cur->value, + (const void **)&old_key, (void **)&old_data); + free(old_key); + free(old_data); + + if (ret) { + hashmap__free(ids1); + hashmap__free(ids2); + return NULL; + } + } + hashmap__free(ids2); + return ids1; +} + +/* Caller must make sure id is allocated */ +int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +{ + return ids__insert(ctx->ids, id); +} + /* Caller must make sure id is allocated */ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) { + return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1); +} + +/* Caller must make sure id is allocated */ +int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, + double val, int source_count) +{ struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; @@ -92,10 +158,11 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) data_ptr = malloc(sizeof(*data_ptr)); if (!data_ptr) return -ENOMEM; - data_ptr->val = val; + data_ptr->val.val = val; + data_ptr->val.source_count = source_count; data_ptr->kind = EXPR_ID_DATA__VALUE; - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -140,7 +207,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) data_ptr->ref.metric_expr = ref->metric_expr; data_ptr->kind = EXPR_ID_DATA__REF; - ret = hashmap__set(&ctx->ids, name, data_ptr, + ret = hashmap__set(ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -156,9 +223,24 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { - return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; + return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; +} + +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles) +{ + struct hashmap_entry *cur; + size_t bkt; + struct expr_id_data *data; + + hashmap__for_each_entry(needles->ids, cur, bkt) { + if (expr__get_id(haystack, cur->key, &data)) + return false; + } + return true; } + int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap) { @@ -173,21 +255,18 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, switch (data->kind) { case EXPR_ID_DATA__VALUE: - pr_debug2("lookup(%s): val %f\n", id, data->val); - break; - case EXPR_ID_DATA__PARENT: - pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); + pr_debug2("lookup(%s): val %f\n", id, data->val.val); break; case EXPR_ID_DATA__REF: pr_debug2("lookup(%s): ref metric name %s\n", id, data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); data->kind = EXPR_ID_DATA__REF_VALUE; - if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) { pr_debug("%s failed to count\n", id); return -1; } - pr_debug("processing metric: %s EXIT: %f\n", id, data->val); + pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val); break; case EXPR_ID_DATA__REF_VALUE: pr_debug2("lookup(%s): ref val %f metric name %s\n", id, @@ -205,15 +284,24 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id) struct expr_id_data *old_val = NULL; char *old_key = NULL; - hashmap__delete(&ctx->ids, id, + hashmap__delete(ctx->ids, id, (const void **)&old_key, (void **)&old_val); free(old_key); free(old_val); } -void expr__ctx_init(struct expr_parse_ctx *ctx) +struct expr_parse_ctx *expr__ctx_new(void) { - hashmap__init(&ctx->ids, key_hash, key_equal, NULL); + struct expr_parse_ctx *ctx; + + ctx = malloc(sizeof(struct expr_parse_ctx)); + if (!ctx) + return NULL; + + ctx->ids = hashmap__new(key_hash, key_equal, NULL); + ctx->runtime = 0; + + return ctx; } void expr__ctx_clear(struct expr_parse_ctx *ctx) @@ -221,20 +309,32 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx) struct hashmap_entry *cur; size_t bkt; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { free((char *)cur->key); free(cur->value); } - hashmap__clear(&ctx->ids); + hashmap__clear(ctx->ids); +} + +void expr__ctx_free(struct expr_parse_ctx *ctx) +{ + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry(ctx->ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + hashmap__free(ctx->ids); + free(ctx); } static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, - int start, int runtime) + bool compute_ids) { struct expr_scanner_ctx scanner_ctx = { - .start_token = start, - .runtime = runtime, + .runtime = ctx->runtime, }; YY_BUFFER_STATE buffer; void *scanner; @@ -253,7 +353,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, expr_set_debug(1, scanner); #endif - ret = expr_parse(val, ctx, scanner); + ret = expr_parse(val, ctx, compute_ids, scanner); expr__flush_buffer(buffer, scanner); expr__delete_buffer(buffer, scanner); @@ -262,15 +362,15 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, } int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime) + const char *expr) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0; } -int expr__find_other(const char *expr, const char *one, - struct expr_parse_ctx *ctx, int runtime) +int expr__find_ids(const char *expr, const char *one, + struct expr_parse_ctx *ctx) { - int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); + int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true); if (one) expr__del_id(ctx, one); @@ -281,13 +381,47 @@ int expr__find_other(const char *expr, const char *one, double expr_id_data__value(const struct expr_id_data *data) { if (data->kind == EXPR_ID_DATA__VALUE) - return data->val; + return data->val.val; assert(data->kind == EXPR_ID_DATA__REF_VALUE); return data->ref.val; } -struct expr_id *expr_id_data__parent(struct expr_id_data *data) +double expr_id_data__source_count(const struct expr_id_data *data) { - assert(data->kind == EXPR_ID_DATA__PARENT); - return data->parent; + assert(data->kind == EXPR_ID_DATA__VALUE); + return data->val.source_count; +} + +double expr__get_literal(const char *literal) +{ + static struct cpu_topology *topology; + + if (!strcmp("#smt_on", literal)) + return smt_on() > 0 ? 1.0 : 0.0; + + if (!strcmp("#num_cpus", literal)) + return cpu__max_present_cpu(); + + /* + * Assume that topology strings are consistent, such as CPUs "0-1" + * wouldn't be listed as "0,1", and so after deduplication the number of + * these strings gives an indication of the number of packages, dies, + * etc. + */ + if (!topology) { + topology = cpu_topology__new(); + if (!topology) { + pr_err("Error creating CPU topology"); + return NAN; + } + } + if (!strcmp("#num_packages", literal)) + return topology->package_cpus_lists; + if (!strcmp("#num_dies", literal)) + return topology->die_cpus_lists; + if (!strcmp("#num_cores", literal)) + return topology->core_cpus_lists; + + pr_err("Unrecognized literal '%s'", literal); + return NAN; } diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 85df3e4771e4..bd2116983bbb 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -13,39 +13,51 @@ struct metric_ref; -struct expr_id { - char *id; - struct expr_id *parent; -}; - struct expr_parse_ctx { - struct hashmap ids; - struct expr_id *parent; + struct hashmap *ids; + int runtime; }; struct expr_id_data; struct expr_scanner_ctx { - int start_token; int runtime; }; -void expr__ctx_init(struct expr_parse_ctx *ctx); +struct hashmap *ids__new(void); +void ids__free(struct hashmap *ids); +int ids__insert(struct hashmap *ids, const char *id); +/* + * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and + * ids2. + */ +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2); + +struct expr_parse_ctx *expr__ctx_new(void); void expr__ctx_clear(struct expr_parse_ctx *ctx); +void expr__ctx_free(struct expr_parse_ctx *ctx); + void expr__del_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, + double val, int source_count); int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data); +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles); int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); + int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime); -int expr__find_other(const char *expr, const char *one, - struct expr_parse_ctx *ids, int runtime); + const char *expr); + +int expr__find_ids(const char *expr, const char *one, + struct expr_parse_ctx *ids); double expr_id_data__value(const struct expr_id_data *data); -struct expr_id *expr_id_data__parent(struct expr_id_data *data); +double expr_id_data__source_count(const struct expr_id_data *data); +double expr__get_literal(const char *literal); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 13e5e3c75f56..0a13eb20c814 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -6,6 +6,7 @@ #include <linux/compiler.h> #include "expr.h" #include "expr-bison.h" +#include <math.h> char *expr_get_text(yyscan_t yyscanner); YYSTYPE *expr_get_lval(yyscan_t yyscanner); @@ -41,11 +42,9 @@ static char *normalize(char *str, int runtime) char *dst = str; while (*str) { - if (*str == '@') - *dst++ = '/'; - else if (*str == '\\') + if (*str == '\\') *dst++ = *++str; - else if (*str == '?') { + else if (*str == '?') { char *paramval; int i = 0; int size = asprintf(¶mval, "%d", runtime); @@ -79,6 +78,17 @@ static int str(yyscan_t scanner, int token, int runtime) yylval->str = normalize(yylval->str, runtime); return token; } + +static int literal(yyscan_t scanner) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + + yylval->num = expr__get_literal(expr_get_text(scanner)); + if (isnan(yylval->num)) + return EXPR_ERROR; + + return LITERAL; +} %} number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) @@ -87,25 +97,18 @@ sch [-,=] spec \\{sch} sym [0-9a-zA-Z_\.:@?]+ symbol ({spec}|{sym})+ +literal #[0-9a-zA-Z_\.\-]+ %% struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - { - int start_token = sctx->start_token; - - if (sctx->start_token) { - sctx->start_token = 0; - return start_token; - } - } - d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } if { return IF; } else { return ELSE; } -#smt_on { return SMT_ON; } +source_count { return SOURCE_COUNT; } +{literal} { return literal(yyscanner); } {number} { return value(yyscanner); } {symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index b2ada8f8309a..a30b825adb7b 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,42 +1,43 @@ /* Simple expression parser */ %{ #define YYDEBUG 1 -#include <stdio.h> -#include "util.h" +#include <assert.h> +#include <math.h> +#include <stdlib.h> #include "util/debug.h" -#include <stdlib.h> // strtod() #define IN_EXPR_Y 1 #include "expr.h" -#include "smt.h" -#include <string.h> - -static double d_ratio(double val0, double val1) -{ - if (val1 == 0) { - return 0; - } - return val0 / val1; -} - %} %define api.pure full %parse-param { double *final_val } %parse-param { struct expr_parse_ctx *ctx } +%parse-param { bool compute_ids } %parse-param {void *scanner} %lex-param {void* scanner} %union { double num; char *str; + struct ids { + /* + * When creating ids, holds the working set of event ids. NULL + * implies the set is empty. + */ + struct hashmap *ids; + /* + * The metric value. When not creating ids this is the value + * read from a counter, a constant or some computed value. When + * creating ids the value is either a constant or BOTTOM. NAN is + * used as the special BOTTOM value, representing a "set of all + * values" case. + */ + double val; + } ids; } -%token EXPR_PARSE EXPR_OTHER EXPR_ERROR -%token <num> NUMBER -%token <str> ID -%destructor { free ($$); } <str> -%token MIN MAX IF ELSE SMT_ON D_RATIO +%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR %left MIN MAX IF %left '|' %left '^' @@ -45,83 +46,256 @@ static double d_ratio(double val0, double val1) %left '-' '+' %left '*' '/' '%' %left NEG NOT -%type <num> expr if_expr +%type <num> NUMBER LITERAL +%type <str> ID +%destructor { free ($$); } <str> +%type <ids> expr if_expr +%destructor { ids__free($$.ids); } <ids> %{ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, + bool compute_ids __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } +/* + * During compute ids, the special "bottom" value uses NAN to represent the set + * of all values. NAN is selected as it isn't a useful constant value. + */ +#define BOTTOM NAN + +/* During computing ids, does val represent a constant (non-BOTTOM) value? */ +static bool is_const(double val) +{ + return isfinite(val); +} + +static struct ids union_expr(struct ids ids1, struct ids ids2) +{ + struct ids result = { + .val = BOTTOM, + .ids = ids__union(ids1.ids, ids2.ids), + }; + return result; +} + +static struct ids handle_id(struct expr_parse_ctx *ctx, char *id, + bool compute_ids, bool source_count) +{ + struct ids result; + + if (!compute_ids) { + /* + * Compute the event's value from ID. If the ID isn't known then + * it isn't used to compute the formula so set to NAN. + */ + struct expr_id_data *data; + + result.val = NAN; + if (expr__resolve_id(ctx, id, &data) == 0) { + result.val = source_count + ? expr_id_data__source_count(data) + : expr_id_data__value(data); + } + result.ids = NULL; + free(id); + } else { + /* + * Set the value to BOTTOM to show that any value is possible + * when the event is computed. Create a set of just the ID. + */ + result.val = BOTTOM; + result.ids = ids__new(); + if (!result.ids || ids__insert(result.ids, id)) { + pr_err("Error creating IDs for '%s'", id); + free(id); + } + } + return result; +} + +/* + * If we're not computing ids or $1 and $3 are constants, compute the new + * constant value using OP. Its invariant that there are no ids. If computing + * ids for non-constants union the set of IDs that must be computed. + */ +#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ + RESULT.val = (long)LHS.val OP (long)RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } + +#define BINARY_OP(RESULT, OP, LHS, RHS) \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ + RESULT.val = LHS.val OP RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } + %} %% -start: -EXPR_PARSE all_expr -| -EXPR_OTHER all_other +start: if_expr +{ + if (compute_ids) + ctx->ids = ids__union($1.ids, ctx->ids); + + if (final_val) + *final_val = $1.val; +} +; -all_other: all_other other -| +if_expr: expr IF expr ELSE expr +{ + if (fpclassify($3.val) == FP_ZERO) { + /* + * The IF expression evaluated to 0 so treat as false, take the + * ELSE and discard everything else. + */ + $$.val = $5.val; + $$.ids = $5.ids; + ids__free($1.ids); + ids__free($3.ids); + } else if (!compute_ids || is_const($3.val)) { + /* + * If ids aren't computed then treat the expression as true. If + * ids are being computed and the IF expr is a non-zero + * constant, then also evaluate the true case. + */ + $$.val = $1.val; + $$.ids = $1.ids; + ids__free($3.ids); + ids__free($5.ids); + } else if ($1.val == $5.val) { + /* + * LHS == RHS, so both are an identical constant. No need to + * evaluate any events. + */ + $$.val = $1.val; + $$.ids = NULL; + ids__free($1.ids); + ids__free($3.ids); + ids__free($5.ids); + } else { + /* + * Value is either the LHS or RHS and we need the IF expression + * to compute it. + */ + $$ = union_expr($1, union_expr($3, $5)); + } +} +| expr +; -other: ID +expr: NUMBER +{ + $$.val = $1; + $$.ids = NULL; +} +| ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); } +| SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); } +| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } +| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } +| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } +| expr '<' expr { BINARY_OP($$, <, $1, $3); } +| expr '>' expr { BINARY_OP($$, >, $1, $3); } +| expr '+' expr { BINARY_OP($$, +, $1, $3); } +| expr '-' expr { BINARY_OP($$, -, $1, $3); } +| expr '*' expr { BINARY_OP($$, *, $1, $3); } +| expr '/' expr +{ + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = $1.val / $3.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($1, $3); + } +} +| expr '%' expr +{ + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = (long)$1.val % (long)$3.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($1, $3); + } +} +| D_RATIO '(' expr ',' expr ')' +{ + if (fpclassify($5.val) == FP_ZERO) { + /* + * Division by constant zero always yields zero and no events + * are necessary. + */ + assert($5.ids == NULL); + $$.val = 0.0; + $$.ids = NULL; + ids__free($3.ids); + } else if (!compute_ids || (is_const($3.val) && is_const($5.val))) { + assert($3.ids == NULL); + assert($5.ids == NULL); + $$.val = $3.val / $5.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($3, $5); + } +} +| '-' expr %prec NEG +{ + $$.val = -$2.val; + $$.ids = $2.ids; +} +| '(' if_expr ')' { - expr__add_id(ctx, $1); -} -| -MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' -| -'<' | '>' | D_RATIO - -all_expr: if_expr { *final_val = $1; } - ; - -if_expr: - expr IF expr ELSE expr { $$ = $3 ? $1 : $5; } - | expr - ; - -expr: NUMBER - | ID { - struct expr_id_data *data; - - if (expr__resolve_id(ctx, $1, &data)) { - free($1); - YYABORT; - } - - $$ = expr_id_data__value(data); - free($1); - } - | expr '|' expr { $$ = (long)$1 | (long)$3; } - | expr '&' expr { $$ = (long)$1 & (long)$3; } - | expr '^' expr { $$ = (long)$1 ^ (long)$3; } - | expr '<' expr { $$ = $1 < $3; } - | expr '>' expr { $$ = $1 > $3; } - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { if ($3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = $1 / $3; - } - | expr '%' expr { if ((long)$3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = (long)$1 % (long)$3; - } - | '-' expr %prec NEG { $$ = -$2; } - | '(' if_expr ')' { $$ = $2; } - | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } - | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } - | SMT_ON { $$ = smt_on() > 0; } - | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } - ; + $$ = $2; +} +| MIN '(' expr ',' expr ')' +{ + if (!compute_ids) { + $$.val = $3.val < $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } +} +| MAX '(' expr ',' expr ')' +{ + if (!compute_ids) { + $$.val = $3.val > $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } +} +| LITERAL +{ + $$.val = $1; + $$.ids = NULL; +} +; %% diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d4137559be05..3db3293213a9 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -42,7 +42,7 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #error "unsupported architecture" #endif -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define GEN_ELF_ENDIAN ELFDATA2MSB #else #define GEN_ELF_ENDIAN ELFDATA2LSB diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1c7414f66655..79cce216727e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -48,6 +48,7 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "clockid.h" #include "pmu-hybrid.h" @@ -582,21 +583,21 @@ static int write_cpu_topology(struct feat_fd *ff, if (!tp) return -1; - ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib)); + ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists)); if (ret < 0) goto done; - for (i = 0; i < tp->core_sib; i++) { - ret = do_write_string(ff, tp->core_siblings[i]); + for (i = 0; i < tp->package_cpus_lists; i++) { + ret = do_write_string(ff, tp->package_cpus_list[i]); if (ret < 0) goto done; } - ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib)); + ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists)); if (ret < 0) goto done; - for (i = 0; i < tp->thread_sib; i++) { - ret = do_write_string(ff, tp->thread_siblings[i]); + for (i = 0; i < tp->core_cpus_lists; i++) { + ret = do_write_string(ff, tp->core_cpus_list[i]); if (ret < 0) break; } @@ -616,15 +617,15 @@ static int write_cpu_topology(struct feat_fd *ff, return ret; } - if (!tp->die_sib) + if (!tp->die_cpus_lists) goto done; - ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); + ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists)); if (ret < 0) goto done; - for (i = 0; i < tp->die_sib; i++) { - ret = do_write_string(ff, tp->die_siblings[i]); + for (i = 0; i < tp->die_cpus_lists; i++) { + ret = do_write_string(ff, tp->die_cpus_list[i]); if (ret < 0) goto done; } @@ -1006,17 +1007,17 @@ static int write_bpf_prog_info(struct feat_fd *ff, node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - len = sizeof(struct bpf_prog_info_linear) + + len = sizeof(struct perf_bpil) + node->info_linear->data_len; /* before writing to file, translate address to offset */ - bpf_program__bpil_addr_to_offs(node->info_linear); + bpil_addr_to_offs(node->info_linear); ret = do_write(ff, node->info_linear, len); /* * translate back to address even when do_write() fails, * so that this function never changes the data. */ - bpf_program__bpil_offs_to_addr(node->info_linear); + bpil_offs_to_addr(node->info_linear); if (ret < 0) goto out; } @@ -3018,9 +3019,9 @@ static int process_dir_format(struct feat_fd *ff, #ifdef HAVE_LIBBPF_SUPPORT static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = &ff->ph->env; + struct perf_bpil *info_linear; u32 count, i; int err = -1; @@ -3049,7 +3050,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; } - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + + info_linear = malloc(sizeof(struct perf_bpil) + data_len); if (!info_linear) goto out; @@ -3071,7 +3072,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; /* after reading from file, translate offset to address */ - bpf_program__bpil_offs_to_addr(info_linear); + bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; perf_env__insert_bpf_prog_info(env, info_node); } @@ -4256,9 +4257,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: + free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: + free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4267,11 +4270,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { + perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; - else + } else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65fe65ba03c2..b776465e04ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -289,15 +289,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat, u64 p_stage_cyc) +static void he_stat__add_period(struct he_stat *he_stat, u64 period) { - he_stat->period += period; - he_stat->weight += weight; he_stat->nr_events += 1; - he_stat->ins_lat += ins_lat; - he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,9 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->weight += src->weight; - dest->ins_lat += src->ins_lat; - dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -598,9 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 weight = entry->stat.weight; - u64 ins_lat = entry->stat.ins_lat; - u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -619,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period); /* * This mem info was allocated from sample__resolve_mem @@ -733,9 +722,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .weight = sample->weight, - .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -748,6 +734,9 @@ __hists__add_entry(struct hists *hists, .raw_size = sample->raw_size, .ops = ops, .time = hist_time(sample->time), + .weight = sample->weight, + .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5343b62476e6..621f35ae1efa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -369,7 +369,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index af1e78d76228..2c8147a62203 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -35,7 +35,7 @@ #define INTEL_BTS_ERR_NOINSN 5 #define INTEL_BTS_ERR_LOST 9 -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le64_to_cpu bswap_64 #else #define le64_to_cpu diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5ab631702769..5f83937bf8f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) { decoder->sample_timestamp = decoder->timestamp; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + decoder->state.cycles = decoder->tot_cyc_cnt; } static void intel_pt_reposition(struct intel_pt_decoder *decoder) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4b5e79fcf557..8fd68f7a0963 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -218,6 +218,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; + uint64_t cycles; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 593f20e9774c..9d5e65cec89b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -143,7 +143,7 @@ static void intel_pt_insn_decoder(struct insn *insn, if (branch == INTEL_PT_BR_CONDITIONAL || branch == INTEL_PT_BR_UNCONDITIONAL) { -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ switch (insn->immediate.nbytes) { case 1: intel_pt_insn->rel = insn->immediate.value; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 09feb5b07d32..5f5dfc8753f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -82,10 +82,10 @@ static int intel_pt_log_open(void) if (f) return 0; - if (!log_name[0]) - return -1; - - f = fopen(log_name, "w+"); + if (log_name[0]) + f = fopen(log_name, "w+"); + else + f = stdout; if (!f) { intel_pt_enable_logging = false; return -1; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 02a3395d6ce3..4bd154848cad 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -16,7 +16,7 @@ #define BIT63 ((uint64_t)1 << 63) -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6f852b305e92..556a893508da 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -111,6 +111,7 @@ struct intel_pt { u64 cbr_id; u64 psb_id; + bool single_pebs; bool sample_pebs; struct evsel *pebs_evsel; @@ -148,6 +149,14 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; +/* applicable_counters is 64-bits */ +#define INTEL_PT_MAX_PEBS 64 + +struct intel_pt_pebs_event { + struct evsel *evsel; + u64 id; +}; + struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -163,6 +172,7 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; @@ -189,6 +199,7 @@ struct intel_pt_queue { u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -1571,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; @@ -1622,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; @@ -1978,15 +1989,13 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->core.id[0]; u8 cpumode; u64 regs[8 * sizeof(sample.intr_regs.mask)]; @@ -2112,6 +2121,45 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } +static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + u64 id = evsel->core.id[0]; + + return intel_pt_do_synth_pebs_sample(ptq, evsel, id); +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct intel_pt_pebs_event *pe; + struct intel_pt *pt = ptq->pt; + int err = -EINVAL; + int hw_id; + + if (!items->has_applicable_counters || !items->applicable_counters) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no applicable_counters\n"); + return intel_pt_synth_single_pebs_sample(ptq); + } + + for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { + pe = &ptq->pebs[hw_id]; + if (!pe->evsel) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", + hw_id); + return intel_pt_synth_single_pebs_sample(ptq); + } + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + if (err) + return err; + } + + return err; +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -2198,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; + } /* * Do PEBS first to allow for the possibility that the PEBS timestamp @@ -2882,6 +2937,30 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + u64 hw_id = event->aux_output_hw_id.hw_id; + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + struct evsel *evsel; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); + if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { + pr_err("Bad AUX output hardware ID\n"); + return -EINVAL; + } + + ptq = queue->priv; + + ptq->pebs[hw_id].evsel = evsel; + ptq->pebs[hw_id].id = sample->id; + + return 0; +} + static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -3009,6 +3088,8 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) + err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); @@ -3393,9 +3474,13 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { + if (pt->single_pebs) { + pt->single_pebs = false; + return; + } + pt->single_pebs = true; pt->sample_pebs = true; pt->pebs_evsel = evsel; - return; } } } @@ -3651,8 +3736,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; - intel_pt_log_set_name(INTEL_PT_PMU_NAME); - if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { @@ -3667,6 +3750,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->synth_opts.thread_stack = opts->thread_stack; } + if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ pt->auxtrace_type = auxtrace_info->type; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 44e40bad0e33..fb8496df8432 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -755,6 +755,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_aux_output_hw_id(event, stdout); + return 0; +} + int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { @@ -2028,6 +2036,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_bpf(machine, event, sample); break; case PERF_RECORD_TEXT_POKE: ret = machine__process_text_poke(machine, event, sample); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret = machine__process_aux_output_hw_id_event(machine, event); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 7377ed6efdf1..a143087eeb47 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -124,6 +124,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_aux_output_hw_id_event(struct machine *machine, + union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_namespaces_event(struct machine *machine, diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 29b747ac31c1..fffe02aae3ed 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -11,6 +11,7 @@ #include "evsel.h" #include "strbuf.h" #include "pmu.h" +#include "pmu-hybrid.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -18,6 +19,7 @@ #include "strlist.h" #include <assert.h> #include <linux/ctype.h> +#include <linux/list_sort.h> #include <linux/string.h> #include <linux/zalloc.h> #include <subcmd/parse-options.h> @@ -84,6 +86,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, struct metric_expr *expr, *tmp; list_for_each_entry_safe(expr, tmp, &me->head, nd) { + free((char *)expr->metric_name); free(expr->metric_refs); free(expr->metric_events); free(expr); @@ -116,289 +119,207 @@ struct metric_ref_node { struct list_head list; }; +/** + * The metric under construction. The data held here will be placed in a + * metric_expr. + */ struct metric { struct list_head nd; - struct expr_parse_ctx pctx; + /** + * The expression parse context importantly holding the IDs contained + * within the expression. + */ + struct expr_parse_ctx *pctx; + /** The name of the metric such as "IPC". */ const char *metric_name; + /** Modifier on the metric such as "u" or NULL for none. */ + const char *modifier; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. + */ const char *metric_unit; - struct list_head metric_refs; - int metric_refs_cnt; - int runtime; + /** Optional null terminated array of referenced metrics. */ + struct metric_ref *metric_refs; + /** + * Is there a constraint on the group of events? In which case the + * events won't be grouped. + */ bool has_constraint; + /** + * Parsed events for the metric. Optional as events may be taken from a + * different metric whose group contains all the IDs necessary for this + * one. + */ + struct evlist *evlist; }; -#define RECURSION_ID_MAX 1000 +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; -struct expr_ids { - struct expr_id id[RECURSION_ID_MAX]; - int cnt; -}; + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } -static struct expr_id *expr_ids__alloc(struct expr_ids *ids) + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(const struct pmu_event *pe) { - if (ids->cnt >= RECURSION_ID_MAX) - return NULL; - return &ids->id[ids->cnt++]; + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; } -static void expr_ids__exit(struct expr_ids *ids) +static struct metric *metric__new(const struct pmu_event *pe, + const char *modifier, + bool metric_no_group, + int runtime) { - int i; + struct metric *m; + + m = zalloc(sizeof(*m)); + if (!m) + return NULL; + + m->pctx = expr__ctx_new(); + if (!m->pctx) { + free(m); + return NULL; + } - for (i = 0; i < ids->cnt; i++) - free(ids->id[i].id); + m->metric_name = pe->metric_name; + m->modifier = modifier ? strdup(modifier) : NULL; + if (modifier && !m->modifier) { + free(m); + expr__ctx_free(m->pctx); + return NULL; + } + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->pctx->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + m->metric_refs = NULL; + m->evlist = NULL; + + return m; } -static bool contains_event(struct evsel **metric_events, int num_events, - const char *event_name) +static void metric__free(struct metric *m) +{ + free(m->metric_refs); + expr__ctx_free(m->pctx); + free((char *)m->modifier); + evlist__delete(m->evlist); + free(m); +} + +static bool contains_metric_id(struct evsel **metric_events, int num_events, + const char *metric_id) { int i; for (i = 0; i < num_events; i++) { - if (!strcmp(metric_events[i]->name, event_name)) + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) return true; } return false; } -static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) -{ - if (!ev1->pmu_name || !ev2->pmu_name) - return true; - - return !strcmp(ev1->pmu_name, ev2->pmu_name); -} - /** - * Find a group of events in perf_evlist that correspond to those from a parsed - * metric expression. Note, as find_evsel_group is called in the same order as - * perf_evlist was constructed, metric_no_merge doesn't need to test for - * underfilling a group. - * @perf_evlist: a list of events something like: {metric1 leader, metric1 - * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, - * metric2 sibling}:W,duration_time - * @pctx: the parse context for the metric expression. - * @metric_no_merge: don't attempt to share events for the metric with other - * metrics. - * @has_constraint: is there a constraint on the group of events? In which case - * the events won't be grouped. - * @metric_events: out argument, null terminated array of evsel's associated - * with the metric. - * @evlist_used: in/out argument, bitmap tracking which evlist events are used. - * @return the first metric event or NULL on failure. + * setup_metric_events - Find a group of events in metric_evlist that correspond + * to the IDs from a parsed metric expression. + * @ids: the metric IDs to match. + * @metric_evlist: the list of perf events. + * @out_metric_events: holds the created metric events array. */ -static struct evsel *find_evsel_group(struct evlist *perf_evlist, - struct expr_parse_ctx *pctx, - bool metric_no_merge, - bool has_constraint, - struct evsel **metric_events, - unsigned long *evlist_used) +static int setup_metric_events(struct hashmap *ids, + struct evlist *metric_evlist, + struct evsel ***out_metric_events) { - struct evsel *ev, *current_leader = NULL; - struct expr_id_data *val_ptr; - int i = 0, matched_events = 0, events_to_match; - const int idnum = (int)hashmap__size(&pctx->ids); + struct evsel **metric_events; + const char *metric_id; + struct evsel *ev; + size_t ids_size, matched_events, i; - /* - * duration_time is always grouped separately, when events are grouped - * (ie has_constraint is false) then ignore it in the matching loop and - * add it to metric_events at the end. - */ - if (!has_constraint && - hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) - events_to_match = idnum - 1; - else - events_to_match = idnum; + *out_metric_events = NULL; + ids_size = hashmap__size(ids); + + metric_events = calloc(sizeof(void *), ids_size + 1); + if (!metric_events) + return -ENOMEM; + + matched_events = 0; + evlist__for_each_entry(metric_evlist, ev) { + struct expr_id_data *val_ptr; - evlist__for_each_entry (perf_evlist, ev) { /* - * Events with a constraint aren't grouped and match the first - * events available. + * Check for duplicate events with the same name. For + * example, uncore_imc/cas_count_read/ will turn into 6 + * events per socket on skylakex. Only the first such + * event is placed in metric_events. */ - if (has_constraint && ev->weak_group) - continue; - /* Ignore event if already used and merging is disabled. */ - if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) + metric_id = evsel__metric_id(ev); + if (contains_metric_id(metric_events, matched_events, metric_id)) continue; - if (!has_constraint && !evsel__has_leader(ev, current_leader)) { - /* - * Start of a new group, discard the whole match and - * start again. - */ - matched_events = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - current_leader = evsel__leader(ev); - } /* - * Check for duplicate events with the same name. For example, - * uncore_imc/cas_count_read/ will turn into 6 events per socket - * on skylakex. Only the first such event is placed in - * metric_events. If events aren't grouped then this also - * ensures that the same event in different sibling groups - * aren't both added to metric_events. + * Does this event belong to the parse context? For + * combined or shared groups, this metric may not care + * about this event. */ - if (contains_event(metric_events, matched_events, ev->name)) - continue; - /* Does this event belong to the parse context? */ - if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) + if (hashmap__find(ids, metric_id, (void **)&val_ptr)) { metric_events[matched_events++] = ev; - if (matched_events == events_to_match) - break; - } - - if (events_to_match != idnum) { - /* Add the first duration_time. */ - evlist__for_each_entry(perf_evlist, ev) { - if (!strcmp(ev->name, "duration_time")) { - metric_events[matched_events++] = ev; + if (matched_events >= ids_size) break; - } } } - - if (matched_events != idnum) { - /* Not a whole match */ - return NULL; + if (matched_events < ids_size) { + free(metric_events); + return -EINVAL; } - - metric_events[idnum] = NULL; - - for (i = 0; i < idnum; i++) { + for (i = 0; i < ids_size; i++) { ev = metric_events[i]; - /* Don't free the used events. */ - set_bit(ev->core.idx, evlist_used); + ev->collect_stat = true; + /* - * The metric leader points to the identically named event in - * metric_events. + * The metric leader points to the identically named + * event in metric_events. */ ev->metric_leader = ev; /* - * Mark two events with identical names in the same group (or - * globally) as being in use as uncore events may be duplicated - * for each pmu. Set the metric leader of such events to be the - * event that appears in metric_events. + * Mark two events with identical names in the same + * group (or globally) as being in use as uncore events + * may be duplicated for each pmu. Set the metric leader + * of such events to be the event that appears in + * metric_events. */ - evlist__for_each_entry_continue(perf_evlist, ev) { - /* - * If events are grouped then the search can terminate - * when then group is left. - */ - if (!has_constraint && - ev->core.leader != metric_events[i]->core.leader && - evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) - break; - if (!strcmp(metric_events[i]->name, ev->name)) { - set_bit(ev->core.idx, evlist_used); + metric_id = evsel__metric_id(ev); + evlist__for_each_entry_continue(metric_evlist, ev) { + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) ev->metric_leader = metric_events[i]; - } } } - - return metric_events[0]; -} - -static int metricgroup__setup_events(struct list_head *groups, - bool metric_no_merge, - struct evlist *perf_evlist, - struct rblist *metric_events_list) -{ - struct metric_event *me; - struct metric_expr *expr; - int i = 0; - int ret = 0; - struct metric *m; - struct evsel *evsel, *tmp; - unsigned long *evlist_used; - - evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries); - if (!evlist_used) - return -ENOMEM; - - list_for_each_entry (m, groups, nd) { - struct evsel **metric_events; - struct metric_ref *metric_refs = NULL; - - metric_events = calloc(sizeof(void *), - hashmap__size(&m->pctx.ids) + 1); - if (!metric_events) { - ret = -ENOMEM; - break; - } - evsel = find_evsel_group(perf_evlist, &m->pctx, - metric_no_merge, - m->has_constraint, metric_events, - evlist_used); - if (!evsel) { - pr_debug("Cannot resolve %s: %s\n", - m->metric_name, m->metric_expr); - free(metric_events); - continue; - } - for (i = 0; metric_events[i]; i++) - metric_events[i]->collect_stat = true; - me = metricgroup__lookup(metric_events_list, evsel, true); - if (!me) { - ret = -ENOMEM; - free(metric_events); - break; - } - expr = malloc(sizeof(struct metric_expr)); - if (!expr) { - ret = -ENOMEM; - free(metric_events); - break; - } - - /* - * Collect and store collected nested expressions - * for metric processing. - */ - if (m->metric_refs_cnt) { - struct metric_ref_node *ref; - - metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); - if (!metric_refs) { - ret = -ENOMEM; - free(metric_events); - free(expr); - break; - } - - i = 0; - list_for_each_entry(ref, &m->metric_refs, list) { - /* - * Intentionally passing just const char pointers, - * originally from 'struct pmu_event' object. - * We don't need to change them, so there's no - * need to create our own copy. - */ - metric_refs[i].metric_name = ref->metric_name; - metric_refs[i].metric_expr = ref->metric_expr; - i++; - } - } - - expr->metric_refs = metric_refs; - expr->metric_expr = m->metric_expr; - expr->metric_name = m->metric_name; - expr->metric_unit = m->metric_unit; - expr->metric_events = metric_events; - expr->runtime = m->runtime; - list_add(&expr->nd, &me->head); - } - - evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { - if (!test_bit(evsel->core.idx, evlist_used)) { - evlist__remove(perf_evlist, evsel); - evsel__delete(evsel); - } - } - bitmap_free(evlist_used); - - return ret; + *out_metric_events = metric_events; + return 0; } static bool match_metric(const char *n, const char *list) @@ -422,7 +343,7 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(struct pmu_event *pe, const char *metric) +static bool match_pe_metric(const struct pmu_event *pe, const char *metric) { return match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric); @@ -506,7 +427,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } -static int metricgroup__print_pmu_event(struct pmu_event *pe, +static int metricgroup__print_pmu_event(const struct pmu_event *pe, bool metricgroups, char *filter, bool raw, bool details, struct rblist *groups, @@ -581,14 +502,14 @@ struct metricgroup_print_sys_idata { bool details; }; -typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); +typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *); struct metricgroup_iter_data { metricgroup_sys_event_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; @@ -607,7 +528,7 @@ static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) return 0; } -static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_print_sys_idata *d = data; @@ -616,10 +537,10 @@ static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details) + bool raw, bool details, const char *pmu_name) { - struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_event *pe; int i; struct rblist groups; struct rb_node *node, *next; @@ -642,6 +563,10 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, break; if (!pe->metric_expr) continue; + if (pmu_name && perf_pmu__is_hybrid(pe->pmu) && + strcmp(pmu_name, pe->pmu)) { + continue; + } if (metricgroup__print_pmu_event(pe, metricgroups, filter, raw, details, &groups, metriclist) < 0) @@ -686,150 +611,391 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } -static void metricgroup__add_metric_weak_group(struct strbuf *events, - struct expr_parse_ctx *ctx) +static const char *code_characters = ",-=@"; + +static int encode_metric_id(struct strbuf *sb, const char *x) { - struct hashmap_entry *cur; - size_t bkt; - bool no_group = true, has_duration = false; + char *c; + int ret = 0; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { - pr_debug("found event %s\n", (const char *)cur->key); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(cur->key, "duration_time")) { - has_duration = true; - continue; + for (; *x; x++) { + c = strchr(code_characters, *x); + if (c) { + ret = strbuf_addch(sb, '!'); + if (ret) + break; + + ret = strbuf_addch(sb, '0' + (c - code_characters)); + if (ret) + break; + } else { + ret = strbuf_addch(sb, *x); + if (ret) + break; } - strbuf_addf(events, "%s%s", - no_group ? "{" : ",", - (const char *)cur->key); - no_group = false; } - if (!no_group) { - strbuf_addf(events, "}:W"); - if (has_duration) - strbuf_addf(events, ",duration_time"); - } else if (has_duration) - strbuf_addf(events, "duration_time"); + return ret; } -static void metricgroup__add_metric_non_group(struct strbuf *events, - struct expr_parse_ctx *ctx) +static int decode_metric_id(struct strbuf *sb, const char *x) { - struct hashmap_entry *cur; - size_t bkt; - bool first = true; + const char *orig = x; + size_t i; + char c; + int ret; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { - if (!first) - strbuf_addf(events, ","); - strbuf_addf(events, "%s", (const char *)cur->key); - first = false; + for (; *x; x++) { + c = *x; + if (*x == '!') { + x++; + i = *x - '0'; + if (i > strlen(code_characters)) { + pr_err("Bad metric-id encoding in: '%s'", orig); + return -1; + } + c = code_characters[i]; + } + ret = strbuf_addch(sb, c); + if (ret) + return ret; } + return 0; } -static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier) { - static bool violate_nmi_constraint; + struct evsel *ev; + struct strbuf sb = STRBUF_INIT; + char *cur; + int ret = 0; - if (!foot) { - pr_warning("Splitting metric group %s into standalone metrics.\n", name); - violate_nmi_constraint = true; - return; - } + evlist__for_each_entry(perf_evlist, ev) { + if (!ev->metric_id) + continue; - if (!violate_nmi_constraint) - return; + ret = strbuf_setlen(&sb, 0); + if (ret) + break; - pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" - " echo 0 > /proc/sys/kernel/nmi_watchdog\n" - " perf stat ...\n" - " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + ret = decode_metric_id(&sb, ev->metric_id); + if (ret) + break; + + free((char *)ev->metric_id); + ev->metric_id = strdup(sb.buf); + if (!ev->metric_id) { + ret = -ENOMEM; + break; + } + /* + * If the name is just the parsed event, use the metric-id to + * give a more friendly display version. + */ + if (strstr(ev->name, "metric-id=")) { + bool has_slash = false; + + free(ev->name); + for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) { + *cur = '/'; + has_slash = true; + } + + if (modifier) { + if (!has_slash && !strchr(sb.buf, ':')) { + ret = strbuf_addch(&sb, ':'); + if (ret) + break; + } + ret = strbuf_addstr(&sb, modifier); + if (ret) + break; + } + ev->name = strdup(sb.buf); + if (!ev->name) { + ret = -ENOMEM; + break; + } + } + } + strbuf_release(&sb); + return ret; } -static bool metricgroup__has_constraint(struct pmu_event *pe) +static int metricgroup__build_event_string(struct strbuf *events, + const struct expr_parse_ctx *ctx, + const char *modifier, + bool has_constraint) { - if (!pe->metric_constraint) - return false; + struct hashmap_entry *cur; + size_t bkt; + bool no_group = true, has_duration = false; + int ret = 0; - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && - sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); - return true; +#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) + + hashmap__for_each_entry(ctx->ids, cur, bkt) { + const char *sep, *rsep, *id = cur->key; + + pr_debug("found event %s\n", id); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(id, "duration_time")) { + has_duration = true; + continue; + } + /* Separate events with commas and open the group if necessary. */ + if (no_group) { + if (!has_constraint) { + ret = strbuf_addch(events, '{'); + RETURN_IF_NON_ZERO(ret); + } + + no_group = false; + } else { + ret = strbuf_addch(events, ','); + RETURN_IF_NON_ZERO(ret); + } + /* + * Encode the ID as an event string. Add a qualifier for + * metric_id that is the original name except with characters + * that parse-events can't parse replaced. For example, + * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/ + */ + sep = strchr(id, '@'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addch(events, '/'); + RETURN_IF_NON_ZERO(ret); + rsep = strrchr(sep, '@'); + ret = strbuf_add(events, sep + 1, rsep - sep - 1); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, ",metric-id="); + RETURN_IF_NON_ZERO(ret); + sep = rsep; + } else { + sep = strchr(id, ':'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + } else { + ret = strbuf_addstr(events, id); + RETURN_IF_NON_ZERO(ret); + } + ret = strbuf_addstr(events, "/metric-id="); + RETURN_IF_NON_ZERO(ret); + } + ret = encode_metric_id(events, id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, "/"); + RETURN_IF_NON_ZERO(ret); + + if (sep != NULL) { + ret = strbuf_addstr(events, sep + 1); + RETURN_IF_NON_ZERO(ret); + } + if (modifier) { + ret = strbuf_addstr(events, modifier); + RETURN_IF_NON_ZERO(ret); + } } + if (has_duration) { + if (no_group) { + /* Strange case of a metric of just duration_time. */ + ret = strbuf_addf(events, "duration_time"); + } else if (!has_constraint) + ret = strbuf_addf(events, "}:W,duration_time"); + else + ret = strbuf_addf(events, ",duration_time"); + } else if (!no_group && !has_constraint) + ret = strbuf_addf(events, "}:W"); - return false; + return ret; +#undef RETURN_IF_NON_ZERO } -int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) +int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) { return 1; } +/* + * A singly linked list on the stack of the names of metrics being + * processed. Used to identify recursion. + */ +struct visited_metric { + const char *name; + const struct visited_metric *parent; +}; + struct metricgroup_add_iter_data { struct list_head *metric_list; - const char *metric; - struct expr_ids *ids; + const char *metric_name; + const char *modifier; int *ret; bool *has_match; bool metric_no_group; + struct metric *root_metric; + const struct visited_metric *visited; + const struct pmu_events_map *map; }; +static int add_metric(struct list_head *metric_list, + const struct pmu_event *pe, + const char *modifier, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map); + +/** + * resolve_metric - Locate metrics within the root metric and recursively add + * references to them. + * @metric_list: The list the metric is added to. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int resolve_metric(struct list_head *metric_list, + const char *modifier, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) +{ + struct hashmap_entry *cur; + size_t bkt; + struct to_resolve { + /* The metric to resolve. */ + const struct pmu_event *pe; + /* + * The key in the IDs map, this may differ from in case, + * etc. from pe->metric_name. + */ + const char *key; + } *pending = NULL; + int i, ret = 0, pending_cnt = 0; + + /* + * Iterate all the parsed IDs and if there's a matching metric and it to + * the pending array. + */ + hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { + const struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (pe) { + pending = realloc(pending, + (pending_cnt + 1) * sizeof(struct to_resolve)); + if (!pending) + return -ENOMEM; + + pending[pending_cnt].pe = pe; + pending[pending_cnt].key = cur->key; + pending_cnt++; + } + } + + /* Remove the metric IDs from the context. */ + for (i = 0; i < pending_cnt; i++) + expr__del_id(root_metric->pctx, pending[i].key); + + /* + * Recursively add all the metrics, IDs are added to the root metric's + * context. + */ + for (i = 0; i < pending_cnt; i++) { + ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group, + root_metric, visited, map); + if (ret) + break; + } + + free(pending); + return ret; +} + +/** + * __add_metric - Add a metric to metric_list. + * @metric_list: The list the metric is added to. + * @pe: The pmu_event containing the metric to be added. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @runtime: A special argument for the parser only known at runtime. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ static int __add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, + const char *modifier, bool metric_no_group, int runtime, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { - struct metric_ref_node *ref; - struct metric *m; + const struct visited_metric *vm; + int ret; + bool is_root = !root_metric; + struct visited_metric visited_node = { + .name = pe->metric_name, + .parent = visited, + }; - if (*mp == NULL) { + for (vm = visited; vm; vm = vm->parent) { + if (!strcmp(pe->metric_name, vm->name)) { + pr_err("failed: recursion detected for %s\n", pe->metric_name); + return -1; + } + } + + if (is_root) { /* - * We got in here for the parent group, - * allocate it and put it on the list. + * This metric is the root of a tree and may reference other + * metrics that are added recursively. */ - m = zalloc(sizeof(*m)); - if (!m) + root_metric = metric__new(pe, modifier, metric_no_group, runtime); + if (!root_metric) return -ENOMEM; - expr__ctx_init(&m->pctx); - m->metric_name = pe->metric_name; - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; - m->runtime = runtime; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); - INIT_LIST_HEAD(&m->metric_refs); - m->metric_refs_cnt = 0; - - parent = expr_ids__alloc(ids); - if (!parent) { - free(m); - return -EINVAL; - } - - parent->id = strdup(pe->metric_name); - if (!parent->id) { - free(m); - return -ENOMEM; - } - *mp = m; } else { + int cnt = 0; + /* - * We got here for the referenced metric, via the - * recursive metricgroup__add_metric call, add - * it to the parent group. + * This metric was referenced in a metric higher in the + * tree. Check if the same metric is already resolved in the + * metric_refs list. */ - m = *mp; + if (root_metric->metric_refs) { + for (; root_metric->metric_refs[cnt].metric_name; cnt++) { + if (!strcmp(pe->metric_name, + root_metric->metric_refs[cnt].metric_name)) + return 0; + } + } - ref = malloc(sizeof(*ref)); - if (!ref) + /* Create reference. Need space for the entry and the terminator. */ + root_metric->metric_refs = realloc(root_metric->metric_refs, + (cnt + 2) * sizeof(struct metric_ref)); + if (!root_metric->metric_refs) return -ENOMEM; /* @@ -838,54 +1004,35 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - ref->metric_name = pe->metric_name; - ref->metric_expr = pe->metric_expr; + root_metric->metric_refs[cnt].metric_name = pe->metric_name; + root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; - list_add(&ref->list, &m->metric_refs); - m->metric_refs_cnt++; + /* Null terminate array. */ + root_metric->metric_refs[cnt+1].metric_name = NULL; + root_metric->metric_refs[cnt+1].metric_expr = NULL; } - /* Force all found IDs in metric to have us as parent ID. */ - WARN_ON_ONCE(!parent); - m->pctx.parent = parent; - /* * For both the parent and referenced metrics, we parse - * all the metric's IDs and add it to the parent context. + * all the metric's IDs and add it to the root context. */ - if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { - if (m->metric_refs_cnt == 0) { - expr__ctx_clear(&m->pctx); - free(m); - *mp = NULL; - } - return -EINVAL; + if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { + /* Broken metric. */ + ret = -EINVAL; + } else { + /* Resolve referenced metrics. */ + ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric, + &visited_node, map); } - /* - * We add new group only in the 'parent' call, - * so bail out for referenced metric case. - */ - if (m->metric_refs_cnt) - return 0; - - if (list_empty(metric_list)) - list_add(&m->nd, metric_list); - else { - struct list_head *pos; - - /* Place the largest groups at the front. */ - list_for_each_prev(pos, metric_list) { - struct metric *old = list_entry(pos, struct metric, nd); + if (ret) { + if (is_root) + metric__free(root_metric); - if (hashmap__size(&m->pctx.ids) <= - hashmap__size(&old->pctx.ids)) - break; - } - list_add(&m->nd, pos); - } + } else if (is_root) + list_add(&root_metric->nd, metric_list); - return 0; + return ret; } #define map_for_each_event(__pe, __idx, __map) \ @@ -900,10 +1047,10 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map) +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map) { - struct pmu_event *pe; + const struct pmu_event *pe; int i; map_for_each_event(pe, i, map) { @@ -914,136 +1061,21 @@ struct pmu_event *metricgroup__find_metric(const char *metric, return NULL; } -static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, - struct expr_ids *ids) -{ - struct expr_id_data *data; - struct expr_id *p; - int ret; - - /* - * We get the parent referenced by 'id' argument and - * traverse through all the parent object IDs to check - * if we already processed 'id', if we did, it's recursion - * and we fail. - */ - ret = expr__get_id(&m->pctx, id, &data); - if (ret) - return ret; - - p = expr_id_data__parent(data); - - while (p->parent) { - if (!strcmp(p->id, id)) { - pr_err("failed: recursion detected for %s\n", id); - return -1; - } - p = p->parent; - } - - /* - * If we are over the limit of static entris, the metric - * is too difficult/nested to process, fail as well. - */ - p = expr_ids__alloc(ids); - if (!p) { - pr_err("failed: too many nested metrics\n"); - return -EINVAL; - } - - p->id = strdup(id); - p->parent = expr_id_data__parent(data); - *parent = p; - - return p->id ? 0 : -ENOMEM; -} - static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, + const char *modifier, bool metric_no_group, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids); - -static int __resolve_metric(struct metric *m, - bool metric_no_group, - struct list_head *metric_list, - struct pmu_events_map *map, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { - struct hashmap_entry *cur; - size_t bkt; - bool all; - int ret; - - /* - * Iterate all the parsed IDs and if there's metric, - * add it to the context. - */ - do { - all = true; - hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { - struct expr_id *parent; - struct pmu_event *pe; - - pe = metricgroup__find_metric(cur->key, map); - if (!pe) - continue; - - ret = recursion_check(m, cur->key, &parent, ids); - if (ret) - return ret; - - all = false; - /* The metric key itself needs to go out.. */ - expr__del_id(&m->pctx, cur->key); - - /* ... and it gets resolved to the parent context. */ - ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); - if (ret) - return ret; - - /* - * We added new metric to hashmap, so we need - * to break the iteration and start over. - */ - break; - } - } while (!all); - - return 0; -} - -static int resolve_metric(bool metric_no_group, - struct list_head *metric_list, - struct pmu_events_map *map, - struct expr_ids *ids) -{ - struct metric *m; - int err; - - list_for_each_entry(m, metric_list, nd) { - err = __resolve_metric(m, metric_no_group, metric_list, map, ids); - if (err) - return err; - } - return 0; -} - -static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, - bool metric_no_group, - struct metric **m, - struct expr_id *parent, - struct expr_ids *ids) -{ - struct metric *orig = *m; int ret = 0; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); + ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, + root_metric, visited, map); } else { int j, count; @@ -1054,29 +1086,25 @@ static int add_metric(struct list_head *metric_list, * those events to metric_list. */ - for (j = 0; j < count && !ret; j++, *m = orig) - ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); + for (j = 0; j < count && !ret; j++) + ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, + root_metric, visited, map); } return ret; } -static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, +static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; - struct metric *m = NULL; int ret; - if (!match_pe_metric(pe, d->metric)) + if (!match_pe_metric(pe, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); - if (ret) - goto out; - - ret = resolve_metric(d->metric_no_group, - d->metric_list, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, + d->root_metric, d->visited, d->map); if (ret) goto out; @@ -1087,32 +1115,47 @@ out: return ret; } -static int metricgroup__add_metric(const char *metric, bool metric_no_group, - struct strbuf *events, +static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, + const struct list_head *r) +{ + const struct metric *left = container_of(l, struct metric, nd); + const struct metric *right = container_of(r, struct metric, nd); + + return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); +} + +/** + * metricgroup__add_metric - Find and add a metric, or a metric group. + * @metric_name: The name of the metric or metric group. For example, "IPC" + * could be the name of a metric and "TopDownL1" the name of a + * metric group. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @metric_list: The list that the metric or metric group are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int metricgroup__add_metric(const char *metric_name, const char *modifier, + bool metric_no_group, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { - struct expr_ids ids = { .cnt = 0, }; - struct pmu_event *pe; - struct metric *m; + const struct pmu_event *pe; LIST_HEAD(list); int i, ret; bool has_match = false; - map_for_each_metric(pe, i, map, metric) { + /* + * Iterate over all metrics seeing if metric matches either the name or + * group. When it does add the metric to the list. + */ + map_for_each_metric(pe, i, map, metric_name) { has_match = true; - m = NULL; - - ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); - if (ret) - goto out; - - /* - * Process any possible referenced metrics - * included in the expression. - */ - ret = resolve_metric(metric_no_group, - &list, map, &ids); + ret = add_metric(&list, pe, modifier, metric_no_group, + /*root_metric=*/NULL, + /*visited_metrics=*/NULL, map); if (ret) goto out; } @@ -1122,34 +1165,20 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, - .metric = metric, + .metric_name = metric_name, + .modifier = modifier, .metric_no_group = metric_no_group, - .ids = &ids, .has_match = &has_match, .ret = &ret, + .map = map, }, }; pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ - if (!has_match) { + if (!has_match) ret = -EINVAL; - goto out; - } - - list_for_each_entry(m, &list, nd) { - if (events->len > 0) - strbuf_addf(events, ","); - - if (m->has_constraint) { - metricgroup__add_metric_non_group(events, - &m->pctx); - } else { - metricgroup__add_metric_weak_group(events, - &m->pctx); - } - } out: /* @@ -1157,95 +1186,315 @@ out: * even if it's failed */ list_splice(&list, metric_list); - expr_ids__exit(&ids); return ret; } +/** + * metricgroup__add_metric_list - Find and add metrics, or metric groups, + * specified in a list. + * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" + * would match the IPC and CPI metrics, and TopDownL1 would match all + * the metrics in the TopDownL1 group. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @metric_list: The list that metrics are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, - struct strbuf *events, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { - char *llist, *nlist, *p; - int ret = -EINVAL; + char *list_itr, *list_copy, *metric_name, *modifier; + int ret, count = 0; - nlist = strdup(list); - if (!nlist) + list_copy = strdup(list); + if (!list_copy) return -ENOMEM; - llist = nlist; + list_itr = list_copy; + + while ((metric_name = strsep(&list_itr, ",")) != NULL) { + modifier = strchr(metric_name, ':'); + if (modifier) + *modifier++ = '\0'; - strbuf_init(events, 100); - strbuf_addf(events, "%s", ""); + ret = metricgroup__add_metric(metric_name, modifier, + metric_no_group, metric_list, + map); + if (ret == -EINVAL) + pr_err("Cannot find metric or group `%s'\n", metric_name); - while ((p = strsep(&llist, ",")) != NULL) { - ret = metricgroup__add_metric(p, metric_no_group, events, - metric_list, map); - if (ret == -EINVAL) { - fprintf(stderr, "Cannot find metric or group `%s'\n", - p); + if (ret) break; - } + + count++; } - free(nlist); + free(list_copy); - if (!ret) + if (!ret) { + /* + * Warn about nmi_watchdog if any parsed metrics had the + * NO_NMI_WATCHDOG constraint. + */ metricgroup___watchdog_constraint_hint(NULL, true); - + /* No metrics. */ + if (count == 0) + return -EINVAL; + } return ret; } -static void metric__free_refs(struct metric *metric) +static void metricgroup__free_metrics(struct list_head *metric_list) { - struct metric_ref_node *ref, *tmp; + struct metric *m, *tmp; - list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { - list_del(&ref->list); - free(ref); + list_for_each_entry_safe (m, tmp, metric_list, nd) { + list_del_init(&m->nd); + metric__free(m); } } -static void metricgroup__free_metrics(struct list_head *metric_list) +/** + * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint + * metric IDs, as the IDs are held in a set, + * duplicates will be removed. + * @metric_list: List to take metrics from. + * @combined: Out argument for result. + */ +static int build_combined_expr_ctx(const struct list_head *metric_list, + struct expr_parse_ctx **combined) { - struct metric *m, *tmp; + struct hashmap_entry *cur; + size_t bkt; + struct metric *m; + char *dup; + int ret; - list_for_each_entry_safe (m, tmp, metric_list, nd) { - metric__free_refs(m); - expr__ctx_clear(&m->pctx); - list_del_init(&m->nd); - free(m); + *combined = expr__ctx_new(); + if (!*combined) + return -ENOMEM; + + list_for_each_entry(m, metric_list, nd) { + if (m->has_constraint && !m->modifier) { + hashmap__for_each_entry(m->pctx->ids, cur, bkt) { + dup = strdup(cur->key); + if (!dup) { + ret = -ENOMEM; + goto err_out; + } + ret = expr__add_id(*combined, dup); + if (ret) + goto err_out; + } + } } + return 0; +err_out: + expr__ctx_free(*combined); + *combined = NULL; + return ret; +} + +/** + * parse_ids - Build the event string for the ids and parse them creating an + * evlist. The encoded metric_ids are decoded. + * @fake_pmu: used when testing metrics not supported by the current CPU. + * @ids: the event identifiers parsed from a metric. + * @modifier: any modifiers added to the events. + * @has_constraint: false if events should be placed in a weak group. + * @out_evlist: the created list of events. + */ +static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, + const char *modifier, bool has_constraint, struct evlist **out_evlist) +{ + struct parse_events_error parse_error; + struct evlist *parsed_evlist; + struct strbuf events = STRBUF_INIT; + int ret; + + *out_evlist = NULL; + if (hashmap__size(ids->ids) == 0) { + char *tmp; + /* + * No ids/events in the expression parsing context. Events may + * have been removed because of constant evaluation, e.g.: + * event1 if #smt_on else 0 + * Add a duration_time event to avoid a parse error on an empty + * string. + */ + tmp = strdup("duration_time"); + if (!tmp) + return -ENOMEM; + + ids__insert(ids->ids, tmp); + } + ret = metricgroup__build_event_string(&events, ids, modifier, + has_constraint); + if (ret) + return ret; + + parsed_evlist = evlist__new(); + if (!parsed_evlist) { + ret = -ENOMEM; + goto err_out; + } + pr_debug("Parsing metric events '%s'\n", events.buf); + parse_events_error__init(&parse_error); + ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); + if (ret) { + parse_events_error__print(&parse_error, events.buf); + goto err_out; + } + ret = decode_all_metric_ids(parsed_evlist, modifier); + if (ret) + goto err_out; + + *out_evlist = parsed_evlist; + parsed_evlist = NULL; +err_out: + parse_events_error__exit(&parse_error); + evlist__delete(parsed_evlist); + strbuf_release(&events); + return ret; } static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, struct perf_pmu *fake_pmu, - struct rblist *metric_events, - struct pmu_events_map *map) + struct rblist *metric_events_list, + const struct pmu_events_map *map) { - struct parse_events_error parse_error; - struct strbuf extra_events; + struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); + struct metric *m; int ret; - if (metric_events->nr_entries == 0) - metricgroup__rblist_init(metric_events); + if (metric_events_list->nr_entries == 0) + metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(str, metric_no_group, - &extra_events, &metric_list, map); + &metric_list, map); if (ret) goto out; - pr_debug("adding %s\n", extra_events.buf); - bzero(&parse_error, sizeof(parse_error)); - ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); - if (ret) { - parse_events_print_error(&parse_error, extra_events.buf); - goto out; + + /* Sort metrics from largest to smallest. */ + list_sort(NULL, &metric_list, metric_list_cmp); + + if (!metric_no_merge) { + struct expr_parse_ctx *combined = NULL; + + ret = build_combined_expr_ctx(&metric_list, &combined); + + if (!ret && combined && hashmap__size(combined->ids)) { + ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + /*has_constraint=*/true, + &combined_evlist); + } + if (combined) + expr__ctx_free(combined); + + if (ret) + goto out; } - ret = metricgroup__setup_events(&metric_list, metric_no_merge, - perf_evlist, metric_events); + + list_for_each_entry(m, &metric_list, nd) { + struct metric_event *me; + struct evsel **metric_events; + struct evlist *metric_evlist = NULL; + struct metric *n; + struct metric_expr *expr; + + if (combined_evlist && m->has_constraint) { + metric_evlist = combined_evlist; + } else if (!metric_no_merge) { + /* + * See if the IDs for this metric are a subset of an + * earlier metric. + */ + list_for_each_entry(n, &metric_list, nd) { + if (m == n) + break; + + if (n->evlist == NULL) + continue; + + if ((!m->modifier && n->modifier) || + (m->modifier && !n->modifier) || + (m->modifier && n->modifier && + strcmp(m->modifier, n->modifier))) + continue; + + if (expr__subset_of_ids(n->pctx, m->pctx)) { + pr_debug("Events in '%s' fully contained within '%s'\n", + m->metric_name, n->metric_name); + metric_evlist = n->evlist; + break; + } + + } + } + if (!metric_evlist) { + ret = parse_ids(fake_pmu, m->pctx, m->modifier, + m->has_constraint, &m->evlist); + if (ret) + goto out; + + metric_evlist = m->evlist; + } + ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); + if (ret) { + pr_debug("Cannot resolve IDs for %s: %s\n", + m->metric_name, m->metric_expr); + goto out; + } + + me = metricgroup__lookup(metric_events_list, metric_events[0], true); + + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + free(metric_events); + goto out; + } + + expr->metric_refs = m->metric_refs; + m->metric_refs = NULL; + expr->metric_expr = m->metric_expr; + if (m->modifier) { + char *tmp; + + if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0) + expr->metric_name = NULL; + else + expr->metric_name = tmp; + } else + expr->metric_name = strdup(m->metric_name); + + if (!expr->metric_name) { + ret = -ENOMEM; + free(metric_events); + goto out; + } + expr->metric_unit = m->metric_unit; + expr->metric_events = metric_events; + expr->runtime = m->pctx->runtime; + list_add(&expr->nd, &me->head); + } + + + if (combined_evlist) { + evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries); + evlist__delete(combined_evlist); + } + + list_for_each_entry(m, &metric_list, nd) { + if (m->evlist) + evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries); + } + out: metricgroup__free_metrics(&metric_list); - strbuf_release(&extra_events); return ret; } @@ -1256,14 +1505,14 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); } int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1275,8 +1524,8 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_event *pe; int i; if (!map) @@ -1328,7 +1577,10 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return -ENOMEM; new_expr->metric_expr = old_expr->metric_expr; - new_expr->metric_name = old_expr->metric_name; + new_expr->metric_name = strdup(old_expr->metric_name); + if (!new_expr->metric_name) + return -ENOMEM; + new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index cc4a92492a61..2b42b778d1bf 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -14,24 +14,51 @@ struct rblist; struct pmu_events_map; struct cgroup; +/** + * A node in a rblist keyed by the evsel. The global rblist of metric events + * generally exists in perf_stat_config. The evsel is looked up in the rblist + * yielding a list of metric_expr. + */ struct metric_event { struct rb_node nd; struct evsel *evsel; struct list_head head; /* list of metric_expr */ }; +/** + * A metric referenced by a metric_expr. When parsing a metric expression IDs + * will be looked up, matching either a value (from metric_events) or a + * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and + * metric_events need to be known before parsing so that their values may be + * placed in the parse context for lookup. + */ struct metric_ref { const char *metric_name; const char *metric_expr; }; +/** + * One in a list of metric_expr associated with an evsel. The data is used to + * generate a metric value during stat output. + */ struct metric_expr { struct list_head nd; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** The name of the meric such as "IPC". */ const char *metric_name; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. For example, "6.4e-05MiB" means to scale the resulting metric + * by 6.4e-05 (typically converting a unit like cache lines to something + * more human intelligible) and then add "MiB" afterward when displayed. + */ const char *metric_unit; + /** Null terminated array of events used by the metric. */ struct evsel **metric_events; + /** Null terminated array of referenced metrics. */ struct metric_ref *metric_refs; + /** A value substituted for '?' during parsing. */ int runtime; }; @@ -43,19 +70,19 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); -struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map); +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details); + bool raw, bool details, const char *pmu_name); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 512dc8b9c168..23ecdba9e670 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -350,3 +350,14 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } + +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) +{ + clone->nbits = original->nbits; + clone->bits = bitmap_zalloc(original->nbits); + if (!clone->bits) + return -ENOMEM; + + memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); + return 0; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index af33118354dd..8e259b9610f8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -64,4 +64,7 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, + struct mmap_cpu_mask *clone); + #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index b234d95fb10a..9fc86971027b 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -38,7 +38,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr, static int create_event_hybrid(__u32 config_type, int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -47,7 +48,7 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -70,7 +71,8 @@ static int pmu_cmp(struct parse_events_state *parse_state, static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -84,7 +86,7 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - &terms, pmu); + metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -94,14 +96,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, } static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { struct evsel *evsel; attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -113,7 +117,8 @@ static int create_raw_event_hybrid(int *idx, struct list_head *list, static int add_raw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -126,7 +131,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, &terms, pmu); + name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -138,7 +143,8 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, const char *metric_id, + struct list_head *config_terms, bool *hybrid) { *hybrid = false; @@ -150,16 +156,18 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, *hybrid = true; if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, + return add_hw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } - return add_raw_hybrid(parse_state, list, attr, name, + return add_raw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, + const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state) @@ -180,7 +188,7 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, &terms, pmu); + attr, name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index f33bd67aa851..cbc05fec02a2 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -11,11 +11,13 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, const char *metric_id, + struct list_head *config_terms, bool *hybrid); int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 51a2219df601..ba74fdf74af9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -19,8 +19,6 @@ #include <subcmd/exec-cmd.h> #include "string2.h" #include "strlist.h" -#include "symbol.h" -#include "header.h" #include "bpf-loader.h" #include "debug.h" #include <api/fs/tracing_path.h> @@ -193,39 +191,6 @@ static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help) -{ - if (WARN(!str, "WARNING: failed to provide error string\n")) { - free(help); - return; - } - switch (err->num_errors) { - case 0: - err->idx = idx; - err->str = str; - err->help = help; - break; - case 1: - err->first_idx = err->idx; - err->idx = idx; - err->first_str = err->str; - err->str = str; - err->first_help = err->help; - err->help = help; - break; - default: - pr_debug("Multiple errors dropping message: %s (%s)\n", - err->str, err->help); - free(err->str); - err->str = str; - free(err->help); - err->help = help; - break; - } - err->num_errors++; -} - struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; @@ -334,12 +299,7 @@ const char *event_type(int type) return "unknown"; } -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static char *get_config_name(struct list_head *head_terms) +static char *get_config_str(struct list_head *head_terms, int type_term) { struct parse_events_term *term; @@ -347,17 +307,27 @@ static char *get_config_name(struct list_head *head_terms) return NULL; list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) + if (term->type_term == type_term) return term->val.str; return NULL; } +static char *get_config_metric_id(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID); +} + +static char *get_config_name(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); +} + static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, - char *name, struct perf_pmu *pmu, + const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) { @@ -386,6 +356,9 @@ __add_event(struct list_head *list, int *idx, if (name) evsel->name = strdup(name); + if (metric_id) + evsel->metric_id = strdup(metric_id); + if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -396,18 +369,21 @@ __add_event(struct list_head *list, int *idx, } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu) + const char *name, const char *metric_id, + struct perf_pmu *pmu) { - return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, - NULL); + return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, + metric_id, pmu, /*config_terms=*/NULL, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL); } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, - struct list_head *config_terms) + struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, NULL, config_terms, - false, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, + /*pmu=*/NULL, config_terms, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; } static int add_event_tool(struct list_head *list, int *idx, @@ -419,13 +395,17 @@ static int add_event_tool(struct list_head *list, int *idx, .config = PERF_COUNT_SW_DUMMY, }; - evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, - "0"); + evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, /*pmu=*/NULL, + /*config_terms=*/NULL, /*auto_merge_stats=*/false, + /*cpu_list=*/"0"); if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = "ns"; + if (tool_event == PERF_TOOL_DURATION_TIME) { + free((char *)evsel->unit); + evsel->unit = strdup("ns"); + } return 0; } @@ -466,7 +446,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, { struct perf_event_attr attr; LIST_HEAD(config_terms); - char name[MAX_NAME_LEN], *config_name; + char name[MAX_NAME_LEN]; + const char *config_name, *metric_id; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n, ret; @@ -531,13 +512,17 @@ int parse_events_add_cache(struct list_head *list, int *idx, return -ENOMEM; } + metric_id = get_config_metric_id(head_config); ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, &config_terms, + config_name ? : name, + metric_id, + &config_terms, &hybrid, parse_state); if (hybrid) goto out_free_terms; - ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -571,7 +556,7 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events__handle_error(e, 0, strdup(str), strdup(help)); + parse_events_error__handle(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, @@ -795,7 +780,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -815,7 +800,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events__handle_error(parse_state->error, term->err_term, + parse_events_error__handle(parse_state->error, term->err_term, strdup("Invalid config term for BPF object"), NULL); return -EINVAL; @@ -835,7 +820,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, else idx = term->err_term + error_pos; - parse_events__handle_error(parse_state->error, idx, + parse_events_error__handle(parse_state->error, idx, strdup(errbuf), strdup( "Hint:\tValid config terms:\n" @@ -907,7 +892,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -931,7 +916,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct bpf_object *obj __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -943,7 +928,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, bool source __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -1014,7 +999,8 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, NULL, NULL); + return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL, + /*config_terms=*/NULL); } static int check_type_val(struct parse_events_term *term, @@ -1025,7 +1011,7 @@ static int check_type_val(struct parse_events_term *term, return 0; if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, type == PARSE_EVENTS__TERM_TYPE_NUM ? strdup("expected numeric value") : strdup("expected string value"), @@ -1059,6 +1045,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", + [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", }; static bool config_term_shrinked; @@ -1069,7 +1056,7 @@ config_term_avail(int term_type, struct parse_events_error *err) char *err_str; if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { - parse_events__handle_error(err, -1, + parse_events_error__handle(err, -1, strdup("Invalid term_type"), NULL); return false; } @@ -1081,6 +1068,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; @@ -1091,7 +1079,7 @@ config_term_avail(int term_type, struct parse_events_error *err) /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", config_term_names[term_type]) >= 0) - parse_events__handle_error(err, -1, err_str, NULL); + parse_events_error__handle(err, -1, err_str, NULL); return false; } } @@ -1135,7 +1123,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -1144,7 +1132,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1171,6 +1159,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1180,7 +1171,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1192,14 +1183,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; default: - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), parse_events_formats_error_string(NULL)); return -EINVAL; @@ -1253,7 +1244,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return config_term_common(attr, term, err); default: if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), strdup("valid terms: call-graph,stack-size\n")); } @@ -1440,6 +1431,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, { struct perf_event_attr attr; LIST_HEAD(config_terms); + const char *name, *metric_id; bool hybrid; int ret; @@ -1456,14 +1448,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, return -ENOMEM; } + name = get_config_name(head_config); + metric_id = get_config_metric_id(head_config); ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - get_config_name(head_config), + name, metric_id, &config_terms, &hybrid); if (hybrid) goto out_free_terms; - ret = add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), &config_terms); + ret = add_event(list, &parse_state->idx, &attr, name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -1471,7 +1465,7 @@ out_free_terms: int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event) + int tool_event) { return add_event_tool(list, &parse_state->idx, tool_event); } @@ -1549,7 +1543,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (asprintf(&err_str, "Cannot find PMU `%s'. Missing kernel support?", name) >= 0) - parse_events__handle_error(err, 0, err_str, NULL); + parse_events_error__handle(err, 0, err_str, NULL); return -EINVAL; } @@ -1564,8 +1558,11 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, - pmu, NULL, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, + /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, pmu, + /*config_terms=*/NULL, auto_merge_stats, + /*cpu_list=*/NULL); if (evsel) { evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; @@ -1618,9 +1615,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } - evsel = __add_event(list, &parse_state->idx, &attr, true, - get_config_name(head_config), pmu, - &config_terms, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, + get_config_name(head_config), + get_config_metric_id(head_config), pmu, + &config_terms, auto_merge_stats, /*cpu_list=*/NULL); if (!evsel) return -ENOMEM; @@ -1634,7 +1632,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - evsel->unit = info.unit; + free((char *)evsel->unit); + evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; @@ -1644,44 +1643,50 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, struct list_head **listp) + char *str, struct list_head *head, + struct list_head **listp) { struct parse_events_term *term; - struct list_head *list; + struct list_head *list = NULL; struct perf_pmu *pmu = NULL; int ok = 0; + char *config; *listp = NULL; + + if (!head) { + head = malloc(sizeof(struct list_head)); + if (!head) + goto out_err; + + INIT_LIST_HEAD(head); + } + config = strdup(str); + if (!config) + goto out_err; + + if (parse_events_term__num(&term, + PARSE_EVENTS__TERM_TYPE_USER, + config, 1, false, &config, + NULL) < 0) { + free(config); + goto out_err; + } + list_add_tail(&term->list, head); + + /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) - return -1; + goto out_err; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { - struct list_head *head; - char *config; - - head = malloc(sizeof(struct list_head)); - if (!head) - return -1; - INIT_LIST_HEAD(head); - config = strdup(str); - if (!config) - return -1; - if (parse_events_term__num(&term, - PARSE_EVENTS__TERM_TYPE_USER, - config, 1, false, &config, - NULL) < 0) { - free(list); - free(config); - return -1; - } - list_add_tail(&term->list, head); - if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { @@ -1689,17 +1694,17 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, pmu->name, alias->str); ok++; } - - parse_events_terms__delete(head); } } } - if (!ok) { +out_err: + if (ok) + *listp = list; + else free(list); - return -1; - } - *listp = list; - return 0; + + parse_events_terms__delete(head); + return ok ? 0 : -1; } int parse_events__modifier_group(struct list_head *list, @@ -2029,7 +2034,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) return 0; } -int parse_events_name(struct list_head *list, char *name) +int parse_events_name(struct list_head *list, const char *name) { struct evsel *evsel; @@ -2299,6 +2304,52 @@ int __parse_events(struct evlist *evlist, const char *str, return ret; } +void parse_events_error__init(struct parse_events_error *err) +{ + bzero(err, sizeof(*err)); +} + +void parse_events_error__exit(struct parse_events_error *err) +{ + zfree(&err->str); + zfree(&err->help); + zfree(&err->first_str); + zfree(&err->first_help); +} + +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help) +{ + if (WARN(!str, "WARNING: failed to provide error string\n")) { + free(help); + return; + } + switch (err->num_errors) { + case 0: + err->idx = idx; + err->str = str; + err->help = help; + break; + case 1: + err->first_idx = err->idx; + err->idx = idx; + err->first_str = err->str; + err->str = str; + err->first_help = err->help; + err->help = help; + break; + default: + pr_debug("Multiple errors dropping message: %s (%s)\n", + err->str, err->help); + free(err->str); + err->str = str; + free(err->help); + err->help = help; + break; + } + err->num_errors++; +} + #define MAX_WIDTH 1000 static int get_term_width(void) { @@ -2308,8 +2359,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void __parse_events_print_error(int err_idx, const char *err_str, - const char *err_help, const char *event) +static void __parse_events_error__print(int err_idx, const char *err_str, + const char *err_help, const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -2363,22 +2414,18 @@ static void __parse_events_print_error(int err_idx, const char *err_str, } } -void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_error__print(struct parse_events_error *err, + const char *event) { if (!err->num_errors) return; - __parse_events_print_error(err->idx, err->str, err->help, event); - zfree(&err->str); - zfree(&err->help); + __parse_events_error__print(err->idx, err->str, err->help, event); if (err->num_errors > 1) { fputs("\nInitial error:\n", stderr); - __parse_events_print_error(err->first_idx, err->first_str, + __parse_events_error__print(err->first_idx, err->first_str, err->first_help, event); - zfree(&err->first_str); - zfree(&err->first_help); } } @@ -2391,13 +2438,14 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, str, &err); if (ret) { - parse_events_print_error(&err, str); + parse_events_error__print(&err, str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } + parse_events_error__exit(&err); return ret; } @@ -2703,7 +2751,7 @@ next: return 0; } -static bool is_event_supported(u8 type, unsigned config) +static bool is_event_supported(u8 type, u64 config) { bool ret = true; int open_return; @@ -2823,10 +2871,18 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0; - char name[64]; - char **evt_list = NULL; + unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; + char name[64], new_name[128]; + char **evt_list = NULL, **evt_pmus = NULL; bool evt_num_known = false; + struct perf_pmu *pmu = NULL; + + if (perf_pmu__has_hybrid()) { + npmus = perf_pmu__hybrid_pmu_num(); + evt_pmus = zalloc(sizeof(char *) * npmus); + if (!evt_pmus) + goto out_enomem; + } restart: if (evt_num_known) { @@ -2842,20 +2898,61 @@ restart: continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + unsigned int hybrid_supported = 0, j; + bool supported; + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) - continue; + if (!perf_pmu__has_hybrid()) { + if (!is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) { + continue; + } + } else { + perf_pmu__for_each_hybrid_pmu(pmu) { + if (!evt_num_known) { + evt_num++; + continue; + } + + supported = is_event_supported( + PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); + if (supported) { + snprintf(new_name, sizeof(new_name), "%s/%s/", + pmu->name, name); + evt_pmus[hybrid_supported] = strdup(new_name); + hybrid_supported++; + } + } + + if (hybrid_supported == 0) + continue; + } if (!evt_num_known) { evt_num++; continue; } - evt_list[evt_i] = strdup(name); + if ((hybrid_supported == 0) || + (hybrid_supported == npmus)) { + evt_list[evt_i] = strdup(name); + if (npmus > 0) { + for (j = 0; j < npmus; j++) + zfree(&evt_pmus[j]); + } + } else { + for (j = 0; j < hybrid_supported; j++) { + evt_list[evt_i++] = evt_pmus[j]; + evt_pmus[j] = NULL; + } + continue; + } + if (evt_list[evt_i] == NULL) goto out_enomem; evt_i++; @@ -2867,6 +2964,13 @@ restart: evt_num_known = true; goto restart; } + + for (evt_i = 0; evt_i < evt_num; evt_i++) { + if (!evt_list[evt_i]) + break; + } + + evt_num = evt_i; qsort(evt_list, evt_num, sizeof(char *), cmp_string); evt_i = 0; while (evt_i < evt_num) { @@ -2885,6 +2989,10 @@ out_free: for (evt_i = 0; evt_i < evt_num; evt_i++) zfree(&evt_list[evt_i]); zfree(&evt_list); + + for (evt_i = 0; evt_i < npmus; evt_i++) + zfree(&evt_pmus[evt_i]); + zfree(&evt_pmus); return evt_num; out_enomem: @@ -2994,7 +3102,8 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -3006,7 +3115,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated); + details_flag, deprecated, pmu_name); if (event_glob != NULL) return; @@ -3032,7 +3141,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only, details_flag); + metricgroup__print(true, true, NULL, name_only, details_flag, + pmu_name); print_libpfm_events(name_only, long_desc); } @@ -3083,7 +3193,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config, + .config = config ? : strdup(config_term_names[type_term]), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -3227,7 +3337,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, if (!parse_state->error) return; - parse_events__handle_error(parse_state->error, idx, strdup(str), NULL); + parse_events_error__handle(parse_state->error, idx, strdup(str), NULL); } static void config_terms_list(char *buf, size_t buf_sz) @@ -3286,9 +3396,12 @@ fail: struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, + const char *metric_id, + struct perf_pmu *pmu, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, pmu, - config_terms, false, NULL); + return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, + pmu, config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index bf6e41aa9b6a..c7fc93f54577 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -87,6 +87,7 @@ enum { PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, + PARSE_EVENTS__TERM_TYPE_METRIC_ID, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -141,8 +142,6 @@ struct parse_events_state { char *hybrid_pmu_name; }; -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -162,7 +161,7 @@ void parse_events_terms__purge(struct list_head *terms); void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); -int parse_events_name(struct list_head *list, char *name); +int parse_events_name(struct list_head *list, const char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, @@ -182,10 +181,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); -enum perf_tool_event; int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event); + int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, @@ -200,10 +198,12 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_alias); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu); + const char *name, const char *metric_id, + struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, + struct list_head *head_config, struct list_head **listp); int parse_events_copy_term_list(struct list_head *old, @@ -219,7 +219,8 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, bool deprecated); + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name); struct event_symbol { const char *symbol; @@ -241,8 +242,12 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); -void parse_events_print_error(struct parse_events_error *err, - const char *event); +void parse_events_error__init(struct parse_events_error *err); +void parse_events_error__exit(struct parse_events_error *err); +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help); +void parse_events_error__print(struct parse_events_error *err, + const char *event); #ifdef HAVE_LIBELF_SUPPORT /* @@ -267,7 +272,9 @@ int perf_pmu__test_parse_init(void); struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, + const char *metric_id, + struct perf_pmu *pmu, struct list_head *config_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 923849024b15..4efe9872c667 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -12,7 +12,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" #include "evsel.h" @@ -139,18 +138,23 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat yylval->str = strdup(text); - if (parse_state->fake_pmu) - return PE_PMU_EVENT_FAKE; - + /* + * If we're not testing then parse check determines the PMU event type + * which if it isn't a PMU returns PE_NAME. When testing the result of + * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless + * an '!' is present in which case the text can't be a PMU name. + */ switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; case PMU_EVENT_SYMBOL: - return PE_KERNEL_PMU_EVENT; + return parse_state->fake_pmu + ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; default: - return PE_NAME; + return parse_state->fake_pmu && !strchr(text,'!') + ? PE_PMU_EVENT_FAKE : PE_NAME; } } @@ -205,7 +209,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? @@ -295,6 +299,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } r{num_raw_hex} { return raw(yyscanner); } r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d94e48e1ff9b..174158982fae 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -183,6 +183,11 @@ group_def ':' PE_MODIFIER_EVENT err = parse_events__modifier_group(list, $3); free($3); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events_error__handle(error, @3.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -240,6 +245,11 @@ event_name PE_MODIFIER_EVENT err = parse_events__modifier_event(list, $2, false); free($2); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events_error__handle(error, @2.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -342,7 +352,20 @@ PE_KERNEL_PMU_EVENT sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); + free($1); + if (err < 0) + YYABORT; + $$ = list; +} +| +PE_KERNEL_PMU_EVENT opt_pmu_config +{ + struct list_head *list; + int err; + + /* frees $2 */ + err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); free($1); if (err < 0) YYABORT; @@ -357,7 +380,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); free($1); free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) YYABORT; $$ = list; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 756295dedccc..f0bcfcab1a93 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -87,7 +87,8 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, pmu = perf_pmu__find_by_type((unsigned int)attr.type); evsel = parse_events__add_event(evlist->core.nr_entries, - &attr, q, pmu); + &attr, q, /*metric_id=*/NULL, + pmu); if (evsel == NULL) goto error; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bdabd62170d2..6ae58406f4fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -315,7 +315,7 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc, char *val, struct pmu_event *pe) + char *desc, char *val, const struct pmu_event *pe) { struct parse_events_term *term; struct perf_pmu_alias *alias; @@ -710,9 +710,9 @@ static char *perf_pmu__getcpuid(struct perf_pmu *pmu) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; char *cpuid = perf_pmu__getcpuid(pmu); int i; @@ -737,7 +737,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -struct pmu_events_map *__weak pmu_events_map__find(void) +const struct pmu_events_map *__weak pmu_events_map__find(void) { return perf_pmu__find_map(NULL); } @@ -824,7 +824,7 @@ out: * as aliases. */ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map) + const struct pmu_events_map *map) { int i; const char *name = pmu->name; @@ -834,7 +834,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, i = 0; while (1) { const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; - struct pmu_event *pe = &map->table[i++]; + const struct pmu_event *pe = &map->table[i++]; const char *pname = pe->pmu ? pe->pmu : cpu_name; if (!pe->name) { @@ -859,7 +859,7 @@ new_alias: static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; map = perf_pmu__find_map(pmu); if (!map) @@ -873,7 +873,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) int i = 0; while (1) { - struct pmu_sys_events *event_table; + const struct pmu_sys_events *event_table; int j = 0; event_table = &pmu_sys_event_tables[i++]; @@ -882,7 +882,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) break; while (1) { - struct pmu_event *pe = &event_table->table[j++]; + const struct pmu_event *pe = &event_table->table[j++]; int ret; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -900,7 +900,7 @@ struct pmu_sys_event_iter_data { struct perf_pmu *pmu; }; -static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) +static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, void *data) { struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; @@ -1283,7 +1283,7 @@ static int pmu_config_term(const char *pmu_name, unknown_term = NULL; help_msg = parse_events_formats_error_string(pmu_term); if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, unknown_term, help_msg); } else { @@ -1316,7 +1316,7 @@ static int pmu_config_term(const char *pmu_name, if (term->no_value && bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) { if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("no value assigned for term"), NULL); } @@ -1331,7 +1331,7 @@ static int pmu_config_term(const char *pmu_name, term->config, term->val.str); } if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected numeric value"), NULL); } @@ -1348,7 +1348,7 @@ static int pmu_config_term(const char *pmu_name, if (err) { char *err_str; - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, asprintf(&err_str, "value too big for format, maximum is %llu", (unsigned long long)max_val) < 0 @@ -1608,6 +1608,7 @@ static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + int ret; /* Put extra events last */ if (!!as->desc != !!bs->desc) @@ -1623,7 +1624,13 @@ static int cmp_sevent(const void *a, const void *b) if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - return strcmp(as->name, bs->name); + ret = strcmp(as->name, bs->name); + if (!ret) { + if (as->pmu && bs->pmu) + return strcmp(as->pmu, bs->pmu); + } + + return ret; } static void wordwrap(char *s, int start, int max, int corr) @@ -1653,7 +1660,8 @@ bool is_pmu_core(const char *name) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1679,10 +1687,16 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu_name && perf_pmu__is_hybrid(pmu->name) && + strcmp(pmu_name, pmu->name)) { + continue; + } + list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name); + bool is_cpu = is_pmu_core(pmu->name) || + perf_pmu__is_hybrid(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1730,8 +1744,13 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) - continue; + if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { + if (!aliases[j].pmu || !aliases[j - 1].pmu || + !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { + continue; + } + } + if (name_only) { printf("%s ", aliases[j].name); continue; @@ -1906,7 +1925,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name) + const char *name) { struct perf_pmu_format *format; __u64 masks = 0, bits; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 394898b07fd9..541889fa9f9c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -49,6 +49,10 @@ struct perf_pmu { struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; + + struct { + bool exclude_guest; + } missing_features; }; extern struct perf_pmu perf_pmu__fake; @@ -111,7 +115,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, - bool deprecated); + bool deprecated, const char *pmu_name); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); @@ -120,21 +124,21 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map); + const struct pmu_events_map *map); -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -struct pmu_events_map *pmu_events_map__find(void); +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +const struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); -typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); +typedef int (*pmu_sys_event_iter_fn)(const struct pmu_event *pe, void *data); void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name); + const char *name); bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index d7c976671e3a..a685d20165f7 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,6 +18,7 @@ util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c +../lib/list_sort.c ../lib/hweight.c ../lib/string.c ../lib/vsprintf.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8feef3a05af7..563a9ba8954f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -70,6 +70,18 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } /* + * This one is needed not to drag the PMU bandwagon, jevents generated + * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for + * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so + * far, for the perf python binding known usecases, revisit if this become + * necessary. + */ +struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) +{ + return NULL; +} + +/* * Add this one here not to drag util/metricgroup.c */ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 68f471d9a88b..ef6c2715fdd9 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -77,6 +77,7 @@ struct record_opts { int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; + int synth; }; extern const char * const *record_usage; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 8130b56aa04b..f3fdad28a852 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -244,7 +244,7 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basicp) { struct hws_basic_entry *basic = basicp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_basic_entry local; unsigned long long word = be64toh(*(unsigned long long *)basicp); @@ -288,7 +288,7 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, struct hws_diag_entry *diagp) { struct hws_diag_entry *diag = diagp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_diag_entry local; unsigned long long word = be64toh(*(unsigned long long *)diagp); @@ -322,7 +322,7 @@ static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_trailer_entry local; const unsigned long long flags = be64toh(te->flags); @@ -552,7 +552,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; progusage2 = be64toh(te->progusage[1]); #else diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 08ec3c3ae0ee..cd3a34840389 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -135,12 +135,12 @@ static int get_counterset_start(int setnr) * the name of this counter. * If no match is found a NULL pointer is returned. */ -static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) +static const char *get_counter_name(int set, int nr, const struct pmu_events_map *map) { int rc, event_nr, wanted = get_counterset_start(set) + nr; if (map) { - struct pmu_event *evp = map->table; + const struct pmu_event *evp = map->table; for (; evp->name || evp->event || evp->desc; ++evp) { if (evp->name == NULL || evp->event == NULL) @@ -159,7 +159,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; - struct pmu_events_map *map; + const struct pmu_events_map *map; u64 *p; map = pmu_events_map__find(); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 352f16076e01..d8857d1b6d7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -44,7 +44,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, size_t decomp_size, src_size; u64 decomp_last_rem = 0; size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->decomp_last; + struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; if (decomp_last) { decomp_last_rem = decomp_last->size - decomp_last->head; @@ -71,7 +71,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, src = (void *)event + sizeof(struct perf_record_compressed); src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, + decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); if (!decomp_size) { munmap(decomp, mmap_len); @@ -81,13 +81,12 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size += decomp_size; - if (session->decomp == NULL) { - session->decomp = decomp; - session->decomp_last = decomp; - } else { - session->decomp_last->next = decomp; - session->decomp_last = decomp; - } + if (session->active_decomp->decomp == NULL) + session->active_decomp->decomp = decomp; + else + session->active_decomp->decomp_last->next = decomp; + + session->active_decomp->decomp_last = decomp; pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); @@ -197,6 +196,8 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->repipe = repipe; session->tool = tool; + session->decomp_data.zstd_decomp = &session->zstd_data; + session->active_decomp = &session->decomp_data; INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); ordered_events__init(&session->ordered_events, @@ -276,11 +277,11 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session__release_decomp_events(struct perf_session *session) +static void perf_decomp__release_events(struct decomp *next) { - struct decomp *next, *decomp; + struct decomp *decomp; size_t mmap_len; - next = session->decomp; + do { decomp = next; if (decomp == NULL) @@ -299,7 +300,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session__release_decomp_events(session); + perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) { @@ -509,6 +510,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->bpf = perf_event__process_bpf; if (tool->text_poke == NULL) tool->text_poke = perf_event__process_text_poke; + if (tool->aux_output_hw_id == NULL) + tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1000,6 +1003,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, + [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1556,6 +1560,8 @@ static int machines__deliver_event(struct machines *machines, return tool->bpf(tool, event, sample, machine); case PERF_RECORD_TEXT_POKE: return tool->text_poke(tool, event, sample, machine); + case PERF_RECORD_AUX_OUTPUT_HW_ID: + return tool->aux_output_hw_id(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -2117,7 +2123,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session) { s64 skip; u64 size; - struct decomp *decomp = session->decomp_last; + struct decomp *decomp = session->active_decomp->decomp_last; if (!decomp) return 0; @@ -2171,35 +2177,55 @@ struct reader { u64 data_offset; reader_cb_t process; bool in_place_update; + char *mmaps[NUM_MMAPS]; + size_t mmap_size; + int mmap_idx; + char *mmap_cur; + u64 file_pos; + u64 file_offset; + u64 head; + struct zstd_data zstd_data; + struct decomp_data decomp_data; }; static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) +reader__init(struct reader *rd, bool *one_mmap) { u64 data_size = rd->data_size; - u64 head, page_offset, file_offset, file_pos, size; - int err = 0, mmap_prot, mmap_flags, map_idx = 0; - size_t mmap_size; - char *buf, *mmaps[NUM_MMAPS]; - union perf_event *event; - s64 skip; - - page_offset = page_size * (rd->data_offset / page_size); - file_offset = page_offset; - head = rd->data_offset - page_offset; - - ui_progress__init_size(prog, data_size, "Processing events..."); + char **mmaps = rd->mmaps; + rd->head = rd->data_offset; data_size += rd->data_offset; - mmap_size = MMAP_SIZE; - if (mmap_size > data_size) { - mmap_size = data_size; - session->one_mmap = true; + rd->mmap_size = MMAP_SIZE; + if (rd->mmap_size > data_size) { + rd->mmap_size = data_size; + if (one_mmap) + *one_mmap = true; } - memset(mmaps, 0, sizeof(mmaps)); + memset(mmaps, 0, sizeof(rd->mmaps)); + + if (zstd_init(&rd->zstd_data, 0)) + return -1; + rd->decomp_data.zstd_decomp = &rd->zstd_data; + + return 0; +} + +static void +reader__release_decomp(struct reader *rd) +{ + perf_decomp__release_events(rd->decomp_data.decomp); + zstd_fini(&rd->zstd_data); +} + +static int +reader__mmap(struct reader *rd, struct perf_session *session) +{ + int mmap_prot, mmap_flags; + char *buf, **mmaps = rd->mmaps; + u64 page_offset; mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2210,47 +2236,63 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } -remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, - file_offset); + + if (mmaps[rd->mmap_idx]) { + munmap(mmaps[rd->mmap_idx], rd->mmap_size); + mmaps[rd->mmap_idx] = NULL; + } + + page_offset = page_size * (rd->head / page_size); + rd->file_offset += page_offset; + rd->head -= page_offset; + + buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, + rd->file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); - err = -errno; - goto out; + return -errno; } - mmaps[map_idx] = buf; - map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); - file_pos = file_offset + head; + mmaps[rd->mmap_idx] = rd->mmap_cur = buf; + rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1); + rd->file_pos = rd->file_offset + rd->head; if (session->one_mmap) { session->one_mmap_addr = buf; - session->one_mmap_offset = file_offset; + session->one_mmap_offset = rd->file_offset; } -more: - event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); + return 0; +} + +enum { + READER_OK, + READER_NODATA, +}; + +static int +reader__read_event(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + u64 size; + int err = READER_OK; + union perf_event *event; + s64 skip; + + event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, + session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); - if (!event) { - if (mmaps[map_idx]) { - munmap(mmaps[map_idx], mmap_size); - mmaps[map_idx] = NULL; - } - - page_offset = page_size * (head / page_size); - file_offset += page_offset; - head -= page_offset; - goto remap; - } + if (!event) + return READER_NODATA; size = event->header.size; skip = -EINVAL; if (size < sizeof(struct perf_event_header) || - (skip = rd->process(session, event, file_pos)) < 0) { + (skip = rd->process(session, event, rd->file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n", - file_offset + head, event->header.size, + rd->file_offset + rd->head, event->header.size, event->header.type, strerror(-skip)); err = skip; goto out; @@ -2259,8 +2301,8 @@ more: if (skip) size += skip; - head += size; - file_pos += size; + rd->head += size; + rd->file_pos += size; err = __perf_session__process_decomp_events(session); if (err) @@ -2268,13 +2310,48 @@ more: ui_progress__update(prog, size); +out: + return err; +} + +static inline bool +reader__eof(struct reader *rd) +{ + return (rd->file_pos >= rd->data_size + rd->data_offset); +} + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + int err; + + err = reader__init(rd, &session->one_mmap); + if (err) + goto out; + + session->active_decomp = &rd->decomp_data; + +remap: + err = reader__mmap(rd, session); + if (err) + goto out; + +more: + err = reader__read_event(rd, session, prog); + if (err < 0) + goto out; + else if (err == READER_NODATA) + goto remap; + if (session_done()) goto out; - if (file_pos < data_size) + if (!reader__eof(rd)) goto more; out: + session->active_decomp = &session->decomp_data; return err; } @@ -2327,6 +2404,7 @@ out_err: */ ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); + reader__release_decomp(&rd); session->one_mmap = false; return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5d8bd14a0a39..46c854292ad6 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,12 @@ struct thread; struct auxtrace; struct itrace_synth_opts; +struct decomp_data { + struct decomp *decomp; + struct decomp *decomp_last; + struct zstd_data *zstd_decomp; +}; + struct perf_session { struct perf_header header; struct machines machines; @@ -39,8 +45,8 @@ struct perf_session { u64 bytes_transferred; u64 bytes_compressed; struct zstd_data zstd_data; - struct decomp *decomp; - struct decomp *decomp_last; + struct decomp_data decomp_data; + struct decomp_data *active_decomp; }; struct decomp { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 568a88c001c6..a111065b484e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1325,88 +1325,68 @@ struct sort_entry sort_mispredict = { .se_width_idx = HISTC_MISPREDICT, }; -static u64 he_weight(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0; -} - static int64_t -sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) +sort__weight_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_weight(left) - he_weight(right); + return left->weight - right->weight; } static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); + return repsep_snprintf(bf, size, "%-*llu", width, he->weight); } struct sort_entry sort_local_weight = { .se_header = "Local Weight", - .se_cmp = sort__local_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__local_weight_snprintf, .se_width_idx = HISTC_LOCAL_WEIGHT, }; -static int64_t -sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.weight - right->stat.weight; -} - static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, + he->weight * he->stat.nr_events); } struct sort_entry sort_global_weight = { .se_header = "Weight", - .se_cmp = sort__global_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__global_weight_snprintf, .se_width_idx = HISTC_GLOBAL_WEIGHT, }; -static u64 he_ins_lat(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; -} - static int64_t -sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_ins_lat(left) - he_ins_lat(right); + return left->ins_lat - right->ins_lat; } static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); + return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat); } struct sort_entry sort_local_ins_lat = { .se_header = "Local INSTR Latency", - .se_cmp = sort__local_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__local_ins_lat_snprintf, .se_width_idx = HISTC_LOCAL_INS_LAT, }; -static int64_t -sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.ins_lat - right->stat.ins_lat; -} - static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); + return repsep_snprintf(bf, size, "%-*u", width, + he->ins_lat * he->stat.nr_events); } struct sort_entry sort_global_ins_lat = { .se_header = "INSTR Latency", - .se_cmp = sort__global_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__global_ins_lat_snprintf, .se_width_idx = HISTC_GLOBAL_INS_LAT, }; @@ -1414,13 +1394,13 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->stat.p_stage_cyc - right->stat.p_stage_cyc; + return left->p_stage_cyc - right->p_stage_cyc; } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } struct sort_entry sort_p_stage_cyc = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b67c469aba79..7b7145501933 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,9 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 weight; - u64 ins_lat; - u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +106,9 @@ struct hist_entry { s32 socket; s32 cpu; u64 code_page_size; + u64 weight; + u64 ins_lat; + u64 p_stage_cyc; u8 cpumode; u8 depth; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 5b7d6c16d33f..af468e3bb6fa 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> +#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -15,6 +17,7 @@ #include "srcline.h" #include "string2.h" #include "symbol.h" +#include "subcmd/run-command.h" bool srcline_full_filename; @@ -119,6 +122,8 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } +#define MAX_INLINE_NEST 1024 + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -273,8 +278,6 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } -#define MAX_INLINE_NEST 1024 - static int inline_list__append_dso_a2l(struct dso *dso, struct inline_node *node, struct symbol *sym) @@ -361,26 +364,14 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso, struct symbol *sym) -{ - struct inline_node *node; - - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); - return NULL; - } - - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); - return node; -} - #else /* HAVE_LIBBFD_SUPPORT */ +struct a2l_subprocess { + struct child_process addr2line; + FILE *to_child; + FILE *from_child; +}; + static int filename_split(char *filename, unsigned int *line_nr) { char *sep; @@ -402,114 +393,285 @@ static int filename_split(char *filename, unsigned int *line_nr) return 0; } -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line_nr, - struct dso *dso __maybe_unused, - bool unwind_inlines __maybe_unused, - struct inline_node *node __maybe_unused, - struct symbol *sym __maybe_unused) +static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l) { - FILE *fp; - char cmd[PATH_MAX]; - char *filename = NULL; - size_t len; - int ret = 0; + if (a2l->addr2line.pid != -1) { + kill(a2l->addr2line.pid, SIGKILL); + finish_command(&a2l->addr2line); /* ignore result, we don't care */ + a2l->addr2line.pid = -1; + } - scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, - dso_name, addr); + if (a2l->to_child != NULL) { + fclose(a2l->to_child); + a2l->to_child = NULL; + } - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_warning("popen failed for %s\n", dso_name); - return 0; + if (a2l->from_child != NULL) { + fclose(a2l->from_child); + a2l->from_child = NULL; + } + + free(a2l); +} + +static struct a2l_subprocess *addr2line_subprocess_init(const char *path) +{ + const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL }; + struct a2l_subprocess *a2l = zalloc(sizeof(*a2l)); + int start_command_status = 0; + + if (a2l == NULL) + goto out; + + a2l->to_child = NULL; + a2l->from_child = NULL; + + a2l->addr2line.pid = -1; + a2l->addr2line.in = -1; + a2l->addr2line.out = -1; + a2l->addr2line.no_stderr = 1; + + a2l->addr2line.argv = argv; + start_command_status = start_command(&a2l->addr2line); + a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */ + + if (start_command_status != 0) { + pr_warning("could not start addr2line for %s: start_command return code %d\n", + path, + start_command_status); + goto out; } - if (getline(&filename, &len, fp) < 0 || !len) { - pr_warning("addr2line has no output for %s\n", dso_name); + a2l->to_child = fdopen(a2l->addr2line.in, "w"); + if (a2l->to_child == NULL) { + pr_warning("could not open write-stream to addr2line of %s\n", path); goto out; } - ret = filename_split(filename, line_nr); - if (ret != 1) { - free(filename); + a2l->from_child = fdopen(a2l->addr2line.out, "r"); + if (a2l->from_child == NULL) { + pr_warning("could not open read-stream from addr2line of %s\n", path); goto out; } - *file = filename; + return a2l; out: - pclose(fp); - return ret; + if (a2l) + addr2line_subprocess_cleanup(a2l); + + return NULL; } -void dso__free_a2l(struct dso *dso __maybe_unused) +static int read_addr2line_record(struct a2l_subprocess *a2l, + char **function, + char **filename, + unsigned int *line_nr) { + /* + * Returns: + * -1 ==> error + * 0 ==> sentinel (or other ill-formed) record read + * 1 ==> a genuine record read + */ + char *line = NULL; + size_t line_len = 0; + unsigned int dummy_line_nr = 0; + int ret = -1; + + if (function != NULL) + zfree(function); + + if (filename != NULL) + zfree(filename); + + if (line_nr != NULL) + *line_nr = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (function != NULL) + *function = strdup(strim(line)); + + zfree(&line); + line_len = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0) { + ret = 0; + goto error; + } + + if (filename != NULL) + *filename = strdup(line); + + zfree(&line); + line_len = 0; + + return 1; + +error: + free(line); + if (function != NULL) + zfree(function); + if (filename != NULL) + zfree(filename); + return ret; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso __maybe_unused, - struct symbol *sym) +static int inline_list__append_record(struct dso *dso, + struct inline_node *node, + struct symbol *sym, + const char *function, + const char *filename, + unsigned int line_nr) { - FILE *fp; - char cmd[PATH_MAX]; - struct inline_node *node; - char *filename = NULL; - char *funcname = NULL; - size_t filelen, funclen; - unsigned int line_nr = 0; + struct symbol *inline_sym = new_inline_sym(dso, sym, function); - scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, - dso_name, addr); + return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); +} - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_err("popen failed for %s\n", dso_name); - return NULL; +static int addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym __maybe_unused) +{ + struct a2l_subprocess *a2l = dso->a2l; + char *record_function = NULL; + char *record_filename = NULL; + unsigned int record_line_nr = 0; + int record_status = -1; + int ret = 0; + size_t inline_count = 0; + + if (!a2l) { + dso->a2l = addr2line_subprocess_init(dso_name); + a2l = dso->a2l; } - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); goto out; } - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - /* addr2line -f generates two lines for each inlined functions */ - while (getline(&funcname, &funclen, fp) != -1) { - char *srcline; - struct symbol *inline_sym; + /* + * Send our request and then *deliberately* send something that can't be interpreted as + * a valid address to ask addr2line about (namely, ","). This causes addr2line to first + * write out the answer to our request, in an unbounded/unknown number of records, and + * then to write out the lines "??" and "??:0", so that we can detect when it has + * finished giving us anything useful. We have to be careful about the first record, + * though, because it may be genuinely unknown, in which case we'll get two sets of + * "??"/"??:0" lines. + */ + if (fprintf(a2l->to_child, "%016"PRIx64"\n,\n", addr) < 0 || fflush(a2l->to_child) != 0) { + pr_warning("%s %s: could not send request\n", __func__, dso_name); + goto out; + } - strim(funcname); + switch (read_addr2line_record(a2l, &record_function, &record_filename, &record_line_nr)) { + case -1: + pr_warning("%s %s: could not read first record\n", __func__, dso_name); + goto out; + case 0: + /* + * The first record was invalid, so return failure, but first read another + * record, since we asked a junk question and have to clear the answer out. + */ + switch (read_addr2line_record(a2l, NULL, NULL, NULL)) { + case -1: + pr_warning("%s %s: could not read delimiter record\n", __func__, dso_name); + break; + case 0: + /* As expected. */ + break; + default: + pr_warning("%s %s: unexpected record instead of sentinel", + __func__, dso_name); + break; + } + goto out; + default: + break; + } - if (getline(&filename, &filelen, fp) == -1) - goto out; + if (file) { + *file = strdup(record_filename); + ret = 1; + } + if (line_nr) + *line_nr = record_line_nr; - if (filename_split(filename, &line_nr) != 1) + if (unwind_inlines) { + if (node && inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; goto out; + } + } - srcline = srcline_from_fileline(filename, line_nr); - inline_sym = new_inline_sym(dso, sym, funcname); - - if (inline_list__append(inline_sym, srcline, node) != 0) { - free(srcline); - if (inline_sym && inline_sym->inlined) - symbol__delete(inline_sym); - goto out; + /* We have to read the records even if we don't care about the inline info. */ + while ((record_status = read_addr2line_record(a2l, + &record_function, + &record_filename, + &record_line_nr)) == 1) { + if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { + if (inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + ret = 1; /* found at least one inline frame */ } } out: - pclose(fp); - free(filename); - free(funcname); + free(record_function); + free(record_filename); + return ret; +} - return node; +void dso__free_a2l(struct dso *dso) +{ + struct a2l_subprocess *a2l = dso->a2l; + + if (!a2l) + return; + + addr2line_subprocess_cleanup(a2l); + + dso->a2l = NULL; } #endif /* HAVE_LIBBFD_SUPPORT */ +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso, struct symbol *sym) +{ + struct inline_node *node; + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + return node; +} + /* * Number of addr2line failures (without success) before disabling it for that * dso. diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 34a7f5c1fff7..5c7308efa768 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#include <math.h> #include <stdio.h> #include "evsel.h" #include "stat.h" #include "color.h" +#include "debug.h" #include "pmu.h" #include "rblist.h" #include "evlist.h" @@ -370,12 +372,16 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { struct evsel *counter, *leader, **metric_events, *oc; bool found; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; struct hashmap_entry *cur; size_t bkt; int i; - expr__ctx_init(&ctx); + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return; + } evlist__for_each_entry(evsel_list, counter) { bool invalid = false; @@ -383,25 +389,25 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) if (!counter->metric_expr) continue; - expr__ctx_clear(&ctx); + expr__ctx_clear(ctx); metric_events = counter->metric_events; if (!metric_events) { - if (expr__find_other(counter->metric_expr, - counter->name, - &ctx, 1) < 0) + if (expr__find_ids(counter->metric_expr, + counter->name, + ctx) < 0) continue; metric_events = calloc(sizeof(struct evsel *), - hashmap__size(&ctx.ids) + 1); + hashmap__size(ctx->ids) + 1); if (!metric_events) { - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return; } counter->metric_events = metric_events; } i = 0; - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *metric_name = (const char *)cur->key; found = false; @@ -438,6 +444,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) "Add %s event to groups to get metric expression for %s\n", metric_name, counter->name); + free(printed); printed = strdup(metric_name); } invalid = true; @@ -453,7 +460,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) counter->metric_expr = NULL; } } - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); } static double runtime_stat_avg(struct runtime_stat *st, @@ -815,18 +822,19 @@ static int prepare_metric(struct evsel **metric_events, struct runtime_stat *st) { double scale; - char *n, *pn; + char *n; int i, j, ret; - expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; u64 metric_total = 0; + int source_count; if (!strcmp(metric_events[i]->name, "duration_time")) { stats = &walltime_nsecs_stats; scale = 1e-9; + source_count = 1; } else { v = saved_value_lookup(metric_events[i], cpu, false, STAT_NONE, 0, st, @@ -835,27 +843,18 @@ static int prepare_metric(struct evsel **metric_events, break; stats = &v->stats; scale = 1.0; + source_count = evsel__source_count(metric_events[i]); if (v->metric_other) metric_total = v->metric_total; } - - n = strdup(metric_events[i]->name); + n = strdup(evsel__metric_id(metric_events[i])); if (!n) return -ENOMEM; - /* - * This display code with --no-merge adds [cpu] postfixes. - * These are not supported by the parser. Remove everything - * after the space. - */ - pn = strchr(n, ' '); - if (pn) - *pn = 0; - - if (metric_total) - expr__add_id_val(pctx, n, metric_total); - else - expr__add_id_val(pctx, n, avg_stats(stats)*scale); + + expr__add_id_val_source_count(pctx, n, + metric_total ? : avg_stats(stats) * scale, + source_count); } for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { @@ -880,17 +879,23 @@ static void generic_metric(struct perf_stat_config *config, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio, scale; int i; void *ctxp = out->ctx; - i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); - if (i < 0) + pctx = expr__ctx_new(); + if (!pctx) return; + pctx->runtime = runtime; + i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + if (i < 0) { + expr__ctx_free(pctx); + return; + } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { + if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; @@ -926,22 +931,26 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); } double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) { - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio = 0.0; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) + pctx = expr__ctx_new(); + if (!pctx) + return NAN; + + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) goto out; - if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) + if (expr__parse(&ratio, pctx, mexp->metric_expr)) ratio = 0.0; out: - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); return ratio; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0fc9a5410739..b2ed3140a1fa 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -274,7 +274,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char * if (symbol_conf.priv_size) { if (symbol_conf.init_annotation) { struct annotation *notes = (void *)sym; - pthread_mutex_init(¬es->lock, NULL); + annotation__init(notes); } sym = ((void *)sym) + symbol_conf.priv_size; } @@ -294,6 +294,13 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char * void symbol__delete(struct symbol *sym) { + if (symbol_conf.priv_size) { + if (symbol_conf.init_annotation) { + struct annotation *notes = symbol__annotation(sym); + + annotation__exit(notes); + } + } free(((void *)sym) - symbol_conf.priv_size); } @@ -702,6 +709,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__filter(type)) return 0; + /* Ignore local symbols for ARM modules */ + if (name[0] == '$') + return 0; + /* * module symbols are not sorted so we add all * symbols, setting length to 0, and rely on @@ -2630,3 +2641,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +/* + * Checks that user supplied symbol kernel files are accessible because + * the default mechanism for accessing elf files fails silently. i.e. if + * debug syms for a build ID aren't found perf carries on normally. When + * they are user supplied we should assume that the user doesn't want to + * silently fail. + */ +int symbol__validate_sym_arguments(void) +{ + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + return 0; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 954d6a049ee2..fbf866d82dcc 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -40,22 +40,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, GElf_Shdr *shp, const char *name, size_t *idx); #endif -/** struct symbol - symtab entry - * - * @ignore - resolvable but tools ignore it (e.g. idle routines) +/** + * A symtab entry. When allocated this may be preceded by an annotation (see + * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node + * to sort by name (see struct symbol_name_rb_node). */ struct symbol { struct rb_node rb_node; + /** Range of symbol [start, end). */ u64 start; u64 end; + /** Length of the string name. */ u16 namelen; + /** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */ u8 type:4; + /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */ u8 binding:4; + /** Set true for kernel symbols of idle routines. */ u8 idle:1; + /** Resolvable but tools ignore it (e.g. idle routines). */ u8 ignore:1; + /** Symbol for an inlined function. */ u8 inlined:1; + /** Has symbol__annotate2 been performed. */ + u8 annotate2:1; + /** Architecture specific. Unused except on PPC where it holds st_other. */ u8 arch_sym; - bool annotate2; + /** The name of length namelen associated with the symbol. */ char name[]; }; @@ -286,4 +297,6 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +int symbol__validate_sym_arguments(void); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a7e981b2d7de..198982109f0f 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -715,7 +715,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, bool mmap_data) + struct perf_tool *tool, struct machine *machine, + bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; struct dirent **dirent; @@ -739,7 +740,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, * send mmap only for thread group leader * see thread__init_maps() */ - if (pid == tgid && + if (pid == tgid && needs_mmap && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data)) return -1; @@ -786,7 +787,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, break; rc = 0; - if (_pid == pid && !kernel_thread) { + if (_pid == pid && !kernel_thread && needs_mmap) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); @@ -806,7 +807,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool mmap_data) + bool needs_mmap, bool mmap_data) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; @@ -836,7 +837,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, perf_thread_map__pid(threads, thread), 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -862,7 +863,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -882,6 +883,7 @@ out: static int __perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, + bool needs_mmap, bool mmap_data, struct dirent **dirent, int start, @@ -926,7 +928,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, - tool, machine, mmap_data); + tool, machine, needs_mmap, mmap_data); } err = 0; @@ -945,6 +947,7 @@ struct synthesize_threads_arg { struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; + bool needs_mmap; bool mmap_data; struct dirent **dirent; int num; @@ -956,7 +959,8 @@ static void *synthesize_threads_worker(void *arg) struct synthesize_threads_arg *args = arg; __perf_event__synthesize_threads(args->tool, args->process, - args->machine, args->mmap_data, + args->machine, + args->needs_mmap, args->mmap_data, args->dirent, args->start, args->num); return NULL; @@ -965,7 +969,7 @@ static void *synthesize_threads_worker(void *arg) int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, - bool mmap_data, + bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize) { struct synthesize_threads_arg *args = NULL; @@ -994,7 +998,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, - machine, mmap_data, + machine, + needs_mmap, mmap_data, dirent, base, n); goto free_dirent; } @@ -1015,6 +1020,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, args[i].tool = tool; args[i].process = process; args[i].machine = machine; + args[i].needs_mmap = needs_mmap; args[i].mmap_data = mmap_data; args[i].dirent = dirent; } @@ -1775,26 +1781,27 @@ out_err: int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize) + perf_event__handler_t process, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + return perf_event__synthesize_thread_map(tool, threads, process, machine, + needs_mmap, data_mmap); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, - machine, data_mmap, + return perf_event__synthesize_threads(tool, process, machine, + needs_mmap, data_mmap, nr_threads_synthesize); /* command specified */ return 0; } int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) + struct perf_thread_map *threads, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); + perf_event__process, needs_mmap, + data_mmap, nr_threads_synthesize); } static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) @@ -2230,3 +2237,31 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, return ret; } + +int parse_synth_opt(char *synth) +{ + char *p, *q; + int ret = 0; + + if (synth == NULL) + return -1; + + for (q = synth; (p = strsep(&q, ",")); p = q) { + if (!strcasecmp(p, "no") || !strcasecmp(p, "none")) + return 0; + + if (!strcasecmp(p, "all")) + return PERF_SYNTH_ALL; + + if (!strcasecmp(p, "task")) + ret |= PERF_SYNTH_TASK; + else if (!strcasecmp(p, "mmap")) + ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP; + else if (!strcasecmp(p, "cgroup")) + ret |= PERF_SYNTH_CGROUP; + else + return -1; + } + + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c845e2b9b444..c931433bacbf 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -27,6 +27,18 @@ struct target; union perf_event; +enum perf_record_synth { + PERF_SYNTH_TASK = 1 << 0, + PERF_SYNTH_MMAP = 1 << 1, + PERF_SYNTH_CGROUP = 1 << 2, + + /* last element */ + PERF_SYNTH_MAX = 1 << 3, +}; +#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1) + +int parse_synth_opt(char *str); + typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); @@ -53,8 +65,8 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); @@ -65,10 +77,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, + perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, + struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); #ifdef HAVE_AUXTRACE_SUPPORT diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bbbc0dcd461f..ef873f2cc38f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,6 +53,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + aux_output_hw_id, context_switch, throttle, unthrottle, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3e..df3c4671be72 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e597..9f0d36ba77f2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); |