diff options
Diffstat (limited to 'tools/perf/util')
51 files changed, 1675 insertions, 772 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3369c7830260..28b233c3dcbe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> -#include <sys/utsname.h> #include "sane_ctype.h" @@ -322,6 +321,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; name = strchr(endptr, '<'); if (name == NULL) return -1; @@ -435,8 +436,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m return 0; comment = ltrim(comment); - comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -480,7 +481,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops return 0; comment = ltrim(comment); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; } @@ -878,32 +879,99 @@ out_free_name: return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, - size_t privsize, int line_nr, - struct arch *arch, - struct map *map) +struct annotate_args { + size_t privsize; + struct arch *arch; + struct map *map; + struct perf_evsel *evsel; + s64 offset; + char *line; + int line_nr; +}; + +static void annotation_line__delete(struct annotation_line *al) { - struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + void *ptr = (void *) al - al->privsize; + + free_srcline(al->path); + zfree(&al->line); + free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + * -------------------------------------- + * private space | struct annotation_line + * -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) +{ + struct annotation_line *al; + struct perf_evsel *evsel = args->evsel; + size_t size = privsize + sizeof(*al); + int nr = 1; + + if (perf_evsel__is_group_event(evsel)) + nr = evsel->nr_members; + + size += sizeof(al->samples[0]) * nr; + + al = zalloc(size); + if (al) { + al = (void *) al + privsize; + al->privsize = privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + al->samples_nr = nr; + } + + return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------------------------ + * privsize space | struct disasm_line | struct annotation_line + * ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ +static struct disasm_line *disasm_line__new(struct annotate_args *args) +{ + struct disasm_line *dl = NULL; + struct annotation_line *al; + size_t privsize = args->privsize + offsetof(struct disasm_line, al); + + al = annotation_line__new(args, privsize); + if (al != NULL) { + dl = disasm_line(al); - if (dl != NULL) { - dl->offset = offset; - dl->line = strdup(line); - dl->line_nr = line_nr; - if (dl->line == NULL) + if (dl->al.line == NULL) goto out_delete; - if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) + if (args->offset != -1) { + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, arch, map); + disasm_line__init_ins(dl, args->arch, args->map); } } return dl; out_free_line: - zfree(&dl->line); + zfree(&dl->al.line); out_delete: free(dl); return NULL; @@ -911,14 +979,13 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); free((void *)dl->ins.name); dl->ins.name = NULL; - free(dl); + annotation_line__delete(&dl->al); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) @@ -929,12 +996,13 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head) { - list_add_tail(&line->node, head); + list_add_tail(&al->node, head); } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head) { list_for_each_entry_continue(pos, head, node) if (pos->offset >= 0) @@ -943,50 +1011,6 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa return NULL; } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample) -{ - struct source_line *src_line = notes->src->lines; - double percent = 0.0; - - sample->nr_samples = sample->period = 0; - - if (src_line) { - size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - - while (offset < end) { - src_line = (void *)notes->src->lines + - (sizeof_src_line * offset); - - if (*path == NULL) - *path = src_line->path; - - percent += src_line->samples[evidx].percent; - sample->nr_samples += src_line->samples[evidx].nr; - offset++; - } - } else { - struct sym_hist *h = annotation__histogram(notes, evidx); - unsigned int hits = 0; - u64 period = 0; - - while (offset < end) { - hits += h->addr[offset].nr_samples; - period += h->addr[offset].period; - ++offset; - } - - if (h->nr_samples) { - sample->period = period; - sample->nr_samples = hits; - percent = 100.0 * hits / h->nr_samples; - } - } - - return percent; -} - static const char *annotate__address_color(struct block_range *br) { double cov = block_range__coverage(br); @@ -1069,50 +1093,39 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } } +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) +{ + s64 offset = dl->al.offset; + const u64 addr = start + offset; + struct block_range *br; + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); + annotate__branch_printf(br, addr); + return 0; +} -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct disasm_line *queue) +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *queue, int addr_fmt_width) { + struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; static const char *prev_color; - if (dl->offset != -1) { - const char *path = NULL; - double percent, max_percent = 0.0; - double *ppercents = &percent; - struct sym_hist_entry sample; - struct sym_hist_entry *psamples = &sample; + if (al->offset != -1) { + double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->offset; - const u64 addr = start + offset; - struct disasm_line *next; - struct block_range *br; - - next = disasm__get_next_ip_line(¬es->src->source, dl); - - if (perf_evsel__is_group_event(evsel)) { - nr_percent = evsel->nr_members; - ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); - if (ppercents == NULL || psamples == NULL) { - return -1; - } - } - for (i = 0; i < nr_percent; i++) { - percent = disasm__calc_percent(notes, - notes->src->lines ? i : evsel->idx + i, - offset, - next ? next->offset : (s64) len, - &path, &sample); - - ppercents[i] = percent; - psamples[i] = sample; - if (percent > max_percent) - max_percent = percent; + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample = &al->samples[i]; + + if (sample->percent > max_percent) + max_percent = sample->percent; } if (max_percent < min_pcnt) @@ -1123,10 +1136,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (queue != NULL) { list_for_each_entry_from(queue, ¬es->src->source, node) { - if (queue == dl) + if (queue == al) break; - disasm_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + annotation_line__print(queue, sym, start, evsel, len, + 0, 0, 1, NULL, addr_fmt_width); } } @@ -1137,44 +1150,34 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (path) { - if (!prev_line || strcmp(prev_line, path) + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path) || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; + color_fprintf(stdout, color, " %s", al->path); + prev_line = al->path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - percent = ppercents[i]; - sample = psamples[i]; - color = get_percent_color(percent); + struct annotation_data *sample = &al->samples[i]; + + color = get_percent_color(sample->percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample.period); + sample->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", percent); + color_fprintf(stdout, color, " %7.2f", sample->percent); } - printf(" : "); + printf(" : "); - br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); - annotate__branch_printf(br, addr); + disasm_line__print(dl, start, addr_fmt_width); printf("\n"); - - if (ppercents != &percent) - free(ppercents); - - if (psamples != &sample) - free(psamples); - } else if (max_lines && printed >= max_lines) return 1; else { @@ -1186,10 +1189,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->line) + if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->line); + printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); } return 0; @@ -1215,11 +1218,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, - FILE *file, size_t privsize, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, + struct annotate_args *args, int *line_nr) { + struct map *map = args->map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1263,7 +1266,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + + dl = disasm_line__new(args); free(line); (*line_nr)++; @@ -1288,7 +1295,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, dl->ops.target.name = strdup(target.sym->name); } - disasm__add(¬es->src->source, dl); + annotation_line__add(&dl->al, ¬es->src->source); return 0; } @@ -1305,19 +1312,19 @@ static void delete_last_nop(struct symbol *sym) struct disasm_line *dl; while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, node); + dl = list_entry(list->prev, struct disasm_line, al.node); if (dl->ins.ops) { if (dl->ins.ops != &nop_ops) return; } else { - if (!strstr(dl->line, " nop ") && - !strstr(dl->line, " nopl ") && - !strstr(dl->line, " nopw ")) + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) return; } - list_del(&dl->node); + list_del(&dl->al.node); disasm_line__free(dl); } } @@ -1412,25 +1419,11 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) -{ - struct utsname uts; - - if (!arch_name) { /* Assume we are annotating locally. */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); -} - -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct map *map = args->map; struct dso *dso = map->dso; char command[PATH_MAX * 2]; - struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1444,25 +1437,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, if (err) return err; - arch_name = annotate__norm_arch(arch_name); - if (!arch_name) - return -1; - - arch = arch__find(arch_name); - if (arch == NULL) - return -ENOTSUP; - - if (parch) - *parch = arch; - - if (arch->init) { - err = arch->init(arch, cpuid); - if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); - return err; - } - } - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1546,8 +1520,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, privsize, - &lineno) < 0) + if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) break; nline++; } @@ -1580,21 +1553,110 @@ out_close_stdout: goto out_remove_tmp; } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void calc_percent(struct sym_hist *hist, + struct annotation_data *sample, + s64 offset, s64 end) +{ + unsigned int hits = 0; + u64 period = 0; + + while (offset < end) { + hits += hist->addr[offset].nr_samples; + period += hist->addr[offset].period; + ++offset; + } + + if (hist->nr_samples) { + sample->he.period = period; + sample->he.nr_samples = hits; + sample->percent = 100.0 * hits / hist->nr_samples; + } +} + +static void annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) +{ + struct annotation_line *al, *next; + + list_for_each_entry(al, ¬es->src->source, node) { + s64 end; + int i; + + if (al->offset == -1) + continue; + + next = annotation_line__next(al, ¬es->src->source); + end = next ? next->offset : len; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + struct sym_hist *hist; + + hist = annotation__histogram(notes, evsel->idx + i); + sample = &al->samples[i]; + + calc_percent(hist, sample, al->offset, end); + } + } +} + +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +{ + struct annotation *notes = symbol__annotation(sym); + + annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + +int symbol__annotate(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, size_t privsize, + struct arch **parch) { - struct source_line *iter; + struct annotate_args args = { + .privsize = privsize, + .map = map, + .evsel = evsel, + }; + struct perf_env *env = perf_evsel__env(evsel); + const char *arch_name = perf_env__arch(env); + struct arch *arch; + int err; + + if (!arch_name) + return -1; + + args.arch = arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (parch) + *parch = arch; + + if (arch->init) { + err = arch->init(arch, env ? env->cpuid : NULL); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + + return symbol__disassemble(sym, &args); +} + +static void insert_source_line(struct rb_root *root, struct annotation_line *al) +{ + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; int i, ret; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - ret = strcmp(iter->path, src_line->path); + ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < src_line->nr_pcnt; i++) - iter->samples[i].percent_sum += src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + iter->samples[i].percent_sum += al->samples[i].percent; return; } @@ -1604,18 +1666,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin p = &(*p)->rb_right; } - for (i = 0; i < src_line->nr_pcnt; i++) - src_line->samples[i].percent_sum = src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + al->samples[i].percent_sum = al->samples[i].percent; - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1624,135 +1686,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) return 0; } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - if (cmp_source_line(src_line, iter)) + if (cmp_source_line(al, iter)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; node = rb_first(src_root); while (node) { struct rb_node *next; - src_line = rb_entry(node, struct source_line, node); + al = rb_entry(node, struct annotation_line, rb_node); next = rb_next(node); rb_erase(node, src_root); - __resort_source_line(dest_root, src_line); + __resort_source_line(dest_root, al); node = next; } } -static void symbol__free_source_line(struct symbol *sym, int len) -{ - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - size_t sizeof_src_line; - int i; - - sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - - for (i = 0; i < len; i++) { - free_srcline(src_line->path); - src_line = (void *)src_line + sizeof_src_line; - } - - zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct rb_root *root, int len) -{ - u64 start; - int i, k; - int evidx = evsel->idx; - struct source_line *src_line; - struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - struct rb_root tmp_root = RB_ROOT; - int nr_pcnt = 1; - u64 nr_samples = h->nr_samples; - size_t sizeof_src_line = sizeof(struct source_line); - - if (perf_evsel__is_group_event(evsel)) { - for (i = 1; i < evsel->nr_members; i++) { - h = annotation__histogram(notes, evidx + i); - nr_samples += h->nr_samples; - } - nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); - } - - if (!nr_samples) - return 0; - - src_line = notes->src->lines = calloc(len, sizeof_src_line); - if (!notes->src->lines) - return -1; - - start = map__rip_2objdump(map, sym->start); - - for (i = 0; i < len; i++) { - u64 offset; - double percent_max = 0.0; - - src_line->nr_pcnt = nr_pcnt; - - for (k = 0; k < nr_pcnt; k++) { - double percent = 0.0; - - h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i].nr_samples; - if (h->nr_samples) - percent = 100.0 * nr_samples / h->nr_samples; - - if (percent > percent_max) - percent_max = percent; - src_line->samples[k].percent = percent; - src_line->samples[k].nr = nr_samples; - } - - if (percent_max <= 0.5) - goto next; - - offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, - false, true); - insert_source_line(&tmp_root, src_line); - - next: - src_line = (void *)src_line + sizeof_src_line; - } - - resort_source_line(root, &tmp_root); - return 0; -} - static void print_summary(struct rb_root *root, const char *filename) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; printf("\nSorted summary for file %s\n", filename); @@ -1770,9 +1744,9 @@ static void print_summary(struct rb_root *root, const char *filename) char *path; int i; - src_line = rb_entry(node, struct source_line, node); - for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->samples[i].percent_sum; + al = rb_entry(node, struct annotation_line, rb_node); + for (i = 0; i < al->samples_nr; i++) { + percent = al->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1780,7 +1754,7 @@ static void print_summary(struct rb_root *root, const char *filename) percent_max = percent; } - path = src_line->path; + path = al->path; color = get_percent_color(percent_max); color_fprintf(stdout, color, " %s\n", path); @@ -1801,6 +1775,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ + char bf[32]; + struct annotation_line *line; + + list_for_each_entry_reverse(line, lines, node) { + if (line->offset != -1) + return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); + } + + return 0; +} + int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context) @@ -1811,9 +1798,9 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); - struct disasm_line *pos, *queue = NULL; + struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0; + int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; @@ -1844,15 +1831,21 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); + addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + list_for_each_entry(pos, ¬es->src->source, node) { + int err; + if (context && queue == NULL) { queue = pos; queue_len = 0; } - switch (disasm_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue)) { + err = annotation_line__print(pos, sym, start, evsel, len, + min_pcnt, printed, max_lines, + queue, addr_fmt_width); + + switch (err) { case 0: ++printed; if (context) { @@ -1907,13 +1900,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) } } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as) { - struct disasm_line *pos, *n; + struct annotation_line *al, *n; - list_for_each_entry_safe(pos, n, head, node) { - list_del(&pos->node); - disasm_line__free(pos); + list_for_each_entry_safe(al, n, &as->source, node) { + list_del(&al->node); + disasm_line__free(disasm_line(al)); } } @@ -1921,10 +1914,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) { size_t printed; - if (dl->offset == -1) - return fprintf(fp, "%s\n", dl->line); + if (dl->al.offset == -1) + return fprintf(fp, "%s\n", dl->al.line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", @@ -1939,38 +1932,73 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) struct disasm_line *pos; size_t printed = 0; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, al.node) printed += disasm_line__fprintf(pos, fp); return printed; } +static void annotation__calc_lines(struct annotation *notes, struct map *map, + struct rb_root *root, u64 start) +{ + struct annotation_line *al; + struct rb_root tmp_root = RB_ROOT; + + list_for_each_entry(al, ¬es->src->source, node) { + double percent_max = 0.0; + int i; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + + sample = &al->samples[i]; + + if (sample->percent > percent_max) + percent_max = sample->percent; + } + + if (percent_max <= 0.5) + continue; + + al->path = get_srcline(map->dso, start + al->offset, NULL, + false, true, start + al->offset); + insert_source_line(&tmp_root, al); + } + + resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, + struct rb_root *root) +{ + struct annotation *notes = symbol__annotation(sym); + u64 start = map__rip_2objdump(map, sym->start); + + annotation__calc_lines(notes, map, root, start); +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; - len = symbol__size(sym); + symbol__calc_percent(sym, evsel); if (print_lines) { srcline_full_filename = full_paths; - symbol__get_source_line(sym, map, evsel, &source_line, len); + symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, min_pcnt, max_lines, 0); - if (print_lines) - symbol__free_source_line(sym, len); - disasm__purge(&symbol__annotation(sym)->src->source); + annotated_source__purge(symbol__annotation(sym)->src); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f6ba3560de5e..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,33 +59,55 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + +struct annotation_data { + double percent; + double percent_sum; + struct sym_hist_entry he; +}; + +struct annotation_line { + struct list_head node; + struct rb_node rb_node; + s64 offset; + char *line; + int line_nr; + float ipc; + u64 cycles; + size_t privsize; + char *path; + int samples_nr; + struct annotation_data samples[0]; +}; + struct disasm_line { - struct list_head node; - s64 offset; - char *line; - struct ins ins; - int line_nr; - float ipc; - u64 cycles; - struct ins_operands ops; + struct ins ins; + struct ins_operands ops; + + /* This needs to be at the end. */ + struct annotation_line al; }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ + return al ? container_of(al, struct disasm_line, al) : NULL; +} + static inline bool disasm_line__has_offset(const struct disasm_line *dl) { return dl->ops.target.offset_avail; } -struct sym_hist_entry { - u64 nr_samples; - u64 period; -}; - void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; @@ -104,19 +126,6 @@ struct cyc_hist { u16 reset; }; -struct source_line_samples { - double percent; - double percent_sum; - u64 nr; -}; - -struct source_line { - struct rb_node node; - char *path; - int nr_pcnt; - struct source_line_samples samples[1]; -}; - /** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored @@ -132,7 +141,6 @@ struct source_line { */ struct annotated_source { struct list_head source; - struct source_line *lines; int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; @@ -169,9 +177,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, size_t privsize, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, @@ -198,7 +206,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int min_pcnt, int max_lines, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as); bool ui__has_annotation(void); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..c76687e42344 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include <sys/param.h> #include <stdlib.h> #include <stdio.h> -#include <string.h> -#include <limits.h> -#include <errno.h> #include <linux/list.h> #include "../perf.h" diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include <errno.h> +#include <sys/utsname.h> struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 97a8ef9980db..44e603c27944 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) event->context_switch.next_prev_tid); } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_SWITCH_CPU_WIDE: ret += perf_event__fprintf_switch(event, fp); break; + case PERF_RECORD_LOST: + ret += perf_event__fprintf_lost(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ae95efbfb95..e5fbd6dd1b01 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -205,6 +205,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + u16 misc; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b62e523a7035..f0a5e09c4071 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); fdarray__exit(&evlist->pollfd); } @@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->backward_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].fd; int err; if (fd < 0) @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, evlist->overwrite); + return perf_mmap__read_forward(md); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { - perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); + perf_mmap__consume(&evlist->mmap[idx], false); } static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); - if (evlist->backward_mmap) + if (evlist->overwrite_mmap) for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->backward_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i]); } void perf_evlist__munmap(struct perf_evlist *evlist) { perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); } static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_backward) + int thread, int *_output, int *_output_overwrite) { struct perf_evsel *evsel; int revent; @@ -812,18 +812,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, int fd; int cpu; + mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { - output = _output_backward; - maps = evlist->backward_mmap; + output = _output_overwrite; + maps = evlist->overwrite_mmap; if (!maps) { maps = perf_evlist__alloc_mmap(evlist); if (!maps) return -1; - evlist->backward_mmap = maps; + evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + mp->prot &= ~PROT_WRITE; } if (evsel->system_wide && thread) @@ -884,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_backward)) + thread, &output, &output_overwrite)) goto out_unmap; } } @@ -912,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_backward)) + &output, &output_overwrite)) goto out_unmap; } @@ -1052,15 +1054,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - struct mmap_params mp = { - .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), - }; + /* + * Delay setting mp.prot: set it before calling perf_mmap__mmap. + * Its value is decided by evsel's write_backward. + * So &mp should not be passed through const pointer. + */ + struct mmap_params mp; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist); @@ -1070,7 +1075,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1091,10 +1095,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1102,7 +1105,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; @@ -1582,6 +1586,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even return perf_evsel__parse_sample(evsel, event, sample); } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp) +{ + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; + return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} + size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { struct perf_evsel *evsel; @@ -1739,7 +1754,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, RESUME, } action = NONE; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return; switch (old_state) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 491f69542920..e7fbca69cbac 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -31,7 +31,6 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_mmaps; - bool overwrite; bool enabled; bool has_user_cpus; size_t mmap_len; @@ -45,12 +44,14 @@ struct perf_evlist { } workload; struct fdarray pollfd; struct perf_mmap *mmap; - struct perf_mmap *backward_mmap; + struct perf_mmap *overwrite_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + u64 first_sample_time; + u64 last_sample_time; }; struct perf_evsel_str_handler { @@ -169,10 +170,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); size_t perf_evlist__mmap_size(unsigned long pages); @@ -205,6 +205,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp); + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d5fbcf8c7aa7..d934f04e3110 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -779,6 +780,8 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + BUG_ON(1); default: break; } @@ -1574,6 +1577,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -1596,10 +1600,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1615,11 +1655,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1724,7 +1771,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper @@ -1960,6 +2007,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ + /* + * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes + * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to + * check the format does not go past the end of the event. + */ + if (sample_size + sizeof(event->header) > event->header.size) + return -EFAULT; + + return 0; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1981,6 +2042,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->misc = event->header.misc; + data->id = -1ULL; + data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -1990,15 +2054,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = event->sample.array; - /* - * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes - * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to - * check the format does not go past the end of the event. - */ - if (evsel->sample_size + sizeof(event->header) > event->header.size) + if (perf_event__check_size(event, evsel->sample_size)) return -EFAULT; - data->id = -1ULL; if (type & PERF_SAMPLE_IDENTIFIER) { data->id = *array; array++; @@ -2028,7 +2086,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->addr = 0; if (type & PERF_SAMPLE_ADDR) { data->addr = *array; array++; @@ -2120,14 +2177,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); @@ -2192,14 +2262,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->data_src = PERF_MEM_DATA_SRC_NONE; if (type & PERF_SAMPLE_DATA_SRC) { OVERFLOW_CHECK_u64(array); data->data_src = *array; array++; } - data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { OVERFLOW_CHECK_u64(array); data->transaction = *array; @@ -2232,6 +2300,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, return 0; } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp) +{ + u64 type = evsel->attr.sample_type; + const u64 *array; + + if (!(type & PERF_SAMPLE_TIME)) + return -1; + + if (event->header.type != PERF_RECORD_SAMPLE) { + struct perf_sample data = { + .time = -1ULL, + }; + + if (!evsel->attr.sample_id_all) + return -1; + if (perf_evsel__parse_id_sample(evsel, event, &data)) + return -1; + + *timestamp = data.time; + return 0; + } + + array = event->sample.array; + + if (perf_event__check_size(event, evsel->sample_size)) + return -EFAULT; + + if (type & PERF_SAMPLE_IDENTIFIER) + array++; + + if (type & PERF_SAMPLE_IP) + array++; + + if (type & PERF_SAMPLE_TID) + array++; + + if (type & PERF_SAMPLE_TIME) + *timestamp = *array; + + return 0; +} + size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) { @@ -2743,8 +2855,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, break; case EOPNOTSUPP: if (evsel->attr.sample_period != 0) - return scnprintf(msg, size, "%s", - "PMU Hardware doesn't support sampling/overflow-interrupts."); + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", + perf_evsel__name(evsel)); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2781,16 +2894,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 157f49e8a772..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,7 +38,7 @@ struct cgroup_sel; * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ -enum { +enum term_type { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, @@ -49,12 +49,11 @@ enum { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_MAX, }; struct perf_evsel_config_term { struct list_head list; - int type; + enum term_type type; union { u64 period; u64 freq; @@ -339,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp); + static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { return list_entry(evsel->node.next, struct perf_evsel, node); @@ -443,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7c0e9d587bfa..a326e0d8b5b6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,8 @@ #include <linux/bitops.h> #include <linux/stringify.h> #include <sys/stat.h> -#include <sys/types.h> #include <sys/utsname.h> -#include <unistd.h> +#include <linux/time64.h> #include "evlist.h" #include "evsel.h" @@ -37,6 +36,7 @@ #include <api/fs/fs.h> #include "asm/bug.h" #include "tool.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1182,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } +static int write_sample_time(struct feat_fd *ff, + struct perf_evlist *evlist) +{ + int ret; + + ret = do_write(ff, &evlist->first_sample_time, + sizeof(evlist->first_sample_time)); + if (ret < 0) + return ret; + + return do_write(ff, &evlist->last_sample_time, + sizeof(evlist->last_sample_time)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1507,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) } } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + char time_buf[32]; + double d; + + session = container_of(ff->ph, struct perf_session, header); + + timestamp__scnprintf_usec(session->evlist->first_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of first sample : %s\n", time_buf); + + timestamp__scnprintf_usec(session->evlist->last_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of last sample : %s\n", time_buf); + + d = (double)(session->evlist->last_sample_time - + session->evlist->first_sample_time) / NSEC_PER_MSEC; + + fprintf(fp, "# sample duration : %10.3f ms\n", d); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -2148,6 +2184,27 @@ out_free_caches: return -1; } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_session *session; + u64 first_sample_time, last_sample_time; + int ret; + + session = container_of(ff->ph, struct perf_session, header); + + ret = do_read_u64(ff, &first_sample_time); + if (ret) + return -1; + + ret = do_read_u64(ff, &last_sample_time); + if (ret) + return -1; + + session->evlist->first_sample_time = first_sample_time; + session->evlist->last_sample_time = last_sample_time; + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2205,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), + FEAT_OPR(SAMPLE_TIME, sample_time, false), }; struct header_print_data { @@ -3258,6 +3316,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, return err; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel counter.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_evlist **pevlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 29ccbfdf8724..f28aaaa3a440 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include "event.h" #include "env.h" +#include "pmu.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -34,6 +35,7 @@ enum { HEADER_AUXTRACE, HEADER_STAT, HEADER_CACHE, + HEADER_SAMPLE_TIME, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -107,6 +109,11 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); + int perf_event__process_feature(struct perf_tool *tool, union perf_event *event, struct perf_session *session); @@ -166,5 +173,5 @@ int write_padded(struct feat_fd *fd, const void *bf, */ int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c @(diff -I 2>&1 | grep -q 'option requires an argument' && \ - test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ - diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ - diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ - diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ + ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ + ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 270f3223c6df..b05a67464c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1726,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) bool show_addr = callchain_param.key == CCKEY_ADDRESS; srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), - sym, show_sym, show_addr); + sym, show_sym, show_addr, ip); srcline__tree_insert(&map->dso->srclines, ip, srcline); } @@ -2204,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { ret = thread__resolve_callchain_sample(thread, cursor, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6d40efd74402..8fe57031e1a8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, map__rip_2objdump(map, addr), NULL, - true, true); + true, true, addr); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0ddd9c199227..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@ #include "pmu.h" #include "expr.h" #include "rblist.h" -#include "pmu.h" #include <string.h> #include <stdbool.h> #include <errno.h> #include "pmu-events/pmu-events.h" -#include "strbuf.h" #include "strlist.h" #include <assert.h> #include <ctype.h> @@ -38,6 +36,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct metric_event me = { .evsel = evsel }; + + if (!metric_events) + return NULL; + nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); @@ -270,7 +272,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int i; struct rblist groups; @@ -368,7 +370,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int ret = -EINVAL; int i, j; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9fe5f9c7d577..05076e683938 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, +static union perf_event *perf_mmap__read(struct perf_mmap *map, u64 start, u64 end, u64 *prev) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; int diff = end - start; - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > map->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - if (diff >= (int)sizeof(event->header)) { size_t size; @@ -89,7 +69,7 @@ broken_event: return event; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; u64 old = map->prev; @@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess head = perf_mmap__read_head(map); - return perf_mmap__read(map, check_messup, old, head, &map->prev); + return perf_mmap__read(map, old, head, &map->prev); } union perf_event *perf_mmap__read_backward(struct perf_mmap *map) @@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) else end = head + map->mask + 1; - return perf_mmap__read(map, false, start, end, &map->prev); + return perf_mmap__read(map, start, end, &map->prev); } void perf_mmap__read_catchup(struct perf_mmap *map) @@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) { struct perf_event_header *pheader; u64 evt_head = head; int size = mask + 1; - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); pheader = (struct perf_event_header *)(buf + (head & mask)); *start = head; while (true) { if (evt_head - head >= (unsigned int)size) { - pr_debug("Finished reading backward ring buffer: rewind\n"); + pr_debug("Finished reading overwrite ring buffer: rewind\n"); if (evt_head - head > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; @@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); if (pheader->size == 0) { - pr_debug("Finished reading backward ring buffer: get start\n"); + pr_debug("Finished reading overwrite ring buffer: get start\n"); *end = evt_head; return 0; } @@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -static int rb_find_range(void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end, bool backward) -{ - if (!backward) { - *start = old; - *end = head; - return 0; - } - - return backward_rb_find_range(data, mask, head, start, end); -} - -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool overwrite, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, void *buf; int rc = 0; - if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) - return -1; + start = overwrite ? head : old; + end = overwrite ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + if (!overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; - perf_mmap__consume(md, overwrite || backward); - return 0; + md->prev = head; + perf_mmap__consume(md, overwrite); + return 0; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) + return -1; } if ((start & md->mask) + size != (end & md->mask)) { @@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, } md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, overwrite); out: return rc; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 3a5cb5a6e94a..e43d7b55a55f 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -86,10 +86,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) pc->data_tail = tail; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 8e09fd2d842f..bad9e0296e9a 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - u64 timestamp = sample->time; struct ordered_event *oevent; if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 96e5292d88e2..8c7a2948593e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -45,7 +45,7 @@ struct ordered_events { }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> +#include <dirent.h> #include <unistd.h> static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 80fb1593913a..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,7 @@ #include <dirent.h> #include <api/fs/fs.h> #include <locale.h> +#include <regex.h> #include "util.h" #include "pmu.h" #include "parse-events.h" @@ -537,17 +538,45 @@ static bool pmu_is_uncore(const char *name) } /* + * PMU CORE devices have different name other than cpu in sysfs on some + * platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + /* Look for cpu sysfs (x86 and others) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); + if ((stat(path, &st) == 0) && + (strncmp(name, "cpu", strlen("cpu")) == 0)) + return 1; + + /* Look for cpu sysfs (specific to arm) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", + sysfs, name); + if (stat(path, &st) == 0) + return 1; + + return 0; +} + +/* * Return the CPU id as a raw string. * * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { return NULL; } -static char *perf_pmu__getcpuid(void) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; static bool printed; @@ -556,7 +585,7 @@ static char *perf_pmu__getcpuid(void) if (cpuid) cpuid = strdup(cpuid); if (!cpuid) - cpuid = get_cpuid_str(); + cpuid = get_cpuid_str(pmu); if (!cpuid) return NULL; @@ -567,22 +596,45 @@ static char *perf_pmu__getcpuid(void) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(void) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { struct pmu_events_map *map; - char *cpuid = perf_pmu__getcpuid(); + char *cpuid = perf_pmu__getcpuid(pmu); int i; + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + i = 0; for (;;) { + regex_t re; + regmatch_t pmatch[1]; + int match; + map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (!strcmp(map->cpuid, cpuid)) + if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", map->cpuid); break; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + break; + } } free(cpuid); return map; @@ -593,13 +645,14 @@ struct pmu_events_map *perf_pmu__find_map(void) * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { int i; struct pmu_events_map *map; struct pmu_event *pe; + const char *name = pmu->name; - map = perf_pmu__find_map(); + map = perf_pmu__find_map(pmu); if (!map) return; @@ -608,7 +661,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) */ i = 0; while (1) { - const char *pname; pe = &map->table[i++]; if (!pe->name) { @@ -617,9 +669,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) break; } - pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) - continue; + if (!is_pmu_core(name)) { + /* check for uncore devices */ + if (pe->pmu == NULL) + continue; + if (strncmp(pe->pmu, name, strlen(pe->pmu))) + continue; + } /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, @@ -661,21 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; - pmu_add_cpu_aliases(&aliases, name); pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; pmu->cpus = pmu_cpumask(name); - + pmu->name = strdup(name); + pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + pmu_add_cpu_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - pmu->name = strdup(name); - pmu->type = type; list_add_tail(&pmu->list, &pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27c75e635866..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -92,6 +92,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -struct pmu_events_map *perf_pmu__find_map(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2573,7 +2577,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2584,13 +2589,13 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2625,6 +2630,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } @@ -2681,8 +2694,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; @@ -2792,16 +2805,40 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + } + + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2847,7 +2884,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c util/mmap.c util/namespaces.c ../lib/bitmap.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8e49d9cafcfc..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist) return; } +void rblist__exit(struct rblist *rblist) +{ + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rblist__remove_node(rblist, pos); + } +} + void rblist__delete(struct rblist *rblist) { if (rblist != NULL) { - struct rb_node *pos, *next = rb_first(&rblist->entries); - - while (next) { - pos = next; - next = rb_next(pos); - rblist__remove_node(rblist, pos); - } + rblist__exit(rblist); free(rblist); } } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 4c8638a22571..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -29,6 +29,7 @@ struct rblist { }; void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist); void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..c1848b543f27 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f266..8d0fa2f8da16 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } @@ -873,9 +865,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1350,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; + struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { @@ -1495,7 +1494,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1509,21 +1507,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + u64 timestamp; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1777,7 +1773,8 @@ done: err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1933,7 +1930,8 @@ out: err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index da1434a7c120..da40b4b380ca 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a00eacdf02ed..211e7f326b9f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true); + he->ms.sym, true, true, he->ip); } static int64_t @@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->from.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->from.al_addr); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->from.al_addr), right->branch_info->from.sym, - true, true); + true, true, + right->branch_info->from.al_addr); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->to.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->to.al_addr); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->to.al_addr), right->branch_info->to.sym, - true, true); + true, true, + right->branch_info->to.al_addr); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true, true); + e->ms.sym, false, true, true, e->ip); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d19f05c56de6..3c21fd059b64 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -496,7 +496,8 @@ out: #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip) { char *file = NULL; unsigned line = 0; @@ -536,7 +537,7 @@ out: if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", - addr - sym->start) < 0) + ip - sym->start) < 0) return SRCLINE_UNKNOWN; } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; @@ -550,9 +551,9 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr) + bool show_sym, bool show_addr, u64 ip) { - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); } struct srcline_node { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 847b7086182c..b2bb5502fd62 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -11,9 +11,10 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); + bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip); void free_srcline(char *srcline); /* insert the srcline into the DSO, which will take ownership */ diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@ #include "expr.h" #include "metricgroup.h" -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket @@ -27,36 +16,18 @@ enum { * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; + enum stat_type type; + int ctx; int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -69,6 +40,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) @@ -87,34 +82,66 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, return &nd->rb_node; } +static void saved_value_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct saved_value *v; + + BUG_ON(!rb_node); + v = container_of(rb_node, struct saved_value, rb_node); + free(v); +} + static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, - bool create) + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - /* No delete for now */ + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) @@ -135,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -174,13 +178,35 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu) + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); @@ -188,50 +214,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); update_stats(&v->stats, count); } } @@ -352,15 +386,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -376,13 +435,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -395,13 +455,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -414,13 +475,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -433,13 +496,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -451,13 +516,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -469,13 +535,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -487,13 +554,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -551,68 +619,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -632,7 +704,8 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; struct parse_ctx pctx; @@ -651,7 +724,8 @@ static void generic_metric(const char *metric_expr, stats = &walltime_nsecs_stats; scale = 1e-9; } else { - v = saved_value_lookup(metric_events[i], cpu, false); + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, st); if (!v) break; stats = &v->stats; @@ -679,7 +753,8 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; @@ -690,7 +765,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -698,8 +774,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -712,8 +793,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -721,8 +802,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -730,8 +812,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -739,8 +822,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -748,8 +832,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -757,27 +842,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -786,7 +872,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -795,8 +882,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -805,19 +893,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -831,28 +921,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -865,19 +955,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, out); - } else if (runtime_nsecs_stats[cpu].n != 0) { + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -888,7 +978,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { num = 0; } @@ -901,7 +991,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, out); + avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,16 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu); - if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; @@ -362,7 +369,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, *count, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <stdio.h> #include "xyarray.h" +#include "rblist.h" struct stats { @@ -43,11 +44,54 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); @@ -67,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -75,16 +128,20 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -92,11 +149,11 @@ struct perf_stat_output_ctx { bool force_header; }; -struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@ #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid) { DIR *proc; int max_threads = 32, items, i; @@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; - struct stat st; pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ @@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - if (stat(path, &st) != 0) - continue; + if (uid != UINT_MAX) { + struct stat st; - if (st.st_uid != uid) - continue; + if (stat(path, &st) != 0 || st.st_uid != uid) + continue; + } snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); @@ -178,6 +178,16 @@ out_free_closedir: goto out_closedir; } +struct thread_map *thread_map__new_all_cpus(void) +{ + return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + return __thread_map__new_all_cpus(uid); +} + struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) @@ -313,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -321,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index f15803985435..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); struct thread_map *thread_map__new_event(struct thread_map_event *event); @@ -30,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 81927d027417..3f7f18f06982 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -6,6 +6,7 @@ #include <time.h> #include <errno.h> #include <inttypes.h> +#include <math.h> #include "perf.h" #include "debug.h" @@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, return 0; } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch) { char *start_str, *end_str; char *d, *str; - int rc = 0; if (ostr == NULL || *ostr == '\0') return 0; @@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) if (str == NULL) return -ENOMEM; - ptime->start = 0; - ptime->end = 0; - - /* str has the format: <start>,<stop> - * variations: <start>, - * ,<stop> - * , - */ start_str = str; - d = strchr(start_str, ','); + d = strchr(start_str, ch); if (d) { *d = '\0'; ++d; } end_str = d; + *start = start_str; + *end = end_str; + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str = NULL, *end_str; + int rc; + + rc = split_start_end(&start_str, &end_str, ostr, ','); + if (rc || !start_str) + return rc; + + ptime->start = 0; + ptime->end = 0; + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); - free(str); + free(start_str); /* make sure end time is after start time if it was given */ if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -104,6 +114,177 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int parse_percent(double *pcnt, char *str) +{ + char *c; + + c = strchr(str, '%'); + if (c) + *c = '\0'; + else + return -1; + + *pcnt = atof(str) / 100.0; + + return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *p, *end_str; + double pcnt, start_pcnt, end_pcnt; + u64 total = end - start; + int i; + + /* + * Example: + * 10%/2: select the second 10% slice and the third 10% slice + */ + + /* We can modify this string since the original one is copied */ + p = strchr(str, '/'); + if (!p) + return -1; + + *p = '\0'; + if (parse_percent(&pcnt, str) < 0) + return -1; + + p++; + i = (int)strtol(p, &end_str, 10); + if (*end_str) + return -1; + + if (pcnt <= 0.0) + return -1; + + start_pcnt = pcnt * (i - 1); + end_pcnt = pcnt * i; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *start_str = NULL, *end_str; + double start_pcnt, end_pcnt; + u64 total = end - start; + int ret; + + /* + * Example: 0%-10% + */ + + ret = split_start_end(&start_str, &end_str, str, '-'); + if (ret || !start_str) + return ret; + + if ((parse_percent(&start_pcnt, start_str) != 0) || + (parse_percent(&end_pcnt, end_str) != 0)) { + free(start_str); + return -1; + } + + free(start_str); + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0 || + start_pcnt > end_pcnt) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, + u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end, + time_pecent_split func) +{ + char *str, *p1, *p2; + int len, ret, i = 0; + + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + len = strlen(str); + p1 = str; + + while (p1 < str + len) { + if (i >= num) { + free(str); + return -1; + } + + p2 = strchr(p1, ','); + if (p2) + *p2 = '\0'; + + ret = (func)(p1, &ptime_buf[i], start, end); + if (ret < 0) { + free(str); + return -1; + } + + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + + i++; + + if (p2) + p1 = p2 + 1; + else + break; + } + + free(str); + return i; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end) +{ + char *c; + + /* + * ostr example: + * 10%/2,10%/3: select the second 10% slice and the third 10% slice + * 0%-10%,30%-40%: multiple time range + */ + + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + + c = strchr(ostr, '/'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_slash_split); + } + + c = strchr(ostr, '-'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_dash_split); + } + + return -1; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ @@ -119,6 +300,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp) +{ + struct perf_time_interval *ptime; + int i; + + if ((timestamp == 0) || (num == 0)) + return false; + + if (num == 1) + return perf_time__skip_sample(&ptime_buf[0], timestamp); + + /* + * start/end of multiple time ranges must be valid. + */ + for (i = 0; i < num; i++) { + ptime = &ptime_buf[i]; + + if (timestamp >= ptime->start && + ((timestamp < ptime->end && i < num - 1) || + (timestamp <= ptime->end && i == num - 1))) { + break; + } + } + + return (i == num) ? true : false; +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 15b475c50ccf..34d5eba26bf5 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -13,8 +13,14 @@ int parse_nsec_time(const char *str, u64 *ptime); int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp); + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2532b558099b..183c91453522 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,6 +76,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool no_warn; enum show_feature_header show_feat_hdr; }; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@ #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) |