diff options
Diffstat (limited to 'tools/perf/util/hist.c')
| -rw-r--r-- | tools/perf/util/hist.c | 569 |
1 files changed, 386 insertions, 183 deletions
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b6e4b4edde43..ef4b569f7df4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -4,10 +4,12 @@ #include "dso.h" #include "build-id.h" #include "hist.h" +#include "kvm-stat.h" #include "map.h" #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "mem-info.h" #include "session.h" #include "namespaces.h" #include "cgroup.h" @@ -30,6 +32,9 @@ #include <linux/time64.h> #include <linux/zalloc.h> +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); + static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_thread(struct hists *hists, @@ -38,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -105,7 +112,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_col_len(hists, HISTC_THREAD, len + 8); if (h->ms.map) { - len = dso__name_len(h->ms.map->dso); + len = dso__name_len(map__dso(h->ms.map)); hists__new_col_len(hists, HISTC_DSO, len); } @@ -119,7 +126,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen += BITS_PER_LONG / 4 + 2 + 3; hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); - symlen = dso__name_len(h->branch_info->from.ms.map->dso); + symlen = dso__name_len(map__dso(h->branch_info->from.ms.map)); hists__new_col_len(hists, HISTC_DSO_FROM, symlen); } else { symlen = unresolved_col_width + 4 + 2; @@ -134,7 +141,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen += BITS_PER_LONG / 4 + 2 + 3; hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); - symlen = dso__name_len(h->branch_info->to.ms.map->dso); + symlen = dso__name_len(map__dso(h->branch_info->to.ms.map)); hists__new_col_len(hists, HISTC_DSO_TO, symlen); } else { symlen = unresolved_col_width + 4 + 2; @@ -152,8 +159,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } if (h->mem_info) { - if (h->mem_info->daddr.ms.sym) { - symlen = (int)h->mem_info->daddr.ms.sym->namelen + 4 + if (mem_info__daddr(h->mem_info)->ms.sym) { + symlen = (int)mem_info__daddr(h->mem_info)->ms.sym->namelen + 4 + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); @@ -167,8 +174,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen); } - if (h->mem_info->iaddr.ms.sym) { - symlen = (int)h->mem_info->iaddr.ms.sym->namelen + 4 + if (mem_info__iaddr(h->mem_info)->ms.sym) { + symlen = (int)mem_info__iaddr(h->mem_info)->ms.sym->namelen + 4 + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen); @@ -178,8 +185,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen); } - if (h->mem_info->daddr.ms.map) { - symlen = dso__name_len(h->mem_info->daddr.ms.map->dso); + if (mem_info__daddr(h->mem_info)->ms.map) { + symlen = dso__name_len(map__dso(mem_info__daddr(h->mem_info)->ms.map)); hists__new_col_len(hists, HISTC_MEM_DADDR_DSO, symlen); } else { @@ -202,12 +209,13 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); + hists__new_col_len(hists, HISTC_PARALLELISM, 11); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); hists__new_col_len(hists, HISTC_MEM_TLB, 22); hists__new_col_len(hists, HISTC_MEM_SNOOP, 12); - hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); + hists__new_col_len(hists, HISTC_MEM_LVL, 36 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); @@ -216,6 +224,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); @@ -240,7 +251,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->cgroup) { const char *cgrp_name = "unknown"; - struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env, + struct cgroup *cgrp = cgroup__find(maps__machine(h->ms.maps)->env, h->cgroup); if (cgrp != NULL) cgrp_name = cgrp->name; @@ -294,9 +305,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period) +static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency) { he_stat->period += period; + he_stat->latency += latency; he_stat->nr_events += 1; } @@ -307,14 +319,84 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_us += src->period_us; dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; + dest->weight1 += src->weight1; + dest->weight2 += src->weight2; + dest->weight3 += src->weight3; dest->nr_events += src->nr_events; + dest->latency += src->latency; } static void he_stat__decay(struct he_stat *he_stat) { he_stat->period = (he_stat->period * 7) / 8; he_stat->nr_events = (he_stat->nr_events * 7) / 8; - /* XXX need decay for weight too? */ + he_stat->weight1 = (he_stat->weight1 * 7) / 8; + he_stat->weight2 = (he_stat->weight2 * 7) / 8; + he_stat->weight3 = (he_stat->weight3 * 7) / 8; + he_stat->latency = (he_stat->latency * 7) / 8; +} + +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats == 0) + return 0; + + if (he->mem_stat == NULL) { + he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat == NULL) + return -1; + } + + for (int i = 0; i < hists->nr_mem_stats; i++) { + int idx = mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); + + assert(0 <= idx && idx < MEM_STAT_LEN); + he->mem_stat[i].entries[idx] += period; + hists->mem_stat_total[i].entries[idx] += period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return 0; + + dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat == NULL) + return -1; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8; + } } static void hists__delete_entry(struct hists *hists, struct hist_entry *he); @@ -322,7 +404,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he); static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; - u64 diff; + u64 prev_latency = he->stat.latency; if (prev_period == 0) return true; @@ -331,13 +413,18 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); - - diff = prev_period - he->stat.period; + hists__decay_mem_stat(hists, he); if (!he->depth) { - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + u64 period_diff = prev_period - he->stat.period; + u64 latency_diff = prev_latency - he->stat.latency; + + hists->stats.total_period -= period_diff; + hists->stats.total_latency -= latency_diff; + if (!he->filtered) { + hists->stats.total_non_filtered_period -= period_diff; + hists->stats.total_non_filtered_latency -= latency_diff; + } } if (!he->leaf) { @@ -352,7 +439,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) } } - return he->stat.period == 0; + return he->stat.period == 0 && he->stat.latency == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) @@ -449,7 +536,8 @@ static int hist_entry__init(struct hist_entry *he, memset(&he->stat, 0, sizeof(he->stat)); } - map__get(he->ms.map); + he->ms.maps = maps__get(he->ms.maps); + he->ms.map = map__get(he->ms.map); if (he->branch_info) { /* @@ -464,13 +552,16 @@ static int hist_entry__init(struct hist_entry *he, memcpy(he->branch_info, template->branch_info, sizeof(*he->branch_info)); - map__get(he->branch_info->from.ms.map); - map__get(he->branch_info->to.ms.map); + he->branch_info->from.ms.maps = maps__get(he->branch_info->from.ms.maps); + he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map); + he->branch_info->to.ms.maps = maps__get(he->branch_info->to.ms.maps); + he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map); } if (he->mem_info) { - map__get(he->mem_info->iaddr.ms.map); - map__get(he->mem_info->daddr.ms.map); + he->mem_info = mem_info__clone(template->mem_info); + if (he->mem_info == NULL) + goto err_infos; } if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) @@ -482,21 +573,21 @@ static int hist_entry__init(struct hist_entry *he, goto err_infos; } - if (he->srcline) { + if (he->srcline && he->srcline != SRCLINE_UNKNOWN) { he->srcline = strdup(he->srcline); if (he->srcline == NULL) goto err_rawdata; } if (symbol_conf.res_sample) { - he->res_samples = calloc(sizeof(struct res_sample), - symbol_conf.res_sample); + he->res_samples = calloc(symbol_conf.res_sample, + sizeof(struct res_sample)); if (!he->res_samples) goto err_srcline; } INIT_LIST_HEAD(&he->pairs.node); - thread__get(he->thread); + he->thread = thread__get(he->thread); he->hroot_in = RB_ROOT_CACHED; he->hroot_out = RB_ROOT_CACHED; @@ -513,16 +604,14 @@ err_rawdata: err_infos: if (he->branch_info) { - map__put(he->branch_info->from.ms.map); - map__put(he->branch_info->to.ms.map); + map_symbol__exit(&he->branch_info->from.ms); + map_symbol__exit(&he->branch_info->to.ms); zfree(&he->branch_info); } - if (he->mem_info) { - map__put(he->mem_info->iaddr.ms.map); - map__put(he->mem_info->daddr.ms.map); - } + if (he->mem_info) + mem_info__zput(he->mem_info); err: - map__zput(he->ms.map); + map_symbol__exit(&he->ms); zfree(&he->stat_acc); return -ENOMEM; } @@ -564,30 +653,32 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, he = NULL; } } - return he; } -static u8 symbol__parent_filter(const struct symbol *parent) +static filter_mask_t symbol__parent_filter(const struct symbol *parent) { if (symbol_conf.exclude_other && parent == NULL) return 1 << HIST_FILTER__PARENT; return 0; } -static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency) { if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; - if (!he->filtered) + he->hists->callchain_latency += latency; + if (!he->filtered) { he->hists->callchain_non_filtered_period += period; + he->hists->callchain_non_filtered_latency += latency; + } } static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *entry, - struct addr_location *al, + const struct addr_location *al, bool sample_self) { struct rb_node **p; @@ -595,6 +686,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; + u64 latency = entry->stat.latency; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -610,22 +702,17 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * keys were used. */ cmp = hist_entry__cmp(he, entry); - if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period); - hist_entry__add_callchain_period(he, period); + he_stat__add_stat(&he->stat, &entry->stat); + hist_entry__add_callchain_period(he, period, latency); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period); + he_stat__add_period(he->stat_acc, period, latency); - /* - * This mem info was allocated from sample__resolve_mem - * and will not be used anymore. - */ - mem_info__zput(entry->mem_info); + block_info__delete(entry->block_info); - block_info__zput(entry->block_info); + kvm_info__zput(entry->kvm_info); /* If the map of an existing hist_entry has * become out-of-date due to an exec() or @@ -633,7 +720,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * mis-adjust symbol addresses when computing * the history counter to increment. */ - if (he->ms.map != entry->ms.map) { + if (hists__has(hists, sym) && he->ms.map != entry->ms.map) { + if (he->ms.sym) { + u64 addr = he->ms.sym->start; + he->ms.sym = map__find_symbol(entry->ms.map, addr); + } + map__put(he->ms.map); he->ms.map = map__get(entry->ms.map); } @@ -653,7 +745,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, return NULL; if (sample_self) - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -663,6 +755,10 @@ out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } @@ -698,6 +794,7 @@ __hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct kvm_info *ki, struct block_info *block_info, struct perf_sample *sample, bool sample_self, @@ -724,15 +821,21 @@ __hists__add_entry(struct hists *hists, .ip = al->addr, .level = al->level, .code_page_size = sample->code_page_size, + .parallelism = al->parallelism, .stat = { .nr_events = 1, .period = sample->period, + .weight1 = sample->weight, + .weight2 = sample->ins_lat, + .weight3 = sample->weight3, + .latency = al->latency, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, .hists = hists, .branch_info = bi, .mem_info = mi, + .kvm_info = ki, .block_info = block_info, .transaction = sample->transaction, .raw_data = sample->raw_data, @@ -741,7 +844,8 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, + .weight3 = sample->weight3, + .simd_flags = sample->simd_flags, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) @@ -756,10 +860,11 @@ struct hist_entry *hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct kvm_info *ki, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, + return __hists__add_entry(hists, al, sym_parent, bi, mi, ki, NULL, sample, sample_self, NULL); } @@ -769,10 +874,11 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct kvm_info *ki, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, + return __hists__add_entry(hists, al, sym_parent, bi, mi, ki, NULL, sample, sample_self, ops); } @@ -817,7 +923,7 @@ iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) if (mi == NULL) return -ENOMEM; - iter->priv = mi; + iter->mi = mi; return 0; } @@ -825,7 +931,7 @@ static int iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) { u64 cost; - struct mem_info *mi = iter->priv; + struct mem_info *mi = iter->mi; struct hists *hists = evsel__hists(iter->evsel); struct perf_sample *sample = iter->sample; struct hist_entry *he; @@ -846,7 +952,7 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al */ sample->period = cost; - he = hists__add_entry(hists, al, iter->parent, NULL, mi, + he = hists__add_entry(hists, al, iter->parent, NULL, mi, NULL, sample, true); if (!he) return -ENOMEM; @@ -872,12 +978,7 @@ iter_finish_mem_entry(struct hist_entry_iter *iter, err = hist_entry__append_callchain(he, iter->sample); out: - /* - * We don't need to free iter->priv (mem_info) here since the mem info - * was either already freed in hists__findnew_entry() or passed to a - * new hist entry by hist_entry__new(). - */ - iter->priv = NULL; + mem_info__zput(iter->mi); iter->he = NULL; return err; @@ -896,7 +997,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al iter->curr = 0; iter->total = sample->branch_stack->nr; - iter->priv = bi; + iter->bi = bi; return 0; } @@ -910,7 +1011,7 @@ iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, static int iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) { - struct branch_info *bi = iter->priv; + struct branch_info *bi = iter->bi; int i = iter->curr; if (bi == NULL) @@ -919,8 +1020,10 @@ iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) if (iter->curr >= iter->total) return 0; - al->maps = bi[i].to.ms.maps; - al->map = bi[i].to.ms.map; + maps__put(al->maps); + al->maps = maps__get(bi[i].to.ms.maps); + map__put(al->map); + al->map = map__get(bi[i].to.ms.map); al->sym = bi[i].to.ms.sym; al->addr = bi[i].to.addr; return 1; @@ -937,7 +1040,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a int i = iter->curr; int err = 0; - bi = iter->priv; + bi = iter->bi; if (iter->hide_unresolved && !(bi[i].from.ms.sym && bi[i].to.ms.sym)) goto out; @@ -949,24 +1052,39 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a sample->period = 1; sample->weight = bi->flags.cycles ? bi->flags.cycles : 1; - he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL, + he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL, NULL, sample, true); if (he == NULL) return -ENOMEM; - hists__inc_nr_samples(hists, he->filtered); - out: iter->he = he; iter->curr++; return err; } +static void branch_info__exit(struct branch_info *bi) +{ + map_symbol__exit(&bi->from.ms); + map_symbol__exit(&bi->to.ms); + zfree_srcline(&bi->srcline_from); + zfree_srcline(&bi->srcline_to); +} + static int iter_finish_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { - zfree(&iter->priv); + struct evsel *evsel = iter->evsel; + struct hists *hists = evsel__hists(evsel); + + for (int i = 0; i < iter->total; i++) + branch_info__exit(&iter->bi[i]); + + if (iter->he) + hists__inc_nr_samples(hists, iter->he->filtered); + + zfree(&iter->bi); iter->he = NULL; return iter->curr >= iter->total ? 0 : -1; @@ -987,7 +1105,7 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location struct hist_entry *he; he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample, true); + NULL, sample, true); if (he == NULL) return -ENOMEM; @@ -1018,19 +1136,23 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; + struct callchain_cursor *cursor = get_tls_callchain_cursor(); - callchain_cursor_commit(&callchain_cursor); + if (cursor == NULL) + return -ENOMEM; + + callchain_cursor_commit(cursor); /* * This is for detecting cycles or recursions so that they're * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1)); + he_cache = malloc(sizeof(*he_cache) * (cursor->nr + 1)); if (he_cache == NULL) return -ENOMEM; - iter->priv = he_cache; + iter->he_cache = he_cache; iter->curr = 0; return 0; @@ -1043,11 +1165,11 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, struct evsel *evsel = iter->evsel; struct hists *hists = evsel__hists(evsel); struct perf_sample *sample = iter->sample; - struct hist_entry **he_cache = iter->priv; + struct hist_entry **he_cache = iter->he_cache; struct hist_entry *he; int err = 0; - he = hists__add_entry(hists, al, iter->parent, NULL, NULL, + he = hists__add_entry(hists, al, iter->parent, NULL, NULL, NULL, sample, true); if (he == NULL) return -ENOMEM; @@ -1061,7 +1183,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, * We need to re-initialize the cursor since callchain_append() * advanced the cursor to the end. */ - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(get_tls_callchain_cursor()); hists__inc_nr_samples(hists, he->filtered); @@ -1074,7 +1196,7 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, { struct callchain_cursor_node *node; - node = callchain_cursor_current(&callchain_cursor); + node = callchain_cursor_current(get_tls_callchain_cursor()); if (node == NULL) return 0; @@ -1101,7 +1223,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, { struct evsel *evsel = iter->evsel; struct perf_sample *sample = iter->sample; - struct hist_entry **he_cache = iter->priv; + struct hist_entry **he_cache = iter->he_cache; struct hist_entry *he; struct hist_entry he_tmp = { .hists = evsel__hists(evsel), @@ -1120,12 +1242,15 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .raw_size = sample->raw_size, }; int i; - struct callchain_cursor cursor; + struct callchain_cursor cursor, *tls_cursor = get_tls_callchain_cursor(); bool fast = hists__has(he_tmp.hists, sym); - callchain_cursor_snapshot(&cursor, &callchain_cursor); + if (tls_cursor == NULL) + return -ENOMEM; - callchain_cursor_advance(&callchain_cursor); + callchain_cursor_snapshot(&cursor, tls_cursor); + + callchain_cursor_advance(tls_cursor); /* * Check if there's duplicate entries in the callchain. @@ -1148,7 +1273,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, } he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample, false); + NULL, sample, false); if (he == NULL) return -ENOMEM; @@ -1164,7 +1289,9 @@ static int iter_finish_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { - zfree(&iter->priv); + mem_info__zput(iter->mi); + zfree(&iter->bi); + zfree(&iter->he_cache); iter->he = NULL; return 0; @@ -1211,7 +1338,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (al) alm = map__get(al->map); - err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, + err = sample__resolve_callchain(iter->sample, get_tls_callchain_cursor(), &iter->parent, iter->evsel, al, max_stack_depth); if (err) { map__put(alm); @@ -1254,19 +1381,35 @@ out: return err; } -int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +static int64_t +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, + struct hist_entry *right, unsigned long fn_offset, + bool ignore_dynamic, bool ignore_skipped) { struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + perf_hpp_fmt_cmp_t *fn; + int64_t cmp; - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && + /* + * Never collapse filtered and non-filtered entries. + * Note this is not the same as having an extra (invisible) fmt + * that corresponds to the filtered status. + */ + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; + if (cmp) + return cmp; + + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; - cmp = fmt->cmp(fmt, left, right); + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) + continue; + + fn = (void *)fmt + fn_offset; + cmp = (*fn)(fmt, left, right); if (cmp) break; } @@ -1275,57 +1418,78 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) } int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { - struct hists *hists = left->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, cmp), true, false); +} - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - continue; +static int64_t +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, sort), false, true); +} - cmp = fmt->collapse(fmt, left, right); - if (cmp) - break; - } +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), true, false); +} - return cmp; +static int64_t +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, + struct hist_entry *left, + struct hist_entry *right) +{ + return hist_entry__cmp_impl(hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), false, false); } void hist_entry__delete(struct hist_entry *he) { struct hist_entry_ops *ops = he->ops; + if (symbol_conf.report_hierarchy) { + struct rb_root *root = &he->hroot_out.rb_root; + struct hist_entry *child, *tmp; + + rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node) + hist_entry__delete(child); + + *root = RB_ROOT; + } + thread__zput(he->thread); - map__zput(he->ms.map); + map_symbol__exit(&he->ms); if (he->branch_info) { - map__zput(he->branch_info->from.ms.map); - map__zput(he->branch_info->to.ms.map); - free_srcline(he->branch_info->srcline_from); - free_srcline(he->branch_info->srcline_to); + branch_info__exit(he->branch_info); zfree(&he->branch_info); } if (he->mem_info) { - map__zput(he->mem_info->iaddr.ms.map); - map__zput(he->mem_info->daddr.ms.map); + map_symbol__exit(&mem_info__iaddr(he->mem_info)->ms); + map_symbol__exit(&mem_info__daddr(he->mem_info)->ms); mem_info__zput(he->mem_info); } if (he->block_info) - block_info__zput(he->block_info); + block_info__delete(he->block_info); + + if (he->kvm_info) + kvm_info__zput(he->kvm_info); zfree(&he->res_samples); zfree(&he->stat_acc); - free_srcline(he->srcline); + zfree_srcline(&he->srcline); if (he->srcfile && he->srcfile[0]) zfree(&he->srcfile); free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } @@ -1388,6 +1552,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, if (symbol_conf.sym_list == NULL) return; break; + case HIST_FILTER__PARALLELISM: + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) + return; + break; case HIST_FILTER__PARENT: case HIST_FILTER__GUEST: case HIST_FILTER__HOST: @@ -1446,6 +1614,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, perf_hpp__is_sym_entry); + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, + perf_hpp__is_parallelism_entry); + hists__apply_filters(he->hists, he); } @@ -1465,16 +1636,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); - - cmp = 0; - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } @@ -1516,6 +1681,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->srcfile = NULL; } + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1553,8 +1723,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists, if (hist_entry__has_callchains(new_he) && symbol_conf.use_callchain) { - callchain_cursor_reset(&callchain_cursor); - if (callchain_merge(&callchain_cursor, + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + + if (cursor == NULL) + return -1; + + callchain_cursor_reset(cursor); + if (callchain_merge(cursor, new_he->callchain, he->callchain) < 0) ret = -1; @@ -1593,13 +1768,18 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { - callchain_cursor_reset(&callchain_cursor); - if (callchain_merge(&callchain_cursor, - iter->callchain, - he->callchain) < 0) - ret = -1; + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + + if (cursor != NULL) { + callchain_cursor_reset(cursor); + if (callchain_merge(cursor, iter->callchain, he->callchain) < 0) + ret = -1; + } else { + ret = 0; + } } hist_entry__delete(he); return ret; @@ -1640,6 +1820,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); hists__filter_entry_by_socket(hists, he); + hists__filter_entry_by_parallelism(hists, he); } int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1683,34 +1864,18 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) -{ - struct hists *hists = a->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; - - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__should_skip(fmt, a->hists)) - continue; - - cmp = fmt->sort(fmt, a, b); - if (cmp) - break; - } - - return cmp; -} - static void hists__reset_filter_stats(struct hists *hists) { hists->nr_non_filtered_entries = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_non_filtered_latency = 0; } void hists__reset_stats(struct hists *hists) { hists->nr_entries = 0; hists->stats.total_period = 0; + hists->stats.total_latency = 0; hists__reset_filter_stats(hists); } @@ -1719,6 +1884,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h) { hists->nr_non_filtered_entries++; hists->stats.total_non_filtered_period += h->stat.period; + hists->stats.total_non_filtered_latency += h->stat.latency; } void hists__inc_stats(struct hists *hists, struct hist_entry *h) @@ -1728,6 +1894,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->nr_entries++; hists->stats.total_period += h->stat.period; + hists->stats.total_latency += h->stat.latency; } static void hierarchy_recalc_total_periods(struct hists *hists) @@ -1739,6 +1906,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists) hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_latency = 0; + hists->stats.total_non_filtered_latency = 0; /* * recalculate total period using top-level entries only @@ -1750,8 +1919,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists) node = rb_next(node); hists->stats.total_period += he->stat.period; - if (!he->filtered) + hists->stats.total_latency += he->stat.latency; + if (!he->filtered) { hists->stats.total_non_filtered_period += he->stat.period; + hists->stats.total_non_filtered_latency += he->stat.latency; + } } } @@ -2099,7 +2271,7 @@ static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he) { if (hists->dso_filter != NULL && - (he->ms.map == NULL || he->ms.map->dso != hists->dso_filter)) { + (he->ms.map == NULL || !RC_CHK_EQUAL(map__dso(he->ms.map), hists->dso_filter))) { he->filtered |= (1 << HIST_FILTER__DSO); return true; } @@ -2111,7 +2283,7 @@ static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { if (hists->thread_filter != NULL && - he->thread != hists->thread_filter) { + !RC_CHK_EQUAL(he->thread, hists->thread_filter)) { he->filtered |= (1 << HIST_FILTER__THREAD); return true; } @@ -2144,6 +2316,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he) +{ + if (test_bit(he->parallelism, hists->parallelism_filter)) { + he->filtered |= (1 << HIST_FILTER__PARALLELISM); + return true; + } + return false; +} + typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) @@ -2313,6 +2495,16 @@ void hists__filter_by_socket(struct hists *hists) hists__filter_entry_by_socket); } +void hists__filter_by_parallelism(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, + hists->parallelism_filter); + else + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, + hists__filter_entry_by_parallelism); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -2341,6 +2533,11 @@ void hists__inc_nr_lost_samples(struct hists *hists, u32 lost) hists->stats.nr_lost_samples += lost; } +void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost) +{ + hists->stats.nr_dropped_samples += lost; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -2397,21 +2594,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct perf_hpp_fmt *fmt; bool leftmost = true; p = &root->rb_root.rb_node; while (*p != NULL) { - int64_t cmp = 0; + int64_t cmp; parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, he, pair); - if (cmp) - break; - } + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); if (!cmp) goto out; @@ -2469,16 +2660,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo while (n) { struct hist_entry *iter; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + int64_t cmp; iter = rb_entry(n, struct hist_entry, rb_node_in); - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); if (cmp < 0) n = n->rb_left; else if (cmp > 0) @@ -2638,15 +2823,13 @@ int hists__unlink(struct hists *hists) void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode, - u64 *total_cycles) + u64 *total_cycles, struct evsel *evsel) { struct branch_info *bi; struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ if (bs && bs->nr && entries[0].flags.cycles) { - int i; - bi = sample__resolve_bstack(sample, al); if (bi) { struct addr_map_symbol *prev = NULL; @@ -2661,39 +2844,50 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, * Note that perf stores branches reversed from * program order! */ - for (i = bs->nr - 1; i >= 0; i--) { + for (int i = bs->nr - 1; i >= 0; i--) { addr_map_symbol__account_cycles(&bi[i].from, nonany_branch_mode ? NULL : prev, - bi[i].flags.cycles); + bi[i].flags.cycles, evsel, + bi[i].branch_stack_cntr); prev = &bi[i].to; if (total_cycles) *total_cycles += bi[i].flags.cycles; } + for (unsigned int i = 0; i < bs->nr; i++) { + map_symbol__exit(&bi[i].to.ms); + map_symbol__exit(&bi[i].from.ms); + } free(bi); } } } -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, - bool skip_empty) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) { struct evsel *pos; size_t ret = 0; evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); + u64 total_samples = hists->stats.nr_samples; - if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples) + total_samples += hists->stats.nr_lost_samples; + total_samples += hists->stats.nr_dropped_samples; + + if (symbol_conf.skip_empty && total_samples == 0) continue; ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); if (hists->stats.nr_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "SAMPLE", hists->stats.nr_samples); if (hists->stats.nr_lost_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "LOST_SAMPLES", hists->stats.nr_lost_samples); + if (hists->stats.nr_dropped_samples) + ret += fprintf(fp, "%20s events: %10d\n", + "LOST_SAMPLES (BPF)", hists->stats.nr_dropped_samples); } return ret; @@ -2706,6 +2900,12 @@ u64 hists__total_period(struct hists *hists) hists->stats.total_period; } +u64 hists__total_latency(struct hists *hists) +{ + return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency : + hists->stats.total_latency; +} + int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq) { char unit; @@ -2767,17 +2967,17 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh if (hists__has(hists, thread)) { printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", - (thread->comm_set ? thread__comm_str(thread) : ""), - thread->tid); + (thread__comm_set(thread) ? thread__comm_str(thread) : ""), + thread__tid(thread)); } else { printed += scnprintf(bf + printed, size - printed, ", Thread: %s", - (thread->comm_set ? thread__comm_str(thread) : "")); + (thread__comm_set(thread) ? thread__comm_str(thread) : "")); } } if (dso) printed += scnprintf(bf + printed, size - printed, - ", DSO: %s", dso->short_name); + ", DSO: %s", dso__short_name(dso)); if (socket_id > -1) printed += scnprintf(bf + printed, size - printed, ", Processor Socket: %d", socket_id); @@ -2817,6 +3017,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries = RB_ROOT_CACHED; mutex_init(&hists->lock); hists->socket_filter = -1; + hists->parallelism_filter = symbol_conf.parallelism_filter; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); return 0; @@ -2851,6 +3052,8 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { |
