summaryrefslogtreecommitdiff
path: root/tools/perf/util/callchain.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-04 08:39:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-04 08:39:02 -0700
commit9657752cb5039c7498d4b27c4a75530f93b87d9b (patch)
treeef4198ba427da0ef5e1cb8fb4ec62843b645aed9 /tools/perf/util/callchain.c
parent0081a0ce809b611c1f37da5d6ae5bc8027ffd1c4 (diff)
parent1b2f76d77a277bb70d38ad0991ed7f16bbc115a9 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Add branch type profiling/tracing support. (Jin Yao) - Add the PERF_SAMPLE_PHYS_ADDR ABI to allow the tracing/profiling of physical memory addresses, where the PMU supports it. (Kan Liang) - Export some PMU capability details in the new /sys/bus/event_source/devices/cpu/caps/ sysfs directory. (Andi Kleen) - Aux data fixes and updates (Will Deacon) - kprobes fixes and updates (Masami Hiramatsu) - AMD uncore PMU driver fixes and updates (Janakarajan Natarajan) On the tooling side, here's a (limited!) list of highlights - there were many other changes that I could not list, see the shortlog and git history for details: UI improvements: - Implement a visual marker for fused x86 instructions in the annotate TUI browser, available now in 'perf report', more work needed to have it available as well in 'perf top' (Jin Yao) Further explanation from one of Jin's patches: │ ┌──cmpl $0x0,argp_program_version_hook 81.93 │ ├──je 20 │ │ lock cmpxchg %esi,0x38a9a4(%rip) │ │↓ jne 29 │ │↓ jmp 43 11.47 │20:└─→cmpxch %esi,0x38a999(%rip) That means the cmpl+je is a fused instruction pair and they should be considered together. - Record the branch type and then show statistics and info about in callchain entries (Jin Yao) Example from one of Jin's patches: # perf record -g -j any,save_type # perf report --branch-history --stdio --no-children 38.50% div.c:45 [.] main div | ---main div.c:42 (RET CROSS_2M cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (RET CROSS_2M cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M cycles:9) namespaces support: - Add initial support for namespaces, using setns to access files in namespaces, grabbing their build-ids, etc. (Krister Johansen) perf trace enhancements: - Beautify pkey_{alloc,free,mprotect} arguments in 'perf trace' (Arnaldo Carvalho de Melo) - Add initial 'clone' syscall args beautifier in 'perf trace' (Arnaldo Carvalho de Melo) - Ignore 'fd' and 'offset' args for MAP_ANONYMOUS in 'perf trace' (Arnaldo Carvalho de Melo) - Beautifiers for the 'cmd' arg of several ioctl types, including: sound, DRM, KVM, vhost virtio and perf_events. (Arnaldo Carvalho de Melo) - Add PERF_SAMPLE_CALLCHAIN and PERF_RECORD_MMAP[2] to 'perf data' CTF conversion, allowing CTF trace visualization tools to show callchains and to resolve symbols (Geneviève Bastien) - Beautify the fcntl syscall, which is an interesting one in the sense that infrastructure had to be put in place to change the formatters of some arguments according to the value in a previous one, i.e. cmd dictates how arg and the syscall return will be formatted. (Arnaldo Carvalho de Melo perf stat enhancements: - Use group read for event groups in 'perf stat', reducing overhead when groups are defined in the event specification, i.e. when using {} to enclose a list of events, asking them to be read at the same time, e.g.: "perf stat -e '{cycles,instructions}'" (Jiri Olsa) pipe mode improvements: - Process tracing data in 'perf annotate' pipe mode (David Carrillo-Cisneros) - Add header record types to pipe-mode, now this command: $ perf record -o - -e cycles sleep 1 | perf report --stdio --header Will show the same as in non-pipe mode, i.e. involving a perf.data file (David Carrillo-Cisneros) Vendor specific hardware event support updates/enhancements: - Update POWER9 vendor events tables (Sukadev Bhattiprolu) - Add POWER9 PMU events Sukadev (Bhattiprolu) - Support additional POWER8+ PVR in PMU mapfile (Shriya) - Add Skylake server uncore JSON vendor events (Andi Kleen) - Support exporting Intel PT data to sqlite3 with python perf scripts, this is in addition to the postgresql support that was already there (Adrian Hunter)" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (253 commits) perf symbols: Fix plt entry calculation for ARM and AARCH64 perf probe: Fix kprobe blacklist checking condition perf/x86: Fix caps/ for !Intel perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR perf/core, pt, bts: Get rid of itrace_started perf trace beauty: Beautify pkey_{alloc,free,mprotect} arguments tools headers: Sync cpu features kernel ABI headers with tooling headers perf tools: Pass full path of FEATURES_DUMP perf tools: Robustify detection of clang binary tools lib: Allow external definition of CC, AR and LD perf tools: Allow external definition of flex and bison binary names tools build tests: Don't hardcode gcc name perf report: Group stat values on global event id perf values: Zero value buffers perf values: Fix allocation check perf values: Fix thread index bug perf report: Add dump_read function perf record: Set read_format for inherit_stat perf c2c: Fix remote HITM detection for Skylake perf tools: Fix static build with newer toolchains ...
Diffstat (limited to 'tools/perf/util/callchain.c')
-rw-r--r--tools/perf/util/callchain.c249
1 files changed, 157 insertions, 92 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index b4204b43ed58..f320b0777e0d 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -23,6 +23,7 @@
#include "sort.h"
#include "machine.h"
#include "callchain.h"
+#include "branch.h"
#define CALLCHAIN_PARAM_DEFAULT \
.mode = CHAIN_GRAPH_ABS, \
@@ -303,7 +304,7 @@ int perf_callchain_config(const char *var, const char *value)
{
char *endptr;
- if (prefixcmp(var, "call-graph."))
+ if (!strstarts(var, "call-graph."))
return 0;
var += sizeof("call-graph.") - 1;
@@ -562,15 +563,33 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
if (cursor_node->branch) {
call->branch_count = 1;
- if (cursor_node->branch_flags.predicted)
- call->predicted_count = 1;
-
- if (cursor_node->branch_flags.abort)
- call->abort_count = 1;
-
- call->cycles_count = cursor_node->branch_flags.cycles;
- call->iter_count = cursor_node->nr_loop_iter;
- call->samples_count = cursor_node->samples;
+ if (cursor_node->branch_from) {
+ /*
+ * branch_from is set with value somewhere else
+ * to imply it's "to" of a branch.
+ */
+ call->brtype_stat.branch_to = true;
+
+ if (cursor_node->branch_flags.predicted)
+ call->predicted_count = 1;
+
+ if (cursor_node->branch_flags.abort)
+ call->abort_count = 1;
+
+ branch_type_count(&call->brtype_stat,
+ &cursor_node->branch_flags,
+ cursor_node->branch_from,
+ cursor_node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ call->brtype_stat.branch_to = false;
+ call->cycles_count =
+ cursor_node->branch_flags.cycles;
+ call->iter_count = cursor_node->nr_loop_iter;
+ call->samples_count = cursor_node->samples;
+ }
}
list_add_tail(&call->list, &node->val);
@@ -679,15 +698,32 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
if (node->branch) {
cnode->branch_count++;
- if (node->branch_flags.predicted)
- cnode->predicted_count++;
-
- if (node->branch_flags.abort)
- cnode->abort_count++;
-
- cnode->cycles_count += node->branch_flags.cycles;
- cnode->iter_count += node->nr_loop_iter;
- cnode->samples_count += node->samples;
+ if (node->branch_from) {
+ /*
+ * It's "to" of a branch
+ */
+ cnode->brtype_stat.branch_to = true;
+
+ if (node->branch_flags.predicted)
+ cnode->predicted_count++;
+
+ if (node->branch_flags.abort)
+ cnode->abort_count++;
+
+ branch_type_count(&cnode->brtype_stat,
+ &node->branch_flags,
+ node->branch_from,
+ node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ cnode->brtype_stat.branch_to = false;
+ cnode->cycles_count +=
+ node->branch_flags.cycles;
+ cnode->iter_count += node->nr_loop_iter;
+ cnode->samples_count += node->samples;
+ }
}
return MATCH_EQ;
@@ -922,7 +958,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
list_for_each_entry_safe(list, next_list, &src->val, list) {
callchain_cursor_append(cursor, list->ip,
list->ms.map, list->ms.sym,
- false, NULL, 0, 0);
+ false, NULL, 0, 0, 0);
list_del(&list->list);
map__zput(list->ms.map);
free(list);
@@ -962,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, int samples)
+ int nr_loop_iter, int samples, u64 branch_from)
{
struct callchain_cursor_node *node = *cursor->last;
@@ -986,6 +1022,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
memcpy(&node->branch_flags, flags,
sizeof(struct branch_flags));
+ node->branch_from = branch_from;
cursor->nr++;
cursor->last = &node->next;
@@ -998,11 +1035,11 @@ int sample__resolve_callchain(struct perf_sample *sample,
struct perf_evsel *evsel, struct addr_location *al,
int max_stack)
{
- if (sample->callchain == NULL)
+ if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
return 0;
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
- perf_hpp_list.parent) {
+ perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
return thread__resolve_callchain(al->thread, cursor, evsel, sample,
parent, al, max_stack);
}
@@ -1011,7 +1048,8 @@ int sample__resolve_callchain(struct perf_sample *sample,
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
{
- if (!symbol_conf.use_callchain || sample->callchain == NULL)
+ if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+ !symbol_conf.show_branchflag_count)
return 0;
return callchain_append(he->callchain, &callchain_cursor, sample->period);
}
@@ -1214,95 +1252,120 @@ int callchain_branch_counts(struct callchain_root *root,
cycles_count);
}
-static int counts_str_build(char *bf, int bfsize,
- u64 branch_count, u64 predicted_count,
- u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count)
+static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
{
- double predicted_percent = 0.0;
- const char *null_str = "";
- char iter_str[32];
- char cycle_str[32];
- char *istr, *cstr;
- u64 cycles;
+ int printed;
- if (branch_count == 0)
- return scnprintf(bf, bfsize, " (calltrace)");
+ printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
- cycles = cycles_count / branch_count;
+ return printed;
+}
- if (iter_count && samples_count) {
- if (cycles > 0)
- scnprintf(iter_str, sizeof(iter_str),
- " iterations:%" PRId64 "",
- iter_count / samples_count);
- else
- scnprintf(iter_str, sizeof(iter_str),
- "iterations:%" PRId64 "",
- iter_count / samples_count);
- istr = iter_str;
- } else
- istr = (char *)null_str;
+static int count_float_printf(int idx, const char *str, float value,
+ char *bf, int bfsize, float threshold)
+{
+ int printed;
- if (cycles > 0) {
- scnprintf(cycle_str, sizeof(cycle_str),
- "cycles:%" PRId64 "", cycles);
- cstr = cycle_str;
- } else
- cstr = (char *)null_str;
+ if (threshold != 0.0 && value < threshold)
+ return 0;
- predicted_percent = predicted_count * 100.0 / branch_count;
+ printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
- if ((predicted_count == branch_count) && (abort_count == 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize, " (%s%s)", cstr, istr);
- else
- return scnprintf(bf, bfsize, "%s", (char *)null_str);
+ return printed;
+}
+
+static int branch_to_str(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed, i = 0;
+
+ printed = branch_type_str(brtype_stat, bf, bfsize);
+ if (printed)
+ i++;
+
+ if (predicted_count < branch_count) {
+ printed += count_float_printf(i++, "predicted",
+ predicted_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.0);
}
- if ((predicted_count < branch_count) && (abort_count == 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% %s%s)",
- predicted_percent, cstr, istr);
- else {
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%%)",
- predicted_percent);
- }
+ if (abort_count) {
+ printed += count_float_printf(i++, "abort",
+ abort_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.1);
}
- if ((predicted_count == branch_count) && (abort_count > 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (abort:%" PRId64 " %s%s)",
- abort_count, cstr, istr);
- else
- return scnprintf(bf, bfsize,
- " (abort:%" PRId64 ")",
- abort_count);
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+ u64 branch_count,
+ u64 cycles_count, u64 iter_count,
+ u64 samples_count)
+{
+ int printed = 0, i = 0;
+ u64 cycles;
+
+ cycles = cycles_count / branch_count;
+ if (cycles) {
+ printed += count_pri64_printf(i++, "cycles",
+ cycles,
+ bf + printed, bfsize - printed);
+ }
+
+ if (iter_count && samples_count) {
+ printed += count_pri64_printf(i++, "iterations",
+ iter_count / samples_count,
+ bf + printed, bfsize - printed);
+ }
+
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count, u64 cycles_count,
+ u64 iter_count, u64 samples_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed;
+
+ if (branch_count == 0)
+ return scnprintf(bf, bfsize, " (calltrace)");
+
+ if (brtype_stat->branch_to) {
+ printed = branch_to_str(bf, bfsize, branch_count,
+ predicted_count, abort_count, brtype_stat);
+ } else {
+ printed = branch_from_str(bf, bfsize, branch_count,
+ cycles_count, iter_count, samples_count);
}
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% abort:%" PRId64 " %s%s)",
- predicted_percent, abort_count, cstr, istr);
+ if (!printed)
+ bf[0] = 0;
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% abort:%" PRId64 ")",
- predicted_percent, abort_count);
+ return printed;
}
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count)
+ u64 iter_count, u64 samples_count,
+ struct branch_type_stat *brtype_stat)
{
- char str[128];
+ char str[256];
counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count,
- iter_count, samples_count);
+ iter_count, samples_count, brtype_stat);
if (fp)
return fprintf(fp, "%s", str);
@@ -1334,7 +1397,8 @@ int callchain_list_counts__printf_value(struct callchain_node *node,
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
- cycles_count, iter_count, samples_count);
+ cycles_count, iter_count, samples_count,
+ &clist->brtype_stat);
}
static void free_callchain_node(struct callchain_node *node)
@@ -1459,7 +1523,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags,
- node->nr_loop_iter, node->samples);
+ node->nr_loop_iter, node->samples,
+ node->branch_from);
if (rc)
break;