From c4ee06251d4212a0d55e2371f2db464f6a1e0901 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Mon, 7 Aug 2017 21:05:15 +0800
Subject: perf report: Calculate the average cycles of iterations

The branch history code has a loop detection function. With this, we can
get the number of iterations by calculating the removed loops.

While it would be nice for knowing the average cycles of iterations.
This patch adds up the cycles in branch entries of removed loops and
save the result to the next branch entry (e.g. branch entry A).

Finally it will display the iteration number and average cycles at the
"from" of branch entry A.

For example:
perf record -g -j any,save_type ./div
perf report --branch-history --no-children --stdio

--22.63%--main div.c:42 (RET CROSS_2M)
          compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2)
          |
           --10.73%--compute_flag div.c:27 (RET CROSS_2M)
                     rand rand.c:28 (cycles:1)
                     rand rand.c:28 (RET CROSS_2M)
                     __random random.c:298 (cycles:1)
                     __random random.c:297 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (RET CROSS_2M)

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1502111115-18305-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c |  8 +---
 tools/perf/ui/stdio/hist.c     | 10 ++---
 tools/perf/util/callchain.c    | 49 +++++++++++------------
 tools/perf/util/callchain.h    |  9 ++---
 tools/perf/util/machine.c      | 88 +++++++++++++++++++++++++-----------------
 5 files changed, 85 insertions(+), 79 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc2462bc2c..13dfb0a0bdeb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 				       browser->show_dso);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (need_percent)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b8301c67..8bdb7a500181 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (!period)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
 			if (symbol_conf.show_branchflag_count)
 				ret += callchain_list_counts__printf_value(
-						NULL, chain, fp, NULL, 0);
+						chain, fp, NULL, 0);
 			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b0777e0d..510b513e0f01 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 				call->cycles_count =
 					cursor_node->branch_flags.cycles;
 				call->iter_count = cursor_node->nr_loop_iter;
-				call->samples_count = cursor_node->samples;
+				call->iter_cycles = cursor_node->iter_cycles;
 			}
 		}
 
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 				cnode->cycles_count +=
 					node->branch_flags.cycles;
 				cnode->iter_count += node->nr_loop_iter;
-				cnode->samples_count += node->samples;
+				cnode->iter_cycles += node->iter_cycles;
 			}
 		}
 
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
 int callchain_cursor_append(struct callchain_cursor *cursor,
 			    u64 ip, struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from)
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
-	node->samples = samples;
+	node->iter_cycles = iter_cycles;
 
 	if (flags)
 		memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 samples_count)
+			   u64 iter_cycles)
 {
 	int printed = 0, i = 0;
 	u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count && samples_count) {
-		printed += count_pri64_printf(i++, "iterations",
-				iter_count / samples_count,
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
 				bf + printed, bfsize - printed);
 	}
 
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
 static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
+			     u64 iter_count, u64 iter_cycles,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, samples_count);
+				cycles_count, iter_count, iter_cycles);
 	}
 
 	if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
-				   u64 iter_count, u64 samples_count,
+				   u64 iter_count, u64 iter_cycles,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, samples_count, brtype_stat);
+			 iter_count, iter_cycles, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 	return scnprintf(bf, bfsize, "%s", str);
 }
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize)
 {
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
-	u64 iter_count = 0, samples_count = 0;
+	u64 iter_count, iter_cycles;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
 	abort_count = clist->abort_count;
 	cycles_count = clist->cycles_count;
-
-	if (node) {
-		struct callchain_list *call;
-
-		list_for_each_entry(call, &node->val, list) {
-			iter_count += call->iter_count;
-			samples_count += call->samples_count;
-		}
-	}
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
-				       cycles_count, iter_count, samples_count,
+				       cycles_count, iter_count, iter_cycles,
 				       &clist->brtype_stat);
 }
 
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
 
 		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
 					     node->branch, &node->branch_flags,
-					     node->nr_loop_iter, node->samples,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
 					     node->branch_from);
 		if (rc)
 			break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 97738201464a..1ed6fc61d0a5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
 	u64			abort_count;
 	u64			cycles_count;
 	u64			iter_count;
-	u64			samples_count;
+	u64			iter_cycles;
 	struct branch_type_stat brtype_stat;
 	char		       *srcline;
 	struct list_head	list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
 	struct branch_flags		branch_flags;
 	u64				branch_from;
 	int				nr_loop_iter;
-	int				samples;
+	u64				iter_cycles;
 	struct callchain_cursor_node	*next;
 };
 
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 			    struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from);
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize);
 
 void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eacaca4f4..9eaa95302c86 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1675,6 +1675,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 	return mi;
 }
 
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
 static int add_callchain_ip(struct thread *thread,
 			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
@@ -1683,11 +1688,12 @@ static int add_callchain_ip(struct thread *thread,
 			    u64 ip,
 			    bool branch,
 			    struct branch_flags *flags,
-			    int nr_loop_iter,
-			    int samples,
+			    struct iterations *iter,
 			    u64 branch_from)
 {
 	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
 
 	al.filtered = 0;
 	al.sym = NULL;
@@ -1737,9 +1743,15 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
 	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
-				       branch, flags, nr_loop_iter, samples,
-				       branch_from);
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1772,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
 #define CHASHSZ 127
 #define CHASHBITS 7
 #define NO_ENTRY 0xff
@@ -1767,7 +1791,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 #define PERF_MAX_BRANCH_DEPTH 127
 
 /* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
 {
 	int i, j, off;
 	unsigned char chash[CHASHSZ];
@@ -1792,8 +1817,18 @@ static int remove_loops(struct branch_entry *l, int nr)
 					break;
 				}
 			if (is_loop) {
-				memmove(l + i, l + i + off,
-					(nr - (i + off)) * sizeof(*l));
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
 				nr -= off;
 			}
 		}
@@ -1883,7 +1918,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 			err = add_callchain_ip(thread, cursor, parent,
 					       root_al, &cpumode, ip,
-					       branch, flags, 0, 0,
+					       branch, flags, NULL,
 					       branch_from);
 			if (err)
 				return (err < 0) ? err : 0;
@@ -1909,7 +1944,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
-	int nr_loop_iter;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -1942,6 +1976,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (branch && callchain_param.branch_callstack) {
 		int nr = min(max_stack, (int)branch->nr);
 		struct branch_entry be[nr];
+		struct iterations iter[nr];
 
 		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2007,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
-		nr_loop_iter = nr;
-		nr = remove_loops(be, nr);
-
-		/*
-		 * Get the number of iterations.
-		 * It's only approximation, but good enough in practice.
-		 */
-		if (nr_loop_iter > nr)
-			nr_loop_iter = nr_loop_iter - nr + 1;
-		else
-			nr_loop_iter = 0;
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
 
 		for (i = 0; i < nr; i++) {
-			if (i == nr - 1)
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       nr_loop_iter, 1,
-						       be[i].from);
-			else
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       0, 0, be[i].from);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
 
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from,
 						       true, &be[i].flags,
-						       0, 0, 0);
+						       &iter[i], 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -2037,7 +2055,7 @@ check_calls:
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-				       false, NULL, 0, 0, 0);
+				       false, NULL, NULL, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;
-- 
cgit 


From 89be3f8ab701180fc0329eff1b076528d64ac56b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 31 Aug 2017 11:46:49 -0300
Subject: perf syscalltbl: Support glob matching on syscall names

With two new methods, one to find the first match, returning its syscall
id and its index in whatever internal database it keeps the syscall
into, then one to find the next match, if any.

Implemented only on arches where we actually read the syscall table from
the kernel sources, i.e. x86-64 for now, all the others use the libaudit
method for which this returns -1, i.e. just stubs were added, with the
actual implementation using whatever libaudit functions for matching
that may be available.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i0sj4rxk1a63pfe9gl8z8irs@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/syscalltbl.c | 33 +++++++++++++++++++++++++++++++++
 tools/perf/util/syscalltbl.h |  3 +++
 2 files changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index bbb4c1957578..19e5db90394c 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -19,6 +19,7 @@
 #ifdef HAVE_SYSCALL_TABLE
 #include <linux/compiler.h>
 #include <string.h>
+#include "string2.h"
 #include "util.h"
 
 #if defined(__x86_64__)
@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
 	return sc ? sc->id : -1;
 }
 
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	int i;
+	struct syscall *syscalls = tbl->syscalls.entries;
+
+	for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+		if (strglobmatch(syscalls[i].name, syscall_glob)) {
+			*idx = i;
+			return syscalls[i].id;
+		}
+	}
+
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	*idx = -1;
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
 #else /* HAVE_SYSCALL_TABLE */
 
 #include <libaudit.h>
@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
 {
 	return audit_name_to_syscall(name, tbl->audit_machine);
 }
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+				  const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
 #endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index e2951510484f..e9fb8786da7c 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
 const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
 int syscalltbl__id(struct syscalltbl *tbl, const char *name);
 
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
 #endif /* __PERF_SYSCALLTBL_H */
-- 
cgit 


From 27702bcfe8a125a1feeeb5f07526d63b20cac47f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 31 Aug 2017 11:50:04 -0300
Subject: perf trace: Support syscall name globbing

So now we can use:

  # perf trace -e pkey_*
   532.784 ( 0.006 ms): pkey/16018 pkey_alloc(init_val: DISABLE_WRITE) = -1 EINVAL Invalid argument
   532.795 ( 0.004 ms): pkey/16018 pkey_mprotect(start: 0x7f380d0a6000, len: 4096, prot: READ|WRITE, pkey: -1) = 0
   532.801 ( 0.002 ms): pkey/16018 pkey_free(pkey: -1                ) = -1 EINVAL Invalid argument
  ^C[root@jouet ~]#

Or '-e epoll*', '-e *msg*', etc.

Combining syscall names with perf events, tracepoints, etc, continues to
be valid, i.e. this is possible:

  # perf probe -L sys_nanosleep
  <SyS_nanosleep@/home/acme/git/linux/kernel/time/hrtimer.c:0>
      0  SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
                        struct timespec __user *, rmtp)
         {
                struct timespec64 tu;

      5         if (get_timespec64(&tu, rqtp))
      6                 return -EFAULT;

                if (!timespec64_valid(&tu))
      9                 return -EINVAL;

     11         current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
     12         current->restart_block.nanosleep.rmtp = rmtp;
     13         return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
         }

  # perf probe my_probe="sys_nanosleep:12 rmtp"
  Added new event:
    probe:my_probe       (on sys_nanosleep:12 with rmtp)

  You can now use it in all perf tools, such as:

	perf record -e probe:my_probe -aR sleep 1

  #
  # perf trace -e probe:my_probe/max-stack=5/,*sleep sleep 1
     0.427 ( 0.003 ms): sleep/16690 nanosleep(rqtp: 0x7ffefc245090) ...
     0.430 (         ): probe:my_probe:(ffffffffbd112923) rmtp=0)
                                       sys_nanosleep ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       return_from_SYSCALL_64 ([kernel.kallsyms])
                                       __nanosleep_nocancel (/usr/lib64/libc-2.25.so)
     0.427 (1000.208 ms): sleep/16690  ... [continued]: nanosleep()) = 0
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-elycoi8wy6y0w9dkj7ox1mzz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt |  2 +-
 tools/perf/builtin-trace.c              | 39 +++++++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index c1e3288a2dfb..d53bea6bd571 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -37,7 +37,7 @@ OPTIONS
 --expr::
 --event::
 	List of syscalls and other perf events (tracepoints, HW cache events,
-	etc) to show.
+	etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
 	See 'perf list' for a complete list of events.
 	Prefixing with ! shows all syscalls but the ones specified.  You may
 	need to escape it.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d59cdadf3a79..771ddab94bb0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 static int trace__validate_ev_qualifier(struct trace *trace)
 {
 	int err = 0, i;
+	size_t nr_allocated;
 	struct str_node *pos;
 
 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 		goto out;
 	}
 
+	nr_allocated = trace->ev_qualifier_ids.nr;
 	i = 0;
 
 	strlist__for_each_entry(pos, trace->ev_qualifier) {
 		const char *sc = pos->s;
-		int id = syscalltbl__id(trace->sctbl, sc);
+		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
 
 		if (id < 0) {
+			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
+			if (id >= 0)
+				goto matches;
+
 			if (err == 0) {
 				fputs("Error:\tInvalid syscall ", trace->output);
 				err = -EINVAL;
@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
 			fputs(sc, trace->output);
 		}
-
+matches:
 		trace->ev_qualifier_ids.entries[i++] = id;
+		if (match_next == -1)
+			continue;
+
+		while (1) {
+			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
+			if (id < 0)
+				break;
+			if (nr_allocated == trace->ev_qualifier_ids.nr) {
+				void *entries;
+
+				nr_allocated += 8;
+				entries = realloc(trace->ev_qualifier_ids.entries,
+						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
+				if (entries == NULL) {
+					err = -ENOMEM;
+					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
+					goto out_free;
+				}
+				trace->ev_qualifier_ids.entries = entries;
+			}
+			trace->ev_qualifier_ids.nr++;
+			trace->ev_qualifier_ids.entries[i++] = id;
+		}
 	}
 
 	if (err < 0) {
 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
+out_free:
 		zfree(&trace->ev_qualifier_ids.entries);
 		trace->ev_qualifier_ids.nr = 0;
 	}
@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 	struct trace *trace = (struct trace *)opt->value;
 	const char *s = str;
 	char *sep = NULL, *lists[2] = { NULL, NULL, };
-	int len = strlen(str) + 1, err = -1, list;
+	int len = strlen(str) + 1, err = -1, list, idx;
 	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
 	char group_name[PATH_MAX];
 
@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 			*sep = '\0';
 
 		list = 0;
-		if (syscalltbl__id(trace->sctbl, s) >= 0) {
+		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
+		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
 			list = 1;
 		} else {
 			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
-- 
cgit 


From 9a805d8648ee09c136130fe4114a09574bc0b1ef Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Thu, 31 Aug 2017 14:44:56 +0530
Subject: perf test powerpc: Fix 'Object code reading' test

'Object code reading' test always fails on powerpc guest. Two reasons
for the failure are:

1. When elf section is too big (size beyond 'unsigned int' max value).
objdump fails to disassemble from such section. This was fixed with
commit 0f6329bd7fc ("binutils/objdump: Fix disassemble for huge elf
sections") in binutils.

2. When the sample is from hypervisor. Hypervisor symbols can not be
resolved within guest and thus thread__find_addr_map() fails for such
symbols. Fix this by ignoring hypervisor symbols in the test.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1504170896-7876-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/code-reading.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 761c5a448c56..466a462b26d1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
 	if (!al.map || !al.map->dso) {
+		if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
+			pr_debug("Hypervisor address can not be resolved - skipping\n");
+			return 0;
+		}
+
 		pr_debug("thread__find_addr_map failed\n");
 		return -1;
 	}
-- 
cgit 


From 4fb205392022ba99a45dd01a62c6e2df046e400a Mon Sep 17 00:00:00 2001
From: Jack Henschel <jackdev@mailbox.org>
Date: Thu, 31 Aug 2017 10:05:35 +0200
Subject: perf intel-pt: Fix syntax in documentation of config option

As specified in tools/perf/Documentation/perf-config.txt, perf
configuration items must be in 'key = value' format, otherwise the
following error message occurs:

  $ perf record -e intel_pt//u -- ls
  bad config file line 2 in ~/.perfconfig
  $ cat .perfconfig
  [intel-pt]
      mispred-all

Changing to assigning a value to the key 'mispred-all' fixes the issue:

  $ perf record -e intel_pt//u -- ls
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Capured and wrote 0.031 MB perf.data]
  $ cat .perfconfig
  [intel-pt]
      mispred-all = true

Signed-off-by: Jack Henschel <jackdev@mailbox.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170831080535.2157-1-jackdev@mailbox.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/intel-pt.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index ab1b0825130a..76971d2e4164 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
 
 	$ cat ~/.perfconfig
 	[intel-pt]
-		mispred-all
+		mispred-all = on
 
 	$ perf record -e intel_pt//u ./sort 3000
 	Bubble sorting array of 3000 elements
-- 
cgit 


From 2a118e1bd22cad57318520d37e3a184b8846c6a2 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Wed, 30 Aug 2017 21:42:23 -0400
Subject: perf vendor events powerpc: Remove duplicate events

Some POWER PMU event names have multiple/alternate event codes. These
alternate event codes were listed in the POWER9 JSON files for
reference.

But the perf tool does not seem to handle duplicates cleanly. 'perf
list' shows such duplicate events only once, but 'perf stat' ends up
counting the first event code twice, multiplexing if necessary and we
end up with double the event counts.

Remove the duplicate event codes from the JSON files for now.

Reported-by: Michael Petlan <mpetlan@redhat.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Link: http://lkml.kernel.org/r/20170830231506.GB20351@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/powerpc/power9/frontend.json   |   7 +-
 .../perf/pmu-events/arch/powerpc/power9/other.json | 120 ---------------------
 .../pmu-events/arch/powerpc/power9/pipeline.json   |   7 +-
 tools/perf/pmu-events/arch/powerpc/power9/pmc.json |   7 +-
 4 files changed, 3 insertions(+), 138 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index 7e62c46d7a20..c63a919eda98 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -79,11 +79,6 @@
     "EventName": "PM_LD_MISS_L1",
     "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
   },
-  {,
-    "EventCode": "0x400F0",
-    "EventName": "PM_LD_MISS_L1",
-    "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
-  },
   {,
     "EventCode": "0x2E01A",
     "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
@@ -374,4 +369,4 @@
     "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
     "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
   }
-]
\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 00f3d2a21f31..54cc3be00fc2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -604,11 +604,6 @@
     "EventName": "PM_L2_RTY_LD",
     "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
   },
-  {,
-    "EventCode": "0x3689E",
-    "EventName": "PM_L2_RTY_LD",
-    "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
-  },
   {,
     "EventCode": "0xE08C",
     "EventName": "PM_LSU0_ERAT_HIT",
@@ -714,11 +709,6 @@
     "EventName": "PM_L3_RD0_BUSY",
     "BriefDescription": "Lifetime, sample of RD machine 0 valid"
   },
-  {,
-    "EventCode": "0x468B4",
-    "EventName": "PM_L3_RD0_BUSY",
-    "BriefDescription": "Lifetime, sample of RD machine 0 valid"
-  },
   {,
     "EventCode": "0x46080",
     "EventName": "PM_L2_DISP_ALL_L2MISS",
@@ -849,21 +839,11 @@
     "EventName": "PM_RC0_BUSY",
     "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
   },
-  {,
-    "EventCode": "0x2608C",
-    "EventName": "PM_RC0_BUSY",
-    "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
-  },
   {,
     "EventCode": "0x36082",
     "EventName": "PM_L2_LD_DISP",
     "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
   },
-  {,
-    "EventCode": "0x1609E",
-    "EventName": "PM_L2_LD_DISP",
-    "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
-  },
   {,
     "EventCode": "0xF8B0",
     "EventName": "PM_L3_SW_PREF",
@@ -1039,11 +1019,6 @@
     "EventName": "PM_L3_CO_MEPF",
     "BriefDescription": "L3 castouts in Mepf state for this thread"
   },
-  {,
-    "EventCode": "0x168A0",
-    "EventName": "PM_L3_CO_MEPF",
-    "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory).  The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
-  },
   {,
     "EventCode": "0x460A2",
     "EventName": "PM_L3_LAT_CI_HIT",
@@ -1149,11 +1124,6 @@
     "EventName": "PM_L2_RTY_ST",
     "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
   },
-  {,
-    "EventCode": "0x4689E",
-    "EventName": "PM_L2_RTY_ST",
-    "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
-  },
   {,
     "EventCode": "0x24040",
     "EventName": "PM_INST_FROM_L2_MEPF",
@@ -1254,11 +1224,6 @@
     "EventName": "PM_CO0_BUSY",
     "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
   },
-  {,
-    "EventCode": "0x4608C",
-    "EventName": "PM_CO0_BUSY",
-    "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
-  },
   {,
     "EventCode": "0x2C122",
     "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
@@ -1394,11 +1359,6 @@
     "EventName": "PM_IPTEG_FROM_LMEM",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
   },
-  {,
-    "EventCode": "0x40006",
-    "EventName": "PM_ISLB_MISS",
-    "BriefDescription": "Number of ISLB misses for this thread"
-  },
   {,
     "EventCode": "0xD8A8",
     "EventName": "PM_ISLB_MISS",
@@ -1514,11 +1474,6 @@
     "EventName": "PM_L2_INST",
     "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
   },
-  {,
-    "EventCode": "0x3609E",
-    "EventName": "PM_L2_INST",
-    "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
-  },
   {,
     "EventCode": "0x3504C",
     "EventName": "PM_IPTEG_FROM_DL4",
@@ -1689,11 +1644,6 @@
     "EventName": "PM_L2_LD_HIT",
     "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
   },
-  {,
-    "EventCode": "0x2609E",
-    "EventName": "PM_L2_LD_HIT",
-    "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
-  },
   {,
     "EventCode": "0x168AC",
     "EventName": "PM_L3_CI_USAGE",
@@ -1794,21 +1744,11 @@
     "EventName": "PM_L3_WI0_BUSY",
     "BriefDescription": "Rotating sample of 8 WI valid"
   },
-  {,
-    "EventCode": "0x260B6",
-    "EventName": "PM_L3_WI0_BUSY",
-    "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
-  },
   {,
     "EventCode": "0x368AC",
     "EventName": "PM_L3_CO0_BUSY",
     "BriefDescription": "Lifetime, sample of CO machine 0 valid"
   },
-  {,
-    "EventCode": "0x468AC",
-    "EventName": "PM_L3_CO0_BUSY",
-    "BriefDescription": "Lifetime, sample of CO machine 0 valid"
-  },
   {,
     "EventCode": "0x2E040",
     "EventName": "PM_DPTEG_FROM_L2_MEPF",
@@ -1839,11 +1779,6 @@
     "EventName": "PM_L3_P0_PF_RTY",
     "BriefDescription": "L3 PF received retry port 0, every retry counted"
   },
-  {,
-    "EventCode": "0x260AE",
-    "EventName": "PM_L3_P0_PF_RTY",
-    "BriefDescription": "L3 PF received retry port 0, every retry counted"
-  },
   {,
     "EventCode": "0x268B2",
     "EventName": "PM_L3_LOC_GUESS_WRONG",
@@ -1894,11 +1829,6 @@
     "EventName": "PM_L3_SN0_BUSY",
     "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
   },
-  {,
-    "EventCode": "0x460AC",
-    "EventName": "PM_L3_SN0_BUSY",
-    "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
-  },
   {,
     "EventCode": "0x3005C",
     "EventName": "PM_BFU_BUSY",
@@ -1934,11 +1864,6 @@
     "EventName": "PM_L3_PF0_BUSY",
     "BriefDescription": "Lifetime, sample of PF machine 0 valid"
   },
-  {,
-    "EventCode": "0x460B4",
-    "EventName": "PM_L3_PF0_BUSY",
-    "BriefDescription": "Lifetime, sample of PF machine 0 valid"
-  },
   {,
     "EventCode": "0xC0B0",
     "EventName": "PM_LSU_FLUSH_UE",
@@ -2084,11 +2009,6 @@
     "EventName": "PM_L3_P1_CO_RTY",
     "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
   },
-  {,
-    "EventCode": "0x468AE",
-    "EventName": "PM_L3_P1_CO_RTY",
-    "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
-  },
   {,
     "EventCode": "0xC0AC",
     "EventName": "PM_LSU_FLUSH_EMSH",
@@ -2194,11 +2114,6 @@
     "EventName": "PM_L2_SN_M_WR_DONE",
     "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
   },
-  {,
-    "EventCode": "0x46886",
-    "EventName": "PM_L2_SN_M_WR_DONE",
-    "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
-  },
   {,
     "EventCode": "0x489C",
     "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
@@ -2289,21 +2204,11 @@
     "EventName": "PM_SN0_BUSY",
     "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
   },
-  {,
-    "EventCode": "0x26090",
-    "EventName": "PM_SN0_BUSY",
-    "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
-  },
   {,
     "EventCode": "0x360AE",
     "EventName": "PM_L3_P0_CO_RTY",
     "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
   },
-  {,
-    "EventCode": "0x460AE",
-    "EventName": "PM_L3_P0_CO_RTY",
-    "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
-  },
   {,
     "EventCode": "0x168A8",
     "EventName": "PM_L3_WI_USAGE",
@@ -2339,26 +2244,11 @@
     "EventName": "PM_L3_P1_PF_RTY",
     "BriefDescription": "L3 PF received retry port 1, every retry counted"
   },
-  {,
-    "EventCode": "0x268AE",
-    "EventName": "PM_L3_P1_PF_RTY",
-    "BriefDescription": "L3 PF received retry port 3, every retry counted"
-  },
   {,
     "EventCode": "0x46082",
     "EventName": "PM_L2_ST_DISP",
     "BriefDescription": "All successful D-side store dispatches for this thread "
   },
-  {,
-    "EventCode": "0x1689E",
-    "EventName": "PM_L2_ST_DISP",
-    "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
-  },
-  {,
-    "EventCode": "0x36880",
-    "EventName": "PM_L2_INST_MISS",
-    "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
-  },
   {,
     "EventCode": "0x4609E",
     "EventName": "PM_L2_INST_MISS",
@@ -2429,11 +2319,6 @@
     "EventName": "PM_INST_DISP",
     "BriefDescription": "# PPC Dispatched"
   },
-  {,
-    "EventCode": "0x300F2",
-    "EventName": "PM_INST_DISP",
-    "BriefDescription": "# PPC Dispatched"
-  },
   {,
     "EventCode": "0x4E05E",
     "EventName": "PM_TM_OUTER_TBEGIN_DISP",
@@ -2459,11 +2344,6 @@
     "EventName": "PM_L2_ST_HIT",
     "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
   },
-  {,
-    "EventCode": "0x2689E",
-    "EventName": "PM_L2_ST_HIT",
-    "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
-  },
   {,
     "EventCode": "0x360A8",
     "EventName": "PM_L3_CO",
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index 47a82568a8df..bc2db636dabf 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -419,11 +419,6 @@
     "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
     "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
   },
-  {,
-    "EventCode": "0x10016",
-    "EventName": "PM_DSLB_MISS",
-    "BriefDescription": "Data SLB Miss - Total of all segment sizes"
-  },
   {,
     "EventCode": "0xD0A8",
     "EventName": "PM_DSLB_MISS",
@@ -554,4 +549,4 @@
     "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
     "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
   }
-]
\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index a2c95a99e168..3ef8a10aac86 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -4,11 +4,6 @@
     "EventName": "PM_BR_2PATH",
     "BriefDescription": "Branches that are not strongly biased"
   },
-  {,
-    "EventCode": "0x40036",
-    "EventName": "PM_BR_2PATH",
-    "BriefDescription": "Branches that are not strongly biased"
-  },
   {,
     "EventCode": "0x40056",
     "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
@@ -124,4 +119,4 @@
     "EventName": "PM_1FLOP_CMPL",
     "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
   }
-]
\ No newline at end of file
+]
-- 
cgit 


From 3b0a5daa061076b2b75ffc294e74483ad9bf241a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 29 Aug 2017 13:11:08 -0400
Subject: perf tools: Support new sample type for physical address

Support new sample type PERF_SAMPLE_PHYS_ADDR for physical address.

Add new option --phys-data to record sample physical address.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1504026672-7304-2-git-send-email-kan.liang@intel.com
[ Added missing printing in evsel.c patch sent by Jiri Olsa ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h    |  4 +++-
 tools/perf/Documentation/perf-record.txt |  5 ++++-
 tools/perf/builtin-record.c              |  2 ++
 tools/perf/perf.h                        |  1 +
 tools/perf/util/event.h                  |  1 +
 tools/perf/util/evsel.c                  | 19 ++++++++++++++++++-
 6 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 2a37ae925d85..140ae638cfd6 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -814,6 +815,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9bdea047c5db..e397453e5a46 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -249,7 +249,10 @@ OPTIONS
 
 -d::
 --data::
-	Record the sample addresses.
+	Record the sample virtual addresses.
+
+--phys-data::
+	Record the sample physical addresses.
 
 -T::
 --timestamp::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 36d7117a7562..56f8142ff97f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 		    "per thread counts"),
 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
+		    "Record the sample physical addresses"),
 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
 			&record.opts.sample_time_set,
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c010dd6a79d..dc442ba21bf6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -43,6 +43,7 @@ struct record_opts {
 	bool	     no_samples;
 	bool	     raw_samples;
 	bool	     sample_address;
+	bool	     sample_phys_addr;
 	bool	     sample_weight;
 	bool	     sample_time;
 	bool	     sample_time_set;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 423ac82605f3..ee7bcc898d35 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -200,6 +200,7 @@ struct perf_sample {
 	u32 cpu;
 	u32 raw_size;
 	u64 data_src;
+	u64 phys_addr;
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9bd632ed7db..4bb89373eb52 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 	if (opts->sample_address)
 		perf_evsel__set_sample_bit(evsel, DATA_SRC);
 
+	if (opts->sample_phys_addr)
+		perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+
 	if (opts->no_buffering) {
 		attr->watermark = 0;
 		attr->wakeup_events = 1;
@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
 		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
 		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
 		bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
-		bit_name(WEIGHT),
+		bit_name(WEIGHT), bit_name(PHYS_ADDR),
 		{ .name = NULL, }
 	};
 #undef bit_name
@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		}
 	}
 
+	data->phys_addr = 0;
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		data->phys_addr = *array;
+		array++;
+	}
+
 	return 0;
 }
 
@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		result += sizeof(u64);
+
 	return result;
 }
 
@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		*array = sample->phys_addr;
+		array++;
+	}
+
 	return 0;
 }
 
-- 
cgit 


From 8780fb25ab060bafa5a8149e79b703e0fc7ee847 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 29 Aug 2017 13:11:09 -0400
Subject: perf sort: Add sort option for physical address

Add a new sort option "phys_daddr" for --mem-mode sort.  With this
option applied, perf can sort and report by sample's physical address.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1504026672-7304-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/util/hist.c                   |  4 +++
 tools/perf/util/hist.h                   |  1 +
 tools/perf/util/machine.c                |  8 ++++--
 tools/perf/util/session.c                |  3 +++
 tools/perf/util/sort.c                   | 42 ++++++++++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  1 +
 tools/perf/util/symbol.h                 |  1 +
 8 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9fa84617181e..383a98d992ed 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -137,6 +137,7 @@ OPTIONS
 	- mem: type of memory access for the data at the time of the sample
 	- snoop: type of snoop (if any) for the data at the time of the sample
 	- dcacheline: the cacheline the data address is on at the time of the sample
+	- phys_daddr: physical address of data being executed on at the time of sample
 
 	And the default sort keys are changed to local_weight, mem, sym, dso,
 	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9453b2e27015..e60d8d8ea4c2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 			symlen = unresolved_col_width + 4 + 2;
 			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
 		}
+
+		hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+				   unresolved_col_width + 4 + 2);
+
 	} else {
 		symlen = unresolved_col_width + 4 + 2;
 		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee3670a388df..e60dda26a920 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
 	HISTC_GLOBAL_WEIGHT,
 	HISTC_MEM_DADDR_SYMBOL,
 	HISTC_MEM_DADDR_DSO,
+	HISTC_MEM_PHYS_DADDR,
 	HISTC_MEM_LOCKED,
 	HISTC_MEM_TLB,
 	HISTC_MEM_LVL,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9eaa95302c86..df709363ef69 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
 	ams->al_addr = al.addr;
 	ams->sym = al.sym;
 	ams->map = al.map;
+	ams->phys_addr = 0;
 }
 
 static void ip__resolve_data(struct thread *thread,
-			     u8 m, struct addr_map_symbol *ams, u64 addr)
+			     u8 m, struct addr_map_symbol *ams,
+			     u64 addr, u64 phys_addr)
 {
 	struct addr_location al;
 
@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
 	ams->al_addr = al.addr;
 	ams->sym = al.sym;
 	ams->map = al.map;
+	ams->phys_addr = phys_addr;
 }
 
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -1669,7 +1672,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 		return NULL;
 
 	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
-	ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr);
+	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+			 sample->addr, sample->phys_addr);
 	mi->data_src.val = sample->data_src;
 
 	return mi;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ac863691605f..a7ebd9fe8e40 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		printf("... transaction: %" PRIx64 "\n", sample->transaction);
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 12359bd986db..eb3ab902a1c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1315,6 +1315,47 @@ struct sort_entry sort_mem_dcacheline = {
 	.se_width_idx	= HISTC_MEM_DCACHELINE,
 };
 
+static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.phys_addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.phys_addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+					   size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	size_t ret = 0;
+	size_t len = BITS_PER_LONG / 4;
+
+	addr = he->mem_info->daddr.phys_addr;
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+	if (ret > width)
+		bf[width] = '\0';
+
+	return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+	.se_header	= "Data Physical Address",
+	.se_cmp		= sort__phys_daddr_cmp,
+	.se_snprintf	= hist_entry__phys_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_PHYS_DADDR,
+};
+
 static int64_t
 sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
 	DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
 	DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
 	DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+	DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b7c75597e18f..f36dc4980a6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -245,6 +245,7 @@ enum sort_type {
 	SORT_MEM_SNOOP,
 	SORT_MEM_DCACHELINE,
 	SORT_MEM_IADDR_SYMBOL,
+	SORT_MEM_PHYS_DADDR,
 };
 
 /*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index d00a012cfdfb..2bd6a1f01a1c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -186,6 +186,7 @@ struct addr_map_symbol {
 	struct symbol *sym;
 	u64	      addr;
 	u64	      al_addr;
+	u64	      phys_addr;
 };
 
 struct branch_info {
-- 
cgit 


From c35aeb9dfe512422ca9ea28aae692c8f1d052b2d Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 29 Aug 2017 13:11:10 -0400
Subject: perf mem: Support physical address

Add option phys-data in "perf mem" to record/report physical address.
The default mem sort order for physical address is changed accordingly.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1504026672-7304-4-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-mem.txt |  4 ++
 tools/perf/builtin-mem.c              | 97 +++++++++++++++++++++++++----------
 2 files changed, 75 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 73496320fca3..4be08a1e3f8d 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -59,6 +59,10 @@ OPTIONS
 --ldload::
 	Specify desired latency for loads event.
 
+-p::
+--phys-data::
+	Record/Report sample physical addresses
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index e001c0290793..0f15634ef82c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -23,6 +23,7 @@ struct perf_mem {
 	bool			hide_unresolved;
 	bool			dump_raw;
 	bool			force;
+	bool			phys_addr;
 	int			operation;
 	const char		*cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
 	rec_argv[i++] = "-d";
 
+	if (mem->phys_addr)
+		rec_argv[i++] = "--phys-data";
+
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 		if (!perf_mem_events[j].record)
 			continue;
@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
-	if (symbol_conf.field_sep) {
-		fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
-		      "%s0x%"PRIx64"%s%s:%s\n";
+	if (mem->phys_addr) {
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
+			      "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
+			      "%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
+
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->phys_addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
 	} else {
-		fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
-		      "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
-		symbol_conf.field_sep = " ";
-	}
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+			      "%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
 
-	printf(fmt,
-		sample->pid,
-		symbol_conf.field_sep,
-		sample->tid,
-		symbol_conf.field_sep,
-		sample->ip,
-		symbol_conf.field_sep,
-		sample->addr,
-		symbol_conf.field_sep,
-		sample->weight,
-		symbol_conf.field_sep,
-		sample->data_src,
-		symbol_conf.field_sep,
-		al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
-		al.sym ? al.sym->name : "???");
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
+	}
 out_put:
 	addr_location__put(&al);
 	return 0;
@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
 	if (ret < 0)
 		goto out_delete;
 
-	printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+	if (mem->phys_addr)
+		printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+	else
+		printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
 
 	ret = perf_session__process_events(session);
 
@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (!(mem->operation & MEM_OPERATION_LOAD))
-		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-				"dso_daddr,tlb,locked";
+	if (!(mem->operation & MEM_OPERATION_LOAD)) {
+		if (mem->phys_addr)
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked,phys_daddr";
+		else
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked";
+	} else if (mem->phys_addr)
+		rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+				"dso_daddr,snoop,tlb,locked,phys_daddr";
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
 		   "separator for columns, no spaces will be added"
 		   " between columns '.' is reserved."),
 	OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+	OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
 	OPT_END()
 	};
 	const char *const mem_subcommands[] = { "record", "report", NULL };
-- 
cgit 


From 49d58f04eb6cdc18b3747fc4243a7114364f5420 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 29 Aug 2017 13:11:11 -0400
Subject: perf script: Support physical address

Display the physical address at the tail if it is available.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1504026672-7304-5-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  2 +-
 tools/perf/builtin-script.c              | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5ee8796be96e..18dfcfa38454 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
-        callindent, insn, insnlen, synth.
+        callindent, insn, insnlen, synth, phys_addr.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 378f76cdf923..3d4c3b5e1868 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -87,6 +87,7 @@ enum perf_output_field {
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
 	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
 	PERF_OUTPUT_SYNTH           = 1U << 25,
+	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
 };
 
 struct output_option {
@@ -119,6 +120,7 @@ struct output_option {
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
 	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
 	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
 };
 
 enum {
@@ -175,7 +177,8 @@ static struct {
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
-			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
+			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
+			      PERF_OUTPUT_PHYS_ADDR,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					PERF_OUTPUT_IREGS))
 		return -EINVAL;
 
+	if (PRINT_FIELD(PHYS_ADDR) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
+					PERF_OUTPUT_PHYS_ADDR))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
 	print_insn(sample, attr, thread, machine);
+
+	if (PRINT_FIELD(PHYS_ADDR))
+		printf("%16" PRIx64, sample->phys_addr);
 	printf("\n");
 }
 
@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
 		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth",
+		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
-- 
cgit 


From fc33dccba39584e403436b9cda3edc9c34b62bce Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 29 Aug 2017 13:11:12 -0400
Subject: perf test: Add test case for PERF_SAMPLE_PHYS_ADDR

Extend sample-parsing test cases to support new sample type
PERF_SAMPLE_PHYS_ADDR.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1504026672-7304-6-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/sample-parsing.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 6d028f42b3cf..c3858487159d 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		COMP(phys_addr);
+
 	return true;
 }
 
@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 			.mask	= sample_regs,
 			.regs	= regs,
 		},
+		.phys_addr	= 113,
 	};
 	struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
 	struct perf_sample sample_out;
@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
 	 * were added.  Please actually update the test rather than just change
 	 * the condition below.
 	 */
-	if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) {
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
 		pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
 		return -1;
 	}
-- 
cgit 


From 63ce8449bc1081711eef1add68909e9bd758de62 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 31 Aug 2017 15:32:18 -0300
Subject: perf stat: Only auto-merge events that are PMU aliases

Peter reported that when he explicitely asked for multiple events with
the same name on the command line it got coalesced into just one line,
i.e.:

   # perf stat -e cycles -e cycles -e cycles usleep 1

   Performance counter stats for 'usleep 1':

         3,269,652      cycles

       0.000884123 seconds time elapsed

  #

And while there is the --no-merges option to disable that auto-merging,
this is a blunt change in behaviour for such explicit request, so change
the code so that this auto merging is done only when handling the multi
PMU aliases with the same name that introduced this coalescing,
restoring the previous behaviour for the explicit case:

  # perf stat -e cycles -e cycles -e cycles usleep 1

   Performance counter stats for 'usleep 1':

         1,472,837      cycles
         1,472,837      cycles
         1,472,837      cycles

       0.001764870 seconds time elapsed

  #

Reported-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 430daf2dc7af ("perf stat: Collapse identically named events")
Link: http://lkml.kernel.org/r/20170831184122.GK4831@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      |  2 +-
 tools/perf/util/evsel.h        |  1 +
 tools/perf/util/parse-events.c | 24 ++++++++++++++++--------
 3 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 866da7aa54bf..85e992d9215b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
 	if (counter->merged_stat)
 		return false;
 	cb(counter, data, true);
-	if (!no_merge)
+	if (!no_merge && counter->auto_merge_stats)
 		collect_all_aliases(counter, cb, data);
 	return true;
 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 351d3b2d8887..dd2c4b5112a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -131,6 +131,7 @@ struct perf_evsel {
 	bool			cmdline_group_boundary;
 	struct list_head	config_terms;
 	int			bpf_fd;
+	bool			auto_merge_stats;
 	bool			merged_stat;
 	const char *		metric_expr;
 	const char *		metric_name;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f44aeba51d1f..f6257fb4f08c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -310,7 +310,7 @@ static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
 	    struct perf_event_attr *attr,
 	    char *name, struct cpu_map *cpus,
-	    struct list_head *config_terms)
+	    struct list_head *config_terms, bool auto_merge_stats)
 {
 	struct perf_evsel *evsel;
 
@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
 	evsel->cpus        = cpu_map__get(cpus);
 	evsel->own_cpus    = cpu_map__get(cpus);
 	evsel->system_wide = !!cpus;
+	evsel->auto_merge_stats = auto_merge_stats;
 
 	if (name)
 		evsel->name = strdup(name);
@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
 		     struct perf_event_attr *attr, char *name,
 		     struct list_head *config_terms)
 {
-	return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
+	return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
 }
 
 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 			 get_config_name(head_config), &config_terms);
 }
 
-int parse_events_add_pmu(struct parse_events_state *parse_state,
+static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
-			 struct list_head *head_config)
+			 struct list_head *head_config, bool auto_merge_stats)
 {
 	struct perf_event_attr attr;
 	struct perf_pmu_info info;
@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL);
+		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
 		return evsel ? 0 : -ENOMEM;
 	}
 
@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	evsel = __add_event(list, &parse_state->idx, &attr,
 			    get_config_name(head_config), pmu->cpus,
-			    &config_terms);
+			    &config_terms, auto_merge_stats);
 	if (evsel) {
 		evsel->unit = info.unit;
 		evsel->scale = info.scale;
@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	return evsel ? 0 : -ENOMEM;
 }
 
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config)
+{
+	return __parse_events_add_pmu(parse_state, list, name, head_config, false);
+}
+
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str, struct list_head **listp)
 {
@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 					return -1;
 				list_add_tail(&term->list, head);
 
-				if (!parse_events_add_pmu(parse_state, list,
-						  pmu->name, head)) {
+				if (!__parse_events_add_pmu(parse_state, list,
+							    pmu->name, head, true)) {
 					pr_debug("%s -> %s/%s/\n", str,
 						 pmu->name, alias->str);
 					ok++;
-- 
cgit 


From eba9fac017617e685d648339e29a1453a30cb065 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 1 Sep 2017 14:55:40 -0300
Subject: perf annotate browser: Help for cycling thru hottest instructions
 with TAB/shift+TAB

The popup help accessed via 'h' wasn't mentioning about TAB and
shift-TAB, just about 'H', which goes to the hottest line, while the
former two are the hotkeys for actually cycling thru the hottest lines.

Reported-by: Flavio Bruno Leitner <fbl@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-5ppym6odizfj1ifa4t7neiku@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ba0aee576a2b..786fecaf578e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"q/ESC/CTRL+C  Exit\n\n"
 		"ENTER         Go to target\n"
 		"ESC           Exit\n"
-		"H             Cycle thru hottest instructions\n"
+		"H             Go to hottest instruction\n"
+		"TAB/shift+TAB Cycle thru hottest instructions\n"
 		"j             Toggle showing jump to target arrows\n"
 		"J             Toggle showing number of jump sources on targets\n"
 		"n             Search next string\n"
-- 
cgit