summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 11:28:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 11:28:13 -0700
commite6328a7abe7f8fcd32e9d3bcbd14ff2161bf71c9 (patch)
tree12df1e085c05269a9289a5b7bcdad864993e29ad
parent33f82bda010224e908e23e59150b4d36904affe9 (diff)
parent770e96125515daf1c7bc179323f2e0d488dfe6ac (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates from Ingo Molnar: "Perf tooling updates and fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf annotate browser: Help for cycling thru hottest instructions with TAB/shift+TAB perf stat: Only auto-merge events that are PMU aliases perf test: Add test case for PERF_SAMPLE_PHYS_ADDR perf script: Support physical address perf mem: Support physical address perf sort: Add sort option for physical address perf tools: Support new sample type for physical address perf vendor events powerpc: Remove duplicate events perf intel-pt: Fix syntax in documentation of config option perf test powerpc: Fix 'Object code reading' test perf trace: Support syscall name globbing perf syscalltbl: Support glob matching on syscall names perf report: Calculate the average cycles of iterations
-rw-r--r--tools/include/uapi/linux/perf_event.h4
-rw-r--r--tools/perf/Documentation/intel-pt.txt2
-rw-r--r--tools/perf/Documentation/perf-mem.txt4
-rw-r--r--tools/perf/Documentation/perf-record.txt5
-rw-r--r--tools/perf/Documentation/perf-report.txt1
-rw-r--r--tools/perf/Documentation/perf-script.txt2
-rw-r--r--tools/perf/Documentation/perf-trace.txt2
-rw-r--r--tools/perf/builtin-mem.c97
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-script.c15
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/builtin-trace.c39
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/frontend.json7
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/other.json120
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pipeline.json7
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pmc.json7
-rw-r--r--tools/perf/tests/code-reading.c5
-rw-r--r--tools/perf/tests/sample-parsing.c6
-rw-r--r--tools/perf/ui/browsers/annotate.c3
-rw-r--r--tools/perf/ui/browsers/hists.c8
-rw-r--r--tools/perf/ui/stdio/hist.c10
-rw-r--r--tools/perf/util/callchain.c49
-rw-r--r--tools/perf/util/callchain.h9
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c19
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/machine.c96
-rw-r--r--tools/perf/util/parse-events.c24
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/sort.c42
-rw-r--r--tools/perf/util/sort.h1
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/syscalltbl.c33
-rw-r--r--tools/perf/util/syscalltbl.h3
37 files changed, 368 insertions, 268 deletions
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 2a37ae925d85..140ae638cfd6 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
+ PERF_SAMPLE_PHYS_ADDR = 1U << 19,
- PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
};
/*
@@ -814,6 +815,7 @@ enum perf_event_type {
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+ * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* };
*/
PERF_RECORD_SAMPLE = 9,
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index ab1b0825130a..76971d2e4164 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
$ cat ~/.perfconfig
[intel-pt]
- mispred-all
+ mispred-all = on
$ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 73496320fca3..4be08a1e3f8d 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -59,6 +59,10 @@ OPTIONS
--ldload::
Specify desired latency for loads event.
+-p::
+--phys-data::
+ Record/Report sample physical addresses
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9bdea047c5db..e397453e5a46 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -249,7 +249,10 @@ OPTIONS
-d::
--data::
- Record the sample addresses.
+ Record the sample virtual addresses.
+
+--phys-data::
+ Record the sample physical addresses.
-T::
--timestamp::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9fa84617181e..383a98d992ed 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -137,6 +137,7 @@ OPTIONS
- mem: type of memory access for the data at the time of the sample
- snoop: type of snoop (if any) for the data at the time of the sample
- dcacheline: the cacheline the data address is on at the time of the sample
+ - phys_daddr: physical address of data being executed on at the time of sample
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5ee8796be96e..18dfcfa38454 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
- callindent, insn, insnlen, synth.
+ callindent, insn, insnlen, synth, phys_addr.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index c1e3288a2dfb..d53bea6bd571 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -37,7 +37,7 @@ OPTIONS
--expr::
--event::
List of syscalls and other perf events (tracepoints, HW cache events,
- etc) to show.
+ etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
See 'perf list' for a complete list of events.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index e001c0290793..0f15634ef82c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -23,6 +23,7 @@ struct perf_mem {
bool hide_unresolved;
bool dump_raw;
bool force;
+ bool phys_addr;
int operation;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
rec_argv[i++] = "-d";
+ if (mem->phys_addr)
+ rec_argv[i++] = "--phys-data";
+
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
if (!perf_mem_events[j].record)
continue;
@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- if (symbol_conf.field_sep) {
- fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
- "%s0x%"PRIx64"%s%s:%s\n";
+ if (mem->phys_addr) {
+ if (symbol_conf.field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
+ "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+ "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
+ "%s%s:%s\n";
+ symbol_conf.field_sep = " ";
+ }
+
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep,
+ sample->phys_addr,
+ symbol_conf.field_sep,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
} else {
- fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
- "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
- symbol_conf.field_sep = " ";
- }
+ if (symbol_conf.field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+ "%s0x%"PRIx64"%s%s:%s\n";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+ "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+ symbol_conf.field_sep = " ";
+ }
- printf(fmt,
- sample->pid,
- symbol_conf.field_sep,
- sample->tid,
- symbol_conf.field_sep,
- sample->ip,
- symbol_conf.field_sep,
- sample->addr,
- symbol_conf.field_sep,
- sample->weight,
- symbol_conf.field_sep,
- sample->data_src,
- symbol_conf.field_sep,
- al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
- al.sym ? al.sym->name : "???");
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
+ }
out_put:
addr_location__put(&al);
return 0;
@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
if (ret < 0)
goto out_delete;
- printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+ if (mem->phys_addr)
+ printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+ else
+ printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
ret = perf_session__process_events(session);
@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
* there is no weight (cost) associated with stores, so don't print
* the column
*/
- if (!(mem->operation & MEM_OPERATION_LOAD))
- rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
- "dso_daddr,tlb,locked";
+ if (!(mem->operation & MEM_OPERATION_LOAD)) {
+ if (mem->phys_addr)
+ rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+ "dso_daddr,tlb,locked,phys_daddr";
+ else
+ rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+ "dso_daddr,tlb,locked";
+ } else if (mem->phys_addr)
+ rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+ "dso_daddr,snoop,tlb,locked,phys_daddr";
for (j = 1; j < argc; j++, i++)
rep_argv[i] = argv[j];
@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
"separator for columns, no spaces will be added"
" between columns '.' is reserved."),
OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+ OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 36d7117a7562..56f8142ff97f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"),
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+ OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
+ "Record the sample physical addresses"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 378f76cdf923..3d4c3b5e1868 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -87,6 +87,7 @@ enum perf_output_field {
PERF_OUTPUT_BRSTACKINSN = 1U << 23,
PERF_OUTPUT_BRSTACKOFF = 1U << 24,
PERF_OUTPUT_SYNTH = 1U << 25,
+ PERF_OUTPUT_PHYS_ADDR = 1U << 26,
};
struct output_option {
@@ -119,6 +120,7 @@ struct output_option {
{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+ {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
};
enum {
@@ -175,7 +177,8 @@ static struct {
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR |
- PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
+ PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
+ PERF_OUTPUT_PHYS_ADDR,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_IREGS))
return -EINVAL;
+ if (PRINT_FIELD(PHYS_ADDR) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
+ PERF_OUTPUT_PHYS_ADDR))
+ return -EINVAL;
+
return 0;
}
@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample);
print_insn(sample, attr, thread, machine);
+
+ if (PRINT_FIELD(PHYS_ADDR))
+ printf("%16" PRIx64, sample->phys_addr);
printf("\n");
}
@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
"Valid types: hw,sw,trace,raw,synth. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
- "bpf-output,callindent,insn,insnlen,brstackinsn,synth",
+ "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 866da7aa54bf..85e992d9215b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
if (counter->merged_stat)
return false;
cb(counter, data, true);
- if (!no_merge)
+ if (!no_merge && counter->auto_merge_stats)
collect_all_aliases(counter, cb, data);
return true;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d59cdadf3a79..771ddab94bb0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
static int trace__validate_ev_qualifier(struct trace *trace)
{
int err = 0, i;
+ size_t nr_allocated;
struct str_node *pos;
trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
goto out;
}
+ nr_allocated = trace->ev_qualifier_ids.nr;
i = 0;
strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s;
- int id = syscalltbl__id(trace->sctbl, sc);
+ int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
if (id < 0) {
+ id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
+ if (id >= 0)
+ goto matches;
+
if (err == 0) {
fputs("Error:\tInvalid syscall ", trace->output);
err = -EINVAL;
@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
fputs(sc, trace->output);
}
-
+matches:
trace->ev_qualifier_ids.entries[i++] = id;
+ if (match_next == -1)
+ continue;
+
+ while (1) {
+ id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
+ if (id < 0)
+ break;
+ if (nr_allocated == trace->ev_qualifier_ids.nr) {
+ void *entries;
+
+ nr_allocated += 8;
+ entries = realloc(trace->ev_qualifier_ids.entries,
+ nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
+ if (entries == NULL) {
+ err = -ENOMEM;
+ fputs("\nError:\t Not enough memory for parsing\n", trace->output);
+ goto out_free;
+ }
+ trace->ev_qualifier_ids.entries = entries;
+ }
+ trace->ev_qualifier_ids.nr++;
+ trace->ev_qualifier_ids.entries[i++] = id;
+ }
}
if (err < 0) {
fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
"\nHint:\tand: 'man syscalls'\n", trace->output);
+out_free:
zfree(&trace->ev_qualifier_ids.entries);
trace->ev_qualifier_ids.nr = 0;
}
@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
struct trace *trace = (struct trace *)opt->value;
const char *s = str;
char *sep = NULL, *lists[2] = { NULL, NULL, };
- int len = strlen(str) + 1, err = -1, list;
+ int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX];
@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
*sep = '\0';
list = 0;
- if (syscalltbl__id(trace->sctbl, s) >= 0) {
+ if (syscalltbl__id(trace->sctbl, s) >= 0 ||
+ syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
list = 1;
} else {
path__join(group_name, sizeof(group_name), strace_groups_dir, s);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c010dd6a79d..dc442ba21bf6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -43,6 +43,7 @@ struct record_opts {
bool no_samples;
bool raw_samples;
bool sample_address;
+ bool sample_phys_addr;
bool sample_weight;
bool sample_time;
bool sample_time_set;
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index 7e62c46d7a20..c63a919eda98 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -80,11 +80,6 @@
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{,
- "EventCode": "0x400F0",
- "EventName": "PM_LD_MISS_L1",
- "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
- },
- {,
"EventCode": "0x2E01A",
"EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
"BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete"
@@ -374,4 +369,4 @@
"EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 00f3d2a21f31..54cc3be00fc2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -605,11 +605,6 @@
"BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
},
{,
- "EventCode": "0x3689E",
- "EventName": "PM_L2_RTY_LD",
- "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
- },
- {,
"EventCode": "0xE08C",
"EventName": "PM_LSU0_ERAT_HIT",
"BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
@@ -715,11 +710,6 @@
"BriefDescription": "Lifetime, sample of RD machine 0 valid"
},
{,
- "EventCode": "0x468B4",
- "EventName": "PM_L3_RD0_BUSY",
- "BriefDescription": "Lifetime, sample of RD machine 0 valid"
- },
- {,
"EventCode": "0x46080",
"EventName": "PM_L2_DISP_ALL_L2MISS",
"BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)"
@@ -850,21 +840,11 @@
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
},
{,
- "EventCode": "0x2608C",
- "EventName": "PM_RC0_BUSY",
- "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
- },
- {,
"EventCode": "0x36082",
"EventName": "PM_L2_LD_DISP",
"BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
},
{,
- "EventCode": "0x1609E",
- "EventName": "PM_L2_LD_DISP",
- "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
- },
- {,
"EventCode": "0xF8B0",
"EventName": "PM_L3_SW_PREF",
"BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest"
@@ -1040,11 +1020,6 @@
"BriefDescription": "L3 castouts in Mepf state for this thread"
},
{,
- "EventCode": "0x168A0",
- "EventName": "PM_L3_CO_MEPF",
- "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
- },
- {,
"EventCode": "0x460A2",
"EventName": "PM_L3_LAT_CI_HIT",
"BriefDescription": "L3 Lateral Castins Hit"
@@ -1150,11 +1125,6 @@
"BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
},
{,
- "EventCode": "0x4689E",
- "EventName": "PM_L2_RTY_ST",
- "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
- },
- {,
"EventCode": "0x24040",
"EventName": "PM_INST_FROM_L2_MEPF",
"BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)"
@@ -1255,11 +1225,6 @@
"BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
},
{,
- "EventCode": "0x4608C",
- "EventName": "PM_CO0_BUSY",
- "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
- },
- {,
"EventCode": "0x2C122",
"EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
"BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load"
@@ -1395,11 +1360,6 @@
"BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
},
{,
- "EventCode": "0x40006",
- "EventName": "PM_ISLB_MISS",
- "BriefDescription": "Number of ISLB misses for this thread"
- },
- {,
"EventCode": "0xD8A8",
"EventName": "PM_ISLB_MISS",
"BriefDescription": "Instruction SLB miss - Total of all segment sizes"
@@ -1515,11 +1475,6 @@
"BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
},
{,
- "EventCode": "0x3609E",
- "EventName": "PM_L2_INST",
- "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
- },
- {,
"EventCode": "0x3504C",
"EventName": "PM_IPTEG_FROM_DL4",
"BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request"
@@ -1690,11 +1645,6 @@
"BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
},
{,
- "EventCode": "0x2609E",
- "EventName": "PM_L2_LD_HIT",
- "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
- },
- {,
"EventCode": "0x168AC",
"EventName": "PM_L3_CI_USAGE",
"BriefDescription": "Rotating sample of 16 CI or CO actives"
@@ -1795,21 +1745,11 @@
"BriefDescription": "Rotating sample of 8 WI valid"
},
{,
- "EventCode": "0x260B6",
- "EventName": "PM_L3_WI0_BUSY",
- "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
- },
- {,
"EventCode": "0x368AC",
"EventName": "PM_L3_CO0_BUSY",
"BriefDescription": "Lifetime, sample of CO machine 0 valid"
},
{,
- "EventCode": "0x468AC",
- "EventName": "PM_L3_CO0_BUSY",
- "BriefDescription": "Lifetime, sample of CO machine 0 valid"
- },
- {,
"EventCode": "0x2E040",
"EventName": "PM_DPTEG_FROM_L2_MEPF",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
@@ -1840,11 +1780,6 @@
"BriefDescription": "L3 PF received retry port 0, every retry counted"
},
{,
- "EventCode": "0x260AE",
- "EventName": "PM_L3_P0_PF_RTY",
- "BriefDescription": "L3 PF received retry port 0, every retry counted"
- },
- {,
"EventCode": "0x268B2",
"EventName": "PM_L3_LOC_GUESS_WRONG",
"BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low"
@@ -1895,11 +1830,6 @@
"BriefDescription": "Lifetime, sample of snooper machine 0 valid"
},
{,
- "EventCode": "0x460AC",
- "EventName": "PM_L3_SN0_BUSY",
- "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
- },
- {,
"EventCode": "0x3005C",
"EventName": "PM_BFU_BUSY",
"BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity"
@@ -1935,11 +1865,6 @@
"BriefDescription": "Lifetime, sample of PF machine 0 valid"
},
{,
- "EventCode": "0x460B4",
- "EventName": "PM_L3_PF0_BUSY",
- "BriefDescription": "Lifetime, sample of PF machine 0 valid"
- },
- {,
"EventCode": "0xC0B0",
"EventName": "PM_LSU_FLUSH_UE",
"BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
@@ -2085,11 +2010,6 @@
"BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
},
{,
- "EventCode": "0x468AE",
- "EventName": "PM_L3_P1_CO_RTY",
- "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
- },
- {,
"EventCode": "0xC0AC",
"EventName": "PM_LSU_FLUSH_EMSH",
"BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
@@ -2195,11 +2115,6 @@
"BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
},
{,
- "EventCode": "0x46886",
- "EventName": "PM_L2_SN_M_WR_DONE",
- "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
- },
- {,
"EventCode": "0x489C",
"EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
"BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time"
@@ -2290,21 +2205,11 @@
"BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
},
{,
- "EventCode": "0x26090",
- "EventName": "PM_SN0_BUSY",
- "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
- },
- {,
"EventCode": "0x360AE",
"EventName": "PM_L3_P0_CO_RTY",
"BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
},
{,
- "EventCode": "0x460AE",
- "EventName": "PM_L3_P0_CO_RTY",
- "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
- },
- {,
"EventCode": "0x168A8",
"EventName": "PM_L3_WI_USAGE",
"BriefDescription": "Lifetime, sample of Write Inject machine 0 valid"
@@ -2340,26 +2245,11 @@
"BriefDescription": "L3 PF received retry port 1, every retry counted"
},
{,
- "EventCode": "0x268AE",
- "EventName": "PM_L3_P1_PF_RTY",
- "BriefDescription": "L3 PF received retry port 3, every retry counted"
- },
- {,
"EventCode": "0x46082",
"EventName": "PM_L2_ST_DISP",
"BriefDescription": "All successful D-side store dispatches for this thread "
},
{,
- "EventCode": "0x1689E",
- "EventName": "PM_L2_ST_DISP",
- "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
- },
- {,
- "EventCode": "0x36880",
- "EventName": "PM_L2_INST_MISS",
- "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
- },
- {,
"EventCode": "0x4609E",
"EventName": "PM_L2_INST_MISS",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
@@ -2430,11 +2320,6 @@
"BriefDescription": "# PPC Dispatched"
},
{,
- "EventCode": "0x300F2",
- "EventName": "PM_INST_DISP",
- "BriefDescription": "# PPC Dispatched"
- },
- {,
"EventCode": "0x4E05E",
"EventName": "PM_TM_OUTER_TBEGIN_DISP",
"BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions"
@@ -2460,11 +2345,6 @@
"BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
},
{,
- "EventCode": "0x2689E",
- "EventName": "PM_L2_ST_HIT",
- "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
- },
- {,
"EventCode": "0x360A8",
"EventName": "PM_L3_CO",
"BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))"
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index 47a82568a8df..bc2db636dabf 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -420,11 +420,6 @@
"BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
},
{,
- "EventCode": "0x10016",
- "EventName": "PM_DSLB_MISS",
- "BriefDescription": "Data SLB Miss - Total of all segment sizes"
- },
- {,
"EventCode": "0xD0A8",
"EventName": "PM_DSLB_MISS",
"BriefDescription": "Data SLB Miss - Total of all segment sizes"
@@ -554,4 +549,4 @@
"EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
"BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index a2c95a99e168..3ef8a10aac86 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -5,11 +5,6 @@
"BriefDescription": "Branches that are not strongly biased"
},
{,
- "EventCode": "0x40036",
- "EventName": "PM_BR_2PATH",
- "BriefDescription": "Branches that are not strongly biased"
- },
- {,
"EventCode": "0x40056",
"EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
"BriefDescription": "Local memory above threshold for LSU medium"
@@ -124,4 +119,4 @@
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 761c5a448c56..466a462b26d1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
if (!al.map || !al.map->dso) {
+ if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
+ pr_debug("Hypervisor address can not be resolved - skipping\n");
+ return 0;
+ }
+
pr_debug("thread__find_addr_map failed\n");
return -1;
}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 6d028f42b3cf..c3858487159d 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
}
}
+ if (type & PERF_SAMPLE_PHYS_ADDR)
+ COMP(phys_addr);
+
return true;
}
@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.mask = sample_regs,
.regs = regs,
},
+ .phys_addr = 113,
};
struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
struct perf_sample sample_out;
@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change
* the condition below.
*/
- if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) {
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ba0aee576a2b..786fecaf578e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
"q/ESC/CTRL+C Exit\n\n"
"ENTER Go to target\n"
"ESC Exit\n"
- "H Cycle thru hottest instructions\n"
+ "H Go to hottest instruction\n"
+ "TAB/shift+TAB Cycle thru hottest instructions\n"
"j Toggle showing jump to target arrows\n"
"J Toggle showing number of jump sources on targets\n"
"n Search next string\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc2462bc2c..13dfb0a0bdeb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
browser->show_dso);
if (symbol_conf.show_branchflag_count) {
- if (need_percent)
- callchain_list_counts__printf_value(node, chain, NULL,
- buf, sizeof(buf));
- else
- callchain_list_counts__printf_value(NULL, chain, NULL,
- buf, sizeof(buf));
+ callchain_list_counts__printf_value(chain, NULL,
+ buf, sizeof(buf));
if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b8301c67..8bdb7a500181 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
if (symbol_conf.show_branchflag_count) {
- if (!period)
- callchain_list_counts__printf_value(node, chain, NULL,
- buf, sizeof(buf));
- else
- callchain_list_counts__printf_value(NULL, chain, NULL,
- buf, sizeof(buf));
+ callchain_list_counts__printf_value(chain, NULL,
+ buf, sizeof(buf));
if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (symbol_conf.show_branchflag_count)
ret += callchain_list_counts__printf_value(
- NULL, chain, fp, NULL, 0);
+ chain, fp, NULL, 0);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b0777e0d..510b513e0f01 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->cycles_count =
cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter;
- call->samples_count = cursor_node->samples;
+ call->iter_cycles = cursor_node->iter_cycles;
}
}
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count +=
node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter;
- cnode->samples_count += node->samples;
+ cnode->iter_cycles += node->iter_cycles;
}
}
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, int samples, u64 branch_from)
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from)
{
struct callchain_cursor_node *node = *cursor->last;
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->sym = sym;
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
- node->samples = samples;
+ node->iter_cycles = iter_cycles;
if (flags)
memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
static int branch_from_str(char *bf, int bfsize,
u64 branch_count,
u64 cycles_count, u64 iter_count,
- u64 samples_count)
+ u64 iter_cycles)
{
int printed = 0, i = 0;
u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
bf + printed, bfsize - printed);
}
- if (iter_count && samples_count) {
- printed += count_pri64_printf(i++, "iterations",
- iter_count / samples_count,
+ if (iter_count) {
+ printed += count_pri64_printf(i++, "iter",
+ iter_count,
+ bf + printed, bfsize - printed);
+
+ printed += count_pri64_printf(i++, "avg_cycles",
+ iter_cycles / iter_count,
bf + printed, bfsize - printed);
}
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count,
+ u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat)
{
int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
predicted_count, abort_count, brtype_stat);
} else {
printed = branch_from_str(bf, bfsize, branch_count,
- cycles_count, iter_count, samples_count);
+ cycles_count, iter_count, iter_cycles);
}
if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count,
+ u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat)
{
char str[256];
counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count,
- iter_count, samples_count, brtype_stat);
+ iter_count, iter_cycles, brtype_stat);
if (fp)
return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
return scnprintf(bf, bfsize, "%s", str);
}
-int callchain_list_counts__printf_value(struct callchain_node *node,
- struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
FILE *fp, char *bf, int bfsize)
{
u64 branch_count, predicted_count;
u64 abort_count, cycles_count;
- u64 iter_count = 0, samples_count = 0;
+ u64 iter_count, iter_cycles;
branch_count = clist->branch_count;
predicted_count = clist->predicted_count;
abort_count = clist->abort_count;
cycles_count = clist->cycles_count;
-
- if (node) {
- struct callchain_list *call;
-
- list_for_each_entry(call, &node->val, list) {
- iter_count += call->iter_count;
- samples_count += call->samples_count;
- }
- }
+ iter_count = clist->iter_count;
+ iter_cycles = clist->iter_cycles;
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
- cycles_count, iter_count, samples_count,
+ cycles_count, iter_count, iter_cycles,
&clist->brtype_stat);
}
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags,
- node->nr_loop_iter, node->samples,
+ node->nr_loop_iter,
+ node->iter_cycles,
node->branch_from);
if (rc)
break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 97738201464a..1ed6fc61d0a5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
u64 abort_count;
u64 cycles_count;
u64 iter_count;
- u64 samples_count;
+ u64 iter_cycles;
struct branch_type_stat brtype_stat;
char *srcline;
struct list_head list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
struct branch_flags branch_flags;
u64 branch_from;
int nr_loop_iter;
- int samples;
+ u64 iter_cycles;
struct callchain_cursor_node *next;
};
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, int samples, u64 branch_from);
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from);
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total);
-int callchain_list_counts__printf_value(struct callchain_node *node,
- struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
FILE *fp, char *bf, int bfsize);
void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 423ac82605f3..ee7bcc898d35 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -200,6 +200,7 @@ struct perf_sample {
u32 cpu;
u32 raw_size;
u64 data_src;
+ u64 phys_addr;
u32 flags;
u16 insn_len;
u8 cpumode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9bd632ed7db..4bb89373eb52 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
if (opts->sample_address)
perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ if (opts->sample_phys_addr)
+ perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+
if (opts->no_buffering) {
attr->watermark = 0;
attr->wakeup_events = 1;
@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
- bit_name(WEIGHT),
+ bit_name(WEIGHT), bit_name(PHYS_ADDR),
{ .name = NULL, }
};
#undef bit_name
@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
}
}
+ data->phys_addr = 0;
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ data->phys_addr = *array;
+ array++;
+ }
+
return 0;
}
@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
}
+ if (type & PERF_SAMPLE_PHYS_ADDR)
+ result += sizeof(u64);
+
return result;
}
@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
}
}
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ *array = sample->phys_addr;
+ array++;
+ }
+
return 0;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 351d3b2d8887..dd2c4b5112a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -131,6 +131,7 @@ struct perf_evsel {
bool cmdline_group_boundary;
struct list_head config_terms;
int bpf_fd;
+ bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
const char * metric_name;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9453b2e27015..e60d8d8ea4c2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
symlen = unresolved_col_width + 4 + 2;
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
+
+ hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+ unresolved_col_width + 4 + 2);
+
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee3670a388df..e60dda26a920 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
HISTC_GLOBAL_WEIGHT,
HISTC_MEM_DADDR_SYMBOL,
HISTC_MEM_DADDR_DSO,
+ HISTC_MEM_PHYS_DADDR,
HISTC_MEM_LOCKED,
HISTC_MEM_TLB,
HISTC_MEM_LVL,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eacaca4f4..df709363ef69 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
+ ams->phys_addr = 0;
}
static void ip__resolve_data(struct thread *thread,
- u8 m, struct addr_map_symbol *ams, u64 addr)
+ u8 m, struct addr_map_symbol *ams,
+ u64 addr, u64 phys_addr)
{
struct addr_location al;
@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
+ ams->phys_addr = phys_addr;
}
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return NULL;
ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
- ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr);
+ ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+ sample->addr, sample->phys_addr);
mi->data_src.val = sample->data_src;
return mi;
}
+struct iterations {
+ int nr_loop_iter;
+ u64 cycles;
+};
+
static int add_callchain_ip(struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread,
u64 ip,
bool branch,
struct branch_flags *flags,
- int nr_loop_iter,
- int samples,
+ struct iterations *iter,
u64 branch_from)
{
struct addr_location al;
+ int nr_loop_iter = 0;
+ u64 iter_cycles = 0;
al.filtered = 0;
al.sym = NULL;
@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
+
+ if (iter) {
+ nr_loop_iter = iter->nr_loop_iter;
+ iter_cycles = iter->cycles;
+ }
+
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
- branch, flags, nr_loop_iter, samples,
- branch_from);
+ branch, flags, nr_loop_iter,
+ iter_cycles, branch_from);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}
+static void save_iterations(struct iterations *iter,
+ struct branch_entry *be, int nr)
+{
+ int i;
+
+ iter->nr_loop_iter = nr;
+ iter->cycles = 0;
+
+ for (i = 0; i < nr; i++)
+ iter->cycles += be[i].flags.cycles;
+}
+
#define CHASHSZ 127
#define CHASHBITS 7
#define NO_ENTRY 0xff
@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
#define PERF_MAX_BRANCH_DEPTH 127
/* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+ struct iterations *iter)
{
int i, j, off;
unsigned char chash[CHASHSZ];
@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr)
break;
}
if (is_loop) {
- memmove(l + i, l + i + off,
- (nr - (i + off)) * sizeof(*l));
+ j = nr - (i + off);
+ if (j > 0) {
+ save_iterations(iter + i + off,
+ l + i, off);
+
+ memmove(iter + i, iter + i + off,
+ j * sizeof(*iter));
+
+ memmove(l + i, l + i + off,
+ j * sizeof(*l));
+ }
+
nr -= off;
}
}
@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- branch, flags, 0, 0,
+ branch, flags, NULL,
branch_from);
if (err)
return (err < 0) ? err : 0;
@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
- int nr_loop_iter;
if (chain)
chain_nr = chain->nr;
@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (branch && callchain_param.branch_callstack) {
int nr = min(max_stack, (int)branch->nr);
struct branch_entry be[nr];
+ struct iterations iter[nr];
if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1];
}
- nr_loop_iter = nr;
- nr = remove_loops(be, nr);
-
- /*
- * Get the number of iterations.
- * It's only approximation, but good enough in practice.
- */
- if (nr_loop_iter > nr)
- nr_loop_iter = nr_loop_iter - nr + 1;
- else
- nr_loop_iter = 0;
+ memset(iter, 0, sizeof(struct iterations) * nr);
+ nr = remove_loops(be, nr, iter);
for (i = 0; i < nr; i++) {
- if (i == nr - 1)
- err = add_callchain_ip(thread, cursor, parent,
- root_al,
- NULL, be[i].to,
- true, &be[i].flags,
- nr_loop_iter, 1,
- be[i].from);
- else
- err = add_callchain_ip(thread, cursor, parent,
- root_al,
- NULL, be[i].to,
- true, &be[i].flags,
- 0, 0, be[i].from);
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al,
+ NULL, be[i].to,
+ true, &be[i].flags,
+ NULL, be[i].from);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
- 0, 0, 0);
+ &iter[i], 0);
if (err == -EINVAL)
break;
if (err)
@@ -2037,7 +2059,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- false, NULL, 0, 0, 0);
+ false, NULL, NULL, 0);
if (err)
return (err < 0) ? err : 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f44aeba51d1f..f6257fb4f08c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -310,7 +310,7 @@ static struct perf_evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
char *name, struct cpu_map *cpus,
- struct list_head *config_terms)
+ struct list_head *config_terms, bool auto_merge_stats)
{
struct perf_evsel *evsel;
@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
evsel->cpus = cpu_map__get(cpus);
evsel->own_cpus = cpu_map__get(cpus);
evsel->system_wide = !!cpus;
+ evsel->auto_merge_stats = auto_merge_stats;
if (name)
evsel->name = strdup(name);
@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, char *name,
struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
}
static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
get_config_name(head_config), &config_terms);
}
-int parse_events_add_pmu(struct parse_events_state *parse_state,
+static int __parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
- struct list_head *head_config)
+ struct list_head *head_config, bool auto_merge_stats)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL);
+ evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
return evsel ? 0 : -ENOMEM;
}
@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr,
get_config_name(head_config), pmu->cpus,
- &config_terms);
+ &config_terms, auto_merge_stats);
if (evsel) {
evsel->unit = info.unit;
evsel->scale = info.scale;
@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
return evsel ? 0 : -ENOMEM;
}
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, char *name,
+ struct list_head *head_config)
+{
+ return __parse_events_add_pmu(parse_state, list, name, head_config, false);
+}
+
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str, struct list_head **listp)
{
@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
return -1;
list_add_tail(&term->list, head);
- if (!parse_events_add_pmu(parse_state, list,
- pmu->name, head)) {
+ if (!__parse_events_add_pmu(parse_state, list,
+ pmu->name, head, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ac863691605f..a7ebd9fe8e40 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
if (sample_type & PERF_SAMPLE_TRANSACTION)
printf("... transaction: %" PRIx64 "\n", sample->transaction);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 12359bd986db..eb3ab902a1c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1316,6 +1316,47 @@ struct sort_entry sort_mem_dcacheline = {
};
static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.phys_addr;
+ if (right->mem_info)
+ r = right->mem_info->daddr.phys_addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ size_t ret = 0;
+ size_t len = BITS_PER_LONG / 4;
+
+ addr = he->mem_info->daddr.phys_addr;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+ if (ret > width)
+ bf[width] = '\0';
+
+ return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+ .se_header = "Data Physical Address",
+ .se_cmp = sort__phys_daddr_cmp,
+ .se_snprintf = hist_entry__phys_daddr_snprintf,
+ .se_width_idx = HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
{
if (!left->branch_info || !right->branch_info)
@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+ DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b7c75597e18f..f36dc4980a6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -245,6 +245,7 @@ enum sort_type {
SORT_MEM_SNOOP,
SORT_MEM_DCACHELINE,
SORT_MEM_IADDR_SYMBOL,
+ SORT_MEM_PHYS_DADDR,
};
/*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index d00a012cfdfb..2bd6a1f01a1c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -186,6 +186,7 @@ struct addr_map_symbol {
struct symbol *sym;
u64 addr;
u64 al_addr;
+ u64 phys_addr;
};
struct branch_info {
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index bbb4c1957578..19e5db90394c 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -19,6 +19,7 @@
#ifdef HAVE_SYSCALL_TABLE
#include <linux/compiler.h>
#include <string.h>
+#include "string2.h"
#include "util.h"
#if defined(__x86_64__)
@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
return sc ? sc->id : -1;
}
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ int i;
+ struct syscall *syscalls = tbl->syscalls.entries;
+
+ for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+ if (strglobmatch(syscalls[i].name, syscall_glob)) {
+ *idx = i;
+ return syscalls[i].id;
+ }
+ }
+
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ *idx = -1;
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
#else /* HAVE_SYSCALL_TABLE */
#include <libaudit.h>
@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
{
return audit_name_to_syscall(name, tbl->audit_machine);
}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+ const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
#endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index e2951510484f..e9fb8786da7c 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
#endif /* __PERF_SYSCALLTBL_H */