summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/perf-report.txt6
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c4
-rw-r--r--tools/perf/util/hist.c12
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/intel-pt.c5
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/perf/util/sort.c47
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/synthetic-events.c4
10 files changed, 81 insertions, 11 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index b9686a131ed8..f546b5e9db05 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -109,6 +109,9 @@ OPTIONS
- time: Separate the samples by time stamp with the resolution specified by
--time-quantum (default 100ms). Specify with overhead and before it.
- code_page_size: the code page size of sampled code address (ip)
+ - ins_lat: Instruction latency in core cycles. This is the global instruction
+ latency
+ - local_ins_lat: Local instruction latency version
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -155,7 +158,8 @@ OPTIONS
- blocked: reason of blocked load access for the data at the time of the sample
And the default sort keys are changed to local_weight, mem, sym, dso,
- symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, see '--mem-mode'.
+ symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat,
+ see '--mem-mode'.
If the data file has tracepoint event(s), following (dynamic) sort keys
are also available:
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2afea7247dd3..60752e4c9727 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -142,6 +142,7 @@ struct perf_sample {
u16 insn_len;
u8 cpumode;
u16 misc;
+ u16 ins_lat;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index fa49d15edc35..844aebd9c306 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2352,8 +2352,10 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
weight.full = *array;
if (type & PERF_SAMPLE_WEIGHT)
data->weight = weight.full;
- else
+ else {
data->weight = weight.var1_dw;
+ data->ins_lat = weight.var2_w;
+ }
array++;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4038b086cb80..c82f5fc26af8 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -209,6 +209,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
+ hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
@@ -287,12 +289,13 @@ static long hist_time(unsigned long htime)
}
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
- u64 weight)
+ u64 weight, u64 ins_lat)
{
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events += 1;
+ he_stat->ins_lat += ins_lat;
}
static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -304,6 +307,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->period_guest_us += src->period_guest_us;
dest->nr_events += src->nr_events;
dest->weight += src->weight;
+ dest->ins_lat += src->ins_lat;
}
static void he_stat__decay(struct he_stat *he_stat)
@@ -592,6 +596,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
int64_t cmp;
u64 period = entry->stat.period;
u64 weight = entry->stat.weight;
+ u64 ins_lat = entry->stat.ins_lat;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
@@ -610,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) {
if (sample_self) {
- he_stat__add_period(&he->stat, period, weight);
+ he_stat__add_period(&he->stat, period, weight, ins_lat);
hist_entry__add_callchain_period(he, period);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period, weight);
+ he_stat__add_period(he->stat_acc, period, weight, ins_lat);
/*
* This mem info was allocated from sample__resolve_mem
@@ -725,6 +730,7 @@ __hists__add_entry(struct hists *hists,
.nr_events = 1,
.period = sample->period,
.weight = sample->weight,
+ .ins_lat = sample->ins_lat,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 3788391c50c7..3c537232294b 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -73,6 +73,8 @@ enum hist_column {
HISTC_DSO_SIZE,
HISTC_SYMBOL_IPC,
HISTC_MEM_BLOCKED,
+ HISTC_LOCAL_INS_LAT,
+ HISTC_GLOBAL_INS_LAT,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index a929f6dbdf43..c9477d021687 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1871,9 +1871,10 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
* cycles. Use latency >> 32 to distinguish the
* different format of the mem access latency field.
*/
- if (weight > 0)
+ if (weight > 0) {
sample.weight = weight & 0xffff;
- else
+ sample.ins_lat = items->mem_access_latency & 0xffff;
+ } else
sample.weight = items->mem_access_latency;
}
if (!sample.weight && items->has_tsx_aux_info) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 053c08c8c850..f4aeb1af05d8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1300,8 +1300,12 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
- if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
- printf("... weight: %" PRIu64 "\n", sample->weight);
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+ printf("... weight: %" PRIu64 "", sample->weight);
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+ printf(",0x%"PRIx16"", sample->ins_lat);
+ printf("\n");
+ }
if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e29a24b41b67..0d5ad42812b9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -36,7 +36,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -1365,6 +1365,49 @@ struct sort_entry sort_global_weight = {
.se_width_idx = HISTC_GLOBAL_WEIGHT,
};
+static u64 he_ins_lat(struct hist_entry *he)
+{
+ return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return he_ins_lat(left) - he_ins_lat(right);
+}
+
+static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he));
+}
+
+struct sort_entry sort_local_ins_lat = {
+ .se_header = "Local INSTR Latency",
+ .se_cmp = sort__local_ins_lat_cmp,
+ .se_snprintf = hist_entry__local_ins_lat_snprintf,
+ .se_width_idx = HISTC_LOCAL_INS_LAT,
+};
+
+static int64_t
+sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->stat.ins_lat - right->stat.ins_lat;
+}
+
+static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat);
+}
+
+struct sort_entry sort_global_ins_lat = {
+ .se_header = "INSTR Latency",
+ .se_cmp = sort__global_ins_lat_cmp,
+ .se_snprintf = hist_entry__global_ins_lat_snprintf,
+ .se_width_idx = HISTC_GLOBAL_INS_LAT,
+};
+
struct sort_entry sort_mem_daddr_sym = {
.se_header = "Data Symbol",
.se_cmp = sort__daddr_cmp,
@@ -1796,6 +1839,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
+ DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
+ DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 984e54533ae1..63f67a3f3630 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -50,6 +50,7 @@ struct he_stat {
u64 period_guest_sys;
u64 period_guest_us;
u64 weight;
+ u64 ins_lat;
u32 nr_events;
};
@@ -231,6 +232,8 @@ enum sort_type {
SORT_SYM_IPC_NULL,
SORT_TIME,
SORT_CODE_PAGE_SIZE,
+ SORT_LOCAL_INS_LAT,
+ SORT_GLOBAL_INS_LAT,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 4e9266f75175..c6f9db3faf83 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1644,8 +1644,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
if (type & PERF_SAMPLE_WEIGHT_TYPE) {
*array = sample->weight;
- if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
*array &= 0xffffffff;
+ *array |= ((u64)sample->ins_lat << 32);
+ }
array++;
}