summaryrefslogtreecommitdiff
path: root/tools/perf/util/sort.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
commit3a36281a17199737b468befb826d4a23eb774445 (patch)
tree18bbdfcb0c30215883809f248d0334b7ea84d5ba /tools/perf/util/sort.c
parent7c70f3a7488d2fa62d32849d138bf2b8420fe788 (diff)
parent3027ce36ccbae74f2e7c1afbfc3f69fee0c2a996 (diff)
Merge tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Support instruction latency in 'perf report', with both memory latency (weight) and instruction latency information, users can locate expensive load instructions and understand time spent in different stages. - Extend 'perf c2c' to display the number of loads which were blocked by data or address conflict. - Add 'perf stat' support for L2 topdown events in systems such as Intel's Sapphire rapids server. - Add support for PERF_SAMPLE_CODE_PAGE_SIZE in various tools, as a sort key, for instance: perf report --stdio --sort=comm,symbol,code_page_size - New 'perf daemon' command to run long running sessions while providing a way to control the enablement of events without restarting a traditional 'perf record' session. - Enable counting events for BPF programs in 'perf stat' just like for other targets (tid, cgroup, cpu, etc), e.g.: # perf stat -e ref-cycles,cycles -b 254 -I 1000 1.487903822 115,200 ref-cycles 1.487903822 86,012 cycles 2.489147029 80,560 ref-cycles 2.489147029 73,784 cycles ^C The example above counts 'cycles' and 'ref-cycles' of BPF program of id 254. It is similar to bpftool-prog-profile command, but more flexible. - Support the new layout for PERF_RECORD_MMAP2 to carry the DSO build-id using infrastructure generalised from the eBPF subsystem, removing the need for traversing the perf.data file to collect build-ids at the end of 'perf record' sessions and helping with long running sessions where binaries can get replaced in updates, leading to possible mis-resolution of symbols. - Support filtering by hex address in 'perf script'. - Support DSO filter in 'perf script', like in other perf tools. - Add namespaces support to 'perf inject' - Add support for SDT (Dtrace Style Markers) events on ARM64. perf record: - Fix handling of eventfd() when draining a buffer in 'perf record'. - Improvements to the generation of metadata events for pre-existing threads (mmaps, comm, etc), speeding up the work done at the start of system wide or per CPU 'perf record' sessions. Hardware tracing: - Initial support for tracing KVM with Intel PT. - Intel PT fixes for IPC - Support Intel PT PSB (synchronization packets) events. - Automatically group aux-output events to overcome --filter syntax. - Enable PERF_SAMPLE_DATA_SRC on ARMs SPE. - Update ARM's CoreSight hardware tracing OpenCSD library to v1.0.0. perf annotate TUI: - Fix handling of 'k' ("show line number") hotkey - Fix jump parsing for C++ code. perf probe: - Add protection to avoid endless loop. cgroups: - Avoid reading cgroup mountpoint multiple times, caching it. - Fix handling of cgroup v1/v2 in mixed hierarchy. Symbol resolving: - Add OCaml symbol demangling. - Further fixes for handling PE executables when using perf with Wine and .exe/.dll files. - Fix 'perf unwind' DSO handling. - Resolve symbols against debug file first, to deal with artifacts related to LTO. - Fix gap between kernel end and module start on powerpc. Reporting tools: - The DSO filter shouldn't show samples in unresolved maps. - Improve debuginfod support in various tools. build ids: - Fix 16-byte build ids in 'perf buildid-cache', add a 'perf test' entry for that case. perf test: - Support for PERF_SAMPLE_WEIGHT_STRUCT. - Add test case for PERF_SAMPLE_CODE_PAGE_SIZE. - Shell based tests for 'perf daemon's commands ('start', 'stop, 'reconfig', 'list', etc). - ARM cs-etm 'perf test' fixes. - Add parse-metric memory bandwidth testcase. Compiler related: - Fix 'perf probe' kretprobe issue caused by gcc 11 bug when used with -fpatchable-function-entry. - Fix ARM64 build with gcc 11's -Wformat-overflow. - Fix unaligned access in sample parsing test. - Fix printf conversion specifier for IP addresses on arm64, s390 and powerpc. Arch specific: - Support exposing Performance Monitor Counter SPRs as part of extended regs on powerpc. - Add JSON 'perf stat' metrics for ARM64's imx8mp, imx8mq and imx8mn DDR, fix imx8mm ones. - Fix common and uarch events for ARM64's A76 and Ampere eMag" * tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (148 commits) perf buildid-cache: Don't skip 16-byte build-ids perf buildid-cache: Add test for 16-byte build-id perf symbol: Remove redundant libbfd checks perf test: Output the sub testing result in cs-etm perf test: Suppress logs in cs-etm testing perf tools: Fix arm64 build error with gcc-11 perf intel-pt: Add documentation for tracing virtual machines perf intel-pt: Split VM-Entry and VM-Exit branches perf intel-pt: Adjust sample flags for VM-Exit perf intel-pt: Allow for a guest kernel address filter perf intel-pt: Support decoding of guest kernel perf machine: Factor out machine__idle_thread() perf machine: Factor out machines__find_guest() perf intel-pt: Amend decoder to track the NR flag perf intel-pt: Retain the last PIP packet payload as is perf intel_pt: Add vmlaunch and vmresume as branches perf script: Add branch types for VM-Entry and VM-Exit perf auxtrace: Automatically group aux-output events perf test: Fix unaligned access in sample parsing test perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing ...
Diffstat (limited to 'tools/perf/util/sort.c')
-rw-r--r--tools/perf/util/sort.c109
1 files changed, 108 insertions, 1 deletions
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 80907bc32683..0d5ad42812b9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -36,7 +36,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -1365,6 +1365,49 @@ struct sort_entry sort_global_weight = {
.se_width_idx = HISTC_GLOBAL_WEIGHT,
};
+static u64 he_ins_lat(struct hist_entry *he)
+{
+ return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return he_ins_lat(left) - he_ins_lat(right);
+}
+
+static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he));
+}
+
+struct sort_entry sort_local_ins_lat = {
+ .se_header = "Local INSTR Latency",
+ .se_cmp = sort__local_ins_lat_cmp,
+ .se_snprintf = hist_entry__local_ins_lat_snprintf,
+ .se_width_idx = HISTC_LOCAL_INS_LAT,
+};
+
+static int64_t
+sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->stat.ins_lat - right->stat.ins_lat;
+}
+
+static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat);
+}
+
+struct sort_entry sort_global_ins_lat = {
+ .se_header = "INSTR Latency",
+ .se_cmp = sort__global_ins_lat_cmp,
+ .se_snprintf = hist_entry__global_ins_lat_snprintf,
+ .se_width_idx = HISTC_GLOBAL_INS_LAT,
+};
+
struct sort_entry sort_mem_daddr_sym = {
.se_header = "Data Symbol",
.se_cmp = sort__daddr_cmp,
@@ -1422,6 +1465,41 @@ struct sort_entry sort_mem_dcacheline = {
};
static int64_t
+sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_blk = PERF_MEM_BLK_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_blk = PERF_MEM_BLK_NA;
+
+ return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk);
+}
+
+static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[16];
+
+ perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+struct sort_entry sort_mem_blocked = {
+ .se_header = "Blocked",
+ .se_cmp = sort__blocked_cmp,
+ .se_snprintf = hist_entry__blocked_snprintf,
+ .se_width_idx = HISTC_MEM_BLOCKED,
+};
+
+static int64_t
sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1492,6 +1570,31 @@ struct sort_entry sort_mem_data_page_size = {
};
static int64_t
+sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = left->code_page_size;
+ uint64_t r = right->code_page_size;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char str[PAGE_SIZE_NAME_LEN];
+
+ return repsep_snprintf(bf, size, "%-*s", width,
+ get_page_size_name(he->code_page_size, str));
+}
+
+struct sort_entry sort_code_page_size = {
+ .se_header = "Code Page Size",
+ .se_cmp = sort__code_page_size_cmp,
+ .se_snprintf = hist_entry__code_page_size_snprintf,
+ .se_width_idx = HISTC_CODE_PAGE_SIZE,
+};
+
+static int64_t
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
{
if (!left->branch_info || !right->branch_info)
@@ -1735,6 +1838,9 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
+ DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
+ DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
+ DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
};
#undef DIM
@@ -1770,6 +1876,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
+ DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked),
};
#undef DIM