summaryrefslogtreecommitdiff
path: root/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
committerIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
commit62e1c09418fc16d27720b128275cac61367e2c1b (patch)
tree4759aa6662b1398e2b93696ace58f6f309722b06 /tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
parent01be377c62210a8d8fef35be906f9349591bb7cd (diff)
parent4fc4d8dfa056dfd48afe73b9ea3b7570ceb80b9c (diff)
Merge tag 'perf-core-for-mingo-5.2-20190517' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf.data: Alexey Budankov: - Streaming compression of perf ring buffer into PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x perf.data file size reduction on variety of tested workloads what saves storage space on larger server systems where perf.data size can easily reach several tens or even hundreds of GiBs, especially when profiling with DWARF-based stacks and tracing of context switches. perf record: Arnaldo Carvalho de Melo - Improve -user-regs/intr-regs suggestions to overcome errors. perf annotate: Jin Yao: - Remove hist__account_cycles() from callback, speeding up branch processing (perf record -b). perf stat: - Add a 'percore' event qualifier, e.g.: -e cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for both hardware threads in a core. We can already do this with --per-core, but it's often useful to do this together with other metrics that are collected per hardware thread. I.e. now its possible to do this per-event, and have it mixed with other events not aggregated by core. core libraries: Donald Yandt: - Check for errors when doing fgets(/proc/version). Jiri Olsa: - Speed up report for perf compiled with linbunwind. tools headers: Arnaldo Carvalho de Melo - Update memcpy_64.S, x86's kvm.h and pt_regs.h. arm64: Florian Fainelli: - Map Brahma-B53 CPUID to cortex-a53 events. - Add Cortex-A57 and Cortex-A72 events. csky: Mao Han: - Add DWARF register mappings for libdw, allowing --call-graph=dwarf to work on the C-SKY arch. x86: Andi Kleen/Kan Liang: - Add support for recording and printing XMM registers, available, for instance, on Icelake. Kan Liang: - Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON support. UPI replaced the Intel QuickPath Interconnect (QPI) in Xeon Skylake-SP. Intel PT: Adrian Hunter . Fix instructions sampling rate. . Timestamp fixes. . Improve exported-sql-viewer GUI, allowing, for instance, to copy'n'paste the trees, useful for e-mailing. Documentation: Thomas Richter: - Add description for 'perf --debug stderr=1', which redirects stderr to stdout. libtraceevent: Tzvetomir Stoyanov: - Add man pages for the various APIs. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util/intel-pt-decoder/intel-pt-decoder.c')
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c31
1 files changed, 24 insertions, 7 deletions
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 872fab163585..f4c3c84b090f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -58,6 +58,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_NO_IP,
INTEL_PT_STATE_ERR_RESYNC,
INTEL_PT_STATE_IN_SYNC,
+ INTEL_PT_STATE_TNT_CONT,
INTEL_PT_STATE_TNT,
INTEL_PT_STATE_TIP,
INTEL_PT_STATE_TIP_PGD,
@@ -72,8 +73,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_NO_IP:
case INTEL_PT_STATE_ERR_RESYNC:
case INTEL_PT_STATE_IN_SYNC:
- case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TNT_CONT:
return true;
+ case INTEL_PT_STATE_TNT:
case INTEL_PT_STATE_TIP:
case INTEL_PT_STATE_TIP_PGD:
case INTEL_PT_STATE_FUP:
@@ -888,16 +890,20 @@ static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
masked_timestamp = timestamp & decoder->period_mask;
if (decoder->continuous_period) {
- if (masked_timestamp != decoder->last_masked_timestamp)
+ if (masked_timestamp > decoder->last_masked_timestamp)
return 1;
} else {
timestamp += 1;
masked_timestamp = timestamp & decoder->period_mask;
- if (masked_timestamp != decoder->last_masked_timestamp) {
+ if (masked_timestamp > decoder->last_masked_timestamp) {
decoder->last_masked_timestamp = masked_timestamp;
decoder->continuous_period = true;
}
}
+
+ if (masked_timestamp < decoder->last_masked_timestamp)
+ return decoder->period_ticks;
+
return decoder->period_ticks - (timestamp - masked_timestamp);
}
@@ -926,7 +932,10 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
case INTEL_PT_PERIOD_TICKS:
timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
masked_timestamp = timestamp & decoder->period_mask;
- decoder->last_masked_timestamp = masked_timestamp;
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ decoder->last_masked_timestamp = masked_timestamp;
+ else
+ decoder->last_masked_timestamp += decoder->period_ticks;
break;
case INTEL_PT_PERIOD_NONE:
case INTEL_PT_PERIOD_MTC:
@@ -1254,7 +1263,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
return -ENOENT;
}
decoder->tnt.count -= 1;
- if (!decoder->tnt.count)
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->tnt.payload <<= 1;
decoder->state.from_ip = decoder->ip;
@@ -1285,7 +1296,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
decoder->tnt.count -= 1;
- if (!decoder->tnt.count)
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (decoder->tnt.payload & BIT63) {
decoder->tnt.payload <<= 1;
@@ -1305,8 +1318,11 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
return 0;
}
decoder->ip += intel_pt_insn.length;
- if (!decoder->tnt.count)
+ if (!decoder->tnt.count) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
return -EAGAIN;
+ }
decoder->tnt.payload <<= 1;
continue;
}
@@ -2365,6 +2381,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_walk_trace(decoder);
break;
case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TNT_CONT:
err = intel_pt_walk_tnt(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);