From e1717e0485af4f47fc4da1e979ac817f9ad61b0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 20 Jul 2016 12:00:06 +0300 Subject: perf intel-pt: Fix ip compression The June 2015 Intel SDM introduced IP Compression types 4 and 6. Refer to section 36.4.2.2 Target IP (TIP) Packet - IP Compression. Existing Intel PT packet decoder did not support type 4, and got type 6 wrong. Because type 3 and type 4 have the same number of bytes, the packet 'count' has been changed from being the number of ip bytes to being the type code. That allows the Intel PT decoder to correctly decide whether to sign-extend or use the last ip. However that also meant the code had to be adjusted in a number of places. Currently hardware is not using the new compression types, so this fix has no effect on existing hardware. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1469005206-3049-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 44 +++++++++++----------- .../util/intel-pt-decoder/intel-pt-pkt-decoder.c | 24 ++++++++---- 2 files changed, 40 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9c8f15da86ce..8ff6c6a61291 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -123,8 +123,6 @@ struct intel_pt_decoder { bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; - uint64_t sign_bit; - uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; uint64_t tot_insn_cnt; @@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; - decoder->sign_bit = (uint64_t)1 << 47; - decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); - decoder->period = params->period; decoder->period_type = params->period_type; @@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen) return 0; } -static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, - const struct intel_pt_pkt *packet, +static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, uint64_t last_ip) { uint64_t ip; switch (packet->count) { - case 2: + case 1: ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | packet->payload; break; - case 4: + case 2: ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | packet->payload; break; + case 3: + ip = packet->payload; + /* Sign-extend 6-byte ip */ + if (ip & (uint64_t)0x800000000000ULL) + ip |= (uint64_t)0xffff000000000000ULL; + break; + case 4: + ip = (last_ip & (uint64_t)0xffff000000000000ULL) | + packet->payload; + break; case 6: ip = packet->payload; break; @@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, return 0; } - if (ip & decoder->sign_bit) - return ip | decoder->sign_bits; - return ip; } static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { - decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, - decoder->last_ip); + decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -1657,6 +1657,12 @@ next: } } +static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) +{ + return decoder->last_ip || decoder->packet.count == 0 || + decoder->packet.count == 3 || decoder->packet.count == 6; +} + /* Walk PSB+ packets to get in sync. */ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { @@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) { + if (intel_pt_have_ip(decoder)) { uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); @@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; @@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: if (decoder->overflow) { - if (decoder->last_ip || - decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index b1257c816310..4f7b32020487 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - switch (byte >> 5) { + int ip_len; + + packet->count = byte >> 5; + + switch (packet->count) { case 0: - packet->count = 0; + ip_len = 0; break; case 1: if (len < 3) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 2; + ip_len = 2; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 4; + ip_len = 4; packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); break; case 3: - case 6: + case 4: if (len < 7) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 6; + ip_len = 6; memcpy_le64(&packet->payload, buf + 1, 6); break; + case 6: + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + ip_len = 8; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + break; default: return INTEL_PT_BAD_PACKET; } packet->type = type; - return packet->count + 1; + return ip_len + 1; } static int intel_pt_get_mode(const unsigned char *buf, size_t len, -- cgit From 33da54fa86e29b87fe1e83bd0f15b4ef2be53ecb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Aug 2016 10:50:57 +0200 Subject: perf tools mem: Fix -t store option for record command Michael reported 'perf mem -t store record' being broken. The reason is latest rework of this area: commit acbe613e0c03 ("perf tools: Add monitored events array") We don't mark perf_mem_events store record when -t store option is specified. Committer notes: Before: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cycles:ppp # After: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cpu/mem-stores/P # Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: acbe613e0c03 ("perf tools: Add monitored events array") Link: http://lkml.kernel.org/r/1470905457-18311-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d608a2c9e48c..d1ce29be560e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->operation & MEM_OPERATION_LOAD) perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_STORE) + perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; -- cgit From f046f3df665361d2f89e660f8c79ba164069f02a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 11 Aug 2016 14:43:59 -0300 Subject: perf ppc64le: Fix build failure when libelf is not present arch__post_process_probe_trace_events() calls get_target_map() to prepare symbol table. get_target_map() is defined inside util/probe-event.c. probe-event.c will only get included in perf binary if CONFIG_LIBELF is set. Hence arch__post_process_probe_trace_events() needs to be defined inside #ifdef HAVE_LIBELF_SUPPORT to solve compilation error. Reported-and-Tested-by: Anton Blanchard Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Alexander Shishkin Cc: Balbir Singh Cc: Naveen N. Rao Cc: Ananth N Mavinakayanahalli Cc: Masami Hiramatsu Cc: Wang Nan Cc: Namhyung Kim Link: http://lkml.kernel.org/r/57ABFF88.8030905@linux.vnet.ibm.com [ Thunderbird MUA mangled it, fix that ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 8d4dc97d80ba..35745a733100 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -97,6 +97,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, } } +#ifdef HAVE_LIBELF_SUPPORT void arch__post_process_probe_trace_events(struct perf_probe_event *pev, int ntevs) { @@ -118,5 +119,6 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, } } } +#endif /* HAVE_LIBELF_SUPPORT */ #endif -- cgit From 49a7f01064a94c4fd6afb6b4f7e3ccc37d0edf99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2016 01:12:10 -0300 Subject: perf jitdump: Add the right header to get the major()/minor() definitions Noticed on Fedora Rawhide: $ gcc --version gcc (GCC) 6.1.1 20160721 (Red Hat 6.1.1-4) $ rpm -q glibc glibc-2.24.90-1.fc26.x86_64 $ CC /tmp/build/perf/util/jitdump.o util/jitdump.c: In function 'jit_repipe_code_load': util/jitdump.c:428:2: error: '__major_from_sys_types' is deprecated: In the GNU C Library, `major' is defined by . For historical compatibility, it is currently defined by as well, but we plan to remove this soon. To use `major', include directly. If you did not intend to use a system-defined macro `major', you should #undef it after including . [-Werror=deprecated-declarations] event->mmap2.maj = major(st.st_dev); ^~~~~ In file included from /usr/include/features.h:397:0, from /usr/include/sys/types.h:25, from util/jitdump.c:1: /usr/include/sys/sysmacros.h:87:1: note: declared here __SYSMACROS_DEFINE_MAJOR (__SYSMACROS_FST_IMPL_TEMPL) Fix it following that recomendation. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3majvd0adhfr25rvx4v5e9te@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/jitdump.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9f3305f6b6d5..95f0884aae02 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit From 88ded4d8d94a550624e1827478e13fecf97a7b0a Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 4 Aug 2016 11:25:42 +0000 Subject: perf script: Show proper message when failed list scripts Perf shows the usage message when perf scripts folder failed to open, which misleads users to let them think the command is being mistyped. This patch shows a proper message and guides users to check the PERF_EXEC_PATH environment variable in that case. Before: $ perf script --list Usage: perf script [] or: perf script [] record