From aeafd623f866c429307e3a4a39998f5f06b4f00e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Dec 2016 14:18:51 +0100 Subject: perf tools: Move headers check into bash script To make it nicer and easily maintainable. Also moving the check into fixdep sub make, so its output is not scattered around the build output. Removing extra $$ from mman*.h checks. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481030331-31944-5-git-send-email-jolsa@kernel.org [ Use /bin/sh, and 'function check() {' -> 'check () {' to make it work with busybox, in Alpine Linux, for instance ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 94 +-------------------------------------------- tools/perf/check-headers.sh | 59 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 93 deletions(-) create mode 100755 tools/perf/check-headers.sh (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8f1c258b151a..e9ec531131ca 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) $(goals) all: sub-make sub-make: fixdep + @./check-headers.sh $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals) else # force_fixdep @@ -404,99 +405,6 @@ export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE - @(test -f ../../include/uapi/linux/perf_event.h && ( \ - (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/hash.h && ( \ - (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \ - || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \ - (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ - (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/required-features.h && ( \ - (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ - (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memset_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/__fls.h && ( \ - (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls.h && ( \ - (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls64.h && ( \ - (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/coresight-pmu.h && ( \ - (diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \ - || echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman-common.h && ( \ - (diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(JEVENTS_IN): FORCE diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh new file mode 100755 index 000000000000..c747bfd7f14d --- /dev/null +++ b/tools/perf/check-headers.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +HEADERS=' +include/uapi/linux/perf_event.h +include/linux/hash.h +include/uapi/linux/hw_breakpoint.h +arch/x86/include/asm/disabled-features.h +arch/x86/include/asm/required-features.h +arch/x86/include/asm/cpufeatures.h +arch/arm/include/uapi/asm/perf_regs.h +arch/arm64/include/uapi/asm/perf_regs.h +arch/powerpc/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/kvm.h +arch/x86/include/uapi/asm/kvm_perf.h +arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/vmx.h +arch/powerpc/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm_perf.h +arch/s390/include/uapi/asm/sie.h +arch/arm/include/uapi/asm/kvm.h +arch/arm64/include/uapi/asm/kvm.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h +include/asm-generic/bitops/fls64.h +include/linux/coresight-pmu.h +include/uapi/asm-generic/mman-common.h +' + +check () { + file=$1 + opts= + + shift + while [ -n "$*" ]; do + opts="$opts \"$1\"" + shift + done + + cmd="diff $opts ../$file ../../$file > /dev/null" + + test -f ../../$file && + eval $cmd || echo "Warning: $file differs from kernel" >&2 +} + + +# simple diff check +for i in $HEADERS; do + check $i -B +done + +# diff with extra ignore lines +check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" +check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" -- cgit From 96039c7c52e03b7d6dd773664e74b79e3ae9856a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:50 +0900 Subject: perf sched timehist: Split is_idle_sample() The is_idle_sample() function actually does more than determining whether sample come from idle task. Split the callchain part into save_task_callchain() to make it clearer. Also checking prev_pid from trace data looks preferred than just checking sample->pid since it's possible, although rare, to have invalid 0 pid/tid on scheduling an exiting task. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-2-namhyung@kernel.org [ Remove some needless () in some return statements ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1a3f1be93372..966eddce1609 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1939,39 +1939,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_run_time += r->dt_run; } -static bool is_idle_sample(struct perf_sched *sched, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine) +static bool is_idle_sample(struct perf_sample *sample, + struct perf_evsel *evsel) { - struct thread *thread; - struct callchain_cursor *cursor = &callchain_cursor; - /* pid 0 == swapper == idle task */ - if (sample->pid == 0) - return true; + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) + return perf_evsel__intval(evsel, sample, "prev_pid") == 0; - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { - if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) - return true; - } + return sample->pid == 0; +} + +static void save_task_callchain(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct callchain_cursor *cursor = &callchain_cursor; + struct thread *thread; /* want main thread for process - has maps */ thread = machine__findnew_thread(machine, sample->pid, sample->pid); if (thread == NULL) { pr_debug("Failed to get thread for pid %d.\n", sample->pid); - return false; + return; } if (!symbol_conf.use_callchain || sample->callchain == NULL) - return false; + return; if (thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, sched->max_stack + 2) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); - return false; + return; } callchain_cursor_commit(cursor); @@ -1994,8 +1995,6 @@ static bool is_idle_sample(struct perf_sched *sched, callchain_cursor_advance(cursor); } - - return false; } /* @@ -2111,7 +2110,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, { struct thread *thread; - if (is_idle_sample(sched, sample, evsel, machine)) { + if (is_idle_sample(sample, evsel)) { thread = get_idle_thread(sample->cpu); if (thread == NULL) pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); @@ -2124,6 +2123,8 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, pr_debug("Failed to get thread for tid %d. skipping sample.\n", sample->tid); } + + save_task_callchain(sched, sample, evsel, machine); } return thread; -- cgit From 3bc2fa9cb829ccf6527e7117d9af769d93ee6d39 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:51 +0900 Subject: perf sched timehist: Introduce struct idle_time_data The struct idle_time_data is to keep idle stats with callchains entering to the idle task. The normal thread_runtime calculation is done transparently since it extends the struct thread_runtime. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-3-namhyung@kernel.org [ Align struct field names ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 966eddce1609..e108b0f6a246 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -230,6 +230,15 @@ struct evsel_runtime { u32 ncpu; /* highest cpu slot allocated */ }; +/* per cpu idle time data */ +struct idle_thread_runtime { + struct thread_runtime tr; + struct thread *last_thread; + struct rb_root sorted_root; + struct callchain_root callchain; + struct callchain_cursor cursor; +}; + /* track idle times per cpu */ static struct thread **idle_threads; static int idle_max_cpu; @@ -1997,13 +2006,31 @@ static void save_task_callchain(struct perf_sched *sched, } } +static int init_idle_thread(struct thread *thread) +{ + struct idle_thread_runtime *itr; + + thread__set_comm(thread, idle_comm, 0); + + itr = zalloc(sizeof(*itr)); + if (itr == NULL) + return -ENOMEM; + + init_stats(&itr->tr.run_stats); + callchain_init(&itr->callchain); + callchain_cursor_reset(&itr->cursor); + thread__set_priv(thread, itr); + + return 0; +} + /* * Track idle stats per cpu by maintaining a local thread * struct for the idle task on each cpu. */ static int init_idle_threads(int ncpu) { - int i; + int i, ret; idle_threads = zalloc(ncpu * sizeof(struct thread *)); if (!idle_threads) @@ -2017,7 +2044,9 @@ static int init_idle_threads(int ncpu) if (idle_threads[i] == NULL) return -ENOMEM; - thread__set_comm(idle_threads[i], idle_comm, 0); + ret = init_idle_thread(idle_threads[i]); + if (ret < 0) + return ret; } return 0; @@ -2064,8 +2093,8 @@ static struct thread *get_idle_thread(int cpu) idle_threads[cpu] = thread__new(0, 0); if (idle_threads[cpu]) { - idle_threads[cpu]->tid = 0; - thread__set_comm(idle_threads[cpu], idle_comm, 0); + if (init_idle_thread(idle_threads[cpu]) < 0) + return NULL; } } -- cgit From 699b5b920db04a6ff5c03a519e4c182aeb350952 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:52 +0900 Subject: perf sched timehist: Save callchain when entering idle In order to investigate the idleness reason, it is necessary to keep the callchains when entering idle. This can be identified by the sched:sched_switch event having the next_pid field as 0. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-4-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-1-namhyung@kernel.org [ Merged fix from Namhyung, see second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e108b0f6a246..dc83b803ca54 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -200,6 +200,7 @@ struct perf_sched { /* options for timehist command */ bool summary; bool summary_only; + bool idle_hist; bool show_callchain; unsigned int max_stack; bool show_cpu_visual; @@ -2101,6 +2102,15 @@ static struct thread *get_idle_thread(int cpu) return idle_threads[cpu]; } +static void save_idle_callchain(struct idle_thread_runtime *itr, + struct perf_sample *sample) +{ + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return; + + callchain_cursor__copy(&itr->cursor, &callchain_cursor); +} + /* * handle runtime stats saved per thread */ @@ -2154,6 +2164,26 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } save_task_callchain(sched, sample, evsel, machine); + if (sched->idle_hist) { + struct thread *idle; + struct idle_thread_runtime *itr; + + idle = get_idle_thread(sample->cpu); + if (idle == NULL) { + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + return NULL; + } + + itr = thread__priv(idle); + if (itr == NULL) + return NULL; + + itr->last_thread = thread; + + /* copy task callchain when entering to idle */ + if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + save_idle_callchain(itr, sample); + } } return thread; -- cgit From a4b2b6f56e0cfe729cf89318d44b6a875b31d95a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:53 +0900 Subject: perf sched timehist: Skip non-idle events when necessary Sometimes it only focuses on idle-related events like upcoming idle-hist feature. In this case we don't want to see other event to reduce noise. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index dc83b803ca54..c8e7848e71a7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2190,7 +2190,9 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } static bool timehist_skip_sample(struct perf_sched *sched, - struct thread *thread) + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) { bool rc = false; @@ -2199,10 +2201,19 @@ static bool timehist_skip_sample(struct perf_sched *sched, sched->skipped_samples++; } + if (sched->idle_hist) { + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + rc = true; + else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && + perf_evsel__intval(evsel, sample, "next_pid") != 0) + rc = true; + } + return rc; } static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine, struct thread *awakened) @@ -2215,8 +2226,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, return; /* show wakeup unless both awakee and awaker are filtered */ - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, awakened)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, awakened, evsel, sample)) { return; } @@ -2261,7 +2272,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, /* show wakeups if requested */ if (sched->show_wakeups && !perf_time__skip_sample(&sched->ptime, sample->time)) - timehist_print_wakeup_event(sched, sample, machine, thread); + timehist_print_wakeup_event(sched, evsel, sample, machine, thread); return 0; } @@ -2288,8 +2299,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, if (thread == NULL) return; - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, migrated)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, migrated, evsel, sample)) { return; } @@ -2374,7 +2385,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - if (timehist_skip_sample(sched, thread)) + if (timehist_skip_sample(sched, thread, evsel, sample)) goto out; tr = thread__get_runtime(thread); -- cgit From 07235f84ece6b66f43334881806aad3467cf3d84 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:54 +0900 Subject: perf sched timehist: Add -I/--idle-hist option The --idle-hist option is to analyze system idle state so which process makes cpu to go idle. If this option is specified, non-idle events will be skipped and processes switching to/from idle will be shown. This option is mostly useful when used with --summary(-only) option. In the idle-time summary view, idle time is accounted to previous thread which is run before idle task. The example output looks like following: Idle-time summary comm parent sched-out idle-time min-idle avg-idle max-idle stddev migrations (count) (msec) (msec) (msec) (msec) % -------------------------------------------------------------------------------------------- rcu_preempt[7] 2 95 550.872 0.011 5.798 23.146 7.63 0 migration/1[16] 2 1 15.558 15.558 15.558 15.558 0.00 0 khugepaged[39] 2 1 3.062 3.062 3.062 3.062 0.00 0 kworker/0:1H[124] 2 2 4.728 0.611 2.364 4.116 74.12 0 systemd-journal[167] 1 1 4.510 4.510 4.510 4.510 0.00 0 kworker/u16:3[558] 2 13 74.737 0.080 5.749 12.960 21.96 0 irq/34-iwlwifi[628] 2 21 118.403 0.032 5.638 23.990 24.00 0 kworker/u17:0[673] 2 1 3.523 3.523 3.523 3.523 0.00 0 dbus-daemon[722] 1 1 6.743 6.743 6.743 6.743 0.00 0 ifplugd[741] 1 1 58.826 58.826 58.826 58.826 0.00 0 wpa_supplicant[1490] 1 1 13.302 13.302 13.302 13.302 0.00 0 wpa_actiond[1492] 1 2 4.064 0.168 2.032 3.896 91.72 0 dockerd[1500] 1 1 0.055 0.055 0.055 0.055 0.00 0 ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-6-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-2-namhyung@kernel.org [ Merged fix sent by Namhyumg, as posted in the second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 +++ tools/perf/builtin-sched.c | 46 +++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 7775b1eb2bee..76173969ab80 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-I:: +--idle-hist:: + Show idle-related events only. + --time:: Only analyze samples within given time window: ,. Times have the format seconds.microseconds. If start is not given (i.e., time diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c8e7848e71a7..0b14265432df 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2421,7 +2421,36 @@ static int timehist_sched_change_event(struct perf_tool *tool, t = ptime->end; } - timehist_update_runtime_stats(tr, t, tprev); + if (!sched->idle_hist || thread->tid == 0) { + timehist_update_runtime_stats(tr, t, tprev); + + if (sched->idle_hist) { + struct idle_thread_runtime *itr = (void *)tr; + struct thread_runtime *last_tr; + + BUG_ON(thread->tid != 0); + + if (itr->last_thread == NULL) + goto out; + + /* add current idle time as last thread's runtime */ + last_tr = thread__get_runtime(itr->last_thread); + if (last_tr == NULL) + goto out; + + timehist_update_runtime_stats(last_tr, t, tprev); + /* + * remove delta time of last thread as it's not updated + * and otherwise it will show an invalid value next + * time. we only care total run time and run stat. + */ + last_tr->dt_run = 0; + last_tr->dt_wait = 0; + last_tr->dt_delay = 0; + + itr->last_thread = NULL; + } + } if (!sched->summary_only) timehist_print_sample(sched, sample, &al, thread, t); @@ -2543,9 +2572,15 @@ static void timehist_print_summary(struct perf_sched *sched, if (comm_width < 30) comm_width = 30; - printf("\nRuntime summary\n"); - printf("%*s parent sched-in ", comm_width, "comm"); - printf(" run-time min-run avg-run max-run stddev migrations\n"); + if (sched->idle_hist) { + printf("\nIdle-time summary\n"); + printf("%*s parent sched-out ", comm_width, "comm"); + printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); + } else { + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev migrations\n"); + } printf("%*s (count) ", comm_width, ""); printf(" (msec) (msec) (msec) (msec) %%\n"); printf("%.117s\n", graph_dotted_line); @@ -2561,7 +2596,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\n"); /* CPU idle stats not tracked when samples were skipped */ - if (sched->skipped_samples) + if (sched->skipped_samples && !sched->idle_hist) return; printf("\nIdle stats:\n"); @@ -3107,6 +3142,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_PARENT(sched_options) -- cgit From ba957ebb54893acaf3dc846031073a63f021cee1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:55 +0900 Subject: perf sched timehist: Show callchains for idle stat When --idle-hist option is used with --summary, it now shows idle stats with callchains like below: Idle stats by callchain: CPU 0: 902.195 msec Idle time (msec) Count Callchains ---------------- ------- -------------------------------------------------- 370.589 69 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 178.799 17 worker_thread <- kthread <- ret_from_fork 128.352 17 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 125.111 19 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 71.599 50 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 23.146 1 rcu_gp_kthread <- kthread <- ret_from_fork 4.510 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 0.085 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- do_restart_poll ... Committer notes: Extra testing: # uname -a Linux jouet 4.8.8-300.fc25.x86_64 #1 SMP Tue Nov 15 18:10:06 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux 1) Run 'perf sched record -g' 2) Run 'perf sched timehist --idle --summary' Idle stats by callchain: CPU 0: 13456.840 msec Idle time (msec) Count Callchains ---------------- ----- -------------------------------------------------- 5386.637 3283 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 2750.238 2299 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- do_syscall_64 1275.672 1287 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- entry_SYSCALL_64_fastpath 936.322 452 worker_thread <- kthread <- ret_from_fork 741.311 385 rcu_nocb_kthread <- kthread <- ret_from_fork 729.385 248 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_ppoll 365.386 229 irq_thread <- kthread <- ret_from_fork 338.934 265 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 219.488 201 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 186.839 410 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 142.541 59 kvm_vcpu_block <- kvm_arch_vcpu_ioctl_run <- kvm_vcpu_ioctl <- do_vfs_ioctl <- sys_ioctl 83.887 92 smpboot_thread_fn <- kthread <- ret_from_fork 62.722 96 do_exit <- do_group_exit <- 0x2a5594 <- entry_SYSCALL_64_fastpath 47.894 83 pipe_wait <- pipe_read <- __vfs_read <- vfs_read <- sys_read 46.554 61 rcu_gp_kthread <- kthread <- ret_from_fork 34.337 21 schedule_timeout <- intel_fbc_work_fn <- process_one_work <- worker_thread <- kthread 29.521 14 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 20.274 10 schedule_timeout <- io_schedule_timeout <- bit_wait_io <- __wait_on_bit <- out_of_line_wait_on_bit 15.085 55 schedule_timeout <- unix_stream_read_generic <- unix_stream_recvmsg <- sock_recvmsg <- SYSC_recvfrom Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0b14265432df..c1c07bfe132c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2448,6 +2448,9 @@ static int timehist_sched_change_event(struct perf_tool *tool, last_tr->dt_wait = 0; last_tr->dt_delay = 0; + if (itr->cursor.nr) + callchain_append(&itr->callchain, &itr->cursor, t - tprev); + itr->last_thread = NULL; } } @@ -2557,6 +2560,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv) return __show_thread_runtime(t, priv); } +static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = " <- "; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (node == NULL) + return 0; + + ret = callchain__fprintf_folded(fp, node->parent); + first = (ret == 0); + + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym && chain->ms.sym->ignore) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, bf, sizeof(bf), + false)); + first = false; + } + + return ret; +} + +static size_t timehist_print_idlehist_callchain(struct rb_root *root) +{ + size_t ret = 0; + FILE *fp = stdout; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(root); + + printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); + printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + while (rb_node) { + chain = rb_entry(rb_node, struct callchain_node, rb_node); + rb_node = rb_next(rb_node); + + ret += fprintf(fp, " "); + print_sched_time(chain->hit, 12); + ret += 16; /* print_sched_time returns 2nd arg + 4 */ + ret += fprintf(fp, " %8d ", chain->count); + ret += callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + } + + return ret; +} + static void timehist_print_summary(struct perf_sched *sched, struct perf_session *session) { @@ -2615,6 +2672,35 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } + if (sched->idle_hist && symbol_conf.use_callchain) { + callchain_param.mode = CHAIN_FOLDED; + callchain_param.value = CCVAL_PERIOD; + + callchain_register_param(&callchain_param); + + printf("\nIdle stats by callchain:\n"); + for (i = 0; i < idle_max_cpu; ++i) { + struct idle_thread_runtime *itr; + + t = idle_threads[i]; + if (!t) + continue; + + itr = thread__priv(t); + if (itr == NULL) + continue; + + callchain_param.sort(&itr->sorted_root, &itr->callchain, + 0, &callchain_param); + + printf(" CPU %2d:", i); + print_sched_time(itr->tr.total_run_time, 6); + printf(" msec\n"); + timehist_print_idlehist_callchain(&itr->sorted_root); + printf("\n"); + } + } + printf("\n" " Total number of unique tasks: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n" -- cgit From 7e6a79981b7a797b37b1dbeca3fd6ae1cb6d881f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 Dec 2016 10:52:10 -0300 Subject: perf tools: Remove some needless __maybe_unused I.e. those parameters/functions _are_ used, so ditch that misleading attribute. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-13cqtjh0yojg5gzvpq1zzpl0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 13 ++++++------- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 6 +++--- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4b419631753d..f8ca7a4ebabc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he, static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, struct machine *machine) { struct c2c_hists *c2c_hists = &c2c.hists; @@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se) static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct hists *hists __maybe_unused) + struct hists *hists) { struct c2c_fmt *c2c_fmt; struct c2c_dimension *dim; @@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); static int -cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { int width = c2c_width(fmt, hpp, he->hists); @@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists, return hpp_list__parse(&hists->list, NULL, sort); } -__maybe_unused static int c2c_hists__reinit(struct c2c_hists *c2c_hists, const char *output, const char *sort) @@ -2658,7 +2657,7 @@ out: return err; } -static int parse_record_events(const struct option *opt __maybe_unused, +static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { bool *event_set = (bool *) opt->value; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d2afbe4a240d..06cc759a4597 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, } static int -parse_branch_mode(const struct option *opt __maybe_unused, +parse_branch_mode(const struct option *opt, const char *str __maybe_unused, int unset) { int *branch_mode = opt->value; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 688dea7cb08f..a02f2e965628 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, } static -int process_stat_config_event(struct perf_tool *tool __maybe_unused, +int process_stat_config_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st) } static -int process_thread_map_event(struct perf_tool *tool __maybe_unused, +int process_thread_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused, } static -int process_cpu_map_event(struct perf_tool *tool __maybe_unused, +int process_cpu_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { -- cgit From 631ac41b46d293fb3ee43a809776c1663de8d9c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:39 +0100 Subject: perf mem: Fix --all-user/--all-kernel options Removing extra '--' prefix. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ad16511b0e40 ("perf mem: Add -U/-K (--all-user/--all-kernel) options") Link: http://lkml.kernel.org/r/1481538943-21874-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e..cd7bc4d104e2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; -- cgit From 83c2e4f3968d6871eed295f2f5675d3d70b01afa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:40 +0100 Subject: perf evsel: Use variable instead of repeating lengthy FD macro It's more readable and will ease up following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b2365a63db45..fd61ebd77c26 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1474,7 +1474,7 @@ retry_sample_id: for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { - int group_fd; + int fd, group_fd; if (!evsel->cgrp && !evsel->system_wide) pid = thread_map__pid(threads, thread); @@ -1484,21 +1484,22 @@ retry_open: pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); - FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - pid, - cpus->map[cpu], - group_fd, flags); - if (FD(evsel, cpu, thread) < 0) { + fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], + group_fd, flags); + + FD(evsel, cpu, thread) = fd; + + if (fd < 0) { err = -errno; pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } - pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + pr_debug2(" = %d\n", fd); if (evsel->bpf_fd >= 0) { - int evt_fd = FD(evsel, cpu, thread); + int evt_fd = fd; int bpf_fd = evsel->bpf_fd; err = ioctl(evt_fd, -- cgit From 38af91f01de0e160c17ae380acb5bab5d51066f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:41 +0100 Subject: perf thread_map: Add thread_map__remove function Add thread_map__remove function to remove thread from thread map. Add automated test also. Committer notes: Testing it: # perf test "Remove thread map" 39: Remove thread map : Ok # perf test -v "Remove thread map" 39: Remove thread map : --- start --- test child forked, pid 4483 2 threads: 4482, 4483 1 thread: 4483 0 thread: test child finished with 0 ---- end ---- Remove thread map: Ok # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-4-git-send-email-jolsa@kernel.org [ Added stdlib.h, to get the free() declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/thread-map.c | 44 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread_map.c | 22 +++++++++++++++++++++ tools/perf/util/thread_map.h | 1 + 5 files changed, 72 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 23605202d4a1..a77dcc0d24e3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -185,6 +185,10 @@ static struct test generic_tests[] = { .desc = "Synthesize thread map", .func = test__thread_map_synthesize, }, + { + .desc = "Remove thread map", + .func = test__thread_map_remove, + }, { .desc = "Synthesize cpu map", .func = test__cpu_map_synthesize, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 0d7b251305af..a512f0c8ff5b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest); int test__bpf_subtest_get_nr(void); int test_session_topology(int subtest); int test__thread_map_synthesize(int subtest); +int test__thread_map_remove(int subtest); int test__cpu_map_synthesize(int subtest); int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index cee2a2cdc933..a4a4b4625ac3 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused) return 0; } + +int test__thread_map_remove(int subtest __maybe_unused) +{ + struct thread_map *threads; + char *str; + int i; + + TEST_ASSERT_VAL("failed to allocate map string", + asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); + + threads = thread_map__new_str(str, NULL, 0); + + TEST_ASSERT_VAL("failed to allocate thread_map", + threads); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to not remove thread", + thread_map__remove(threads, 0)); + + for (i = 0; i < threads->nr; i++) + free(threads->map[i].comm); + + free(threads); + return 0; +} diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 40585f5b7027..f9eab200fd75 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid) return false; } + +int thread_map__remove(struct thread_map *threads, int idx) +{ + int i; + + if (threads->nr < 1) + return -EINVAL; + + if (idx >= threads->nr) + return -EINVAL; + + /* + * Free the 'idx' item and shift the rest up. + */ + free(threads->map[idx].comm); + + for (i = idx; i < threads->nr - 1; i++) + threads->map[i] = threads->map[i + 1]; + + threads->nr--; + return 0; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index bd3b971588da..ea0ef08c6303 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) void thread_map__read_comms(struct thread_map *threads); bool thread_map__has(struct thread_map *threads, pid_t pid); +int thread_map__remove(struct thread_map *threads, int idx); #endif /* __PERF_THREAD_MAP_H */ -- cgit From a359c17a7e1a9c99384499cf7b43d80867080789 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Dec 2016 08:46:22 +0100 Subject: perf evsel: Allow to ignore missing pid Adding perf_evsel::ignore_missing_cpu_thread bool. When set true, it allows perf to ignore error of missing pid of perf event syscall. We remove missing thread id from the thread_map, so the rest of the processing like ioctl and mmap won't get disturbed with -1 fd. The reason for supporting this is to ease up monitoring group of pids, that 'disappear' before perf opens their event. This currently leads perf to report error and exit and makes perf record's -u option unusable under certain setup. With this change we will allow this race and ignore such failure with following warning: WARNING: Ignored open failure for pid 8605 Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161213074622.GA3084@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 3 files changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 9a0236a4cf95..1c27d947c2fe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -55,6 +55,7 @@ struct record_opts { bool all_user; bool tail_synthesize; bool overwrite; + bool ignore_missing_thread; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fd61ebd77c26..04e536ae4d88 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * it overloads any global configuration. */ apply_config_terms(evsel, opts); + + evsel->ignore_missing_thread = opts->ignore_missing_thread; } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static bool ignore_missing_thread(struct perf_evsel *evsel, + struct thread_map *threads, + int thread, int err) +{ + if (!evsel->ignore_missing_thread) + return false; + + /* The system wide setup does not work with threads. */ + if (evsel->system_wide) + return false; + + /* The -ESRCH is perf event syscall errno for pid's not found. */ + if (err != -ESRCH) + return false; + + /* If there's only one thread, let it fail. */ + if (threads->nr == 1) + return false; + + if (thread_map__remove(threads, thread)) + return false; + + pr_warning("WARNING: Ignored open failure for pid %d\n", + thread_map__pid(threads, thread)); + return true; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1491,6 +1520,21 @@ retry_open: if (fd < 0) { err = -errno; + + if (ignore_missing_thread(evsel, threads, thread, err)) { + /* + * We just removed 1 thread, so take a step + * back on thread index and lower the upper + * nthreads limit. + */ + nthreads--; + thread--; + + /* ... and pretend like nothing have happened. */ + err = 0; + continue; + } + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6abb89cd27f9..06ef6f29efa1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; + bool ignore_missing_thread; /* parse modifier helper */ int exclude_GH; int nr_members; -- cgit From 23dc4f1586159470aac707caae526824dd077e25 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:43 +0100 Subject: perf record: Force ignore_missing_thread for uid option Enable perf_evsel::ignore_missing_thread for -u option to ignore complete failure if any of the user's processes die between its enumeration and time we open the event. Committer notes: While doing a 'make -j4 allmodconfig' we sometimes get into the race: Before: # perf record -u acme Error: The sys_perf_event_open() syscall returned with 3 (No such process) for event (cycles:ppp). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? # After: [root@jouet ~]# perf record -u acme WARNING: Ignored open failure for pid 9888 WARNING: Ignored open failure for pid 18059 [root@jouet ~]# Which is an improvement, with the races not preventing the remaining threads for the specified user from being monitored, but the message probably needs further clarification. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fa26865364b6..74d6a035133a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + /* Enable ignoring missing threads when -u option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); -- cgit From 3ee2eb6da20db1edad31070da38996e8e0f8adfa Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:46 +0530 Subject: perf annotate: Support jump instruction with target as second operand Architectures like PowerPC have jump instructions that includes a target address as a second operand. For example, 'bne cr7,0xc0000000000f6154'. Add support for such instruction in perf annotate. objdump o/p: c0000000000f6140: ld r9,1032(r31) c0000000000f6144: cmpdi cr7,r9,0 c0000000000f6148: bne cr7,0xc0000000000f6154 c0000000000f614c: ld r9,2312(r30) c0000000000f6150: std r9,1032(r31) c0000000000f6154: ld r9,88(r31) Corresponding perf annotate o/p: Before patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 3ffffffffff09f2c ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) After patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 74 ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ea7e0de4b9c1..590244e5781e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -223,8 +223,12 @@ bool ins__is_call(const struct ins *ins) static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); + const char *c = strchr(ops->raw, ','); - ops->target.addr = strtoull(ops->raw, NULL, 16); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + else + ops->target.addr = strtoull(ops->raw, NULL, 16); if (s++ != NULL) ops->target.offset = strtoull(s, NULL, 16); -- cgit From e216874cc1946d28084fa90e495e02725a29e25f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:47 +0530 Subject: perf annotate: Fix jump target outside of function address range If jump target is outside of function range, perf is not handling it correctly. Especially when target address is lesser than function start address, target offset will be negative. But, target address declared to be unsigned, converts negative number into 2's complement. See below example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is lesser than function start address(34cf0). 34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0 Objdump output: 0000000000034cf0 <__sigaction>: __GI___sigaction(): 34cf0: lea -0x20(%rdi),%eax 34cf3: cmp -bashx1,%eax 34cf6: jbe 34d00 <__sigaction+0x10> 34cf8: jmpq 34ac0 <__GI___libc_sigaction> 34cfd: nopl (%rax) 34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8> 34d07: movl -bashx16,%fs:(%rax) 34d0e: mov -bashxffffffff,%eax 34d13: retq perf annotate before applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 v jmpq fffffffffffffdd0 nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq perf annotate after applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 ^ jmpq 34ac0 <__GI___libc_sigaction> nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 5 +++-- tools/perf/util/annotate.c | 14 +++++++++----- tools/perf/util/annotate.h | 5 +++-- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ec7a30fad149..ba36aac340bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > (u64)dl->offset; + bool fwd = dl->ops.target.offset > dl->offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy { if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || !disasm_line__has_offset(dl) - || dl->ops.target.offset >= symbol__size(sym)) + || dl->ops.target.offset < 0 + || dl->ops.target.offset >= (s64)symbol__size(sym)) return false; return true; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 590244e5781e..c81a3950a7fe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -230,10 +230,12 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op else ops->target.addr = strtoull(ops->raw, NULL, 16); - if (s++ != NULL) + if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); - else - ops->target.offset = UINT64_MAX; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } return 0; } @@ -241,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (!ops->target.addr) + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); @@ -1209,9 +1211,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (dl == NULL) return -1; - if (dl->ops.target.offset == UINT64_MAX) + if (!disasm_line__has_offset(dl)) { dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail = true; + } /* kcore has no symbols, so add the call target name */ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 87e4cadc5d27..09776b5af991 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,7 +24,8 @@ struct ins_operands { char *raw; char *name; u64 addr; - u64 offset; + s64 offset; + bool offset_avail; } target; union { struct { @@ -68,7 +69,7 @@ struct disasm_line { static inline bool disasm_line__has_offset(const struct disasm_line *dl) { - return dl->ops.target.offset != UINT64_MAX; + return dl->ops.target.offset_avail; } void disasm_line__free(struct disasm_line *dl); -- cgit From 0cb34dc2a37290f4069c5b01735c9725dc0a1b5c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:14 -0800 Subject: tools lib bpf: Sync {tools,}/include/uapi/linux/bpf.h The tools version of this header is out of date; update it to the latest version from the kernel headers. Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-2-joe@ovn.org [ Sync it harder, after merging with what was in net-next via perf/urgent via torvalds/master to get BPG_PROG_(AT|DE)TACH, etc ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 593 +++++++++++++++++++++++++---------------- 1 file changed, 364 insertions(+), 229 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9e5fc168c8a3..0eb0e87dbe9f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,6 +73,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -85,6 +87,8 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, }; enum bpf_prog_type { @@ -95,8 +99,23 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -105,6 +124,13 @@ enum bpf_prog_type { #define BPF_EXIST 2 /* update existing element */ #define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -140,243 +166,327 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ @@ -450,6 +560,31 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result -- cgit From 83d994d02bf4594d6bfa9b2b67befb6cff7f9380 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:15 -0800 Subject: tools lib bpf: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following issue when building without access to 'u32' type: ./tools/lib/bpf/bpf.h:27:23: error: unknown type name ‘u32’ Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-3-joe@ovn.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/bpf.c | 4 ++-- tools/lib/bpf/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8143536b462a..89e8e8e5b60e 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -70,7 +70,7 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, size_t log_buf_sz) + __u32 kern_version, char *log_buf, size_t log_buf_sz) { int fd; union bpf_attr attr; @@ -98,7 +98,7 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, } int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags) + __u64 flags) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 253c3dbb06b4..61130170a6ad 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -30,11 +30,11 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, #define BPF_LOG_BUF_SIZE 65536 int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, + __u32 kern_version, char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags); + __u64 flags); int bpf_map_lookup_elem(int fd, void *key, void *value); int bpf_map_delete_elem(int fd, void *key); -- cgit From a5580c7f7a6d078696458d283df5d6547ad1c740 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:16 -0800 Subject: tools lib bpf: Add flags to bpf_create_map() Commit 6c905981743 ("bpf: pre-allocate hash map elements") introduces map_flags to bpf_attr for BPF_MAP_CREATE command. Expose this new parameter in libbpf. By exposing it, users can access flags such as whether or not to preallocate the map. Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-4-joe@ovn.org [ Added clarifying comment made by Wang Nan ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/bpf.c | 3 ++- tools/lib/bpf/bpf.h | 2 +- tools/lib/bpf/libbpf.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 89e8e8e5b60e..d0afb26c2e0f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -54,7 +54,7 @@ static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, } int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries) + int value_size, int max_entries, __u32 map_flags) { union bpf_attr attr; @@ -64,6 +64,7 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, attr.key_size = key_size; attr.value_size = value_size; attr.max_entries = max_entries; + attr.map_flags = map_flags; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 61130170a6ad..7fcdce16fd62 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,7 +24,7 @@ #include int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries); + int max_entries, __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e974593f3e8..84e6b35da4bd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -854,7 +854,8 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map(def->type, def->key_size, def->value_size, - def->max_entries); + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; -- cgit From 2bd42f3aaa53ebe78b9be6f898b7945dd61f9773 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2016 20:56:54 +0100 Subject: perf trace: Check if MAP_32BIT is defined (again) There might be systems where MAP_32BIT is not defined, like some some RHEL7 powerpc versions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Kyle McMartin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 256763b01741 ("perf trace beauty mmap: Add more conditional defines") Link: http://lkml.kernel.org/r/1481831814-23683-1-git-send-email-jolsa@kernel.org [ Changed the Fixme cset to the one removing the conditional switch case for MAP_32BIT ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684..af1cfde6b97b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); -- cgit From 9de3ffa1b714e6b8ebc1723f71bc9172a4470f7d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 Dec 2016 11:36:24 -0800 Subject: perf bench futex: Fix lock-pi help string Obvious copy/paste typo from the requeue program. Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/1481830584-30909-1-git-send-email-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-lock-pi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 465012b320ee..6d9d6c40a916 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -48,7 +48,7 @@ static const struct option options[] = { }; static const char * const bench_futex_lock_pi_usage[] = { - "perf bench futex requeue ", + "perf bench futex lock-pi ", NULL }; -- cgit From edee44be59190bf22d5c6e521f3852b7ff16862f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:10:50 +0530 Subject: perf annotate: Don't throw error for zero length symbols 'perf report --tui' exits with error when it finds a sample of zero length symbol (i.e. addr == sym->start == sym->end). Actually these are valid samples. Don't exit TUI and show report with such symbols. Reported-and-Tested-by: Anton Blanchard Link: https://lkml.org/lkml/2016/10/8/189 Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Benjamin Herrenschmidt Cc: Chris Riyder Cc: linuxppc-dev@lists.ozlabs.org Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Cc: stable@kernel.org # v4.9+ Link: http://lkml.kernel.org/r/1479804050-5028-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c81a3950a7fe..06cc04e5806a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -647,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; -- cgit From ed6c166cc7dc329736cace3affd2df984fb22ec8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Dec 2016 10:29:44 -0500 Subject: perf diff: Do not overwrite valid build id Fixes a perf diff regression issue which was introduced by commit 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") The binary name could be same when perf diff different binaries. Build id is used to distinguish between them. However, the previous patch assumes the same binary name has same build id. So it overwrites the build id according to the binary name, regardless of whether the build id is set or not. Check the has_build_id in dso__load. If the build id is already set, use it. Before the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% -99.80% tchain_edit [.] f2 0.12% +99.81% tchain_edit [.] f3 0.02% -0.01% [ixgbe] [k] ixgbe_read_reg After the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% +0.10% tchain_edit [.] f3 0.12% -0.08% tchain_edit [.] f2 Signed-off-by: Kan Liang Cc: Andi Kleen CC: Dima Kogan Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") Link: http://lkml.kernel.org/r/1481642984-13593-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index df2482b2ba45..dc93940de351 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); -- cgit From 5dc880de6e7c8566fbc8bc5dfc3a922d2d1c5ee3 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 14 Dec 2016 14:05:26 -0800 Subject: tools lib bpf: Add bpf_prog_{attach,detach} Commit d8c5b17f2bc0 ("samples: bpf: add userspace example for attaching eBPF programs to cgroups") added these functions to samples/libbpf, but during this merge all of the samples libbpf functionality is shifting to tools/lib/bpf. Shift these functions there. Committer notes: Use bzero + attr.FIELD = value instead of 'attr = { .FIELD = value, just like the other wrapper calls to sys_bpf with bpf_attr to make this build in older toolchais, such as the ones in CentOS 5 and 6. Signed-off-by: Joe Stringer Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-au2zvtsh55vqeo3v3uw7jr4c@git.kernel.org Link: https://github.com/joestringer/linux/commit/353e6f298c3d0a92fa8bfa61ff898c5050261a12.patch Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/bpf.c | 23 +++++++++++++++++++++++ tools/lib/bpf/bpf.h | 3 +++ 2 files changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d0afb26c2e0f..3ddb58a36d3c 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -167,3 +167,26 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } + +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 7fcdce16fd62..a2f9853dd882 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,5 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); + #endif -- cgit From 0e6758e8231d5905c2e267566e9bd679a68a7b00 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Honour 'comm_width' when aligning the headers Current default value is 20, but that may change in the future, so make places where we have 20 hardcoded use 'comm_width'. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c1c07bfe132c..020f9c9cef7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1817,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - printf(" %-20s %9s %9s %9s", + printf(" %-*s %9s %9s %9s", comm_width, "task name", "wait time", "sch delay", "run time"); printf("\n"); @@ -1830,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + printf(" %-*s %9s %9s %9s\n", comm_width, + "[tid/pid]", "(msec)", "(msec)", "(msec)"); /* * separator @@ -1840,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - printf(" %.20s %.9s %.9s %.9s", + printf(" %.*s %.9s %.9s %.9s", comm_width, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line); -- cgit From 9b8087d72086b249ff744cee237ad12cc5e9255d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Enlarge default 'comm_width' Current default value is 20 but it's easily changed to a bigger value as task has a long name and different tid and pid. And it makes the output not aligned. So change it to have a large value as summary shows. Committer notes: Before: # perf sched record ^C # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 40602.771776 [0000] 0.001 0.000 1.892 After: # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 020f9c9cef7b..c65f16cbae62 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1775,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) return r->last_time[cpu]; } -static int comm_width = 20; +static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) { -- cgit From 4fa0d1aa2711a5053fb2422331bdf6bce99b5f87 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Remove hardcoded 'comm_width' check at print_summary Now that the default 'comm_width' value is 30, no need to check that at print_summary, Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c65f16cbae62..5052caa91caa 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2627,9 +2627,6 @@ static void timehist_print_summary(struct perf_sched *sched, memset(&totals, 0, sizeof(totals)); - if (comm_width < 30) - comm_width = 30; - if (sched->idle_hist) { printf("\nIdle-time summary\n"); printf("%*s parent sched-out ", comm_width, "comm"); -- cgit From bdd75729e5d279d734e8d3fb41ef4818ac1598ab Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:49 +0900 Subject: perf sched timehist: Fix invalid period calculation When --time option is given with a value outside recorded time, the last sample time (tprev) was set to that value and run time calculation might be incorrect. This is a problem of the first samples for each cpus since it would skip the runtime update when tprev is 0. But with --time option it had non-zero (which is invalid) value so the calculation is also incorrect. For example, let's see the followging: $ perf sched timehist time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 3195.968367 [0003] 0.000 0.000 0.000 3195.968386 [0002] Timer[4306/4277] 0.000 0.000 0.018 3195.968397 [0002] Web Content[4277] 0.000 0.000 0.000 3195.968595 [0001] JS Helper[4302/4277] 0.000 0.000 0.000 3195.969217 [0000] 0.000 0.000 0.621 3195.969251 [0001] kworker/1:1H[291] 0.000 0.000 0.033 The sample starts at 3195.968367 but when I gave a time interval from 3194 to 3196 (in sec) it will calculate the whole 2 second as runtime. In below, 2 cpus accounted it as runtime, other 2 cpus accounted it as idle time. Before: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 1995.991 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 1999.852 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 3724.940 After: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 10.811 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 18.337 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 18.139 Committer notes: Further testing: Before: Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 937.944 msec CPU 2 idle for 188.931 msec CPU 3 idle for 986.185 msec After: # perf sched timehist --time 40602,40603 -s | tail Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 175.407 msec CPU 2 idle for 188.931 msec CPU 3 idle for 223.657 msec Total number of unique tasks: 68 Total number of context switches: 814 Total run time (msec): 97.688 # for cpu in `seq 0 3` ; do echo -n "CPU $cpu idle for " ; perf sched timehist --time 40602,40603 | grep "\[000${cpu}\].*\" | tr -s ' ' | cut -d' ' -f7 | awk '{entries++ ; s+=$1} END {print s " msec (entries: " entries ")"}' ; done CPU 0 idle for 229.721 msec (entries: 123) CPU 1 idle for 175.381 msec (entries: 65) CPU 2 idle for 188.903 msec (entries: 56) CPU 3 idle for 223.61 msec (entries: 102) Difference due to the idle stats being accounted at nanoseconds precision while the entries in 'perf sched timehist' are trucated at msec.usec. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 853b74071110 ("perf sched timehist: Add option to specify time window of interest") Link: http://lkml.kernel.org/r/20161222060350.17655-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5052caa91caa..d53e706a6f17 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2405,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, if (ptime->start && ptime->start > t) goto out; - if (ptime->start > tprev) + if (tprev && ptime->start > tprev) tprev = ptime->start; /* -- cgit