From 1ea1bdf7faa4d0b5293e605f2e1ef1c2c59f6b53 Mon Sep 17 00:00:00 2001 From: Ari Kauppi Date: Thu, 20 Jan 2011 13:57:18 -0500 Subject: oprofile: Fix usage of CONFIG_HW_PERF_EVENTS for oprofile_perf_init and friends The implementations are flagged in Makefile with CONFIG_HW_PERF_EVENTS instead of CONFIG_PERF_EVENTS. Cc: stable@kernel.org # 37.x Signed-off-by: Ari Kauppi Signed-off-by: Robert Richter --- include/linux/oprofile.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 32fb81212fd1..54c83827f913 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -186,10 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val); int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_HW_PERF_EVENTS int __init oprofile_perf_init(struct oprofile_operations *ops); void oprofile_perf_exit(void); char *op_name_from_perf_id(void); -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_HW_PERF_EVENTS */ #endif /* OPROFILE_H */ -- cgit From d14dd7e20d5e526557f5d3cfef4046a642f80924 Mon Sep 17 00:00:00 2001 From: Ari Kauppi Date: Thu, 20 Jan 2011 13:57:19 -0500 Subject: ARM: oprofile: Fix backtraces in timer mode Always allow backtraces when using oprofile on ARM, even if a PMU isn't present. Restores functionality originally introduced in commit 1b7b56982fdcd9d85effd76f3928cf5d6eb26155 ("oprofile: Always allow backtraces on ARM") by Richard Purdie. It is not that obvious, but there is now only one oprofile_arch_init() function. So the .backtrace callback is available also in timer mode. Implemented by removing code and using stubs for oprofile_perf_{init, exit} provided by . This allows cleaning of other architecture specific implementations too. Cc: stable@kernel.org # 37.x Signed-off-by: Ari Kauppi Acked-by: Will Deacon Signed-off-by: Robert Richter --- arch/arm/oprofile/common.c | 12 ++---------- include/linux/oprofile.h | 9 +++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 8aa974491dfc..2b663918c464 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -10,8 +10,6 @@ */ #include -#include -#include #include #include #include @@ -46,6 +44,7 @@ char *op_name_from_perf_id(void) return NULL; } } +#endif static int report_trace(struct stackframe *frame, void *d) { @@ -111,6 +110,7 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) int __init oprofile_arch_init(struct oprofile_operations *ops) { + /* provide backtrace support also in timer mode: */ ops->backtrace = arm_backtrace; return oprofile_perf_init(ops); @@ -120,11 +120,3 @@ void __exit oprofile_arch_exit(void) { oprofile_perf_exit(); } -#else -int __init oprofile_arch_init(struct oprofile_operations *ops) -{ - pr_info("oprofile: hardware counters not available\n"); - return -ENODEV; -} -void __exit oprofile_arch_exit(void) {} -#endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 54c83827f913..1ca64113efe8 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include /* Each escaped entry is prefixed by ESCAPE_CODE @@ -190,6 +192,13 @@ int oprofile_write_commit(struct op_entry *entry); int __init oprofile_perf_init(struct oprofile_operations *ops); void oprofile_perf_exit(void); char *op_name_from_perf_id(void); +#else +static inline int __init oprofile_perf_init(struct oprofile_operations *ops) +{ + pr_info("oprofile: hardware counters not available\n"); + return -ENODEV; +} +static inline void oprofile_perf_exit(void) { } #endif /* CONFIG_HW_PERF_EVENTS */ #endif /* OPROFILE_H */ -- cgit From 5651f7f47dbb1cf2b95a60582546db4ff508e2b4 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 9 Feb 2011 14:02:33 -0500 Subject: watchdog, nmi: Lower the severity of error messages During boot if the hardlockup detector fails to initialize, it complains very loudly. Some failures should be expected under certain situations, ie no lapics, or resource in-use. Tone those error messages down a bit. Keep the rest at a high level. Reported-by: Paul Bolle Tested-by: Paul Bolle Signed-off-by: Don Zickus Cc: Peter Zijlstra LKML-Reference: <1297278153-21111-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f37f974aa81b..18bb15776c57 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -363,8 +363,14 @@ static int watchdog_nmi_enable(int cpu) goto out_save; } - printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); + + /* vary the KERN level based on the returned errno */ + if (PTR_ERR(event) == -EOPNOTSUPP) + printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); + else if (PTR_ERR(event) == -ENOENT) + printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); + else + printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); return PTR_ERR(event); /* success path */ -- cgit From 401b8e1317d288f28d6e1afd13271dcb08fd9869 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Feb 2011 12:52:47 -0200 Subject: perf tools: Fix thread_map event synthesizing in top and record Jeff Moyer reported these messages: Warning: ... trying to fall back to cpu-clock-ticks couldn't open /proc/-1/status couldn't open /proc/-1/maps [ls output] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.008 MB perf.data (~363 samples) ] That lead me and David Ahern to see that something was fishy on the thread synthesizing routines, at least for the case where the workload is started from 'perf record', as -1 is the default for target_tid in 'perf record --tid' parameter, so somehow we were trying to synthesize the PERF_RECORD_MMAP and PERF_RECORD_COMM events for the thread -1, a bug. So I investigated this and noticed that when we introduced support for recording a process and its threads using --pid some bugs were introduced and that the way to fix it was to instead of passing the target_tid to the event synthesizing routines we should better pass the thread_map that has the list of threads for a --pid or just the single thread for a --tid. Checked in the following ways: On a 8-way machine run cyclictest: [root@emilia ~]# perf record cyclictest -a -t -n -p99 -i100 -d50 policy: fifo: loadavg: 0.00 0.13 0.31 2/139 28798 T: 0 (28791) P:99 I:100 C: 25072 Min: 4 Act: 5 Avg: 6 Max: 122 T: 1 (28792) P:98 I:150 C: 16715 Min: 4 Act: 6 Avg: 5 Max: 27 T: 2 (28793) P:97 I:200 C: 12534 Min: 4 Act: 5 Avg: 4 Max: 8 T: 3 (28794) P:96 I:250 C: 10028 Min: 4 Act: 5 Avg: 5 Max: 96 T: 4 (28795) P:95 I:300 C: 8357 Min: 5 Act: 6 Avg: 5 Max: 12 T: 5 (28796) P:94 I:350 C: 7163 Min: 5 Act: 6 Avg: 5 Max: 12 T: 6 (28797) P:93 I:400 C: 6267 Min: 4 Act: 5 Avg: 5 Max: 9 T: 7 (28798) P:92 I:450 C: 5571 Min: 4 Act: 5 Avg: 5 Max: 9 ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.108 MB perf.data (~4719 samples) ] [root@emilia ~]# This will create one extra thread per CPU: [root@emilia ~]# tuna -t cyclictest -CP thread ctxt_switches pid SCHED_ rtpri affinity voluntary nonvoluntary cmd 28825 OTHER 0 0xff 2169 671 cyclictest 28832 FIFO 93 6 52338 1 cyclictest 28833 FIFO 92 7 46524 1 cyclictest 28826 FIFO 99 0 209360 1 cyclictest 28827 FIFO 98 1 139577 1 cyclictest 28828 FIFO 97 2 104686 0 cyclictest 28829 FIFO 96 3 83751 1 cyclictest 28830 FIFO 95 4 69794 1 cyclictest 28831 FIFO 94 5 59825 1 cyclictest [root@emilia ~]# So we should expect only samples for the above 9 threads when using the --dump-raw-trace|-D perf report switch to look at the column with the tid: [root@emilia ~]# perf report -D | grep RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort | uniq -c 629 28825 110 28826 491 28827 308 28828 198 28829 621 28830 225 28831 203 28832 89 28833 [root@emilia ~]# So for workloads started by 'perf record' seems to work, now for existing workloads, just run cyclictest first, without 'perf record': [root@emilia ~]# tuna -t cyclictest -CP thread ctxt_switches pid SCHED_ rtpri affinity voluntary nonvoluntary cmd 28859 OTHER 0 0xff 594 200 cyclictest 28864 FIFO 95 4 16587 1 cyclictest 28865 FIFO 94 5 14219 1 cyclictest 28866 FIFO 93 6 12443 0 cyclictest 28867 FIFO 92 7 11062 1 cyclictest 28860 FIFO 99 0 49779 1 cyclictest 28861 FIFO 98 1 33190 1 cyclictest 28862 FIFO 97 2 24895 1 cyclictest 28863 FIFO 96 3 19918 1 cyclictest [root@emilia ~]# and then later did: [root@emilia ~]# perf record --pid 28859 sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.027 MB perf.data (~1195 samples) ] [root@emilia ~]# To collect 3 seconds worth of samples for pid 28859 and its children: [root@emilia ~]# perf report -D | grep RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort | uniq -c 15 28859 33 28860 19 28861 13 28862 13 28863 10 28864 11 28865 9 28866 255 28867 [root@emilia ~]# Works, last thing is to check if looking at just one of those threads also works: [root@emilia ~]# perf record --tid 28866 sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.006 MB perf.data (~242 samples) ] [root@emilia ~]# perf report -D | grep RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort | uniq -c 3 28866 [root@emilia ~]# Works too. Reported-by: Jeff Moyer Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jeff Moyer Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/util/event.c | 18 +++++++++++++----- tools/perf/util/event.h | 6 ++++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b2f729fdb317..60cac6f92e8b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -759,8 +759,8 @@ static int __cmd_record(int argc, const char **argv) perf_session__process_machines(session, event__synthesize_guest_os); if (!system_wide) - event__synthesize_thread(target_tid, process_synthesized_event, - session); + event__synthesize_thread_map(threads, process_synthesized_event, + session); else event__synthesize_threads(process_synthesized_event, session); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b6998e055767..5a29d9cd9486 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1306,7 +1306,7 @@ static int __cmd_top(void) return -ENOMEM; if (target_tid != -1) - event__synthesize_thread(target_tid, event__process, session); + event__synthesize_thread_map(threads, event__process, session); else event__synthesize_threads(event__process, session); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1478ab4ee222..50d0a931497a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -263,11 +263,12 @@ static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, process, session); } -int event__synthesize_thread(pid_t pid, event__handler_t process, - struct perf_session *session) +int event__synthesize_thread_map(struct thread_map *threads, + event__handler_t process, + struct perf_session *session) { event_t *comm_event, *mmap_event; - int err = -1; + int err = -1, thread; comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); if (comm_event == NULL) @@ -277,8 +278,15 @@ int event__synthesize_thread(pid_t pid, event__handler_t process, if (mmap_event == NULL) goto out_free_comm; - err = __event__synthesize_thread(comm_event, mmap_event, pid, - process, session); + err = 0; + for (thread = 0; thread < threads->nr; ++thread) { + if (__event__synthesize_thread(comm_event, mmap_event, + threads->map[thread], + process, session)) { + err = -1; + break; + } + } free(mmap_event); out_free_comm: free(comm_event); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2b7e91902f10..cc7b52f9b492 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,14 +135,16 @@ typedef union event_union { void event__print_totals(void); struct perf_session; +struct thread_map; typedef int (*event__handler_synth_t)(event_t *event, struct perf_session *session); typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, struct perf_session *session); -int event__synthesize_thread(pid_t pid, event__handler_t process, - struct perf_session *session); +int event__synthesize_thread_map(struct thread_map *threads, + event__handler_t process, + struct perf_session *session); int event__synthesize_threads(event__handler_t process, struct perf_session *session); int event__synthesize_kernel_mmap(event__handler_t process, -- cgit From d91309f69b7bdb64aeb30106fde8d18c5dd354b5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Feb 2011 22:07:46 +0100 Subject: x86: Fix text_poke_smp_batch() deadlock Fix this deadlock - we are already holding the mutex: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.38-rc4-test+ #1 ------------------------------------------------------- bash/1850 is trying to acquire lock: (text_mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f but task is already holding lock: (smp_alt){+.+...}, at: [] return_to_handler+0x0/0x2f which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (smp_alt){+.+...}: [] lock_acquire+0xcd/0xf8 [] __mutex_lock_common+0x4c/0x339 [] mutex_lock_nested+0x3e/0x43 [] alternatives_smp_switch+0x77/0x1d8 [] do_boot_cpu+0xd7/0x762 [] native_cpu_up+0xe6/0x16a [] _cpu_up+0x9d/0xee [] cpu_up+0xd3/0xe7 [] kernel_init+0xe8/0x20a [] kernel_thread_helper+0x4/0x10 -> #1 (cpu_hotplug.lock){+.+.+.}: [] lock_acquire+0xcd/0xf8 [] __mutex_lock_common+0x4c/0x339 [] mutex_lock_nested+0x3e/0x43 [] get_online_cpus+0x41/0x55 [] stop_machine+0x1e/0x3e [] text_poke_smp_batch+0x3a/0x3c [] arch_optimize_kprobes+0x10d/0x11c [] kprobe_optimizer+0x152/0x222 [] process_one_work+0x1d3/0x335 [] worker_thread+0x104/0x1a4 [] kthread+0x9d/0xa5 [] kernel_thread_helper+0x4/0x10 -> #0 (text_mutex){+.+.+.}: other info that might help us debug this: 6 locks held by bash/1850: #0: (&buffer->mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f #1: (s_active#75){.+.+.+}, at: [] return_to_handler+0x0/0x2f #2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f #3: (cpu_add_remove_lock){+.+.+.}, at: [] return_to_handler+0x0/0x2f #4: (cpu_hotplug.lock){+.+.+.}, at: [] return_to_handler+0x0/0x2f #5: (smp_alt){+.+...}, at: [] return_to_handler+0x0/0x2f stack backtrace: Pid: 1850, comm: bash Not tainted 2.6.38-rc4-test+ #1 Call Trace: [] print_circular_bug+0xa8/0xb7 [] mutex_lock_nested+0x3e/0x43 [] alternatives_smp_unlock+0x3d/0x93 [] alternatives_smp_switch+0x198/0x1d8 [] native_cpu_die+0x65/0x95 [] _cpu_down+0x13e/0x202 [] sysfs_write_file+0x108/0x144 [] vfs_write+0xac/0xff [] sys_write+0x4a/0x6e Reported-by: Steven Rostedt Tested-by: Steven Rostedt Signed-off-by: Peter Zijlstra Cc: mathieu.desnoyers@efficios.com Cc: rusty@rustcorp.com.au Cc: ananth@in.ibm.com Cc: masami.hiramatsu.pt@hitachi.com Cc: fweisbec@gmail.com Cc: jbeulich@novell.com Cc: jbaron@redhat.com Cc: mhiramat@redhat.com LKML-Reference: <1297458466.5226.93.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 123608531c8f..7038b95d363f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -671,7 +671,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); } #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) -- cgit