From f4deaf90212c18d4b6d0687f0cba4c22d90b3391 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Sep 2020 13:19:12 -0700 Subject: x86/cpu: Avoid cpuinfo-induced IPI pileups The aperfmperf_snapshot_cpu() function is invoked upon access to /proc/cpuinfo, and it does do an early exit if the specified CPU has recently done a snapshot. Unfortunately, the indication that a snapshot has been completed is set in an IPI handler, and the execution of this handler can be delayed by any number of unfortunate events. This means that a system that starts a number of applications, each of which parses /proc/cpuinfo, can suffer from an smp_call_function_single() storm, especially given that each access to /proc/cpuinfo invokes smp_call_function_single() for all CPUs. Please note that this is not theoretical speculation. Note also that one CPU's pending IPI serves all requests, so there is no point in ever having more than one IPI pending to a given CPU. This commit therefore suppresses duplicate IPIs to a given CPU via a new ->scfpending field in the aperfmperf_sample structure. This field is set to the value one if an IPI is pending to the corresponding CPU and to zero otherwise. The aperfmperf_snapshot_cpu() function uses atomic_xchg() to set this field to the value one and sample the old value. If this function's "wait" parameter is zero, smp_call_function_single() is called only if the old value of the ->scfpending field was zero. The IPI handler uses atomic_set_release() to set this new field to zero just before returning, so that the prior stores into the aperfmperf_sample structure are seen by future requests that get to the atomic_xchg(). Future requests that pass the elapsed-time check are ordered by the fact that on x86 loads act as acquire loads, just as was the case prior to this change. The return value is based off of the age of the prior snapshot, just as before. Reported-by: Dave Jones [ paulmck: Allow /proc/cpuinfo to take advantage of arch_freq_get_on_cpu(). ] [ paulmck: Add comment on memory barrier. ] Signed-off-by: Paul E. McKenney Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: --- arch/x86/kernel/cpu/aperfmperf.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index e2f319dc992d..dd3261dab0fb 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -19,6 +19,7 @@ struct aperfmperf_sample { unsigned int khz; + atomic_t scfpending; ktime_t time; u64 aperf; u64 mperf; @@ -62,17 +63,20 @@ static void aperfmperf_snapshot_khz(void *dummy) s->aperf = aperf; s->mperf = mperf; s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); + atomic_set_release(&s->scfpending, 0); } static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait) { s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu)); + struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); /* Don't bother re-computing within the cache threshold time. */ if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return true; - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait); + if (!atomic_xchg(&s->scfpending, 1) || wait) + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait); /* Return false if the previous iteration was too long ago. */ return time_delta <= APERFMPERF_STALE_THRESHOLD_MS; @@ -118,6 +122,8 @@ void arch_freq_prepare_all(void) unsigned int arch_freq_get_on_cpu(int cpu) { + struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); + if (!cpu_khz) return 0; @@ -131,6 +137,8 @@ unsigned int arch_freq_get_on_cpu(int cpu) return per_cpu(samples.khz, cpu); msleep(APERFMPERF_REFRESH_DELAY_MS); + atomic_set(&s->scfpending, 1); + smp_mb(); /* ->scfpending before smp_call_function_single(). */ smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); return per_cpu(samples.khz, cpu); -- cgit From 3fcd6a230fa7d03bffcb831a81b40435c146c12b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 3 Sep 2020 15:23:29 -0700 Subject: x86/cpu: Avoid cpuinfo-induced IPIing of idle CPUs Currently, accessing /proc/cpuinfo sends IPIs to idle CPUs in order to learn their clock frequency. Which is a bit strange, given that waking them from idle likely significantly changes their clock frequency. This commit therefore avoids sending /proc/cpuinfo-induced IPIs to idle CPUs. [ paulmck: Also check for idle in arch_freq_prepare_all(). ] Signed-off-by: Paul E. McKenney Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: --- arch/x86/kernel/cpu/aperfmperf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index dd3261dab0fb..22911deacb6e 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "cpu.h" @@ -93,6 +94,9 @@ unsigned int aperfmperf_get_khz(int cpu) if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) return 0; + if (rcu_is_idle_cpu(cpu)) + return 0; /* Idle CPUs are completely uninteresting. */ + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); return per_cpu(samples.khz, cpu); } @@ -112,6 +116,8 @@ void arch_freq_prepare_all(void) for_each_online_cpu(cpu) { if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) continue; + if (rcu_is_idle_cpu(cpu)) + continue; /* Idle CPUs are completely uninteresting. */ if (!aperfmperf_snapshot_cpu(cpu, now, false)) wait = true; } -- cgit From 29368e09392123800e5e2bf0f3eda91f16972e52 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Oct 2020 21:13:55 -0700 Subject: x86/smpboot: Move rcu_cpu_starting() earlier The call to rcu_cpu_starting() in mtrr_ap_init() is not early enough in the CPU-hotplug onlining process, which results in lockdep splats as follows: ============================= WARNING: suspicious RCU usage 5.9.0+ #268 Not tainted ----------------------------- kernel/kprobes.c:300 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0+ #268 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0x77/0x97 __is_insn_slot_addr+0x15d/0x170 kernel_text_address+0xba/0xe0 ? get_stack_info+0x22/0xa0 __kernel_text_address+0x9/0x30 show_trace_log_lvl+0x17d/0x380 ? dump_stack+0x77/0x97 dump_stack+0x77/0x97 __lock_acquire+0xdf7/0x1bf0 lock_acquire+0x258/0x3d0 ? vprintk_emit+0x6d/0x2c0 _raw_spin_lock+0x27/0x40 ? vprintk_emit+0x6d/0x2c0 vprintk_emit+0x6d/0x2c0 printk+0x4d/0x69 start_secondary+0x1c/0x100 secondary_startup_64_no_verify+0xb8/0xbb This is avoided by moving the call to rcu_cpu_starting up near the beginning of the start_secondary() function. Note that the raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. Link: https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@tip-bot2/ Reported-by: Qian Cai Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- arch/x86/kernel/cpu/mtrr/mtrr.c | 2 -- arch/x86/kernel/smpboot.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 6a80f36b5d59..5f436cb4f7c4 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -794,8 +794,6 @@ void mtrr_ap_init(void) if (!use_intel() || mtrr_aps_delayed_init) return; - rcu_cpu_starting(smp_processor_id()); - /* * Ideally we should hold mtrr_mutex here to avoid mtrr entries * changed, but this routine will be called in cpu boot time, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index de776b2e6046..99bdcebaedfc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -229,6 +229,7 @@ static void notrace start_secondary(void *unused) #endif cpu_init_exception_handling(); cpu_init(); + rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); preempt_disable(); smp_callin(); -- cgit