summaryrefslogtreecommitdiff
path: root/kernel/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/smp.c')
-rw-r--r--kernel/smp.c56
1 files changed, 43 insertions, 13 deletions
diff --git a/kernel/smp.c b/kernel/smp.c
index f085ebcdf9e7..974f3a3962e8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -25,6 +25,7 @@
#include <linux/nmi.h>
#include <linux/sched/debug.h>
#include <linux/jump_label.h>
+#include <linux/string_choices.h>
#include <trace/events/ipi.h>
#define CREATE_TRACE_POINTS
@@ -169,9 +170,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
static DEFINE_PER_CPU(void *, cur_csd_info);
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
-module_param(csd_lock_timeout, ulong, 0444);
+module_param(csd_lock_timeout, ulong, 0644);
static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */
-module_param(panic_on_ipistall, int, 0444);
+module_param(panic_on_ipistall, int, 0644);
static atomic_t csd_bug_count = ATOMIC_INIT(0);
@@ -207,12 +208,25 @@ static int csd_lock_wait_getcpu(call_single_data_t *csd)
return -1;
}
+static atomic_t n_csd_lock_stuck;
+
+/**
+ * csd_lock_is_stuck - Has a CSD-lock acquisition been stuck too long?
+ *
+ * Returns @true if a CSD-lock acquisition is stuck and has been stuck
+ * long enough for a "non-responsive CSD lock" message to be printed.
+ */
+bool csd_lock_is_stuck(void)
+{
+ return !!atomic_read(&n_csd_lock_stuck);
+}
+
/*
* Complain if too much time spent waiting. Note that only
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
* so waiting on other types gets much less information.
*/
-static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id, unsigned long *nmessages)
{
int cpu = -1;
int cpux;
@@ -228,15 +242,26 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
cpu = csd_lock_wait_getcpu(csd);
pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
*bug_id, raw_smp_processor_id(), cpu);
+ atomic_dec(&n_csd_lock_stuck);
return true;
}
- ts2 = sched_clock();
+ ts2 = ktime_get_mono_fast_ns();
/* How long since we last checked for a stuck CSD lock.*/
ts_delta = ts2 - *ts1;
- if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
+ if (likely(ts_delta <= csd_lock_timeout_ns * (*nmessages + 1) *
+ (!*nmessages ? 1 : (ilog2(num_online_cpus()) / 2 + 1)) ||
+ csd_lock_timeout_ns == 0))
return false;
+ if (ts0 > ts2) {
+ /* Our own sched_clock went backward; don't blame another CPU. */
+ ts_delta = ts0 - ts2;
+ pr_alert("sched_clock on CPU %d went backward by %llu ns\n", raw_smp_processor_id(), ts_delta);
+ *ts1 = ts2;
+ return false;
+ }
+
firsttime = !*bug_id;
if (firsttime)
*bug_id = atomic_inc_return(&csd_bug_count);
@@ -248,9 +273,12 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
/* How long since this CSD lock was stuck. */
ts_delta = ts2 - ts0;
- pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
- firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta,
+ pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %lld ns for CPU#%02d %pS(%ps).\n",
+ firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), (s64)ts_delta,
cpu, csd->func, csd->info);
+ (*nmessages)++;
+ if (firsttime)
+ atomic_inc(&n_csd_lock_stuck);
/*
* If the CSD lock is still stuck after 5 minutes, it is unlikely
* to become unstuck. Use a signed comparison to avoid triggering
@@ -289,12 +317,13 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
*/
static void __csd_lock_wait(call_single_data_t *csd)
{
+ unsigned long nmessages = 0;
int bug_id = 0;
u64 ts0, ts1;
- ts1 = ts0 = sched_clock();
+ ts1 = ts0 = ktime_get_mono_fast_ns();
for (;;) {
- if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
+ if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id, &nmessages))
break;
cpu_relax();
}
@@ -786,7 +815,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
WARN_ON_ONCE(!in_task());
/* Check if we need local execution. */
- if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
+ if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) &&
+ (!cond_func || cond_func(this_cpu, info)))
run_local = true;
/* Check if we need remote execution, i.e., any CPU excluding this one. */
@@ -839,7 +869,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
send_call_function_ipi_mask(cfd->cpumask_ipi);
}
- if (run_local && (!cond_func || cond_func(this_cpu, info))) {
+ if (run_local) {
unsigned long flags;
local_irq_save(flags);
@@ -982,8 +1012,7 @@ void __init smp_init(void)
num_nodes = num_online_nodes();
num_cpus = num_online_cpus();
pr_info("Brought up %d node%s, %d CPU%s\n",
- num_nodes, (num_nodes > 1 ? "s" : ""),
- num_cpus, (num_cpus > 1 ? "s" : ""));
+ num_nodes, str_plural(num_nodes), num_cpus, str_plural(num_cpus));
/* Any cleanup work */
smp_cpus_done(setup_max_cpus);
@@ -1119,6 +1148,7 @@ int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
queue_work_on(cpu, system_wq, &sscs.work);
wait_for_completion(&sscs.done);
+ destroy_work_on_stack(&sscs.work);
return sscs.ret;
}