summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/locking/locktorture.c15
-rw-r--r--kernel/rcu/rcu.h17
-rw-r--r--kernel/rcu/rcuperf.c14
-rw-r--r--kernel/rcu/rcutorture.c67
-rw-r--r--kernel/rcu/srcutree.c16
-rw-r--r--kernel/rcu/tree.c300
-rw-r--r--kernel/rcu/tree.h4
-rw-r--r--kernel/rcu/tree_exp.h9
-rw-r--r--kernel/rcu/tree_plugin.h4
-rw-r--r--kernel/rcu/tree_stall.h6
-rw-r--r--kernel/rcu/update.c28
-rw-r--r--kernel/torture.c29
12 files changed, 413 insertions, 96 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 99475a66c94f..5efbfc68ce99 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -618,7 +618,7 @@ static struct lock_torture_ops percpu_rwsem_lock_ops = {
static int lock_torture_writer(void *arg)
{
struct lock_stress_stats *lwsp = arg;
- static DEFINE_TORTURE_RANDOM(rand);
+ DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_writer task started");
set_user_nice(current, MAX_NICE);
@@ -655,7 +655,7 @@ static int lock_torture_writer(void *arg)
static int lock_torture_reader(void *arg)
{
struct lock_stress_stats *lrsp = arg;
- static DEFINE_TORTURE_RANDOM(rand);
+ DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("lock_torture_reader task started");
set_user_nice(current, MAX_NICE);
@@ -696,15 +696,16 @@ static void __torture_print_stats(char *page,
if (statp[i].n_lock_fail)
fail = true;
sum += statp[i].n_lock_acquired;
- if (max < statp[i].n_lock_fail)
- max = statp[i].n_lock_fail;
- if (min > statp[i].n_lock_fail)
- min = statp[i].n_lock_fail;
+ if (max < statp[i].n_lock_acquired)
+ max = statp[i].n_lock_acquired;
+ if (min > statp[i].n_lock_acquired)
+ min = statp[i].n_lock_acquired;
}
page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
write ? "Writes" : "Reads ",
- sum, max, min, max / 2 > min ? "???" : "",
+ sum, max, min,
+ !onoff_interval && max / 2 > min ? "???" : "",
fail, fail ? "!!!" : "");
if (fail)
atomic_inc(&cxt.n_lock_torture_errors);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f6ce173e35f6..00ddc92c5774 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -198,6 +198,13 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+extern int rcu_cpu_stall_suppress_at_boot;
+
+static inline bool rcu_stall_is_suppressed_at_boot(void)
+{
+ return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended();
+}
+
#ifdef CONFIG_RCU_STALL_COMMON
extern int rcu_cpu_stall_ftrace_dump;
@@ -205,6 +212,11 @@ extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void);
+static inline bool rcu_stall_is_suppressed(void)
+{
+ return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
+}
+
#define rcu_ftrace_dump_stall_suppress() \
do { \
if (!rcu_cpu_stall_suppress) \
@@ -218,6 +230,11 @@ do { \
} while (0)
#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
+
+static inline bool rcu_stall_is_suppressed(void)
+{
+ return rcu_stall_is_suppressed_at_boot();
+}
#define rcu_ftrace_dump_stall_suppress()
#define rcu_ftrace_dump_stall_unsuppress()
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index da94b89cd531..a4a8d097d84d 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/err.h>
@@ -611,6 +612,7 @@ kfree_perf_thread(void *arg)
long me = (long)arg;
struct kfree_obj *alloc_ptr;
u64 start_time, end_time;
+ long long mem_begin, mem_during = 0;
VERBOSE_PERFOUT_STRING("kfree_perf_thread task started");
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
@@ -626,6 +628,12 @@ kfree_perf_thread(void *arg)
}
do {
+ if (!mem_during) {
+ mem_during = mem_begin = si_mem_available();
+ } else if (loop % (kfree_loops / 4) == 0) {
+ mem_during = (mem_during + si_mem_available()) / 2;
+ }
+
for (i = 0; i < kfree_alloc_num; i++) {
alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
@@ -645,9 +653,11 @@ kfree_perf_thread(void *arg)
else
b_rcu_gp_test_finished = cur_ops->get_gp_seq();
- pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld\n",
+ pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n",
(unsigned long long)(end_time - start_time), kfree_loops,
- rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started));
+ rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
+ (mem_begin - mem_during) >> (20 - PAGE_SHIFT));
+
if (shutdown) {
smp_mb(); /* Assign before wake. */
wake_up(&shutdown_wq);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 1aeecc165b21..5453bd557f43 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -339,7 +339,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
* period, and we want a long delay occasionally to trigger
* force_quiescent_state. */
- if (!rcu_fwd_cb_nodelay &&
+ if (!READ_ONCE(rcu_fwd_cb_nodelay) &&
!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
@@ -375,11 +375,12 @@ rcu_torture_pipe_update_one(struct rcu_torture *rp)
{
int i;
- i = rp->rtort_pipe_count;
+ i = READ_ONCE(rp->rtort_pipe_count);
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
- if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+ WRITE_ONCE(rp->rtort_pipe_count, i + 1);
+ if (rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
rp->rtort_mbtest = 0;
return true;
}
@@ -1015,7 +1016,8 @@ rcu_torture_writer(void *arg)
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
- old_rp->rtort_pipe_count++;
+ WRITE_ONCE(old_rp->rtort_pipe_count,
+ old_rp->rtort_pipe_count + 1);
switch (synctype[torture_random(&rand) % nsynctypes]) {
case RTWS_DEF_FREE:
rcu_torture_writer_state = RTWS_DEF_FREE;
@@ -1067,7 +1069,8 @@ rcu_torture_writer(void *arg)
if (stutter_wait("rcu_torture_writer") &&
!READ_ONCE(rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
- !torture_must_stop())
+ !torture_must_stop() &&
+ rcu_inkernel_boot_has_ended())
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free) &&
rcu_access_pointer(rcu_torture_current) !=
@@ -1290,7 +1293,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
atomic_inc(&n_rcu_torture_mberror);
rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp);
preempt_disable();
- pipe_count = p->rtort_pipe_count;
+ pipe_count = READ_ONCE(p->rtort_pipe_count);
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
@@ -1404,14 +1407,15 @@ rcu_torture_stats_print(void)
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+ struct rcu_torture *rtcp;
static unsigned long rtcv_snap = ULONG_MAX;
static bool splatted;
struct task_struct *wtp;
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
- pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
- batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
+ pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
+ batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
}
}
for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) {
@@ -1420,9 +1424,10 @@ rcu_torture_stats_print(void)
}
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+ rtcp = rcu_access_pointer(rcu_torture_current);
pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
- rcu_torture_current,
- rcu_torture_current ? "ver" : "VER",
+ rtcp,
+ rtcp && !rcu_stall_is_suppressed_at_boot() ? "ver" : "VER",
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
@@ -1478,7 +1483,8 @@ rcu_torture_stats_print(void)
if (cur_ops->stats)
cur_ops->stats();
if (rtcv_snap == rcu_torture_current_version &&
- rcu_torture_current != NULL) {
+ rcu_access_pointer(rcu_torture_current) &&
+ !rcu_stall_is_suppressed()) {
int __maybe_unused flags = 0;
unsigned long __maybe_unused gp_seq = 0;
@@ -1993,8 +1999,11 @@ static int rcu_torture_fwd_prog(void *args)
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
WRITE_ONCE(rcu_fwd_emergency_stop, false);
register_oom_notifier(&rcutorture_oom_nb);
- rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
- rcu_torture_fwd_prog_cr(rfp);
+ if (!IS_ENABLED(CONFIG_TINY_RCU) ||
+ rcu_inkernel_boot_has_ended())
+ rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
+ if (rcu_inkernel_boot_has_ended())
+ rcu_torture_fwd_prog_cr(rfp);
unregister_oom_notifier(&rcutorture_oom_nb);
/* Avoid slow periods, better to test when busy. */
@@ -2044,6 +2053,14 @@ static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
atomic_inc(&barrier_cbs_invoked);
}
+/* IPI handler to get callback posted on desired CPU, if online. */
+static void rcu_torture_barrier1cb(void *rcu_void)
+{
+ struct rcu_head *rhp = rcu_void;
+
+ cur_ops->call(rhp, rcu_torture_barrier_cbf);
+}
+
/* kthread function to register callbacks used to test RCU barriers. */
static int rcu_torture_barrier_cbs(void *arg)
{
@@ -2067,9 +2084,11 @@ static int rcu_torture_barrier_cbs(void *arg)
* The above smp_load_acquire() ensures barrier_phase load
* is ordered before the following ->call().
*/
- local_irq_disable(); /* Just to test no-irq call_rcu(). */
- cur_ops->call(&rcu, rcu_torture_barrier_cbf);
- local_irq_enable();
+ if (smp_call_function_single(myid, rcu_torture_barrier1cb,
+ &rcu, 1)) {
+ // IPI failed, so use direct call from current CPU.
+ cur_ops->call(&rcu, rcu_torture_barrier_cbf);
+ }
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
@@ -2105,7 +2124,21 @@ static int rcu_torture_barrier(void *arg)
pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n",
atomic_read(&barrier_cbs_invoked),
n_barrier_cbs);
- WARN_ON_ONCE(1);
+ WARN_ON(1);
+ // Wait manually for the remaining callbacks
+ i = 0;
+ do {
+ if (WARN_ON(i++ > HZ))
+ i = INT_MIN;
+ schedule_timeout_interruptible(1);
+ cur_ops->cb_barrier();
+ } while (atomic_read(&barrier_cbs_invoked) !=
+ n_barrier_cbs &&
+ !torture_must_stop());
+ smp_mb(); // Can't trust ordering if broken.
+ if (!torture_must_stop())
+ pr_err("Recovered: barrier_cbs_invoked = %d\n",
+ atomic_read(&barrier_cbs_invoked));
} else {
n_barrier_successes++;
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 7ddb29cc7dba..0c71505f0e19 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -450,7 +450,7 @@ static void srcu_gp_start(struct srcu_struct *ssp)
spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
rcu_seq_start(&ssp->srcu_gp_seq);
- state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
+ state = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
}
@@ -534,7 +534,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
- ssp->srcu_gp_seq_needed_exp = gpseq;
+ WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq);
spin_unlock_irq_rcu_node(ssp);
mutex_unlock(&ssp->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
@@ -550,7 +550,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
snp->srcu_have_cbs[idx] = gpseq;
rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
- snp->srcu_gp_seq_needed_exp = gpseq;
+ WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
mask = snp->srcu_data_have_cbs[idx];
snp->srcu_data_have_cbs[idx] = 0;
spin_unlock_irq_rcu_node(snp);
@@ -614,7 +614,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
}
spin_lock_irqsave_rcu_node(ssp, flags);
if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
- ssp->srcu_gp_seq_needed_exp = s;
+ WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
spin_unlock_irqrestore_rcu_node(ssp, flags);
}
@@ -660,7 +660,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (snp == sdp->mynode)
snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
- snp->srcu_gp_seq_needed_exp = s;
+ WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
spin_unlock_irqrestore_rcu_node(snp, flags);
}
@@ -674,7 +674,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
}
if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
- ssp->srcu_gp_seq_needed_exp = s;
+ WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
/* If grace period not already done and none in progress, start it. */
if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
@@ -1079,7 +1079,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
*/
unsigned long srcu_batches_completed(struct srcu_struct *ssp)
{
- return ssp->srcu_idx;
+ return READ_ONCE(ssp->srcu_idx);
}
EXPORT_SYMBOL_GPL(srcu_batches_completed);
@@ -1130,7 +1130,9 @@ static void srcu_advance_state(struct srcu_struct *ssp)
return; /* readers present, retry later. */
}
srcu_flip(ssp);
+ spin_lock_irq_rcu_node(ssp);
rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+ spin_unlock_irq_rcu_node(ssp);
}
if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 35292c8ffd4a..550193a9ce76 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -150,6 +150,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
+static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
/* rcuc/rcub kthread realtime priority */
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
@@ -413,10 +414,15 @@ static long blimit = DEFAULT_RCU_BLIMIT;
static long qhimark = DEFAULT_RCU_QHIMARK;
#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
static long qlowmark = DEFAULT_RCU_QLOMARK;
+#define DEFAULT_RCU_QOVLD_MULT 2
+#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
+static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */
+static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
+module_param(qovld, long, 0444);
static ulong jiffies_till_first_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
@@ -1076,7 +1082,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu);
if (!READ_ONCE(*rnhqp) &&
(time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
- time_after(jiffies, rcu_state.jiffies_resched))) {
+ time_after(jiffies, rcu_state.jiffies_resched) ||
+ rcu_state.cbovld)) {
WRITE_ONCE(*rnhqp, true);
/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
smp_store_release(ruqp, true);
@@ -1093,8 +1100,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
* So hit them over the head with the resched_cpu() hammer!
*/
if (tick_nohz_full_cpu(rdp->cpu) &&
- time_after(jiffies,
- READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) {
+ (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
+ rcu_state.cbovld)) {
WRITE_ONCE(*ruqp, true);
resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies);
@@ -1392,7 +1399,7 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
{
bool ret = false;
- bool need_gp;
+ bool need_qs;
const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
rcu_segcblist_is_offloaded(&rdp->cblist);
@@ -1406,10 +1413,13 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
unlikely(READ_ONCE(rdp->gpwrap))) {
if (!offloaded)
ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */
+ rdp->core_needs_qs = false;
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
} else {
if (!offloaded)
ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */
+ if (rdp->core_needs_qs)
+ rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
}
/* Now handle the beginnings of any new-to-this-CPU grace periods. */
@@ -1421,9 +1431,9 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
* go looking for one.
*/
trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
- need_gp = !!(rnp->qsmask & rdp->grpmask);
- rdp->cpu_no_qs.b.norm = need_gp;
- rdp->core_needs_qs = need_gp;
+ need_qs = !!(rnp->qsmask & rdp->grpmask);
+ rdp->cpu_no_qs.b.norm = need_qs;
+ rdp->core_needs_qs = need_qs;
zero_cpu_stall_ticks(rdp);
}
rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
@@ -1703,8 +1713,9 @@ static void rcu_gp_fqs_loop(void)
*/
static void rcu_gp_cleanup(void)
{
- unsigned long gp_duration;
+ int cpu;
bool needgp = false;
+ unsigned long gp_duration;
unsigned long new_gp_seq;
bool offloaded;
struct rcu_data *rdp;
@@ -1750,6 +1761,12 @@ static void rcu_gp_cleanup(void)
needgp = __note_gp_changes(rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */
needgp = rcu_future_gp_cleanup(rnp) || needgp;
+ // Reset overload indication for CPUs no longer overloaded
+ if (rcu_is_leaf_node(rnp))
+ for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ check_cb_ovld_locked(rdp, rnp);
+ }
sq = rcu_nocb_gp_get(rnp);
raw_spin_unlock_irq_rcu_node(rnp);
rcu_nocb_gp_cleanup(sq);
@@ -1987,6 +2004,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
+ if (rdp->cpu == smp_processor_id())
+ rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
@@ -2294,10 +2313,13 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
struct rcu_data *rdp;
struct rcu_node *rnp;
+ rcu_state.cbovld = rcu_state.cbovldnext;
+ rcu_state.cbovldnext = false;
rcu_for_each_leaf_node(rnp) {
cond_resched_tasks_rcu_qs();
mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rcu_state.cbovldnext |= !!rnp->cbovldmask;
if (rnp->qsmask == 0) {
if (!IS_ENABLED(CONFIG_PREEMPT_RCU) ||
rcu_preempt_blocked_readers_cgp(rnp)) {
@@ -2579,11 +2601,48 @@ static void rcu_leak_callback(struct rcu_head *rhp)
}
/*
- * Helper function for call_rcu() and friends. The cpu argument will
- * normally be -1, indicating "currently running CPU". It may specify
- * a CPU only if that CPU is a no-CBs CPU. Currently, only rcu_barrier()
- * is expected to specify a CPU.
+ * Check and if necessary update the leaf rcu_node structure's
+ * ->cbovldmask bit corresponding to the current CPU based on that CPU's
+ * number of queued RCU callbacks. The caller must hold the leaf rcu_node
+ * structure's ->lock.
+ */
+static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp)
+{
+ raw_lockdep_assert_held_rcu_node(rnp);
+ if (qovld_calc <= 0)
+ return; // Early boot and wildcard value set.
+ if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc)
+ WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask);
+ else
+ WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask);
+}
+
+/*
+ * Check and if necessary update the leaf rcu_node structure's
+ * ->cbovldmask bit corresponding to the current CPU based on that CPU's
+ * number of queued RCU callbacks. No locks need be held, but the
+ * caller must have disabled interrupts.
+ *
+ * Note that this function ignores the possibility that there are a lot
+ * of callbacks all of which have already seen the end of their respective
+ * grace periods. This omission is due to the need for no-CBs CPUs to
+ * be holding ->nocb_lock to do this check, which is too heavy for a
+ * common-case operation.
*/
+static void check_cb_ovld(struct rcu_data *rdp)
+{
+ struct rcu_node *const rnp = rdp->mynode;
+
+ if (qovld_calc <= 0 ||
+ ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) ==
+ !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask)))
+ return; // Early boot wildcard value or already set correctly.
+ raw_spin_lock_rcu_node(rnp);
+ check_cb_ovld_locked(rdp, rnp);
+ raw_spin_unlock_rcu_node(rnp);
+}
+
+/* Helper function for call_rcu() and friends. */
static void
__call_rcu(struct rcu_head *head, rcu_callback_t func)
{
@@ -2621,9 +2680,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
rcu_segcblist_init(&rdp->cblist);
}
+ check_cb_ovld(rdp);
if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
return; // Enqueued onto ->nocb_bypass, so just leave.
- /* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */
+ // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
rcu_segcblist_enqueue(&rdp->cblist, head);
if (__is_kfree_rcu_offset((unsigned long)func))
trace_rcu_kfree_callback(rcu_state.name, head,
@@ -2689,22 +2749,47 @@ EXPORT_SYMBOL_GPL(call_rcu);
#define KFREE_DRAIN_JIFFIES (HZ / 50)
#define KFREE_N_BATCHES 2
+/*
+ * This macro defines how many entries the "records" array
+ * will contain. It is based on the fact that the size of
+ * kfree_rcu_bulk_data structure becomes exactly one page.
+ */
+#define KFREE_BULK_MAX_ENTR ((PAGE_SIZE / sizeof(void *)) - 3)
+
+/**
+ * struct kfree_rcu_bulk_data - single block to store kfree_rcu() pointers
+ * @nr_records: Number of active pointers in the array
+ * @records: Array of the kfree_rcu() pointers
+ * @next: Next bulk object in the block chain
+ * @head_free_debug: For debug, when CONFIG_DEBUG_OBJECTS_RCU_HEAD is set
+ */
+struct kfree_rcu_bulk_data {
+ unsigned long nr_records;
+ void *records[KFREE_BULK_MAX_ENTR];
+ struct kfree_rcu_bulk_data *next;
+ struct rcu_head *head_free_debug;
+};
+
/**
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
* @head_free: List of kfree_rcu() objects waiting for a grace period
+ * @bhead_free: Bulk-List of kfree_rcu() objects waiting for a grace period
* @krcp: Pointer to @kfree_rcu_cpu structure
*/
struct kfree_rcu_cpu_work {
struct rcu_work rcu_work;
struct rcu_head *head_free;
+ struct kfree_rcu_bulk_data *bhead_free;
struct kfree_rcu_cpu *krcp;
};
/**
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
* @head: List of kfree_rcu() objects not yet waiting for a grace period
+ * @bhead: Bulk-List of kfree_rcu() objects not yet waiting for a grace period
+ * @bcached: Keeps at most one object for later reuse when build chain blocks
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
* @lock: Synchronize access to this structure
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
@@ -2718,6 +2803,8 @@ struct kfree_rcu_cpu_work {
*/
struct kfree_rcu_cpu {
struct rcu_head *head;
+ struct kfree_rcu_bulk_data *bhead;
+ struct kfree_rcu_bulk_data *bcached;
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
spinlock_t lock;
struct delayed_work monitor_work;
@@ -2727,14 +2814,24 @@ struct kfree_rcu_cpu {
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
+static __always_inline void
+debug_rcu_head_unqueue_bulk(struct rcu_head *head)
+{
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+ for (; head; head = head->next)
+ debug_rcu_head_unqueue(head);
+#endif
+}
+
/*
* This function is invoked in workqueue context after a grace period.
- * It frees all the objects queued on ->head_free.
+ * It frees all the objects queued on ->bhead_free or ->head_free.
*/
static void kfree_rcu_work(struct work_struct *work)
{
unsigned long flags;
struct rcu_head *head, *next;
+ struct kfree_rcu_bulk_data *bhead, *bnext;
struct kfree_rcu_cpu *krcp;
struct kfree_rcu_cpu_work *krwp;
@@ -2744,22 +2841,44 @@ static void kfree_rcu_work(struct work_struct *work)
spin_lock_irqsave(&krcp->lock, flags);
head = krwp->head_free;
krwp->head_free = NULL;
+ bhead = krwp->bhead_free;
+ krwp->bhead_free = NULL;
spin_unlock_irqrestore(&krcp->lock, flags);
- // List "head" is now private, so traverse locklessly.
+ /* "bhead" is now private, so traverse locklessly. */
+ for (; bhead; bhead = bnext) {
+ bnext = bhead->next;
+
+ debug_rcu_head_unqueue_bulk(bhead->head_free_debug);
+
+ rcu_lock_acquire(&rcu_callback_map);
+ trace_rcu_invoke_kfree_bulk_callback(rcu_state.name,
+ bhead->nr_records, bhead->records);
+
+ kfree_bulk(bhead->nr_records, bhead->records);
+ rcu_lock_release(&rcu_callback_map);
+
+ if (cmpxchg(&krcp->bcached, NULL, bhead))
+ free_page((unsigned long) bhead);
+
+ cond_resched_tasks_rcu_qs();
+ }
+
+ /*
+ * Emergency case only. It can happen under low memory
+ * condition when an allocation gets failed, so the "bulk"
+ * path can not be temporary maintained.
+ */
for (; head; head = next) {
unsigned long offset = (unsigned long)head->func;
next = head->next;
- // Potentially optimize with kfree_bulk in future.
debug_rcu_head_unqueue(head);
rcu_lock_acquire(&rcu_callback_map);
trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
- if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
- /* Could be optimized with kfree_bulk() in future. */
+ if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset)))
kfree((void *)head - offset);
- }
rcu_lock_release(&rcu_callback_map);
cond_resched_tasks_rcu_qs();
@@ -2774,26 +2893,48 @@ static void kfree_rcu_work(struct work_struct *work)
*/
static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
{
+ struct kfree_rcu_cpu_work *krwp;
+ bool queued = false;
int i;
- struct kfree_rcu_cpu_work *krwp = NULL;
lockdep_assert_held(&krcp->lock);
- for (i = 0; i < KFREE_N_BATCHES; i++)
- if (!krcp->krw_arr[i].head_free) {
- krwp = &(krcp->krw_arr[i]);
- break;
- }
- // If a previous RCU batch is in progress, we cannot immediately
- // queue another one, so return false to tell caller to retry.
- if (!krwp)
- return false;
+ for (i = 0; i < KFREE_N_BATCHES; i++) {
+ krwp = &(krcp->krw_arr[i]);
- krwp->head_free = krcp->head;
- krcp->head = NULL;
- INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
- queue_rcu_work(system_wq, &krwp->rcu_work);
- return true;
+ /*
+ * Try to detach bhead or head and attach it over any
+ * available corresponding free channel. It can be that
+ * a previous RCU batch is in progress, it means that
+ * immediately to queue another one is not possible so
+ * return false to tell caller to retry.
+ */
+ if ((krcp->bhead && !krwp->bhead_free) ||
+ (krcp->head && !krwp->head_free)) {
+ /* Channel 1. */
+ if (!krwp->bhead_free) {
+ krwp->bhead_free = krcp->bhead;
+ krcp->bhead = NULL;
+ }
+
+ /* Channel 2. */
+ if (!krwp->head_free) {
+ krwp->head_free = krcp->head;
+ krcp->head = NULL;
+ }
+
+ /*
+ * One work is per one batch, so there are two "free channels",
+ * "bhead_free" and "head_free" the batch can handle. It can be
+ * that the work is in the pending state when two channels have
+ * been detached following each other, one by one.
+ */
+ queue_rcu_work(system_wq, &krwp->rcu_work);
+ queued = true;
+ }
+ }
+
+ return queued;
}
static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
@@ -2830,19 +2971,65 @@ static void kfree_rcu_monitor(struct work_struct *work)
spin_unlock_irqrestore(&krcp->lock, flags);
}
+static inline bool
+kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
+ struct rcu_head *head, rcu_callback_t func)
+{
+ struct kfree_rcu_bulk_data *bnode;
+
+ if (unlikely(!krcp->initialized))
+ return false;
+
+ lockdep_assert_held(&krcp->lock);
+
+ /* Check if a new block is required. */
+ if (!krcp->bhead ||
+ krcp->bhead->nr_records == KFREE_BULK_MAX_ENTR) {
+ bnode = xchg(&krcp->bcached, NULL);
+ if (!bnode) {
+ WARN_ON_ONCE(sizeof(struct kfree_rcu_bulk_data) > PAGE_SIZE);
+
+ bnode = (struct kfree_rcu_bulk_data *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ }
+
+ /* Switch to emergency path. */
+ if (unlikely(!bnode))
+ return false;
+
+ /* Initialize the new block. */
+ bnode->nr_records = 0;
+ bnode->next = krcp->bhead;
+ bnode->head_free_debug = NULL;
+
+ /* Attach it to the head. */
+ krcp->bhead = bnode;
+ }
+
+#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+ head->func = func;
+ head->next = krcp->bhead->head_free_debug;
+ krcp->bhead->head_free_debug = head;
+#endif
+
+ /* Finally insert. */
+ krcp->bhead->records[krcp->bhead->nr_records++] =
+ (void *) head - (unsigned long) func;
+
+ return true;
+}
+
/*
- * Queue a request for lazy invocation of kfree() after a grace period.
+ * Queue a request for lazy invocation of kfree_bulk()/kfree() after a grace
+ * period. Please note there are two paths are maintained, one is the main one
+ * that uses kfree_bulk() interface and second one is emergency one, that is
+ * used only when the main path can not be maintained temporary, due to memory
+ * pressure.
*
* Each kfree_call_rcu() request is added to a batch. The batch will be drained
- * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch
- * will be kfree'd in workqueue context. This allows us to:
- *
- * 1. Batch requests together to reduce the number of grace periods during
- * heavy kfree_rcu() load.
- *
- * 2. It makes it possible to use kfree_bulk() on a large number of
- * kfree_rcu() requests thus reducing cache misses and the per-object
- * overhead of kfree().
+ * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
+ * be free'd in workqueue context. This allows us to: batch requests together to
+ * reduce the number of grace periods during heavy kfree_rcu() load.
*/
void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
@@ -2861,9 +3048,16 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
__func__, head);
goto unlock_return;
}
- head->func = func;
- head->next = krcp->head;
- krcp->head = head;
+
+ /*
+ * Under high memory pressure GFP_NOWAIT can fail,
+ * in that case the emergency path is maintained.
+ */
+ if (unlikely(!kfree_call_rcu_add_ptr_to_bulk(krcp, head, func))) {
+ head->func = func;
+ head->next = krcp->head;
+ krcp->head = head;
+ }
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
@@ -3791,8 +3985,11 @@ static void __init kfree_rcu_batch_init(void)
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
spin_lock_init(&krcp->lock);
- for (i = 0; i < KFREE_N_BATCHES; i++)
+ for (i = 0; i < KFREE_N_BATCHES; i++) {
+ INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
krcp->krw_arr[i].krcp = krcp;
+ }
+
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
krcp->initialized = true;
}
@@ -3831,6 +4028,13 @@ void __init rcu_init(void)
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
srcu_init();
+
+ /* Fill in default value for rcutree.qovld boot parameter. */
+ /* -After- the rcu_node ->lock fields are initialized! */
+ if (qovld < 0)
+ qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark;
+ else
+ qovld_calc = qovld;
}
#include "tree_stall.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 0c87e4c161c2..9dc2ec021da5 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -68,6 +68,8 @@ struct rcu_node {
/* Online CPUs for next expedited GP. */
/* Any CPU that has ever been online will */
/* have its bit set. */
+ unsigned long cbovldmask;
+ /* CPUs experiencing callback overload. */
unsigned long ffmask; /* Fully functional CPUs. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
/* Only one bit will be set in this mask. */
@@ -321,6 +323,8 @@ struct rcu_state {
atomic_t expedited_need_qs; /* # CPUs left to check in. */
struct swait_queue_head expedited_wq; /* Wait for check-ins. */
int ncpus_snap; /* # CPUs seen last time. */
+ u8 cbovld; /* Callback overload now? */
+ u8 cbovldnext; /* ^ ^ next time? */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 85b009e05637..1a617b9dffb0 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -485,6 +485,7 @@ static bool synchronize_rcu_expedited_wait_once(long tlimit)
static void synchronize_rcu_expedited_wait(void)
{
int cpu;
+ unsigned long j;
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
@@ -496,7 +497,7 @@ static void synchronize_rcu_expedited_wait(void)
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_jiffies_till_stall_check();
jiffies_start = jiffies;
- if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+ if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
if (synchronize_rcu_expedited_wait_once(1))
return;
rcu_for_each_leaf_node(rnp) {
@@ -508,12 +509,16 @@ static void synchronize_rcu_expedited_wait(void)
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
}
}
+ j = READ_ONCE(jiffies_till_first_fqs);
+ if (synchronize_rcu_expedited_wait_once(j + HZ))
+ return;
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT));
}
for (;;) {
if (synchronize_rcu_expedited_wait_once(jiffies_stall))
return;
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
continue;
panic_on_rcu_stall();
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 36e71c99970a..097635c41135 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -56,6 +56,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
if (qlowmark != DEFAULT_RCU_QLOMARK)
pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
+ if (qovld != DEFAULT_RCU_QOVLD)
+ pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
if (jiffies_till_first_fqs != ULONG_MAX)
pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
if (jiffies_till_next_fqs != ULONG_MAX)
@@ -1077,7 +1079,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
- ULONG_CMP_GE(jiffies, rnp->boost_time))) {
+ (ULONG_CMP_GE(jiffies, rnp->boost_time) || rcu_state.cbovld))) {
if (rnp->exp_tasks == NULL)
rnp->boost_tasks = rnp->gp_tasks;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 16ad7ad9a185..119ed6afd20f 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -383,7 +383,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
return;
/*
@@ -452,7 +452,7 @@ static void print_cpu_stall(void)
/* Kick and suppress, if so configured. */
rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
+ if (rcu_stall_is_suppressed())
return;
/*
@@ -504,7 +504,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
unsigned long js;
struct rcu_node *rnp;
- if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+ if ((rcu_stall_is_suppressed() && !rcu_kick_kthreads) ||
!rcu_gp_in_progress())
return;
rcu_stall_kick_kthreads();
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 6c4b862f57d6..a4ad8e0406c7 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -183,6 +183,8 @@ void rcu_unexpedite_gp(void)
}
EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
+static bool rcu_boot_ended __read_mostly;
+
/*
* Inform RCU of the end of the in-kernel boot sequence.
*/
@@ -191,8 +193,18 @@ void rcu_end_inkernel_boot(void)
rcu_unexpedite_gp();
if (rcu_normal_after_boot)
WRITE_ONCE(rcu_normal, 1);
+ rcu_boot_ended = 1;
}
+/*
+ * Let rcutorture know when it is OK to turn it up to eleven.
+ */
+bool rcu_inkernel_boot_has_ended(void)
+{
+ return rcu_boot_ended;
+}
+EXPORT_SYMBOL_GPL(rcu_inkernel_boot_has_ended);
+
#endif /* #ifndef CONFIG_TINY_RCU */
/*
@@ -464,13 +476,19 @@ EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_ftrace_dump __read_mostly;
module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
-int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644);
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
+// warnings. Also used by rcutorture even if stall warnings are excluded.
+int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls.
+EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot);
+module_param(rcu_cpu_stall_suppress_at_boot, int, 0444);
+
#ifdef CONFIG_TASKS_RCU
/*
@@ -528,7 +546,7 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
rhp->func = func;
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
needwake = !rcu_tasks_cbs_head;
- *rcu_tasks_cbs_tail = rhp;
+ WRITE_ONCE(*rcu_tasks_cbs_tail, rhp);
rcu_tasks_cbs_tail = &rhp->next;
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
/* We can't create the thread unless interrupts are enabled. */
@@ -658,7 +676,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
/* If there were none, wait a bit and start over. */
if (!list) {
wait_event_interruptible(rcu_tasks_cbs_wq,
- rcu_tasks_cbs_head);
+ READ_ONCE(rcu_tasks_cbs_head));
if (!rcu_tasks_cbs_head) {
WARN_ON(signal_pending(current));
schedule_timeout_interruptible(HZ/10);
@@ -801,7 +819,7 @@ static int __init rcu_spawn_tasks_kthread(void)
core_initcall(rcu_spawn_tasks_kthread);
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
-void exit_tasks_rcu_start(void)
+void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
{
preempt_disable();
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
@@ -809,7 +827,7 @@ void exit_tasks_rcu_start(void)
}
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
-void exit_tasks_rcu_finish(void)
+void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
{
preempt_disable();
__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
diff --git a/kernel/torture.c b/kernel/torture.c
index 7c13f5558b71..8683375dc0c7 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -42,6 +42,9 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
+static bool disable_onoff_at_boot;
+module_param(disable_onoff_at_boot, bool, 0444);
+
static char *torture_type;
static int verbose;
@@ -84,6 +87,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
{
unsigned long delta;
int ret;
+ char *s;
unsigned long starttime;
if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
@@ -99,10 +103,16 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
(*n_offl_attempts)++;
ret = cpu_down(cpu);
if (ret) {
+ s = "";
+ if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) {
+ // PCI probe frequently disables hotplug during boot.
+ (*n_offl_attempts)--;
+ s = " (-EBUSY forgiven during boot)";
+ }
if (verbose)
pr_alert("%s" TORTURE_FLAG
- "torture_onoff task: offline %d failed: errno %d\n",
- torture_type, cpu, ret);
+ "torture_onoff task: offline %d failed%s: errno %d\n",
+ torture_type, cpu, s, ret);
} else {
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
@@ -137,6 +147,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
{
unsigned long delta;
int ret;
+ char *s;
unsigned long starttime;
if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
@@ -150,10 +161,16 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
(*n_onl_attempts)++;
ret = cpu_up(cpu);
if (ret) {
+ s = "";
+ if (!rcu_inkernel_boot_has_ended() && ret == -EBUSY) {
+ // PCI probe frequently disables hotplug during boot.
+ (*n_onl_attempts)--;
+ s = " (-EBUSY forgiven during boot)";
+ }
if (verbose)
pr_alert("%s" TORTURE_FLAG
- "torture_onoff task: online %d failed: errno %d\n",
- torture_type, cpu, ret);
+ "torture_onoff task: online %d failed%s: errno %d\n",
+ torture_type, cpu, s, ret);
} else {
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
@@ -215,6 +232,10 @@ torture_onoff(void *arg)
VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
}
while (!torture_must_stop()) {
+ if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) {
+ schedule_timeout_interruptible(HZ / 10);
+ continue;
+ }
cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
if (!torture_offline(cpu,
&n_offline_attempts, &n_offline_successes,