From 135bd1a230bb69a68c9808a7d25467318900b80a Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Mon, 7 Aug 2017 11:20:10 +0530
Subject: rcu: Fix up pending cbs check in rcu_prepare_for_idle

The pending-callbacks check in rcu_prepare_for_idle() is backwards.
It should accelerate if there are pending callbacks, but the check
rather uselessly accelerates only if there are no callbacks.  This commit
therefore inverts this check.

Fixes: 15fecf89e46a ("srcu: Abstract multi-tail callback list handling")
Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # 4.12.x
---
 kernel/rcu/tree_plugin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e012b9be777e..fed95fa941e6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1507,7 +1507,7 @@ static void rcu_prepare_for_idle(void)
 	rdtp->last_accelerate = jiffies;
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
-		if (rcu_segcblist_pend_cbs(&rdp->cblist))
+		if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 			continue;
 		rnp = rdp->mynode;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
-- 
cgit 


From c63eb17ff06dbcf73e771b9b425c531cc0a9c17b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 11 Aug 2017 12:37:07 -0700
Subject: rcu: Create call_rcu_tasks() kthread at boot time

Currently the call_rcu_tasks() kthread is created upon first
invocation of call_rcu_tasks().  This has the advantage of avoiding
creation if there are never any invocations of call_rcu_tasks() and of
synchronize_rcu_tasks(), but it requires an unreliable heuristic to
determine when it is safe to create the kthread.  For example, it is
not safe to create the kthread when call_rcu_tasks() is invoked with
a spinlock held, but there is no good way to detect this in !PREEMPT
kernels.

This commit therefore creates this kthread unconditionally at
core_initcall() time.  If you don't want this kthread created, then
build with CONFIG_TASKS_RCU=n.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/update.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5033b66d2753..e9bbedbb8745 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -575,7 +575,6 @@ DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
 
-static void rcu_spawn_tasks_kthread(void);
 static struct task_struct *rcu_tasks_kthread_ptr;
 
 /**
@@ -600,7 +599,6 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
 	unsigned long flags;
 	bool needwake;
-	bool havetask = READ_ONCE(rcu_tasks_kthread_ptr);
 
 	rhp->next = NULL;
 	rhp->func = func;
@@ -610,11 +608,8 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 	rcu_tasks_cbs_tail = &rhp->next;
 	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
 	/* We can't create the thread unless interrupts are enabled. */
-	if ((needwake && havetask) ||
-	    (!havetask && !irqs_disabled_flags(flags))) {
-		rcu_spawn_tasks_kthread();
+	if (needwake && READ_ONCE(rcu_tasks_kthread_ptr))
 		wake_up(&rcu_tasks_cbs_wq);
-	}
 }
 EXPORT_SYMBOL_GPL(call_rcu_tasks);
 
@@ -853,27 +848,18 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 	}
 }
 
-/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
-static void rcu_spawn_tasks_kthread(void)
+/* Spawn rcu_tasks_kthread() at core_initcall() time. */
+static int __init rcu_spawn_tasks_kthread(void)
 {
-	static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
 	struct task_struct *t;
 
-	if (READ_ONCE(rcu_tasks_kthread_ptr)) {
-		smp_mb(); /* Ensure caller sees full kthread. */
-		return;
-	}
-	mutex_lock(&rcu_tasks_kthread_mutex);
-	if (rcu_tasks_kthread_ptr) {
-		mutex_unlock(&rcu_tasks_kthread_mutex);
-		return;
-	}
 	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
 	BUG_ON(IS_ERR(t));
 	smp_mb(); /* Ensure others see full kthread. */
 	WRITE_ONCE(rcu_tasks_kthread_ptr, t);
-	mutex_unlock(&rcu_tasks_kthread_mutex);
+	return 0;
 }
+core_initcall(rcu_spawn_tasks_kthread);
 
 /* Do the srcu_read_lock() for the above synchronize_srcu().  */
 void exit_tasks_rcu_start(void)
-- 
cgit 


From f79c3ad6189624c3de0ad5521610c9e22a1c33cf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 30 Nov 2016 06:24:30 -0800
Subject: sched,rcu: Make cond_resched() provide RCU quiescent state

There is some confusion as to which of cond_resched() or
cond_resched_rcu_qs() should be added to long in-kernel loops.
This commit therefore eliminates the decision by adding RCU quiescent
states to cond_resched().  This commit also simplifies the code that
used to interact with cond_resched_rcu_qs(), and that now interacts with
cond_resched(), to reduce its overhead.  This reduction is necessary to
allow the heavier-weight cond_resched_rcu_qs() mechanism to be invoked
everywhere that cond_resched() is invoked.

Part of that reduction in overhead converts the jiffies_till_sched_qs
kernel parameter to read-only at runtime, thus eliminating the need for
bounds checking.

Reported-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
[ paulmck: Keep PREEMPT=n cond_resched a no-op, per Peter Zijlstra. ]
---
 kernel/rcu/tree.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b0ad62b0e7b8..0dda57a28276 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -534,8 +534,8 @@ module_param(rcu_kick_kthreads, bool, 0644);
  * How long the grace period must be before we start recruiting
  * quiescent-state help from rcu_note_context_switch().
  */
-static ulong jiffies_till_sched_qs = HZ / 20;
-module_param(jiffies_till_sched_qs, ulong, 0644);
+static ulong jiffies_till_sched_qs = HZ / 10;
+module_param(jiffies_till_sched_qs, ulong, 0444);
 
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 				  struct rcu_data *rdp);
@@ -1235,7 +1235,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	unsigned long jtsq;
 	bool *rnhqp;
 	bool *ruqp;
-	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
 	/*
@@ -1252,23 +1251,13 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		return 1;
 	}
 
-	/* Compute and saturate jiffies_till_sched_qs. */
-	jtsq = jiffies_till_sched_qs;
-	rjtsc = rcu_jiffies_till_stall_check();
-	if (jtsq > rjtsc / 2) {
-		WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
-		jtsq = rjtsc / 2;
-	} else if (jtsq < 1) {
-		WRITE_ONCE(jiffies_till_sched_qs, 1);
-		jtsq = 1;
-	}
-
 	/*
 	 * Has this CPU encountered a cond_resched_rcu_qs() since the
 	 * beginning of the grace period?  For this to be the case,
 	 * the CPU has to have noticed the current grace period.  This
 	 * might not be the case for nohz_full CPUs looping in the kernel.
 	 */
+	jtsq = jiffies_till_sched_qs;
 	rnp = rdp->mynode;
 	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
@@ -1276,7 +1265,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
-	} else {
+	} else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
 		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
 		smp_store_release(ruqp, true);
 	}
@@ -1304,10 +1293,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * updates are only once every few jiffies, the probability of
 	 * lossage (and thus of slight grace-period extension) is
 	 * quite low.
-	 *
-	 * Note that if the jiffies_till_sched_qs boot/sysfs parameter
-	 * is set too high, we override with half of the RCU CPU stall
-	 * warning delay.
 	 */
 	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
 	if (!READ_ONCE(*rnhqp) &&
@@ -1316,7 +1301,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		WRITE_ONCE(*rnhqp, true);
 		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
 		smp_store_release(ruqp, true);
-		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
+		rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */
 	}
 
 	/*
-- 
cgit 


From 9b9500da81502738efa1b485a8835f174ff7be6d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 17 Aug 2017 17:05:59 -0700
Subject: rcu: Make RCU CPU stall warnings check for irq-disabled CPUs

One common question upon seeing an RCU CPU stall warning is "did
the stalled CPUs have interrupts disabled?"  However, the current
stall warnings are silent on this point.  This commit therefore
uses irq_work to check whether stalled CPUs still respond to IPIs,
and flags this state in the RCU CPU stall warning console messages.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c        | 104 +++++++++++++++++++++++++++++++++++++++++------
 kernel/rcu/tree.h        |   5 +++
 kernel/rcu/tree_plugin.h |   7 +++-
 3 files changed, 103 insertions(+), 13 deletions(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0dda57a28276..12838a9a128e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1206,6 +1206,22 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
 }
 
+/*
+ * We are reporting a quiescent state on behalf of some other CPU, so
+ * it is our responsibility to check for and handle potential overflow
+ * of the rcu_node ->gpnum counter with respect to the rcu_data counters.
+ * After all, the CPU might be in deep idle state, and thus executing no
+ * code whatsoever.
+ */
+static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
+{
+	lockdep_assert_held(&rnp->lock);
+	if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
+		WRITE_ONCE(rdp->gpwrap, true);
+	if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
+		rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4;
+}
+
 /*
  * Snapshot the specified CPU's dynticks counter so that we can later
  * credit them with an implicit quiescent state.  Return 1 if this CPU
@@ -1216,14 +1232,33 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
-		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
-				 rdp->mynode->gpnum))
-			WRITE_ONCE(rdp->gpwrap, true);
+		rcu_gpnum_ovf(rdp->mynode, rdp);
 		return 1;
 	}
 	return 0;
 }
 
+/*
+ * Handler for the irq_work request posted when a grace period has
+ * gone on for too long, but not yet long enough for an RCU CPU
+ * stall warning.  Set state appropriately, but just complain if
+ * there is unexpected state on entry.
+ */
+static void rcu_iw_handler(struct irq_work *iwp)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+
+	rdp = container_of(iwp, struct rcu_data, rcu_iw);
+	rnp = rdp->mynode;
+	raw_spin_lock_rcu_node(rnp);
+	if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
+		rdp->rcu_iw_gpnum = rnp->gpnum;
+		rdp->rcu_iw_pending = false;
+	}
+	raw_spin_unlock_rcu_node(rnp);
+}
+
 /*
  * Return true if the specified CPU has passed through a quiescent
  * state by virtue of being in or having passed through an dynticks
@@ -1235,7 +1270,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	unsigned long jtsq;
 	bool *rnhqp;
 	bool *ruqp;
-	struct rcu_node *rnp;
+	struct rcu_node *rnp = rdp->mynode;
 
 	/*
 	 * If the CPU passed through or entered a dynticks idle phase with
@@ -1248,6 +1283,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		rdp->dynticks_fqs++;
+		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
 	}
 
@@ -1258,12 +1294,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * might not be the case for nohz_full CPUs looping in the kernel.
 	 */
 	jtsq = jiffies_till_sched_qs;
-	rnp = rdp->mynode;
 	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
 	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
+		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
 	} else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
 		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
@@ -1274,6 +1310,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
 		rdp->offline_fqs++;
+		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
 	}
 
@@ -1305,11 +1342,22 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	}
 
 	/*
-	 * If more than halfway to RCU CPU stall-warning time, do
-	 * a resched_cpu() to try to loosen things up a bit.
+	 * If more than halfway to RCU CPU stall-warning time, do a
+	 * resched_cpu() to try to loosen things up a bit.  Also check to
+	 * see if the CPU is getting hammered with interrupts, but only
+	 * once per grace period, just to keep the IPIs down to a dull roar.
 	 */
-	if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2)
+	if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) {
 		resched_cpu(rdp->cpu);
+		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
+		    !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum &&
+		    (rnp->ffmask & rdp->grpmask)) {
+			init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
+			rdp->rcu_iw_pending = true;
+			rdp->rcu_iw_gpnum = rnp->gpnum;
+			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
+		}
+	}
 
 	return 0;
 }
@@ -1498,6 +1546,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
 {
 	int cpu;
 	unsigned long flags;
+	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	long totqlen = 0;
 
@@ -1513,7 +1562,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	 */
 	pr_err("INFO: %s self-detected stall on CPU", rsp->name);
 	print_cpu_stall_info_begin();
+	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
 	print_cpu_stall_info(rsp, smp_processor_id());
+	raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
 	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
 		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
@@ -1907,6 +1958,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 		WRITE_ONCE(rdp->gpwrap, false);
+		rcu_gpnum_ovf(rnp, rdp);
 	}
 	return ret;
 }
@@ -3685,6 +3737,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->cpu_no_qs.b.norm = true;
 	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
+	rdp->rcu_iw_pending = false;
+	rdp->rcu_iw_gpnum = rnp->gpnum - 1;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
@@ -3722,10 +3776,24 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
  */
 int rcutree_online_cpu(unsigned int cpu)
 {
-	sync_sched_exp_online_cleanup(cpu);
-	rcutree_affinity_setting(cpu, -1);
+	unsigned long flags;
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		rdp = per_cpu_ptr(rsp->rda, cpu);
+		rnp = rdp->mynode;
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		rnp->ffmask |= rdp->grpmask;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
 	if (IS_ENABLED(CONFIG_TREE_SRCU))
 		srcu_online_cpu(cpu);
+	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+		return 0; /* Too early in boot for scheduler work. */
+	sync_sched_exp_online_cleanup(cpu);
+	rcutree_affinity_setting(cpu, -1);
 	return 0;
 }
 
@@ -3735,6 +3803,19 @@ int rcutree_online_cpu(unsigned int cpu)
  */
 int rcutree_offline_cpu(unsigned int cpu)
 {
+	unsigned long flags;
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		rdp = per_cpu_ptr(rsp->rda, cpu);
+		rnp = rdp->mynode;
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		rnp->ffmask &= ~rdp->grpmask;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+
 	rcutree_affinity_setting(cpu, cpu);
 	if (IS_ENABLED(CONFIG_TREE_SRCU))
 		srcu_offline_cpu(cpu);
@@ -4183,8 +4264,7 @@ void __init rcu_init(void)
 	for_each_online_cpu(cpu) {
 		rcutree_prepare_cpu(cpu);
 		rcu_cpu_starting(cpu);
-		if (IS_ENABLED(CONFIG_TREE_SRCU))
-			srcu_online_cpu(cpu);
+		rcutree_online_cpu(cpu);
 	}
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e1f285f0a70..46a5d1991450 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,7 @@ struct rcu_node {
 				/* Online CPUs for next expedited GP. */
 				/*  Any CPU that has ever been online will */
 				/*  have its bit set. */
+	unsigned long ffmask;	/* Fully functional CPUs. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
 				/*  Only one bit will be set in this mask. */
 	int	grplo;		/* lowest-numbered CPU or group here. */
@@ -285,6 +286,10 @@ struct rcu_data {
 
 	/* 8) RCU CPU stall data. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
+	/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
+	struct irq_work rcu_iw;		/* Check for non-irq activity. */
+	bool rcu_iw_pending;		/* Is ->rcu_iw pending? */
+	unsigned long rcu_iw_gpnum;	/* ->gpnum associated with ->rcu_iw. */
 
 	int cpu;
 	struct rcu_state *rsp;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e012b9be777e..14977d0470d1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1671,6 +1671,7 @@ static void print_cpu_stall_info_begin(void)
  */
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 {
+	unsigned long delta;
 	char fast_no_hz[72];
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_dynticks *rdtp = rdp->dynticks;
@@ -1685,11 +1686,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 		ticks_value = rsp->gpnum - rdp->gpnum;
 	}
 	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
-	pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
+	delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum;
+	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
 	       cpu,
 	       "O."[!!cpu_online(cpu)],
 	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
 	       "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
+	       !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
+			rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
+				"!."[!delta],
 	       ticks_value, ticks_title,
 	       rcu_dynticks_snap(rdtp) & 0xfff,
 	       rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
-- 
cgit 


From 83b6ca1fede773eebcdfb44f5a94eb410d48b886 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 31 Aug 2017 16:47:08 -0700
Subject: rcu: Turn off tracing before dumping trace

Currently, RCU allows tracing to continue when it automatically does
ftrace_dump() after detecting an error condition, which can result in
excessively large traces and lost trace events.  This commit therefore
does a tracing_off() before any of these ftrace_dump() calls.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index e4b43fef89f5..b8729eb09a5d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -220,8 +220,10 @@ do { \
 	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
 	\
 	if (!atomic_read(&___rfd_beenhere) && \
-	    !atomic_xchg(&___rfd_beenhere, 1)) \
+	    !atomic_xchg(&___rfd_beenhere, 1)) { \
+		tracing_off(); \
 		ftrace_dump(oops_dump_mode); \
+	} \
 } while (0)
 
 void rcu_early_boot_tests(void);
-- 
cgit 


From f22ce09157239aab08eae99c678ef664f71a9097 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 1 Sep 2017 14:40:54 -0700
Subject: rcu: Suppress RCU CPU stall warnings while dumping trace

Currently, RCU emits Suppress RCU CPU stall warnings during its
automatically initiated ftrace_dump() calls after detecting an error
condition, which can result in excessively excessive console output
and lost trace events.  This commit therefore suppresses RCU CPU stall
warnings across any of these ftrace_dump() calls.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h    | 17 +++++++++++++++++
 kernel/rcu/update.c |  1 +
 2 files changed, 18 insertions(+)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index b8729eb09a5d..59c471de342a 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -203,6 +203,21 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 extern int rcu_cpu_stall_suppress;
 int rcu_jiffies_till_stall_check(void);
 
+#define rcu_ftrace_dump_stall_suppress() \
+do { \
+	if (!rcu_cpu_stall_suppress) \
+		rcu_cpu_stall_suppress = 3; \
+} while (0)
+
+#define rcu_ftrace_dump_stall_unsuppress() \
+do { \
+	if (rcu_cpu_stall_suppress == 3) \
+		rcu_cpu_stall_suppress = 0; \
+} while (0)
+
+#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
+#define rcu_ftrace_dump_stall_suppress()
+#define rcu_ftrace_dump_stall_unsuppress()
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 /*
@@ -222,7 +237,9 @@ do { \
 	if (!atomic_read(&___rfd_beenhere) && \
 	    !atomic_xchg(&___rfd_beenhere, 1)) { \
 		tracing_off(); \
+		rcu_ftrace_dump_stall_suppress(); \
 		ftrace_dump(oops_dump_mode); \
+		rcu_ftrace_dump_stall_unsuppress(); \
 	} \
 } while (0)
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5033b66d2753..3dc8efb16dc7 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -494,6 +494,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #endif
 
 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
 static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 
 module_param(rcu_cpu_stall_suppress, int, 0644);
-- 
cgit 


From 2b1516e55f8416acfb48d5f43d41222d180fb5a3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 18 Aug 2017 16:11:37 -0700
Subject: rcutorture: Add interrupt-disable capability to stall-warning tests

When rcutorture sees the rcutorture.stall_cpu kernel boot parameter,
it loops with preemption disabled, which does in fact normally
generate an RCU CPU stall warning message.  However, there are test
scenarios that need the stalling CPU to have interrupts disabled.
This commit therefore adds an rcutorture.stall_cpu_irqsoff kernel
boot parameter that causes the stalling CPU to disable interrupts.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcutorture.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 45f2ffbc1e78..0273bc0a8586 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -89,6 +89,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
 torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
 torture_param(int, stall_cpu_holdoff, 10,
 	     "Time to wait before starting stall (s).");
+torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
 torture_param(int, stat_interval, 60,
 	     "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of seconds to run/halt test");
@@ -1357,7 +1358,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
 		 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
 		 "test_boost=%d/%d test_boost_interval=%d "
 		 "test_boost_duration=%d shutdown_secs=%d "
-		 "stall_cpu=%d stall_cpu_holdoff=%d "
+		 "stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
 		 "n_barrier_cbs=%d "
 		 "onoff_interval=%d onoff_holdoff=%d\n",
 		 torture_type, tag, nrealreaders, nfakewriters,
@@ -1365,7 +1366,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
 		 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
 		 test_boost, cur_ops->can_boost,
 		 test_boost_interval, test_boost_duration, shutdown_secs,
-		 stall_cpu, stall_cpu_holdoff,
+		 stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
 		 n_barrier_cbs,
 		 onoff_interval, onoff_holdoff);
 }
@@ -1430,12 +1431,19 @@ static int rcu_torture_stall(void *args)
 	if (!kthread_should_stop()) {
 		stop_at = get_seconds() + stall_cpu;
 		/* RCU CPU stall is expected behavior in following code. */
-		pr_alert("rcu_torture_stall start.\n");
 		rcu_read_lock();
-		preempt_disable();
+		if (stall_cpu_irqsoff)
+			local_irq_disable();
+		else
+			preempt_disable();
+		pr_alert("rcu_torture_stall start on CPU %d.\n",
+			 smp_processor_id());
 		while (ULONG_CMP_LT(get_seconds(), stop_at))
 			continue;  /* Induce RCU CPU stall warning. */
-		preempt_enable();
+		if (stall_cpu_irqsoff)
+			local_irq_enable();
+		else
+			preempt_enable();
 		rcu_read_unlock();
 		pr_alert("rcu_torture_stall end.\n");
 	}
-- 
cgit 


From 0032f4e889764d22ccccb6a15742071d6f0d1f5a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 30 Aug 2017 10:40:17 -0700
Subject: rcutorture: Dump writer stack if stalled

Right now, rcutorture warns if an rcu_torture_writer() kthread stalls,
but this warning is not always all that helpful.  This commit therefore
makes the first such warning include a stack dump.

This in turn requires that sched_show_task() be exported to GPL modules,
so this commit makes that change as well.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcutorture.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0273bc0a8586..362eb2f78b3c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -51,6 +51,7 @@
 #include <asm/byteorder.h>
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/debug.h>
 
 #include "rcu.h"
 
@@ -1240,6 +1241,7 @@ rcu_torture_stats_print(void)
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	static unsigned long rtcv_snap = ULONG_MAX;
+	static bool splatted;
 	struct task_struct *wtp;
 
 	for_each_possible_cpu(cpu) {
@@ -1325,6 +1327,10 @@ rcu_torture_stats_print(void)
 			 gpnum, completed, flags,
 			 wtp == NULL ? ~0UL : wtp->state,
 			 wtp == NULL ? -1 : (int)task_cpu(wtp));
+		if (!splatted && wtp) {
+			sched_show_task(wtp);
+			splatted = true;
+		}
 		show_rcu_gp_kthreads();
 		rcu_ftrace_dump(DUMP_ALL);
 	}
-- 
cgit 


From b88697810d7c1d102a529990f9071b0f14cfe6df Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 18 Oct 2017 08:33:44 -0700
Subject: rcu: Do not include rtmutex_common.h unconditionally

This commit adjusts include files and provides definitions in preparation
for suppressing lockdep false-positive ->boost_mtx complaints.  Without
this preparation, architectures not supporting rt_mutex will get build
failures.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree_plugin.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index fed95fa941e6..969eae45f05d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -54,6 +54,7 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
  * This probably needs to be excluded from -rt builds.
  */
 #define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
+#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
 
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
@@ -911,8 +912,6 @@ void exit_rcu(void)
 
 #ifdef CONFIG_RCU_BOOST
 
-#include "../locking/rtmutex_common.h"
-
 static void rcu_wake_cond(struct task_struct *t, int status)
 {
 	/*
-- 
cgit 


From 02a7c234e54052101164368ff981bd72f7acdd65 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 19 Sep 2017 15:36:42 -0700
Subject: rcu: Suppress lockdep false-positive ->boost_mtx complaints

RCU priority boosting uses rt_mutex_init_proxy_locked() to initialize an
rt_mutex structure in locked state held by some other task.  When that
other task releases it, lockdep complains (quite accurately, but a bit
uselessly) that the other task never acquired it.  This complaint can
suppress other, more helpful, lockdep complaints, and in any case it is
a false positive.

This commit therefore switches from rt_mutex_unlock() to
rt_mutex_futex_unlock(), thereby avoiding the lockdep annotations.
Of course, if lockdep ever learns about rt_mutex_init_proxy_locked(),
addtional adjustments will be required.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree_plugin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 969eae45f05d..1eaab96d1a3c 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -531,7 +531,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 
 		/* Unboost if we were boosted. */
 		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
-			rt_mutex_unlock(&rnp->boost_mtx);
+			rt_mutex_futex_unlock(&rnp->boost_mtx);
 
 		/*
 		 * If this was the last task on the expedited lists,
-- 
cgit 


From c0da313e090d6e7130d0c3005245176296c24e4a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2017 09:58:47 -0700
Subject: rcu: Add extended-quiescent-state testing advice

If you add or remove calls to rcu_idle_enter(), rcu_user_enter(),
rcu_irq_exit(), rcu_irq_exit_irqson(), rcu_idle_exit(), rcu_user_exit(),
rcu_irq_enter(), rcu_irq_enter_irqson(), rcu_nmi_enter(), or
rcu_nmi_exit(), you should run a full set of tests on a kernel built
with CONFIG_RCU_EQS_DEBUG=y.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b0ad62b0e7b8..d234356d7afc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -837,6 +837,9 @@ static void rcu_eqs_enter(bool user)
  * We crowbar the ->dynticks_nesting field to zero to allow for
  * the possibility of usermode upcalls having messed up our count
  * of interrupt nesting level during the prior busy period.
+ *
+ * If you add or remove a call to rcu_idle_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_idle_enter(void)
 {
@@ -852,6 +855,9 @@ void rcu_idle_enter(void)
  * is permitted between this call and rcu_user_exit(). This way the
  * CPU doesn't need to maintain the tick for RCU maintenance purposes
  * when the CPU runs in userspace.
+ *
+ * If you add or remove a call to rcu_user_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_user_enter(void)
 {
@@ -875,6 +881,9 @@ void rcu_user_enter(void)
  * Use things like work queues to work around this limitation.
  *
  * You have been warned.
+ *
+ * If you add or remove a call to rcu_irq_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_irq_exit(void)
 {
@@ -899,6 +908,9 @@ void rcu_irq_exit(void)
 
 /*
  * Wrapper for rcu_irq_exit() where interrupts are enabled.
+ *
+ * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_irq_exit_irqson(void)
 {
@@ -971,6 +983,9 @@ static void rcu_eqs_exit(bool user)
  * allow for the possibility of usermode upcalls messing up our count
  * of interrupt nesting level during the busy period that is just
  * now starting.
+ *
+ * If you add or remove a call to rcu_idle_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_idle_exit(void)
 {
@@ -987,6 +1002,9 @@ void rcu_idle_exit(void)
  *
  * Exit RCU idle mode while entering the kernel because it can
  * run a RCU read side critical section anytime.
+ *
+ * If you add or remove a call to rcu_user_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_user_exit(void)
 {
@@ -1012,6 +1030,9 @@ void rcu_user_exit(void)
  * Use things like work queues to work around this limitation.
  *
  * You have been warned.
+ *
+ * If you add or remove a call to rcu_irq_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_irq_enter(void)
 {
@@ -1037,6 +1058,9 @@ void rcu_irq_enter(void)
 
 /*
  * Wrapper for rcu_irq_enter() where interrupts are enabled.
+ *
+ * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_irq_enter_irqson(void)
 {
@@ -1055,6 +1079,9 @@ void rcu_irq_enter_irqson(void)
  * that the CPU is active.  This implementation permits nested NMIs, as
  * long as the nesting level does not overflow an int.  (You will probably
  * run out of stack space first.)
+ *
+ * If you add or remove a call to rcu_nmi_enter(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_nmi_enter(void)
 {
@@ -1087,6 +1114,9 @@ void rcu_nmi_enter(void)
  * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
  * to let the RCU grace-period handling know that the CPU is back to
  * being RCU-idle.
+ *
+ * If you add or remove a call to rcu_nmi_exit(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
  */
 void rcu_nmi_exit(void)
 {
-- 
cgit 


From 56628a7fc84a6e3c4c84886d70ec26da3d7f2ce4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 22 Sep 2017 17:28:06 +0200
Subject: rcu/segcblist: Include rcupdate.h

The RT build on ARM complains about non-existing ULONG_CMP_LT.
This commit therefore includes rcupdate.h into rcu_segcblist.c.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu_segcblist.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/rcu')

diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 7649fcd2c4c7..88cba7c2956c 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/rcupdate.h>
 
 #include "rcu_segcblist.h"
 
-- 
cgit