From 18fec7d8758dd416904da205375e6fa667defc80 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 4 Jan 2012 11:44:57 -0800
Subject: rcu: Improve synchronize_rcu() diagnostics

Although TREE_PREEMPT_RCU indirectly uses might_sleep() to detect illegal
use of synchronize_sched() and synchronize_rcu_bh() from within an RCU
read-side critical section, this might_sleep() check is bypassed when
there is only a single CPU (for example, when running an SMP kernel on
a single-CPU system).  This patch therefore adds a might_sleep() call
to the rcu_blocking_is_gp() check that is unconditionally invoked from
both synchronize_sched() and synchronize_rcu_bh().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutree.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 67458468f1a8..73e7195f9997 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -83,6 +83,7 @@ extern void rcu_sched_force_quiescent_state(void);
 /* A context switch is a grace period for RCU-sched and RCU-bh. */
 static inline int rcu_blocking_is_gp(void)
 {
+	might_sleep();  /* Check for RCU read-side critical section. */
 	return num_online_cpus() == 1;
 }
 
-- 
cgit 


From 486e259340fc4c60474f2c14703e3b3634bb58ca Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 6 Jan 2012 14:11:30 -0800
Subject: rcu: Avoid waking up CPUs having only kfree_rcu() callbacks

When CONFIG_RCU_FAST_NO_HZ is enabled, RCU will allow a given CPU to
enter dyntick-idle mode even if it still has RCU callbacks queued.
RCU avoids system hangs in this case by scheduling a timer for several
jiffies in the future.  However, if all of the callbacks on that CPU
are from kfree_rcu(), there is no reason to wake the CPU up, as it is
not a problem to defer freeing of memory.

This commit therefore tracks the number of callbacks on a given CPU
that are from kfree_rcu(), and avoids scheduling the timer if all of
a given CPU's callbacks are from kfree_rcu().

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h   |  2 +-
 include/linux/rcutiny.h    |  6 +++++
 include/linux/rcutree.h    |  2 ++
 include/trace/events/rcu.h | 63 ++++++++++++++++++++++++++++------------------
 4 files changed, 48 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 81c04f4348ec..a67d5f1072ea 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -841,7 +841,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
 	/* See the kfree_rcu() header comment. */
 	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
 
-	call_rcu(head, (rcu_callback)offset);
+	kfree_call_rcu(head, (rcu_callback)offset);
 }
 
 /**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 00b7a5e493d2..51bf29c81485 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -83,6 +83,12 @@ static inline void synchronize_sched_expedited(void)
 	synchronize_sched();
 }
 
+static inline void kfree_call_rcu(struct rcu_head *head,
+				  void (*func)(struct rcu_head *rcu))
+{
+	call_rcu(head, func);
+}
+
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 73e7195f9997..73892483fd05 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -61,6 +61,8 @@ extern void synchronize_rcu_bh(void);
 extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 
+void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+
 static inline void synchronize_rcu_bh_expedited(void)
 {
 	synchronize_sched_expedited();
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d2d88bed891b..337099783f37 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -313,19 +313,22 @@ TRACE_EVENT(rcu_prep_idle,
 /*
  * Tracepoint for the registration of a single RCU callback function.
  * The first argument is the type of RCU, the second argument is
- * a pointer to the RCU callback itself, and the third element is the
- * new RCU callback queue length for the current CPU.
+ * a pointer to the RCU callback itself, the third element is the
+ * number of lazy callbacks queued, and the fourth element is the
+ * total number of callbacks queued.
  */
 TRACE_EVENT(rcu_callback,
 
-	TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
+	TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy,
+		 long qlen),
 
-	TP_ARGS(rcuname, rhp, qlen),
+	TP_ARGS(rcuname, rhp, qlen_lazy, qlen),
 
 	TP_STRUCT__entry(
 		__field(char *, rcuname)
 		__field(void *, rhp)
 		__field(void *, func)
+		__field(long, qlen_lazy)
 		__field(long, qlen)
 	),
 
@@ -333,11 +336,13 @@ TRACE_EVENT(rcu_callback,
 		__entry->rcuname = rcuname;
 		__entry->rhp = rhp;
 		__entry->func = rhp->func;
+		__entry->qlen_lazy = qlen_lazy;
 		__entry->qlen = qlen;
 	),
 
-	TP_printk("%s rhp=%p func=%pf %ld",
-		  __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
+	TP_printk("%s rhp=%p func=%pf %ld/%ld",
+		  __entry->rcuname, __entry->rhp, __entry->func,
+		  __entry->qlen_lazy, __entry->qlen)
 );
 
 /*
@@ -345,20 +350,21 @@ TRACE_EVENT(rcu_callback,
  * kfree() form.  The first argument is the RCU type, the second argument
  * is a pointer to the RCU callback, the third argument is the offset
  * of the callback within the enclosing RCU-protected data structure,
- * and the fourth argument is the new RCU callback queue length for the
- * current CPU.
+ * the fourth argument is the number of lazy callbacks queued, and the
+ * fifth argument is the total number of callbacks queued.
  */
 TRACE_EVENT(rcu_kfree_callback,
 
 	TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
-		 long qlen),
+		 long qlen_lazy, long qlen),
 
-	TP_ARGS(rcuname, rhp, offset, qlen),
+	TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen),
 
 	TP_STRUCT__entry(
 		__field(char *, rcuname)
 		__field(void *, rhp)
 		__field(unsigned long, offset)
+		__field(long, qlen_lazy)
 		__field(long, qlen)
 	),
 
@@ -366,41 +372,45 @@ TRACE_EVENT(rcu_kfree_callback,
 		__entry->rcuname = rcuname;
 		__entry->rhp = rhp;
 		__entry->offset = offset;
+		__entry->qlen_lazy = qlen_lazy;
 		__entry->qlen = qlen;
 	),
 
-	TP_printk("%s rhp=%p func=%ld %ld",
+	TP_printk("%s rhp=%p func=%ld %ld/%ld",
 		  __entry->rcuname, __entry->rhp, __entry->offset,
-		  __entry->qlen)
+		  __entry->qlen_lazy, __entry->qlen)
 );
 
 /*
  * Tracepoint for marking the beginning rcu_do_batch, performed to start
  * RCU callback invocation.  The first argument is the RCU flavor,
- * the second is the total number of callbacks (including those that
- * are not yet ready to be invoked), and the third argument is the
- * current RCU-callback batch limit.
+ * the second is the number of lazy callbacks queued, the third is
+ * the total number of callbacks queued, and the fourth argument is
+ * the current RCU-callback batch limit.
  */
 TRACE_EVENT(rcu_batch_start,
 
-	TP_PROTO(char *rcuname, long qlen, int blimit),
+	TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
 
-	TP_ARGS(rcuname, qlen, blimit),
+	TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
 
 	TP_STRUCT__entry(
 		__field(char *, rcuname)
+		__field(long, qlen_lazy)
 		__field(long, qlen)
 		__field(int, blimit)
 	),
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
+		__entry->qlen_lazy = qlen_lazy;
 		__entry->qlen = qlen;
 		__entry->blimit = blimit;
 	),
 
-	TP_printk("%s CBs=%ld bl=%d",
-		  __entry->rcuname, __entry->qlen, __entry->blimit)
+	TP_printk("%s CBs=%ld/%ld bl=%d",
+		  __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
+		  __entry->blimit)
 );
 
 /*
@@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+				    qsmask) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
-#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
+#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
+					 grplo, grphi, gp_tasks) do { } \
+	while (0)
 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
 #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
 #define trace_rcu_prep_idle(reason) do { } while (0)
-#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
-#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
-#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
+#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
+#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
+	do { } while (0)
+#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \
+	do { } while (0)
 #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
-- 
cgit 


From 768dfffdffbfcc07d6927bdd642c714c0dd64c99 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 11 Jan 2012 16:33:17 -0800
Subject: rcu: Prevent RCU callbacks from executing before scheduler
 initialized

This is a port of commit #b0d3041 from TREE_RCU to TREE_PREEMPT_RCU.

Under some rare but real combinations of configuration parameters, RCU
callbacks are posted during early boot that use kernel facilities that are
not yet initialized.  Therefore, when these callbacks are invoked, hard
hangs and crashes ensue.  This commit therefore prevents RCU callbacks
from being invoked until after the scheduler is fully up and running,
as in after multiple tasks have been spawned.

It might well turn out that a better approach is to identify the specific
RCU callbacks that are causing this problem, but that discussion will
wait until such time as someone really needs an RCU callback to be invoked
(as opposed to merely registered) during early boot.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 51bf29c81485..e93df77176d1 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,13 +27,9 @@
 
 #include <linux/cache.h>
 
-#ifdef CONFIG_RCU_BOOST
 static inline void rcu_init(void)
 {
 }
-#else /* #ifdef CONFIG_RCU_BOOST */
-void rcu_init(void);
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 static inline void rcu_barrier_bh(void)
 {
-- 
cgit 


From 1aa03f1188f7b0b85df2de602b33ee7b6fab8e00 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 11 Jan 2012 17:25:17 -0800
Subject: rcu: Simplify unboosting checks

This is a port of commit #82e78d80 from TREE_PREEMPT_RCU to
TINY_PREEMPT_RCU.

This commit uses the fact that current->rcu_boost_mutex is set
any time that the RCU_READ_UNLOCK_BOOSTED flag is set in the
current->rcu_read_unlock_special bitmask.  This allows tests of
the bit to be changed to tests of the pointer, which in turn allows
the RCU_READ_UNLOCK_BOOSTED flag to be eliminated.

Please note that the check of current->rcu_read_unlock_special need not
change because any time that RCU_READ_UNLOCK_BOOSTED was set, so was
RCU_READ_UNLOCK_BLOCKED.  Therefore, __rcu_read_unlock() can continue
testing current->rcu_read_unlock_special for non-zero, as before.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6bfd88..e692abaf915a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1864,8 +1864,7 @@ extern void task_clear_jobctl_pending(struct task_struct *task,
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
-- 
cgit 


From 50406b98b6372e7de21d903d2cf3914e9d64e094 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 12 Jan 2012 13:49:19 -0800
Subject: rcu: Make rcu_sleep_check() also check rcu_lock_map

Although it is OK to be preempted in an RCU read-side critical section
for TREE_PREEMPT_RCU, it is definitely not OK to be preempted, block,
or might_sleep() within an RCU read-side critical section for TREE_RCU.
Unfortunately, rcu_might_sleep() currently only checks for RCU-bh and
RCU-sched read-side critical sections.  This commit therefore makes
rcu_might_sleep() check for RCU read-side critical sections, but only
in TREE_RCU builds.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a67d5f1072ea..6df0ae197810 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -381,8 +381,22 @@ extern int rcu_my_thread_group_empty(void);
 		}							\
 	} while (0)
 
+#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
+static inline void rcu_preempt_sleep_check(void)
+{
+	rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
+			   "Illegal context switch in RCU read-side "
+			   "critical section");
+}
+#else /* #ifdef CONFIG_PROVE_RCU */
+static inline void rcu_preempt_sleep_check(void)
+{
+}
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
+
 #define rcu_sleep_check()						\
 	do {								\
+		rcu_preempt_sleep_check();				\
 		rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),	\
 				   "Illegal context switch in RCU-bh"	\
 				   " read-side critical section");	\
-- 
cgit 


From 5e1ee6e1016763812018bf5c5e966992821dc47e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 12 Jan 2012 17:21:20 -0800
Subject: rcu: Note that rcu_access_pointer() can be used for teardown

There is no convenient expression for rcu_deference_protected()
when it is used in tearing down multilinked structures following
a grace period.  For example, suppose that an element containing an
RCU-protected pointer to a second element is removed from an enclosing
RCU-protected data structure, then the write-side lock is released,
and finally synchronize_rcu() is invoked to wait for a grace period.
Then it is necessary to traverse the pointer in order to free up the
second element.  But we are not in an RCU read-side critical section
and we are holding no locks, so the usual rcu_dereference_check() and
rcu_dereference_protected() primitives are not appropriate.  Neither
is rcu_dereference_raw(), as it is intended for use in data structures
where the user defines the locking design (for example, list_head).

So this responsibility is added to rcu_access_pointer()'s list, and
this commit updates rcu_assign_pointer()'s header comment accordingly.

Suggested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
---
 include/linux/rcupdate.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6df0ae197810..f409529ff35a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -484,6 +484,13 @@ static inline void rcu_preempt_sleep_check(void)
  * NULL.  Although rcu_access_pointer() may also be used in cases where
  * update-side locks prevent the value of the pointer from changing, you
  * should instead use rcu_dereference_protected() for this use case.
+ *
+ * It is also permissible to use rcu_access_pointer() when read-side
+ * access to the pointer was removed at least one grace period ago, as
+ * is the case in the context of the RCU callback that is freeing up
+ * the data, or after a synchronize_rcu() returns.  This can be useful
+ * when tearing down multi-linked structures after a grace period
+ * has elapsed.
  */
 #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
 
-- 
cgit 


From c0d6d01bffdce19fa19baad6cb8cc3eed7bfd6f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 23 Jan 2012 12:41:26 -0800
Subject: rcu: Check for illegal use of RCU from offlined CPUs

Although it is legal to use RCU during early boot, it is anything
but legal to use RCU at runtime from an offlined CPU.  After all, RCU
explicitly ignores offlined CPUs.  This commit therefore adds checks
for runtime use of RCU from offlined CPUs.

These checks are not perfect, in particular, they can be subverted
through use of things like rcu_dereference_raw().  Note that it is not
possible to put checks in rcu_read_lock() and friends due to the fact
that these primitives are used in code that might be used under either
RCU or lock-based protection, which means that checking rcu_read_lock()
gets you fat piles of false positives.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 19 +++++++++++++++++++
 include/linux/srcu.h     | 11 +++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f409529ff35a..146d37d31778 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -226,6 +226,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
+bool rcu_lockdep_current_cpu_online(void);
+#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+static inline bool rcu_lockdep_current_cpu_online(void)
+{
+	return 1;
+}
+#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 #ifdef CONFIG_PROVE_RCU
@@ -270,6 +279,9 @@ extern int debug_lockdep_rcu_enabled(void);
  * occur in the same context, for example, it is illegal to invoke
  * rcu_read_unlock() in process context if the matching rcu_read_lock()
  * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
  */
 static inline int rcu_read_lock_held(void)
 {
@@ -277,6 +289,8 @@ static inline int rcu_read_lock_held(void)
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
 	return lock_is_held(&rcu_lock_map);
 }
 
@@ -313,6 +327,9 @@ extern int rcu_read_lock_bh_held(void);
  * notice an extended quiescent state to other CPUs that started a grace
  * period. Otherwise we would delay any grace period as long as we run in
  * the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
  */
 #ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
@@ -323,6 +340,8 @@ static inline int rcu_read_lock_sched_held(void)
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
 	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e1b005918bbb..9a323728e60c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -99,15 +99,18 @@ long srcu_batches_completed(struct srcu_struct *sp);
  * power mode. This way we can notice an extended quiescent state to
  * other CPUs that started a grace period. Otherwise we would delay any
  * grace period as long as we run in the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
-	if (rcu_is_cpu_idle())
-		return 0;
-
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-
+	if (rcu_is_cpu_idle())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
-- 
cgit 


From 236fefafe5d3d34b78ed2ccf5510909716112326 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 31 Jan 2012 14:00:41 -0800
Subject: rcu: Call out dangers of expedited RCU primitives

The expedited RCU primitives can be quite useful, but they have some
high costs as well.  This commit updates and creates docbook comments
calling out the costs, and updates the RCU documentation as well.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutree.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 73892483fd05..e8ee5dd0854c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -63,6 +63,22 @@ extern void synchronize_rcu_expedited(void);
 
 void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 
+/**
+ * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
+ *
+ * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_rcu_bh_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_rcu_bh() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
+ */
 static inline void synchronize_rcu_bh_expedited(void)
 {
 	synchronize_sched_expedited();
-- 
cgit 


From bde23c6892878e48f64de668660778991bc2fb56 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Feb 2012 10:30:46 -0800
Subject: rcu: Convert WARN_ON_ONCE() in rcu_lock_acquire() to lockdep

The WARN_ON_ONCE() in rcu_lock_acquire() results in infinite recursion
on S390, and also doesn't print very much information.  Remove this.

Updated patch to add lockdep-RCU assertions to RCU's read-side primitives.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 14 ++++++++++++--
 include/linux/srcu.h     |  4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 146d37d31778..6ee663c8745a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -248,13 +248,11 @@ static inline int rcu_is_cpu_idle(void)
 
 static inline void rcu_lock_acquire(struct lockdep_map *map)
 {
-	WARN_ON_ONCE(rcu_is_cpu_idle());
 	lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
 }
 
 static inline void rcu_lock_release(struct lockdep_map *map)
 {
-	WARN_ON_ONCE(rcu_is_cpu_idle());
 	lock_release(map, 1, _THIS_IP_);
 }
 
@@ -699,6 +697,8 @@ static inline void rcu_read_lock(void)
 	__rcu_read_lock();
 	__acquire(RCU);
 	rcu_lock_acquire(&rcu_lock_map);
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_lock() used illegally while idle");
 }
 
 /*
@@ -718,6 +718,8 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
 	__rcu_read_unlock();
@@ -745,6 +747,8 @@ static inline void rcu_read_lock_bh(void)
 	local_bh_disable();
 	__acquire(RCU_BH);
 	rcu_lock_acquire(&rcu_bh_lock_map);
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_lock_bh() used illegally while idle");
 }
 
 /*
@@ -754,6 +758,8 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_unlock_bh() used illegally while idle");
 	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
 	local_bh_enable();
@@ -777,6 +783,8 @@ static inline void rcu_read_lock_sched(void)
 	preempt_disable();
 	__acquire(RCU_SCHED);
 	rcu_lock_acquire(&rcu_sched_lock_map);
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_lock_sched() used illegally while idle");
 }
 
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -793,6 +801,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "rcu_read_unlock_sched() used illegally while idle");
 	rcu_lock_release(&rcu_sched_lock_map);
 	__release(RCU_SCHED);
 	preempt_enable();
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 9a323728e60c..d3d5fa54f25e 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -172,6 +172,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	int retval = __srcu_read_lock(sp);
 
 	rcu_lock_acquire(&(sp)->dep_map);
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "srcu_read_lock() used illegally while idle");
 	return retval;
 }
 
@@ -185,6 +187,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "srcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&(sp)->dep_map);
 	__srcu_read_unlock(sp, idx);
 }
-- 
cgit 


From 8a2ecf474d3ee8dd5d001490349e422cec52f39f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 2 Feb 2012 15:42:04 -0800
Subject: rcu: Add RCU_NONIDLE() for idle-loop RCU read-side critical sections

RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
in the inner idle loop, that is, between the rcu_idle_enter() and the
rcu_idle_exit() -- RCU will happily ignore any such read-side critical
sections.  However, things like powertop need tracepoints in the inner
idle loop.

This commit therefore provides an RCU_NONIDLE() macro that can be used to
wrap code in the idle loop that requires RCU read-side critical sections.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6ee663c8745a..937217425c47 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,33 @@ extern void rcu_idle_exit(void);
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 
+/**
+ * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
+ * @a: Code that RCU needs to pay attention to.
+ *
+ * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
+ * in the inner idle loop, that is, between the rcu_idle_enter() and
+ * the rcu_idle_exit() -- RCU will happily ignore any such read-side
+ * critical sections.  However, things like powertop need tracepoints
+ * in the inner idle loop.
+ *
+ * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
+ * will tell RCU that it needs to pay attending, invoke its argument
+ * (in this example, a call to the do_something_with_RCU() function),
+ * and then tell RCU to go back to ignoring this CPU.  It is permissible
+ * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
+ * quite limited.  If deeper nesting is required, it will be necessary
+ * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
+ *
+ * This macro may be used from process-level code only.
+ */
+#define RCU_NONIDLE(a) \
+	do { \
+		rcu_idle_exit(); \
+		do { a; } while (0); \
+		rcu_idle_enter(); \
+	} while (0)
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
-- 
cgit