Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu

Pull RCU updates from Paul E. McKenney: - Miscellaneous fixes. (Paul E. McKenney, Boqun Feng, Oleg Nesterov, Patrick Marlier) - Improvements to expedited grace periods. (Paul E. McKenney) - Performance improvements to and locktorture tests for percpu-rwsem. (Oleg Nesterov, Paul E. McKenney) - Torture-test changes. (Paul E. McKenney, Davidlohr Bueso) - Documentation updates. (Paul E. McKenney) Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2015-10-19 10:09:54 +0200
committer: Ingo Molnar <mingo@kernel.org> 2015-10-19 10:09:54 +0200
commit: c13dc31adb04c3f85d54d2fa13e34206f25742eb (patch)
tree: 09cf55d6f3cc628ac0a303c05dfdcc9af2ad803c /kernel/locking
parent: 7379047d5585187d1288486d4627873170d0005a (diff)
parent: 39cd2dd39a8b92ce91c4dad95f6e979c946a3942 (diff)
2 files changed, 194 insertions, 60 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 32244186f1f2..8ef1919d63b2 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -17,12 +17,14 @@
  *
  * Copyright (C) IBM Corporation, 2014
  *
- * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *          Davidlohr Bueso <dave@stgolabs.net>
  *	Based on kernel/rcu/torture.c.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/rwlock.h>
 #include <linux/mutex.h>
@@ -34,6 +36,7 @@
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 #include <linux/torture.h>
 
 MODULE_LICENSE("GPL");
@@ -91,11 +94,13 @@ struct lock_torture_ops {
 	void (*init)(void);
 	int (*writelock)(void);
 	void (*write_delay)(struct torture_random_state *trsp);
+	void (*task_boost)(struct torture_random_state *trsp);
 	void (*writeunlock)(void);
 	int (*readlock)(void);
 	void (*read_delay)(struct torture_random_state *trsp);
 	void (*readunlock)(void);
-	unsigned long flags;
+
+	unsigned long flags; /* for irq spinlocks */
 	const char *name;
 };
 
@@ -139,9 +144,15 @@ static void torture_lock_busted_write_unlock(void)
 	  /* BUGGY, do not use in real life!!! */
 }
 
+static void torture_boost_dummy(struct torture_random_state *trsp)
+{
+	/* Only rtmutexes care about priority */
+}
+
 static struct lock_torture_ops lock_busted_ops = {
 	.writelock	= torture_lock_busted_write_lock,
 	.write_delay	= torture_lock_busted_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_lock_busted_write_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -185,6 +196,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_ops = {
 	.writelock	= torture_spin_lock_write_lock,
 	.write_delay	= torture_spin_lock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_spin_lock_write_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -211,6 +223,7 @@ __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_irq_ops = {
 	.writelock	= torture_spin_lock_write_lock_irq,
 	.write_delay	= torture_spin_lock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_lock_spin_write_unlock_irq,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -275,6 +288,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_ops = {
 	.writelock	= torture_rwlock_write_lock,
 	.write_delay	= torture_rwlock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwlock_write_unlock,
 	.readlock       = torture_rwlock_read_lock,
 	.read_delay     = torture_rwlock_read_delay,
@@ -315,6 +329,7 @@ __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_irq_ops = {
 	.writelock	= torture_rwlock_write_lock_irq,
 	.write_delay	= torture_rwlock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwlock_write_unlock_irq,
 	.readlock       = torture_rwlock_read_lock_irq,
 	.read_delay     = torture_rwlock_read_delay,
@@ -354,6 +369,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex)
 static struct lock_torture_ops mutex_lock_ops = {
 	.writelock	= torture_mutex_lock,
 	.write_delay	= torture_mutex_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_mutex_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -361,6 +377,90 @@ static struct lock_torture_ops mutex_lock_ops = {
 	.name		= "mutex_lock"
 };
 
+#ifdef CONFIG_RT_MUTEXES
+static DEFINE_RT_MUTEX(torture_rtmutex);
+
+static int torture_rtmutex_lock(void) __acquires(torture_rtmutex)
+{
+	rt_mutex_lock(&torture_rtmutex);
+	return 0;
+}
+
+static void torture_rtmutex_boost(struct torture_random_state *trsp)
+{
+	int policy;
+	struct sched_param param;
+	const unsigned int factor = 50000; /* yes, quite arbitrary */
+
+	if (!rt_task(current)) {
+		/*
+		 * (1) Boost priority once every ~50k operations. When the
+		 * task tries to take the lock, the rtmutex it will account
+		 * for the new priority, and do any corresponding pi-dance.
+		 */
+		if (!(torture_random(trsp) %
+		      (cxt.nrealwriters_stress * factor))) {
+			policy = SCHED_FIFO;
+			param.sched_priority = MAX_RT_PRIO - 1;
+		} else /* common case, do nothing */
+			return;
+	} else {
+		/*
+		 * The task will remain boosted for another ~500k operations,
+		 * then restored back to its original prio, and so forth.
+		 *
+		 * When @trsp is nil, we want to force-reset the task for
+		 * stopping the kthread.
+		 */
+		if (!trsp || !(torture_random(trsp) %
+			       (cxt.nrealwriters_stress * factor * 2))) {
+			policy = SCHED_NORMAL;
+			param.sched_priority = 0;
+		} else /* common case, do nothing */
+			return;
+	}
+
+	sched_setscheduler_nocheck(current, policy, &param);
+}
+
+static void torture_rtmutex_delay(struct torture_random_state *trsp)
+{
+	const unsigned long shortdelay_us = 2;
+	const unsigned long longdelay_ms = 100;
+
+	/*
+	 * We want a short delay mostly to emulate likely code, and
+	 * we want a long delay occasionally to force massive contention.
+	 */
+	if (!(torture_random(trsp) %
+	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+		mdelay(longdelay_ms);
+	if (!(torture_random(trsp) %
+	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+		udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+		preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+
+static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
+{
+	rt_mutex_unlock(&torture_rtmutex);
+}
+
+static struct lock_torture_ops rtmutex_lock_ops = {
+	.writelock	= torture_rtmutex_lock,
+	.write_delay	= torture_rtmutex_delay,
+	.task_boost     = torture_rtmutex_boost,
+	.writeunlock	= torture_rtmutex_unlock,
+	.readlock       = NULL,
+	.read_delay     = NULL,
+	.readunlock     = NULL,
+	.name		= "rtmutex_lock"
+};
+#endif
+
 static DECLARE_RWSEM(torture_rwsem);
 static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
 {
@@ -419,6 +519,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem)
 static struct lock_torture_ops rwsem_lock_ops = {
 	.writelock	= torture_rwsem_down_write,
 	.write_delay	= torture_rwsem_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwsem_up_write,
 	.readlock       = torture_rwsem_down_read,
 	.read_delay     = torture_rwsem_read_delay,
@@ -426,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = {
 	.name		= "rwsem_lock"
 };
 
+#include <linux/percpu-rwsem.h>
+static struct percpu_rw_semaphore pcpu_rwsem;
+
+void torture_percpu_rwsem_init(void)
+{
+	BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
+}
+
+static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
+{
+	percpu_down_write(&pcpu_rwsem);
+	return 0;
+}
+
+static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem)
+{
+	percpu_up_write(&pcpu_rwsem);
+}
+
+static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem)
+{
+	percpu_down_read(&pcpu_rwsem);
+	return 0;
+}
+
+static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
+{
+	percpu_up_read(&pcpu_rwsem);
+}
+
+static struct lock_torture_ops percpu_rwsem_lock_ops = {
+	.init		= torture_percpu_rwsem_init,
+	.writelock	= torture_percpu_rwsem_down_write,
+	.write_delay	= torture_rwsem_write_delay,
+	.task_boost     = torture_boost_dummy,
+	.writeunlock	= torture_percpu_rwsem_up_write,
+	.readlock       = torture_percpu_rwsem_down_read,
+	.read_delay     = torture_rwsem_read_delay,
+	.readunlock     = torture_percpu_rwsem_up_read,
+	.name		= "percpu_rwsem_lock"
+};
+
 /*
  * Lock torture writer kthread.  Repeatedly acquires and releases
  * the lock, checking for duplicate acquisitions.
@@ -442,6 +585,7 @@ static int lock_torture_writer(void *arg)
 		if ((torture_random(&rand) & 0xfffff) == 0)
 			schedule_timeout_uninterruptible(1);
 
+		cxt.cur_ops->task_boost(&rand);
 		cxt.cur_ops->writelock();
 		if (WARN_ON_ONCE(lock_is_write_held))
 			lwsp->n_lock_fail++;
@@ -456,6 +600,8 @@ static int lock_torture_writer(void *arg)
 
 		stutter_wait("lock_torture_writer");
 	} while (!torture_must_stop());
+
+	cxt.cur_ops->task_boost(NULL); /* reset prio */
 	torture_kthread_stopping("lock_torture_writer");
 	return 0;
 }
@@ -642,7 +788,11 @@ static int __init lock_torture_init(void)
 		&spin_lock_ops, &spin_lock_irq_ops,
 		&rw_lock_ops, &rw_lock_irq_ops,
 		&mutex_lock_ops,
+#ifdef CONFIG_RT_MUTEXES
+		&rtmutex_lock_ops,
+#endif
 		&rwsem_lock_ops,
+		&percpu_rwsem_lock_ops,
 	};
 
 	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -661,11 +811,11 @@ static int __init lock_torture_init(void)
 		for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
 			pr_alert(" %s", torture_ops[i]->name);
 		pr_alert("\n");
-		torture_init_end();
-		return -EINVAL;
+		firsterr = -EINVAL;
+		goto unwind;
 	}
 	if (cxt.cur_ops->init)
-		cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+		cxt.cur_ops->init();
 
 	if (nwriters_stress >= 0)
 		cxt.nrealwriters_stress = nwriters_stress;
@@ -676,6 +826,10 @@ static int __init lock_torture_init(void)
 	if (strncmp(torture_type, "mutex", 5) == 0)
 		cxt.debug_lock = true;
 #endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+	if (strncmp(torture_type, "rtmutex", 7) == 0)
+		cxt.debug_lock = true;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if ((strncmp(torture_type, "spin", 4) == 0) ||
 	    (strncmp(torture_type, "rw_lock", 7) == 0))
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f32567254867..f231e0bb311c 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,50 +17,43 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	atomic_set(&brw->write_ctr, 0);
+	rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
 void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 {
+	/*
+	 * XXX: temporary kludge. The error path in alloc_super()
+	 * assumes that percpu_free_rwsem() is safe after kzalloc().
+	 */
+	if (!brw->fast_read_ctr)
+		return;
+
+	rcu_sync_dtor(&brw->rss);
 	free_percpu(brw->fast_read_ctr);
 	brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
 
 /*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- *	R_W: down_write() comes after up_read(), the writer should see all
- *	     changes done by the reader
- * or
- *	W_R: down_read() comes after up_write(), the reader should see all
- *	     changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
  *
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
-	bool success = false;
+	bool success;
 
 	preempt_disable();
-	if (likely(!atomic_read(&brw->write_ctr))) {
+	success = rcu_sync_is_idle(&brw->rss);
+	if (likely(success))
 		__this_cpu_add(*brw->fast_read_ctr, val);
-		success = true;
-	}
 	preempt_enable();
 
 	return success;
@@ -77,16 +70,17 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
 	might_sleep();
-	if (likely(update_fast_ctr(brw, +1))) {
-		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+	if (likely(update_fast_ctr(brw, +1)))
 		return;
-	}
 
-	down_read(&brw->rw_sem);
+	/* Avoid rwsem_acquire_read() and rwsem_release() */
+	__down_read(&brw->rw_sem);
 	atomic_inc(&brw->slow_read_ctr);
-	/* avoid up_read()->rwsem_release() */
 	__up_read(&brw->rw_sem);
 }
+EXPORT_SYMBOL_GPL(percpu_down_read);
 
 int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
 {
@@ -112,6 +106,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw)
 	if (atomic_dec_and_test(&brw->slow_read_ctr))
 		wake_up_all(&brw->write_waitq);
 }
+EXPORT_SYMBOL_GPL(percpu_up_read);
 
 static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 {
@@ -126,33 +121,17 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 	return sum;
 }
 
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-	/* tell update_fast_ctr() there is a pending writer */
-	atomic_inc(&brw->write_ctr);
 	/*
-	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-	 *    so that update_fast_ctr() can't succeed.
-	 *
-	 * 2. Ensures we see the result of every previous this_cpu_add() in
-	 *    update_fast_ctr().
+	 * Make rcu_sync_is_idle() == F and thus disable the fast-path in
+	 * percpu_down_read() and percpu_up_read(), and wait for gp pass.
 	 *
-	 * 3. Ensures that if any reader has exited its critical section via
-	 *    fast-path, it executes a full memory barrier before we return.
-	 *    See R_W case in the comment above update_fast_ctr().
+	 * The latter synchronises us with the preceding readers which used
+	 * the fast-past, so we can not miss the result of __this_cpu_add()
+	 * or anything else inside their criticial sections.
 	 */
-	synchronize_sched_expedited();
+	rcu_sync_enter(&brw->rss);
 
 	/* exclude other writers, and block the new readers completely */
 	down_write(&brw->rw_sem);
@@ -163,16 +142,17 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	/* wait for all readers to complete their percpu_up_read() */
 	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
 }
+EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *brw)
 {
 	/* release the lock, but the readers can't use the fast-path */
 	up_write(&brw->rw_sem);
 	/*
-	 * Insert the barrier before the next fast-path in down_read,
-	 * see W_R case in the comment above update_fast_ctr().
+	 * Enable the fast-path in percpu_down_read() and percpu_up_read()
+	 * but only after another gp pass; this adds the necessary barrier
+	 * to ensure the reader can't miss the changes done by us.
 	 */
-	synchronize_sched_expedited();
-	/* the last writer unblocks update_fast_ctr() */
-	atomic_dec(&brw->write_ctr);
+	rcu_sync_exit(&brw->rss);
 }
+EXPORT_SYMBOL_GPL(percpu_up_write);
author	Ingo Molnar <mingo@kernel.org>	2015-10-19 10:09:54 +0200
committer	Ingo Molnar <mingo@kernel.org>	2015-10-19 10:09:54 +0200
commit	c13dc31adb04c3f85d54d2fa13e34206f25742eb (patch)
tree	09cf55d6f3cc628ac0a303c05dfdcc9af2ad803c /kernel/locking
parent	7379047d5585187d1288486d4627873170d0005a (diff)
parent	39cd2dd39a8b92ce91c4dad95f6e979c946a3942 (diff)