summaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-10-19 10:09:54 +0200
committerIngo Molnar <mingo@kernel.org>2015-10-19 10:09:54 +0200
commitc13dc31adb04c3f85d54d2fa13e34206f25742eb (patch)
tree09cf55d6f3cc628ac0a303c05dfdcc9af2ad803c /kernel/locking
parent7379047d5585187d1288486d4627873170d0005a (diff)
parent39cd2dd39a8b92ce91c4dad95f6e979c946a3942 (diff)
Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: - Miscellaneous fixes. (Paul E. McKenney, Boqun Feng, Oleg Nesterov, Patrick Marlier) - Improvements to expedited grace periods. (Paul E. McKenney) - Performance improvements to and locktorture tests for percpu-rwsem. (Oleg Nesterov, Paul E. McKenney) - Torture-test changes. (Paul E. McKenney, Davidlohr Bueso) - Documentation updates. (Paul E. McKenney) Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/locktorture.c164
-rw-r--r--kernel/locking/percpu-rwsem.c90
2 files changed, 194 insertions, 60 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 32244186f1f2..8ef1919d63b2 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -17,12 +17,14 @@
*
* Copyright (C) IBM Corporation, 2014
*
- * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ * Davidlohr Bueso <dave@stgolabs.net>
* Based on kernel/rcu/torture.c.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
+#include <linux/sched/rt.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <linux/mutex.h>
@@ -34,6 +36,7 @@
#include <linux/moduleparam.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
#include <linux/torture.h>
MODULE_LICENSE("GPL");
@@ -91,11 +94,13 @@ struct lock_torture_ops {
void (*init)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
+ void (*task_boost)(struct torture_random_state *trsp);
void (*writeunlock)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *trsp);
void (*readunlock)(void);
- unsigned long flags;
+
+ unsigned long flags; /* for irq spinlocks */
const char *name;
};
@@ -139,9 +144,15 @@ static void torture_lock_busted_write_unlock(void)
/* BUGGY, do not use in real life!!! */
}
+static void torture_boost_dummy(struct torture_random_state *trsp)
+{
+ /* Only rtmutexes care about priority */
+}
+
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -185,6 +196,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -211,6 +223,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
@@ -275,6 +288,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
@@ -315,6 +329,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
@@ -354,6 +369,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex)
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -361,6 +377,90 @@ static struct lock_torture_ops mutex_lock_ops = {
.name = "mutex_lock"
};
+#ifdef CONFIG_RT_MUTEXES
+static DEFINE_RT_MUTEX(torture_rtmutex);
+
+static int torture_rtmutex_lock(void) __acquires(torture_rtmutex)
+{
+ rt_mutex_lock(&torture_rtmutex);
+ return 0;
+}
+
+static void torture_rtmutex_boost(struct torture_random_state *trsp)
+{
+ int policy;
+ struct sched_param param;
+ const unsigned int factor = 50000; /* yes, quite arbitrary */
+
+ if (!rt_task(current)) {
+ /*
+ * (1) Boost priority once every ~50k operations. When the
+ * task tries to take the lock, the rtmutex it will account
+ * for the new priority, and do any corresponding pi-dance.
+ */
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * factor))) {
+ policy = SCHED_FIFO;
+ param.sched_priority = MAX_RT_PRIO - 1;
+ } else /* common case, do nothing */
+ return;
+ } else {
+ /*
+ * The task will remain boosted for another ~500k operations,
+ * then restored back to its original prio, and so forth.
+ *
+ * When @trsp is nil, we want to force-reset the task for
+ * stopping the kthread.
+ */
+ if (!trsp || !(torture_random(trsp) %
+ (cxt.nrealwriters_stress * factor * 2))) {
+ policy = SCHED_NORMAL;
+ param.sched_priority = 0;
+ } else /* common case, do nothing */
+ return;
+ }
+
+ sched_setscheduler_nocheck(current, policy, &param);
+}
+
+static void torture_rtmutex_delay(struct torture_random_state *trsp)
+{
+ const unsigned long shortdelay_us = 2;
+ const unsigned long longdelay_ms = 100;
+
+ /*
+ * We want a short delay mostly to emulate likely code, and
+ * we want a long delay occasionally to force massive contention.
+ */
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+ mdelay(longdelay_ms);
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+ preempt_schedule(); /* Allow test to be preempted. */
+#endif
+}
+
+static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
+{
+ rt_mutex_unlock(&torture_rtmutex);
+}
+
+static struct lock_torture_ops rtmutex_lock_ops = {
+ .writelock = torture_rtmutex_lock,
+ .write_delay = torture_rtmutex_delay,
+ .task_boost = torture_rtmutex_boost,
+ .writeunlock = torture_rtmutex_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "rtmutex_lock"
+};
+#endif
+
static DECLARE_RWSEM(torture_rwsem);
static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
{
@@ -419,6 +519,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem)
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
@@ -426,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = {
.name = "rwsem_lock"
};
+#include <linux/percpu-rwsem.h>
+static struct percpu_rw_semaphore pcpu_rwsem;
+
+void torture_percpu_rwsem_init(void)
+{
+ BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
+}
+
+static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
+{
+ percpu_down_write(&pcpu_rwsem);
+ return 0;
+}
+
+static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem)
+{
+ percpu_up_write(&pcpu_rwsem);
+}
+
+static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem)
+{
+ percpu_down_read(&pcpu_rwsem);
+ return 0;
+}
+
+static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
+{
+ percpu_up_read(&pcpu_rwsem);
+}
+
+static struct lock_torture_ops percpu_rwsem_lock_ops = {
+ .init = torture_percpu_rwsem_init,
+ .writelock = torture_percpu_rwsem_down_write,
+ .write_delay = torture_rwsem_write_delay,
+ .task_boost = torture_boost_dummy,
+ .writeunlock = torture_percpu_rwsem_up_write,
+ .readlock = torture_percpu_rwsem_down_read,
+ .read_delay = torture_rwsem_read_delay,
+ .readunlock = torture_percpu_rwsem_up_read,
+ .name = "percpu_rwsem_lock"
+};
+
/*
* Lock torture writer kthread. Repeatedly acquires and releases
* the lock, checking for duplicate acquisitions.
@@ -442,6 +585,7 @@ static int lock_torture_writer(void *arg)
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
+ cxt.cur_ops->task_boost(&rand);
cxt.cur_ops->writelock();
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
@@ -456,6 +600,8 @@ static int lock_torture_writer(void *arg)
stutter_wait("lock_torture_writer");
} while (!torture_must_stop());
+
+ cxt.cur_ops->task_boost(NULL); /* reset prio */
torture_kthread_stopping("lock_torture_writer");
return 0;
}
@@ -642,7 +788,11 @@ static int __init lock_torture_init(void)
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
+#ifdef CONFIG_RT_MUTEXES
+ &rtmutex_lock_ops,
+#endif
&rwsem_lock_ops,
+ &percpu_rwsem_lock_ops,
};
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -661,11 +811,11 @@ static int __init lock_torture_init(void)
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_alert(" %s", torture_ops[i]->name);
pr_alert("\n");
- torture_init_end();
- return -EINVAL;
+ firsterr = -EINVAL;
+ goto unwind;
}
if (cxt.cur_ops->init)
- cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+ cxt.cur_ops->init();
if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;
@@ -676,6 +826,10 @@ static int __init lock_torture_init(void)
if (strncmp(torture_type, "mutex", 5) == 0)
cxt.debug_lock = true;
#endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+ if (strncmp(torture_type, "rtmutex", 7) == 0)
+ cxt.debug_lock = true;
+#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if ((strncmp(torture_type, "spin", 4) == 0) ||
(strncmp(torture_type, "rw_lock", 7) == 0))
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f32567254867..f231e0bb311c 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,50 +17,43 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
__init_rwsem(&brw->rw_sem, name, rwsem_key);
- atomic_set(&brw->write_ctr, 0);
+ rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
atomic_set(&brw->slow_read_ctr, 0);
init_waitqueue_head(&brw->write_waitq);
return 0;
}
+EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
{
+ /*
+ * XXX: temporary kludge. The error path in alloc_super()
+ * assumes that percpu_free_rwsem() is safe after kzalloc().
+ */
+ if (!brw->fast_read_ctr)
+ return;
+
+ rcu_sync_dtor(&brw->rss);
free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */
}
/*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- * R_W: down_write() comes after up_read(), the writer should see all
- * changes done by the reader
- * or
- * W_R: down_read() comes after up_write(), the reader should see all
- * changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
*
* If this helper fails the callers rely on the normal rw_semaphore and
* atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
*/
static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
{
- bool success = false;
+ bool success;
preempt_disable();
- if (likely(!atomic_read(&brw->write_ctr))) {
+ success = rcu_sync_is_idle(&brw->rss);
+ if (likely(success))
__this_cpu_add(*brw->fast_read_ctr, val);
- success = true;
- }
preempt_enable();
return success;
@@ -77,16 +70,17 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
void percpu_down_read(struct percpu_rw_semaphore *brw)
{
might_sleep();
- if (likely(update_fast_ctr(brw, +1))) {
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+ rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+ if (likely(update_fast_ctr(brw, +1)))
return;
- }
- down_read(&brw->rw_sem);
+ /* Avoid rwsem_acquire_read() and rwsem_release() */
+ __down_read(&brw->rw_sem);
atomic_inc(&brw->slow_read_ctr);
- /* avoid up_read()->rwsem_release() */
__up_read(&brw->rw_sem);
}
+EXPORT_SYMBOL_GPL(percpu_down_read);
int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
{
@@ -112,6 +106,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw)
if (atomic_dec_and_test(&brw->slow_read_ctr))
wake_up_all(&brw->write_waitq);
}
+EXPORT_SYMBOL_GPL(percpu_up_read);
static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
{
@@ -126,33 +121,17 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
return sum;
}
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
void percpu_down_write(struct percpu_rw_semaphore *brw)
{
- /* tell update_fast_ctr() there is a pending writer */
- atomic_inc(&brw->write_ctr);
/*
- * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
- * so that update_fast_ctr() can't succeed.
- *
- * 2. Ensures we see the result of every previous this_cpu_add() in
- * update_fast_ctr().
+ * Make rcu_sync_is_idle() == F and thus disable the fast-path in
+ * percpu_down_read() and percpu_up_read(), and wait for gp pass.
*
- * 3. Ensures that if any reader has exited its critical section via
- * fast-path, it executes a full memory barrier before we return.
- * See R_W case in the comment above update_fast_ctr().
+ * The latter synchronises us with the preceding readers which used
+ * the fast-past, so we can not miss the result of __this_cpu_add()
+ * or anything else inside their criticial sections.
*/
- synchronize_sched_expedited();
+ rcu_sync_enter(&brw->rss);
/* exclude other writers, and block the new readers completely */
down_write(&brw->rw_sem);
@@ -163,16 +142,17 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
/* wait for all readers to complete their percpu_up_read() */
wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
}
+EXPORT_SYMBOL_GPL(percpu_down_write);
void percpu_up_write(struct percpu_rw_semaphore *brw)
{
/* release the lock, but the readers can't use the fast-path */
up_write(&brw->rw_sem);
/*
- * Insert the barrier before the next fast-path in down_read,
- * see W_R case in the comment above update_fast_ctr().
+ * Enable the fast-path in percpu_down_read() and percpu_up_read()
+ * but only after another gp pass; this adds the necessary barrier
+ * to ensure the reader can't miss the changes done by us.
*/
- synchronize_sched_expedited();
- /* the last writer unblocks update_fast_ctr() */
- atomic_dec(&brw->write_ctr);
+ rcu_sync_exit(&brw->rss);
}
+EXPORT_SYMBOL_GPL(percpu_up_write);