summaryrefslogtreecommitdiff
path: root/kernel/rcu/refscale.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/refscale.c')
-rw-r--r--kernel/rcu/refscale.c816
1 files changed, 767 insertions, 49 deletions
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 435c884c02b5..07a313782dfd 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -28,6 +28,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/reboot.h>
#include <linux/sched.h>
+#include <linux/seq_buf.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/stat.h>
@@ -35,6 +36,7 @@
#include <linux/slab.h>
#include <linux/torture.h>
#include <linux/types.h>
+#include <linux/sched/clock.h>
#include "rcu.h"
@@ -63,6 +65,7 @@ do { \
#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
+MODULE_DESCRIPTION("Scalability test for object reference mechanisms");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
@@ -73,11 +76,16 @@ MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
+// Number of seconds to extend warm-up and cool-down for multiple guest OSes
+torture_param(long, guest_os_delay, 0,
+ "Number of seconds to extend warm-up/cool-down for multiple guest OSes.");
// Wait until there are multiple CPUs before starting test.
torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
"Holdoff time before test start (s)");
+// Number of typesafe_lookup structures, that is, the degree of concurrency.
+torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures.");
// Number of loops per experiment, all readers execute operations concurrently.
-torture_param(long, loops, 10000, "Number of loops per experiment.");
+torture_param(int, loops, 10000, "Number of loops per experiment.");
// Number of readers, with -1 defaulting to about 75% of the CPUs.
torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
// Number of runs.
@@ -124,14 +132,15 @@ static int exp_idx;
// Operations vector for selecting different types of tests.
struct ref_scale_ops {
- void (*init)(void);
+ bool (*init)(void);
void (*cleanup)(void);
void (*readsection)(const int nloops);
void (*delaysection)(const int nloops, const int udl, const int ndl);
+ bool enable_irqs;
const char *name;
};
-static struct ref_scale_ops *cur_ops;
+static const struct ref_scale_ops *cur_ops;
static void un_delay(const int udl, const int ndl)
{
@@ -162,11 +171,12 @@ static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl
}
}
-static void rcu_sync_scale_init(void)
+static bool rcu_sync_scale_init(void)
{
+ return true;
}
-static struct ref_scale_ops rcu_ops = {
+static const struct ref_scale_ops rcu_ops = {
.init = rcu_sync_scale_init,
.readsection = ref_rcu_read_section,
.delaysection = ref_rcu_delay_section,
@@ -175,6 +185,8 @@ static struct ref_scale_ops rcu_ops = {
// Definitions for SRCU ref scale testing.
DEFINE_STATIC_SRCU(srcu_refctl_scale);
+DEFINE_STATIC_SRCU_FAST(srcu_fast_refctl_scale);
+DEFINE_STATIC_SRCU_FAST_UPDOWN(srcu_fast_updown_refctl_scale);
static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale;
static void srcu_ref_scale_read_section(const int nloops)
@@ -200,13 +212,85 @@ static void srcu_ref_scale_delay_section(const int nloops, const int udl, const
}
}
-static struct ref_scale_ops srcu_ops = {
+static const struct ref_scale_ops srcu_ops = {
.init = rcu_sync_scale_init,
.readsection = srcu_ref_scale_read_section,
.delaysection = srcu_ref_scale_delay_section,
.name = "srcu"
};
+static bool srcu_fast_sync_scale_init(void)
+{
+ srcu_ctlp = &srcu_fast_refctl_scale;
+ return true;
+}
+
+static void srcu_fast_ref_scale_read_section(const int nloops)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ un_delay(udl, ndl);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static const struct ref_scale_ops srcu_fast_ops = {
+ .init = srcu_fast_sync_scale_init,
+ .readsection = srcu_fast_ref_scale_read_section,
+ .delaysection = srcu_fast_ref_scale_delay_section,
+ .name = "srcu-fast"
+};
+
+static bool srcu_fast_updown_sync_scale_init(void)
+{
+ srcu_ctlp = &srcu_fast_updown_refctl_scale;
+ return true;
+}
+
+static void srcu_fast_updown_ref_scale_read_section(const int nloops)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast_updown(srcu_ctlp);
+ srcu_read_unlock_fast_updown(srcu_ctlp, scp);
+ }
+}
+
+static void srcu_fast_updown_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast_updown(srcu_ctlp);
+ un_delay(udl, ndl);
+ srcu_read_unlock_fast_updown(srcu_ctlp, scp);
+ }
+}
+
+static const struct ref_scale_ops srcu_fast_updown_ops = {
+ .init = srcu_fast_updown_sync_scale_init,
+ .readsection = srcu_fast_updown_ref_scale_read_section,
+ .delaysection = srcu_fast_updown_ref_scale_delay_section,
+ .name = "srcu-fast-updown"
+};
+
#ifdef CONFIG_TASKS_RCU
// Definitions for RCU Tasks ref scale testing: Empty read markers.
@@ -227,7 +311,7 @@ static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, c
un_delay(udl, ndl);
}
-static struct ref_scale_ops rcu_tasks_ops = {
+static const struct ref_scale_ops rcu_tasks_ops = {
.init = rcu_sync_scale_init,
.readsection = rcu_tasks_ref_scale_read_section,
.delaysection = rcu_tasks_ref_scale_delay_section,
@@ -266,7 +350,7 @@ static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, c
}
}
-static struct ref_scale_ops rcu_trace_ops = {
+static const struct ref_scale_ops rcu_trace_ops = {
.init = rcu_sync_scale_init,
.readsection = rcu_trace_ref_scale_read_section,
.delaysection = rcu_trace_ref_scale_delay_section,
@@ -284,6 +368,9 @@ static struct ref_scale_ops rcu_trace_ops = {
// Definitions for reference count
static atomic_t refcnt;
+// Definitions acquire-release.
+static DEFINE_PER_CPU(unsigned long, test_acqrel);
+
static void ref_refcnt_section(const int nloops)
{
int i;
@@ -305,19 +392,198 @@ static void ref_refcnt_delay_section(const int nloops, const int udl, const int
}
}
-static struct ref_scale_ops refcnt_ops = {
+static const struct ref_scale_ops refcnt_ops = {
.init = rcu_sync_scale_init,
.readsection = ref_refcnt_section,
.delaysection = ref_refcnt_delay_section,
.name = "refcnt"
};
+static void ref_percpuinc_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ this_cpu_inc(test_acqrel);
+ this_cpu_dec(test_acqrel);
+ }
+}
+
+static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ this_cpu_inc(test_acqrel);
+ un_delay(udl, ndl);
+ this_cpu_dec(test_acqrel);
+ }
+}
+
+static const struct ref_scale_ops percpuinc_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_percpuinc_section,
+ .delaysection = ref_percpuinc_delay_section,
+ .name = "percpuinc"
+};
+
+// Note that this can lose counts in preemptible kernels.
+static void ref_incpercpu_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap = this_cpu_ptr(&test_acqrel);
+
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ }
+}
+
+static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap = this_cpu_ptr(&test_acqrel);
+
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ }
+}
+
+static const struct ref_scale_ops incpercpu_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpu_section,
+ .delaysection = ref_incpercpu_delay_section,
+ .name = "incpercpu"
+};
+
+static void ref_incpercpupreempt_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ preempt_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ preempt_enable();
+ }
+}
+
+static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ preempt_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ preempt_enable();
+ }
+}
+
+static const struct ref_scale_ops incpercpupreempt_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpupreempt_section,
+ .delaysection = ref_incpercpupreempt_delay_section,
+ .name = "incpercpupreempt"
+};
+
+static void ref_incpercpubh_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_bh_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_bh_enable();
+ }
+}
+
+static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_bh_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_bh_enable();
+ }
+}
+
+static const struct ref_scale_ops incpercpubh_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpubh_section,
+ .delaysection = ref_incpercpubh_delay_section,
+ .enable_irqs = true,
+ .name = "incpercpubh"
+};
+
+static void ref_incpercpuirqsave_section(const int nloops)
+{
+ int i;
+ unsigned long flags;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_irq_save(flags);
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_irq_restore(flags);
+ }
+}
+
+static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ unsigned long flags;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_irq_save(flags);
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_irq_restore(flags);
+ }
+}
+
+static const struct ref_scale_ops incpercpuirqsave_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpuirqsave_section,
+ .delaysection = ref_incpercpuirqsave_delay_section,
+ .name = "incpercpuirqsave"
+};
+
// Definitions for rwlock
static rwlock_t test_rwlock;
-static void ref_rwlock_init(void)
+static bool ref_rwlock_init(void)
{
rwlock_init(&test_rwlock);
+ return true;
}
static void ref_rwlock_section(const int nloops)
@@ -341,7 +607,7 @@ static void ref_rwlock_delay_section(const int nloops, const int udl, const int
}
}
-static struct ref_scale_ops rwlock_ops = {
+static const struct ref_scale_ops rwlock_ops = {
.init = ref_rwlock_init,
.readsection = ref_rwlock_section,
.delaysection = ref_rwlock_delay_section,
@@ -351,9 +617,10 @@ static struct ref_scale_ops rwlock_ops = {
// Definitions for rwsem
static struct rw_semaphore test_rwsem;
-static void ref_rwsem_init(void)
+static bool ref_rwsem_init(void)
{
init_rwsem(&test_rwsem);
+ return true;
}
static void ref_rwsem_section(const int nloops)
@@ -377,7 +644,7 @@ static void ref_rwsem_delay_section(const int nloops, const int udl, const int n
}
}
-static struct ref_scale_ops rwsem_ops = {
+static const struct ref_scale_ops rwsem_ops = {
.init = ref_rwsem_init,
.readsection = ref_rwsem_section,
.delaysection = ref_rwsem_delay_section,
@@ -412,7 +679,7 @@ static void ref_lock_delay_section(const int nloops, const int udl, const int nd
preempt_enable();
}
-static struct ref_scale_ops lock_ops = {
+static const struct ref_scale_ops lock_ops = {
.readsection = ref_lock_section,
.delaysection = ref_lock_delay_section,
.name = "lock"
@@ -447,15 +714,12 @@ static void ref_lock_irq_delay_section(const int nloops, const int udl, const in
preempt_enable();
}
-static struct ref_scale_ops lock_irq_ops = {
+static const struct ref_scale_ops lock_irq_ops = {
.readsection = ref_lock_irq_section,
.delaysection = ref_lock_irq_delay_section,
.name = "lock-irq"
};
-// Definitions acquire-release.
-static DEFINE_PER_CPU(unsigned long, test_acqrel);
-
static void ref_acqrel_section(const int nloops)
{
unsigned long x;
@@ -483,7 +747,7 @@ static void ref_acqrel_delay_section(const int nloops, const int udl, const int
preempt_enable();
}
-static struct ref_scale_ops acqrel_ops = {
+static const struct ref_scale_ops acqrel_ops = {
.readsection = ref_acqrel_section,
.delaysection = ref_acqrel_delay_section,
.name = "acqrel"
@@ -491,6 +755,39 @@ static struct ref_scale_ops acqrel_ops = {
static volatile u64 stopopts;
+static void ref_sched_clock_section(const int nloops)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--)
+ x += sched_clock();
+ preempt_enable();
+ stopopts = x;
+}
+
+static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ x += sched_clock();
+ un_delay(udl, ndl);
+ }
+ preempt_enable();
+ stopopts = x;
+}
+
+static const struct ref_scale_ops sched_clock_ops = {
+ .readsection = ref_sched_clock_section,
+ .delaysection = ref_sched_clock_delay_section,
+ .name = "sched-clock"
+};
+
+
static void ref_clock_section(const int nloops)
{
u64 x = 0;
@@ -517,12 +814,402 @@ static void ref_clock_delay_section(const int nloops, const int udl, const int n
stopopts = x;
}
-static struct ref_scale_ops clock_ops = {
+static const struct ref_scale_ops clock_ops = {
.readsection = ref_clock_section,
.delaysection = ref_clock_delay_section,
.name = "clock"
};
+static void ref_jiffies_section(const int nloops)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--)
+ x += jiffies;
+ preempt_enable();
+ stopopts = x;
+}
+
+static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ x += jiffies;
+ un_delay(udl, ndl);
+ }
+ preempt_enable();
+ stopopts = x;
+}
+
+static const struct ref_scale_ops jiffies_ops = {
+ .readsection = ref_jiffies_section,
+ .delaysection = ref_jiffies_delay_section,
+ .name = "jiffies"
+};
+
+static void ref_preempt_section(const int nloops)
+{
+ int i;
+
+ migrate_disable();
+ for (i = nloops; i >= 0; i--) {
+ preempt_disable();
+ preempt_enable();
+ }
+ migrate_enable();
+}
+
+static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ migrate_disable();
+ for (i = nloops; i >= 0; i--) {
+ preempt_disable();
+ un_delay(udl, ndl);
+ preempt_enable();
+ }
+ migrate_enable();
+}
+
+static const struct ref_scale_ops preempt_ops = {
+ .readsection = ref_preempt_section,
+ .delaysection = ref_preempt_delay_section,
+ .name = "preempt"
+};
+
+static void ref_bh_section(const int nloops)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_bh_disable();
+ local_bh_enable();
+ }
+ preempt_enable();
+}
+
+static void ref_bh_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_bh_disable();
+ un_delay(udl, ndl);
+ local_bh_enable();
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops bh_ops = {
+ .readsection = ref_bh_section,
+ .delaysection = ref_bh_delay_section,
+ .enable_irqs = true,
+ .name = "bh"
+};
+
+static void ref_irq_section(const int nloops)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ local_irq_enable();
+ }
+ preempt_enable();
+}
+
+static void ref_irq_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ un_delay(udl, ndl);
+ local_irq_enable();
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops irq_ops = {
+ .readsection = ref_irq_section,
+ .delaysection = ref_irq_delay_section,
+ .name = "irq"
+};
+
+static void ref_irqsave_section(const int nloops)
+{
+ unsigned long flags;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_save(flags);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+}
+
+static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl)
+{
+ unsigned long flags;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_save(flags);
+ un_delay(udl, ndl);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops irqsave_ops = {
+ .readsection = ref_irqsave_section,
+ .delaysection = ref_irqsave_delay_section,
+ .name = "irqsave"
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// Methods leveraging SLAB_TYPESAFE_BY_RCU.
+//
+
+// Item to look up in a typesafe manner. Array of pointers to these.
+struct refscale_typesafe {
+ atomic_t rts_refctr; // Used by all flavors
+ spinlock_t rts_lock;
+ seqlock_t rts_seqlock;
+ unsigned int a;
+ unsigned int b;
+};
+
+static struct kmem_cache *typesafe_kmem_cachep;
+static struct refscale_typesafe **rtsarray;
+static long rtsarray_size;
+static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand);
+static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start);
+static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start);
+
+// Conditionally acquire an explicit in-structure reference count.
+static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
+{
+ return atomic_inc_not_zero(&rtsp->rts_refctr);
+}
+
+// Unconditionally release an explicit in-structure reference count.
+static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start)
+{
+ if (!atomic_dec_return(&rtsp->rts_refctr)) {
+ WRITE_ONCE(rtsp->a, rtsp->a + 1);
+ kmem_cache_free(typesafe_kmem_cachep, rtsp);
+ }
+ return true;
+}
+
+// Unconditionally acquire an explicit in-structure spinlock.
+static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
+{
+ spin_lock(&rtsp->rts_lock);
+ return true;
+}
+
+// Unconditionally release an explicit in-structure spinlock.
+static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start)
+{
+ spin_unlock(&rtsp->rts_lock);
+ return true;
+}
+
+// Unconditionally acquire an explicit in-structure sequence lock.
+static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
+{
+ *start = read_seqbegin(&rtsp->rts_seqlock);
+ return true;
+}
+
+// Conditionally release an explicit in-structure sequence lock. Return
+// true if this release was successful, that is, if no retry is required.
+static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start)
+{
+ return !read_seqretry(&rtsp->rts_seqlock, start);
+}
+
+// Do a read-side critical section with the specified delay in
+// microseconds and nanoseconds inserted so as to increase probability
+// of failure.
+static void typesafe_delay_section(const int nloops, const int udl, const int ndl)
+{
+ unsigned int a;
+ unsigned int b;
+ int i;
+ long idx;
+ struct refscale_typesafe *rtsp;
+ unsigned int start;
+
+ for (i = nloops; i >= 0; i--) {
+ preempt_disable();
+ idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size;
+ preempt_enable();
+retry:
+ rcu_read_lock();
+ rtsp = rcu_dereference(rtsarray[idx]);
+ a = READ_ONCE(rtsp->a);
+ if (!rts_acquire(rtsp, &start)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+ if (a != READ_ONCE(rtsp->a)) {
+ (void)rts_release(rtsp, start);
+ rcu_read_unlock();
+ goto retry;
+ }
+ un_delay(udl, ndl);
+ b = READ_ONCE(rtsp->a);
+ // Remember, seqlock read-side release can fail.
+ if (!rts_release(rtsp, start)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+ WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b);
+ b = rtsp->b;
+ rcu_read_unlock();
+ WARN_ON_ONCE(a * a != b);
+ }
+}
+
+// Because the acquisition and release methods are expensive, there
+// is no point in optimizing away the un_delay() function's two checks.
+// Thus simply define typesafe_read_section() as a simple wrapper around
+// typesafe_delay_section().
+static void typesafe_read_section(const int nloops)
+{
+ typesafe_delay_section(nloops, 0, 0);
+}
+
+// Allocate and initialize one refscale_typesafe structure.
+static struct refscale_typesafe *typesafe_alloc_one(void)
+{
+ struct refscale_typesafe *rtsp;
+
+ rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL);
+ if (!rtsp)
+ return NULL;
+ atomic_set(&rtsp->rts_refctr, 1);
+ WRITE_ONCE(rtsp->a, rtsp->a + 1);
+ WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a);
+ return rtsp;
+}
+
+// Slab-allocator constructor for refscale_typesafe structures created
+// out of a new slab of system memory.
+static void refscale_typesafe_ctor(void *rtsp_in)
+{
+ struct refscale_typesafe *rtsp = rtsp_in;
+
+ spin_lock_init(&rtsp->rts_lock);
+ seqlock_init(&rtsp->rts_seqlock);
+ preempt_disable();
+ rtsp->a = torture_random(this_cpu_ptr(&refscale_rand));
+ preempt_enable();
+}
+
+static const struct ref_scale_ops typesafe_ref_ops;
+static const struct ref_scale_ops typesafe_lock_ops;
+static const struct ref_scale_ops typesafe_seqlock_ops;
+
+// Initialize for a typesafe test.
+static bool typesafe_init(void)
+{
+ long idx;
+ long si = lookup_instances;
+
+ typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe",
+ sizeof(struct refscale_typesafe), sizeof(void *),
+ SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor);
+ if (!typesafe_kmem_cachep)
+ return false;
+ if (si < 0)
+ si = -si * nr_cpu_ids;
+ else if (si == 0)
+ si = nr_cpu_ids;
+ rtsarray_size = si;
+ rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL);
+ if (!rtsarray)
+ return false;
+ for (idx = 0; idx < rtsarray_size; idx++) {
+ rtsarray[idx] = typesafe_alloc_one();
+ if (!rtsarray[idx])
+ return false;
+ }
+ if (cur_ops == &typesafe_ref_ops) {
+ rts_acquire = typesafe_ref_acquire;
+ rts_release = typesafe_ref_release;
+ } else if (cur_ops == &typesafe_lock_ops) {
+ rts_acquire = typesafe_lock_acquire;
+ rts_release = typesafe_lock_release;
+ } else if (cur_ops == &typesafe_seqlock_ops) {
+ rts_acquire = typesafe_seqlock_acquire;
+ rts_release = typesafe_seqlock_release;
+ } else {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+ return true;
+}
+
+// Clean up after a typesafe test.
+static void typesafe_cleanup(void)
+{
+ long idx;
+
+ if (rtsarray) {
+ for (idx = 0; idx < rtsarray_size; idx++)
+ kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]);
+ kfree(rtsarray);
+ rtsarray = NULL;
+ rtsarray_size = 0;
+ }
+ kmem_cache_destroy(typesafe_kmem_cachep);
+ typesafe_kmem_cachep = NULL;
+ rts_acquire = NULL;
+ rts_release = NULL;
+}
+
+// The typesafe_init() function distinguishes these structures by address.
+static const struct ref_scale_ops typesafe_ref_ops = {
+ .init = typesafe_init,
+ .cleanup = typesafe_cleanup,
+ .readsection = typesafe_read_section,
+ .delaysection = typesafe_delay_section,
+ .name = "typesafe_ref"
+};
+
+static const struct ref_scale_ops typesafe_lock_ops = {
+ .init = typesafe_init,
+ .cleanup = typesafe_cleanup,
+ .readsection = typesafe_read_section,
+ .delaysection = typesafe_delay_section,
+ .name = "typesafe_lock"
+};
+
+static const struct ref_scale_ops typesafe_seqlock_ops = {
+ .init = typesafe_init,
+ .cleanup = typesafe_cleanup,
+ .readsection = typesafe_read_section,
+ .delaysection = typesafe_delay_section,
+ .name = "typesafe_seqlock"
+};
+
static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
@@ -531,6 +1218,18 @@ static void rcu_scale_one_reader(void)
cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
}
+// Warm up cache, or, if needed run a series of rcu_scale_one_reader()
+// to allow multiple rcuscale guest OSes to collect mutually valid data.
+static void rcu_scale_warm_cool(void)
+{
+ unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1);
+
+ do {
+ rcu_scale_one_reader();
+ cond_resched();
+ } while (time_before(jiffies, jdone));
+}
+
// Reader kthread. Repeatedly does empty RCU read-side
// critical section, minimizing update-side interference.
static int
@@ -559,7 +1258,7 @@ repeat:
goto end;
// Make sure that the CPU is affinitized appropriately during testing.
- WARN_ON_ONCE(raw_smp_processor_id() != me);
+ WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids);
WRITE_ONCE(rt->start_reader, 0);
if (!atomic_dec_return(&n_started))
@@ -575,15 +1274,18 @@ repeat:
if (!atomic_dec_return(&n_warmedup))
while (atomic_read_acquire(&n_warmedup))
rcu_scale_one_reader();
- // Also keep interrupts disabled. This also has the effect
- // of preventing entries into slow path for rcu_read_unlock().
- local_irq_save(flags);
+ // Also keep interrupts disabled when it is safe to do so, which
+ // it is not for local_bh_enable(). This also has the effect of
+ // preventing entries into slow path for rcu_read_unlock().
+ if (!cur_ops->enable_irqs)
+ local_irq_save(flags);
start = ktime_get_mono_fast_ns();
rcu_scale_one_reader();
duration = ktime_get_mono_fast_ns() - start;
- local_irq_restore(flags);
+ if (!cur_ops->enable_irqs)
+ local_irq_restore(flags);
rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
// To reduce runtime-skew noise, do maintain-load invocations until
@@ -622,32 +1324,34 @@ static u64 process_durations(int n)
{
int i;
struct reader_task *rt;
- char buf1[64];
+ struct seq_buf s;
char *buf;
u64 sum = 0;
buf = kmalloc(800 + 64, GFP_KERNEL);
if (!buf)
return 0;
- buf[0] = 0;
- sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
- exp_idx);
+ seq_buf_init(&s, buf, 800 + 64);
+
+ seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
+ exp_idx);
for (i = 0; i < n && !torture_must_stop(); i++) {
rt = &(reader_tasks[i]);
- sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns);
if (i % 5 == 0)
- strcat(buf, "\n");
- if (strlen(buf) >= 800) {
- pr_alert("%s", buf);
- buf[0] = 0;
+ seq_buf_putc(&s, '\n');
+
+ if (seq_buf_used(&s) >= 800) {
+ pr_alert("%s", seq_buf_str(&s));
+ seq_buf_clear(&s);
}
- strcat(buf, buf1);
+
+ seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns);
sum += rt->last_duration_ns;
}
- pr_alert("%s\n", buf);
+ pr_alert("%s\n", seq_buf_str(&s));
kfree(buf);
return sum;
@@ -670,7 +1374,7 @@ static int main_func(void *arg)
set_user_nice(current, MAX_NICE);
VERBOSE_SCALEOUT("main_func task started");
- result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
+ result_avg = kcalloc(nruns, sizeof(*result_avg), GFP_KERNEL);
buf = kzalloc(800 + 64, GFP_KERNEL);
if (!result_avg || !buf) {
SCALEOUT_ERRSTRING("out of memory");
@@ -685,6 +1389,7 @@ static int main_func(void *arg)
schedule_timeout_uninterruptible(1);
// Start exp readers up per experiment
+ rcu_scale_warm_cool();
for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
if (torture_must_stop())
goto end;
@@ -715,6 +1420,7 @@ static int main_func(void *arg)
result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
}
+ rcu_scale_warm_cool();
// Print the average of all experiments
SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
@@ -754,11 +1460,11 @@ end:
}
static void
-ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag)
+ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag)
{
pr_alert("%s" SCALE_FLAG
- "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
- verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay);
+ "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
+ verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
}
static void
@@ -780,9 +1486,9 @@ ref_scale_cleanup(void)
reader_tasks[i].task);
}
kfree(reader_tasks);
+ reader_tasks = NULL;
torture_stop_kthread("main_task", main_task);
- kfree(main_task);
// Do scale-type-specific cleanup operations.
if (cur_ops->cleanup != NULL)
@@ -795,7 +1501,7 @@ ref_scale_cleanup(void)
static int
ref_scale_shutdown(void *arg)
{
- wait_event(shutdown_wq, shutdown_start);
+ wait_event_idle(shutdown_wq, shutdown_start);
smp_mb(); // Wake before output.
ref_scale_cleanup();
@@ -809,9 +1515,15 @@ ref_scale_init(void)
{
long i;
int firsterr = 0;
- static struct ref_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
- &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
+ static const struct ref_scale_ops *scale_ops[] = {
+ &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops,
+ RCU_TRACE_OPS RCU_TASKS_OPS
+ &refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops,
+ &incpercpubh_ops, &incpercpuirqsave_ops,
+ &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
+ &sched_clock_ops, &clock_ops, &jiffies_ops,
+ &preempt_ops, &bh_ops, &irq_ops, &irqsave_ops,
+ &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
if (!torture_init_begin(scale_type, verbose))
@@ -833,7 +1545,10 @@ ref_scale_init(void)
goto unwind;
}
if (cur_ops->init)
- cur_ops->init();
+ if (!cur_ops->init()) {
+ firsterr = -EUCLEAN;
+ goto unwind;
+ }
ref_scale_print_module_parms(cur_ops, "Start of test");
@@ -850,12 +1565,16 @@ ref_scale_init(void)
// Reader tasks (default to ~75% of online CPUs).
if (nreaders < 0)
nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
- if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops))
+ if (WARN_ONCE(loops <= 0, "%s: loops = %d, adjusted to 1\n", __func__, loops))
loops = 1;
if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders))
nreaders = 1;
if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns))
nruns = 1;
+ if (WARN_ONCE(loops > INT_MAX / nreaders,
+ "%s: nreaders * loops will overflow, adjusted loops to %d",
+ __func__, INT_MAX / nreaders))
+ loops = INT_MAX / nreaders;
reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (!reader_tasks) {
@@ -867,12 +1586,11 @@ ref_scale_init(void)
VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
for (i = 0; i < nreaders; i++) {
+ init_waitqueue_head(&reader_tasks[i].wq);
firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
reader_tasks[i].task);
if (torture_init_error(firsterr))
goto unwind;
-
- init_waitqueue_head(&(reader_tasks[i].wq));
}
// Main Task