summaryrefslogtreecommitdiff
path: root/kernel/rcu/refscale.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/refscale.c')
-rw-r--r--kernel/rcu/refscale.c453
1 files changed, 424 insertions, 29 deletions
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index aacfcc9838b3..07a313782dfd 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/torture.h>
#include <linux/types.h>
+#include <linux/sched/clock.h>
#include "rcu.h"
@@ -84,7 +85,7 @@ torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
// Number of typesafe_lookup structures, that is, the degree of concurrency.
torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures.");
// Number of loops per experiment, all readers execute operations concurrently.
-torture_param(long, loops, 10000, "Number of loops per experiment.");
+torture_param(int, loops, 10000, "Number of loops per experiment.");
// Number of readers, with -1 defaulting to about 75% of the CPUs.
torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
// Number of runs.
@@ -135,6 +136,7 @@ struct ref_scale_ops {
void (*cleanup)(void);
void (*readsection)(const int nloops);
void (*delaysection)(const int nloops, const int udl, const int ndl);
+ bool enable_irqs;
const char *name;
};
@@ -183,6 +185,8 @@ static const struct ref_scale_ops rcu_ops = {
// Definitions for SRCU ref scale testing.
DEFINE_STATIC_SRCU(srcu_refctl_scale);
+DEFINE_STATIC_SRCU_FAST(srcu_fast_refctl_scale);
+DEFINE_STATIC_SRCU_FAST_UPDOWN(srcu_fast_updown_refctl_scale);
static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale;
static void srcu_ref_scale_read_section(const int nloops)
@@ -215,34 +219,76 @@ static const struct ref_scale_ops srcu_ops = {
.name = "srcu"
};
-static void srcu_lite_ref_scale_read_section(const int nloops)
+static bool srcu_fast_sync_scale_init(void)
+{
+ srcu_ctlp = &srcu_fast_refctl_scale;
+ return true;
+}
+
+static void srcu_fast_ref_scale_read_section(const int nloops)
{
int i;
- int idx;
+ struct srcu_ctr __percpu *scp;
for (i = nloops; i >= 0; i--) {
- idx = srcu_read_lock_lite(srcu_ctlp);
- srcu_read_unlock_lite(srcu_ctlp, idx);
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
}
}
-static void srcu_lite_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
{
int i;
- int idx;
+ struct srcu_ctr __percpu *scp;
for (i = nloops; i >= 0; i--) {
- idx = srcu_read_lock_lite(srcu_ctlp);
+ scp = srcu_read_lock_fast(srcu_ctlp);
un_delay(udl, ndl);
- srcu_read_unlock_lite(srcu_ctlp, idx);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
}
}
-static const struct ref_scale_ops srcu_lite_ops = {
- .init = rcu_sync_scale_init,
- .readsection = srcu_lite_ref_scale_read_section,
- .delaysection = srcu_lite_ref_scale_delay_section,
- .name = "srcu-lite"
+static const struct ref_scale_ops srcu_fast_ops = {
+ .init = srcu_fast_sync_scale_init,
+ .readsection = srcu_fast_ref_scale_read_section,
+ .delaysection = srcu_fast_ref_scale_delay_section,
+ .name = "srcu-fast"
+};
+
+static bool srcu_fast_updown_sync_scale_init(void)
+{
+ srcu_ctlp = &srcu_fast_updown_refctl_scale;
+ return true;
+}
+
+static void srcu_fast_updown_ref_scale_read_section(const int nloops)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast_updown(srcu_ctlp);
+ srcu_read_unlock_fast_updown(srcu_ctlp, scp);
+ }
+}
+
+static void srcu_fast_updown_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast_updown(srcu_ctlp);
+ un_delay(udl, ndl);
+ srcu_read_unlock_fast_updown(srcu_ctlp, scp);
+ }
+}
+
+static const struct ref_scale_ops srcu_fast_updown_ops = {
+ .init = srcu_fast_updown_sync_scale_init,
+ .readsection = srcu_fast_updown_ref_scale_read_section,
+ .delaysection = srcu_fast_updown_ref_scale_delay_section,
+ .name = "srcu-fast-updown"
};
#ifdef CONFIG_TASKS_RCU
@@ -322,6 +368,9 @@ static const struct ref_scale_ops rcu_trace_ops = {
// Definitions for reference count
static atomic_t refcnt;
+// Definitions acquire-release.
+static DEFINE_PER_CPU(unsigned long, test_acqrel);
+
static void ref_refcnt_section(const int nloops)
{
int i;
@@ -350,6 +399,184 @@ static const struct ref_scale_ops refcnt_ops = {
.name = "refcnt"
};
+static void ref_percpuinc_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ this_cpu_inc(test_acqrel);
+ this_cpu_dec(test_acqrel);
+ }
+}
+
+static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ this_cpu_inc(test_acqrel);
+ un_delay(udl, ndl);
+ this_cpu_dec(test_acqrel);
+ }
+}
+
+static const struct ref_scale_ops percpuinc_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_percpuinc_section,
+ .delaysection = ref_percpuinc_delay_section,
+ .name = "percpuinc"
+};
+
+// Note that this can lose counts in preemptible kernels.
+static void ref_incpercpu_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap = this_cpu_ptr(&test_acqrel);
+
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ }
+}
+
+static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap = this_cpu_ptr(&test_acqrel);
+
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ }
+}
+
+static const struct ref_scale_ops incpercpu_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpu_section,
+ .delaysection = ref_incpercpu_delay_section,
+ .name = "incpercpu"
+};
+
+static void ref_incpercpupreempt_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ preempt_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ preempt_enable();
+ }
+}
+
+static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ preempt_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ preempt_enable();
+ }
+}
+
+static const struct ref_scale_ops incpercpupreempt_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpupreempt_section,
+ .delaysection = ref_incpercpupreempt_delay_section,
+ .name = "incpercpupreempt"
+};
+
+static void ref_incpercpubh_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_bh_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_bh_enable();
+ }
+}
+
+static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_bh_disable();
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_bh_enable();
+ }
+}
+
+static const struct ref_scale_ops incpercpubh_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpubh_section,
+ .delaysection = ref_incpercpubh_delay_section,
+ .enable_irqs = true,
+ .name = "incpercpubh"
+};
+
+static void ref_incpercpuirqsave_section(const int nloops)
+{
+ int i;
+ unsigned long flags;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_irq_save(flags);
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_irq_restore(flags);
+ }
+}
+
+static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ unsigned long flags;
+
+ for (i = nloops; i >= 0; i--) {
+ unsigned long *tap;
+
+ local_irq_save(flags);
+ tap = this_cpu_ptr(&test_acqrel);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
+ un_delay(udl, ndl);
+ WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
+ local_irq_restore(flags);
+ }
+}
+
+static const struct ref_scale_ops incpercpuirqsave_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = ref_incpercpuirqsave_section,
+ .delaysection = ref_incpercpuirqsave_delay_section,
+ .name = "incpercpuirqsave"
+};
+
// Definitions for rwlock
static rwlock_t test_rwlock;
@@ -493,9 +720,6 @@ static const struct ref_scale_ops lock_irq_ops = {
.name = "lock-irq"
};
-// Definitions acquire-release.
-static DEFINE_PER_CPU(unsigned long, test_acqrel);
-
static void ref_acqrel_section(const int nloops)
{
unsigned long x;
@@ -531,6 +755,39 @@ static const struct ref_scale_ops acqrel_ops = {
static volatile u64 stopopts;
+static void ref_sched_clock_section(const int nloops)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--)
+ x += sched_clock();
+ preempt_enable();
+ stopopts = x;
+}
+
+static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ x += sched_clock();
+ un_delay(udl, ndl);
+ }
+ preempt_enable();
+ stopopts = x;
+}
+
+static const struct ref_scale_ops sched_clock_ops = {
+ .readsection = ref_sched_clock_section,
+ .delaysection = ref_sched_clock_delay_section,
+ .name = "sched-clock"
+};
+
+
static void ref_clock_section(const int nloops)
{
u64 x = 0;
@@ -595,6 +852,133 @@ static const struct ref_scale_ops jiffies_ops = {
.name = "jiffies"
};
+static void ref_preempt_section(const int nloops)
+{
+ int i;
+
+ migrate_disable();
+ for (i = nloops; i >= 0; i--) {
+ preempt_disable();
+ preempt_enable();
+ }
+ migrate_enable();
+}
+
+static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ migrate_disable();
+ for (i = nloops; i >= 0; i--) {
+ preempt_disable();
+ un_delay(udl, ndl);
+ preempt_enable();
+ }
+ migrate_enable();
+}
+
+static const struct ref_scale_ops preempt_ops = {
+ .readsection = ref_preempt_section,
+ .delaysection = ref_preempt_delay_section,
+ .name = "preempt"
+};
+
+static void ref_bh_section(const int nloops)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_bh_disable();
+ local_bh_enable();
+ }
+ preempt_enable();
+}
+
+static void ref_bh_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_bh_disable();
+ un_delay(udl, ndl);
+ local_bh_enable();
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops bh_ops = {
+ .readsection = ref_bh_section,
+ .delaysection = ref_bh_delay_section,
+ .enable_irqs = true,
+ .name = "bh"
+};
+
+static void ref_irq_section(const int nloops)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ local_irq_enable();
+ }
+ preempt_enable();
+}
+
+static void ref_irq_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ un_delay(udl, ndl);
+ local_irq_enable();
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops irq_ops = {
+ .readsection = ref_irq_section,
+ .delaysection = ref_irq_delay_section,
+ .name = "irq"
+};
+
+static void ref_irqsave_section(const int nloops)
+{
+ unsigned long flags;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_save(flags);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+}
+
+static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl)
+{
+ unsigned long flags;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ local_irq_save(flags);
+ un_delay(udl, ndl);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+}
+
+static const struct ref_scale_ops irqsave_ops = {
+ .readsection = ref_irqsave_section,
+ .delaysection = ref_irqsave_delay_section,
+ .name = "irqsave"
+};
+
////////////////////////////////////////////////////////////////////////
//
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
@@ -890,15 +1274,18 @@ repeat:
if (!atomic_dec_return(&n_warmedup))
while (atomic_read_acquire(&n_warmedup))
rcu_scale_one_reader();
- // Also keep interrupts disabled. This also has the effect
- // of preventing entries into slow path for rcu_read_unlock().
- local_irq_save(flags);
+ // Also keep interrupts disabled when it is safe to do so, which
+ // it is not for local_bh_enable(). This also has the effect of
+ // preventing entries into slow path for rcu_read_unlock().
+ if (!cur_ops->enable_irqs)
+ local_irq_save(flags);
start = ktime_get_mono_fast_ns();
rcu_scale_one_reader();
duration = ktime_get_mono_fast_ns() - start;
- local_irq_restore(flags);
+ if (!cur_ops->enable_irqs)
+ local_irq_restore(flags);
rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
// To reduce runtime-skew noise, do maintain-load invocations until
@@ -987,7 +1374,7 @@ static int main_func(void *arg)
set_user_nice(current, MAX_NICE);
VERBOSE_SCALEOUT("main_func task started");
- result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
+ result_avg = kcalloc(nruns, sizeof(*result_avg), GFP_KERNEL);
buf = kzalloc(800 + 64, GFP_KERNEL);
if (!result_avg || !buf) {
SCALEOUT_ERRSTRING("out of memory");
@@ -1076,7 +1463,7 @@ static void
ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag)
{
pr_alert("%s" SCALE_FLAG
- "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
+ "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
}
@@ -1099,9 +1486,9 @@ ref_scale_cleanup(void)
reader_tasks[i].task);
}
kfree(reader_tasks);
+ reader_tasks = NULL;
torture_stop_kthread("main_task", main_task);
- kfree(main_task);
// Do scale-type-specific cleanup operations.
if (cur_ops->cleanup != NULL)
@@ -1129,10 +1516,14 @@ ref_scale_init(void)
long i;
int firsterr = 0;
static const struct ref_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
- &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
- &clock_ops, &jiffies_ops, &typesafe_ref_ops, &typesafe_lock_ops,
- &typesafe_seqlock_ops,
+ &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops,
+ RCU_TRACE_OPS RCU_TASKS_OPS
+ &refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops,
+ &incpercpubh_ops, &incpercpuirqsave_ops,
+ &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
+ &sched_clock_ops, &clock_ops, &jiffies_ops,
+ &preempt_ops, &bh_ops, &irq_ops, &irqsave_ops,
+ &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
if (!torture_init_begin(scale_type, verbose))
@@ -1174,12 +1565,16 @@ ref_scale_init(void)
// Reader tasks (default to ~75% of online CPUs).
if (nreaders < 0)
nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
- if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops))
+ if (WARN_ONCE(loops <= 0, "%s: loops = %d, adjusted to 1\n", __func__, loops))
loops = 1;
if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders))
nreaders = 1;
if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns))
nruns = 1;
+ if (WARN_ONCE(loops > INT_MAX / nreaders,
+ "%s: nreaders * loops will overflow, adjusted loops to %d",
+ __func__, INT_MAX / nreaders))
+ loops = INT_MAX / nreaders;
reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (!reader_tasks) {