summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-10-06 09:23:24 +0200
committerIngo Molnar <mingo@kernel.org>2017-10-10 10:14:03 +0200
commitf2cdd9cc6c97e617b95f430f527a6e3165e1bee8 (patch)
tree6e2b49bc9382b9f6840592c5f3625e269787bac5
parentd153b153446f7d8832bb2ebd92309c8a6003b3bb (diff)
sched/core: Address more wake_affine() regressions
The trivial wake_affine_idle() implementation is very good for a number of workloads, but it comes apart at the moment there are no idle CPUs left, IOW. the overloaded case. hackbench: NO_WA_WEIGHT WA_WEIGHT hackbench-20 : 7.362717561 seconds 6.450509391 seconds (win) netperf: NO_WA_WEIGHT WA_WEIGHT TCP_SENDFILE-1 : Avg: 54524.6 Avg: 52224.3 TCP_SENDFILE-10 : Avg: 48185.2 Avg: 46504.3 TCP_SENDFILE-20 : Avg: 29031.2 Avg: 28610.3 TCP_SENDFILE-40 : Avg: 9819.72 Avg: 9253.12 TCP_SENDFILE-80 : Avg: 5355.3 Avg: 4687.4 TCP_STREAM-1 : Avg: 41448.3 Avg: 42254 TCP_STREAM-10 : Avg: 24123.2 Avg: 25847.9 TCP_STREAM-20 : Avg: 15834.5 Avg: 18374.4 TCP_STREAM-40 : Avg: 5583.91 Avg: 5599.57 TCP_STREAM-80 : Avg: 2329.66 Avg: 2726.41 TCP_RR-1 : Avg: 80473.5 Avg: 82638.8 TCP_RR-10 : Avg: 72660.5 Avg: 73265.1 TCP_RR-20 : Avg: 52607.1 Avg: 52634.5 TCP_RR-40 : Avg: 57199.2 Avg: 56302.3 TCP_RR-80 : Avg: 25330.3 Avg: 26867.9 UDP_RR-1 : Avg: 108266 Avg: 107844 UDP_RR-10 : Avg: 95480 Avg: 95245.2 UDP_RR-20 : Avg: 68770.8 Avg: 68673.7 UDP_RR-40 : Avg: 76231 Avg: 75419.1 UDP_RR-80 : Avg: 34578.3 Avg: 35639.1 UDP_STREAM-1 : Avg: 64684.3 Avg: 66606 UDP_STREAM-10 : Avg: 52701.2 Avg: 52959.5 UDP_STREAM-20 : Avg: 30376.4 Avg: 29704 UDP_STREAM-40 : Avg: 15685.8 Avg: 15266.5 UDP_STREAM-80 : Avg: 8415.13 Avg: 7388.97 (wins and losses) sysbench: NO_WA_WEIGHT WA_WEIGHT sysbench-mysql-2 : 2135.17 per sec. 2142.51 per sec. sysbench-mysql-5 : 4809.68 per sec. 4800.19 per sec. sysbench-mysql-10 : 9158.59 per sec. 9157.05 per sec. sysbench-mysql-20 : 14570.70 per sec. 14543.55 per sec. sysbench-mysql-40 : 22130.56 per sec. 22184.82 per sec. sysbench-mysql-80 : 20995.56 per sec. 21904.18 per sec. sysbench-psql-2 : 1679.58 per sec. 1705.06 per sec. sysbench-psql-5 : 3797.69 per sec. 3879.93 per sec. sysbench-psql-10 : 7253.22 per sec. 7258.06 per sec. sysbench-psql-20 : 11166.75 per sec. 11220.00 per sec. sysbench-psql-40 : 17277.28 per sec. 17359.78 per sec. sysbench-psql-80 : 17112.44 per sec. 17221.16 per sec. (increase on the top end) tbench: NO_WA_WEIGHT Throughput 685.211 MB/sec 2 clients 2 procs max_latency=0.123 ms Throughput 1596.64 MB/sec 5 clients 5 procs max_latency=0.119 ms Throughput 2985.47 MB/sec 10 clients 10 procs max_latency=0.262 ms Throughput 4521.15 MB/sec 20 clients 20 procs max_latency=0.506 ms Throughput 9438.1 MB/sec 40 clients 40 procs max_latency=2.052 ms Throughput 8210.5 MB/sec 80 clients 80 procs max_latency=8.310 ms WA_WEIGHT Throughput 697.292 MB/sec 2 clients 2 procs max_latency=0.127 ms Throughput 1596.48 MB/sec 5 clients 5 procs max_latency=0.080 ms Throughput 2975.22 MB/sec 10 clients 10 procs max_latency=0.254 ms Throughput 4575.14 MB/sec 20 clients 20 procs max_latency=0.502 ms Throughput 9468.65 MB/sec 40 clients 40 procs max_latency=2.069 ms Throughput 8631.73 MB/sec 80 clients 80 procs max_latency=8.605 ms (increase on the top end) Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Rik van Riel <riel@redhat.com> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/fair.c41
-rw-r--r--kernel/sched/features.h2
2 files changed, 43 insertions, 0 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 28cabed85387..ed2ab474ec93 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5363,6 +5363,10 @@ static int wake_wide(struct task_struct *p)
*
* wake_affine_idle() - only considers 'now', it check if the waking CPU is (or
* will be) idle.
+ *
+ * wake_affine_weight() - considers the weight to reflect the average
+ * scheduling latency of the CPUs. This seems to work
+ * for the overloaded case.
*/
static bool
@@ -5378,6 +5382,40 @@ wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
return false;
}
+static bool
+wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
+ int this_cpu, int prev_cpu, int sync)
+{
+ s64 this_eff_load, prev_eff_load;
+ unsigned long task_load;
+
+ this_eff_load = target_load(this_cpu, sd->wake_idx);
+ prev_eff_load = source_load(prev_cpu, sd->wake_idx);
+
+ if (sync) {
+ unsigned long current_load = task_h_load(current);
+
+ if (current_load > this_eff_load)
+ return true;
+
+ this_eff_load -= current_load;
+ }
+
+ task_load = task_h_load(p);
+
+ this_eff_load += task_load;
+ if (sched_feat(WA_BIAS))
+ this_eff_load *= 100;
+ this_eff_load *= capacity_of(prev_cpu);
+
+ prev_eff_load -= task_load;
+ if (sched_feat(WA_BIAS))
+ prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
+ prev_eff_load *= capacity_of(this_cpu);
+
+ return this_eff_load <= prev_eff_load;
+}
+
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int prev_cpu, int sync)
{
@@ -5387,6 +5425,9 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (sched_feat(WA_IDLE) && !affine)
affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
+ if (sched_feat(WA_WEIGHT) && !affine)
+ affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
+
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
if (affine) {
schedstat_inc(sd->ttwu_move_affine);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 0a519f8c224d..319ed0e8a347 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -82,3 +82,5 @@ SCHED_FEAT(LB_MIN, false)
SCHED_FEAT(ATTACH_AGE_LOAD, true)
SCHED_FEAT(WA_IDLE, true)
+SCHED_FEAT(WA_WEIGHT, true)
+SCHED_FEAT(WA_BIAS, true)