summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Low <jason.low2@hp.com>2013-09-13 11:26:52 -0700
committerIngo Molnar <mingo@kernel.org>2013-09-20 12:03:44 +0200
commit9bd721c55c8a886b938a45198aab0ccb52f1f7fa (patch)
tree1c47dcd9afda75ce5a1d4800da33b35076a246a9
parentabfafa54db9aba404e8e6763503f04d35bd07138 (diff)
sched/balancing: Consider max cost of idle balance per sched domain
In this patch, we keep track of the max cost we spend doing idle load balancing for each sched domain. If the avg time the CPU remains idle is less then the time we have already spent on idle balancing + the max cost of idle balancing in the sched domain, then we don't continue to attempt the balance. We also keep a per rq variable, max_idle_balance_cost, which keeps track of the max time spent on newidle load balances throughout all its domains so that we can determine the avg_idle's max value. By using the max, we avoid overrunning the average. This further reduces the chance we attempt balancing when the CPU is not idle for longer than the cost to balance. Signed-off-by: Jason Low <jason.low2@hp.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/metag/include/asm/topology.h1
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/topology.h3
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/sched.h3
6 files changed, 26 insertions, 1 deletions
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
index 23f5118f58db..db192924f4b0 100644
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -26,6 +26,7 @@
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
+ .max_newidle_lb_cost = 0, \
}
#define cpu_to_node(cpu) ((void)(cpu), 0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..be078ff9157f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,6 +810,7 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */
u64 last_update;
+ u64 max_newidle_lb_cost;
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d3cf0d6e7712..e2a2c3da2929 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void);
.last_balance = jiffies, \
.balance_interval = 1, \
.smt_gain = 1178, /* 15% */ \
+ .max_newidle_lb_cost = 0, \
}
#endif
#endif /* CONFIG_SCHED_SMT */
@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void);
, \
.last_balance = jiffies, \
.balance_interval = 1, \
+ .max_newidle_lb_cost = 0, \
}
#endif
#endif /* CONFIG_SCHED_MC */
@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void);
, \
.last_balance = jiffies, \
.balance_interval = 1, \
+ .max_newidle_lb_cost = 0, \
}
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 048f39e45761..c2283c54aed0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1330,7 +1330,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
if (rq->idle_stamp) {
u64 delta = rq_clock(rq) - rq->idle_stamp;
- u64 max = 2*sysctl_sched_migration_cost;
+ u64 max = 2*rq->max_idle_balance_cost;
update_avg(&rq->avg_idle, delta);
@@ -6506,6 +6506,7 @@ void __init sched_init(void)
rq->online = 0;
rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost;
+ rq->max_idle_balance_cost = sysctl_sched_migration_cost;
INIT_LIST_HEAD(&rq->cfs_tasks);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0784ab6fcc59..ffc99d8f0a95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5396,6 +5396,7 @@ void idle_balance(int this_cpu, struct rq *this_rq)
struct sched_domain *sd;
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
+ u64 curr_cost = 0;
this_rq->idle_stamp = rq_clock(this_rq);
@@ -5412,15 +5413,27 @@ void idle_balance(int this_cpu, struct rq *this_rq)
for_each_domain(this_cpu, sd) {
unsigned long interval;
int continue_balancing = 1;
+ u64 t0, domain_cost;
if (!(sd->flags & SD_LOAD_BALANCE))
continue;
+ if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+ break;
+
if (sd->flags & SD_BALANCE_NEWIDLE) {
+ t0 = sched_clock_cpu(this_cpu);
+
/* If we've pulled tasks over stop searching: */
pulled_task = load_balance(this_cpu, this_rq,
sd, CPU_NEWLY_IDLE,
&continue_balancing);
+
+ domain_cost = sched_clock_cpu(this_cpu) - t0;
+ if (domain_cost > sd->max_newidle_lb_cost)
+ sd->max_newidle_lb_cost = domain_cost;
+
+ curr_cost += domain_cost;
}
interval = msecs_to_jiffies(sd->balance_interval);
@@ -5442,6 +5455,9 @@ void idle_balance(int this_cpu, struct rq *this_rq)
*/
this_rq->next_balance = next_balance;
}
+
+ if (curr_cost > this_rq->max_idle_balance_cost)
+ this_rq->max_idle_balance_cost = curr_cost;
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0d7544c3dba7..e82484db7699 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -476,6 +476,9 @@ struct rq {
u64 age_stamp;
u64 idle_stamp;
u64 avg_idle;
+
+ /* This is used to determine avg_idle's max value */
+ u64 max_idle_balance_cost;
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING