summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c101
1 files changed, 61 insertions, 40 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7b6535987500..5eb3ffc9be84 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -871,7 +871,7 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
likely(wait_start > prev_wait_start))
wait_start -= prev_wait_start;
- schedstat_set(se->statistics.wait_start, wait_start);
+ __schedstat_set(se->statistics.wait_start, wait_start);
}
static inline void
@@ -893,17 +893,17 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
- schedstat_set(se->statistics.wait_start, delta);
+ __schedstat_set(se->statistics.wait_start, delta);
return;
}
trace_sched_stat_wait(p, delta);
}
- schedstat_set(se->statistics.wait_max,
+ __schedstat_set(se->statistics.wait_max,
max(schedstat_val(se->statistics.wait_max), delta));
- schedstat_inc(se->statistics.wait_count);
- schedstat_add(se->statistics.wait_sum, delta);
- schedstat_set(se->statistics.wait_start, 0);
+ __schedstat_inc(se->statistics.wait_count);
+ __schedstat_add(se->statistics.wait_sum, delta);
+ __schedstat_set(se->statistics.wait_start, 0);
}
static inline void
@@ -928,10 +928,10 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
delta = 0;
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
- schedstat_set(se->statistics.sleep_max, delta);
+ __schedstat_set(se->statistics.sleep_max, delta);
- schedstat_set(se->statistics.sleep_start, 0);
- schedstat_add(se->statistics.sum_sleep_runtime, delta);
+ __schedstat_set(se->statistics.sleep_start, 0);
+ __schedstat_add(se->statistics.sum_sleep_runtime, delta);
if (tsk) {
account_scheduler_latency(tsk, delta >> 10, 1);
@@ -945,15 +945,15 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
delta = 0;
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
- schedstat_set(se->statistics.block_max, delta);
+ __schedstat_set(se->statistics.block_max, delta);
- schedstat_set(se->statistics.block_start, 0);
- schedstat_add(se->statistics.sum_sleep_runtime, delta);
+ __schedstat_set(se->statistics.block_start, 0);
+ __schedstat_add(se->statistics.sum_sleep_runtime, delta);
if (tsk) {
if (tsk->in_iowait) {
- schedstat_add(se->statistics.iowait_sum, delta);
- schedstat_inc(se->statistics.iowait_count);
+ __schedstat_add(se->statistics.iowait_sum, delta);
+ __schedstat_inc(se->statistics.iowait_count);
trace_sched_stat_iowait(tsk, delta);
}
@@ -1012,10 +1012,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
struct task_struct *tsk = task_of(se);
if (tsk->state & TASK_INTERRUPTIBLE)
- schedstat_set(se->statistics.sleep_start,
+ __schedstat_set(se->statistics.sleep_start,
rq_clock(rq_of(cfs_rq)));
if (tsk->state & TASK_UNINTERRUPTIBLE)
- schedstat_set(se->statistics.block_start,
+ __schedstat_set(se->statistics.block_start,
rq_clock(rq_of(cfs_rq)));
}
}
@@ -5692,27 +5692,31 @@ static int wake_wide(struct task_struct *p)
* scheduling latency of the CPUs. This seems to work
* for the overloaded case.
*/
-
-static bool
-wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
- int this_cpu, int prev_cpu, int sync)
+static int
+wake_affine_idle(int this_cpu, int prev_cpu, int sync)
{
/*
* If this_cpu is idle, it implies the wakeup is from interrupt
* context. Only allow the move if cache is shared. Otherwise an
* interrupt intensive workload could force all tasks onto one
* node depending on the IO topology or IRQ affinity settings.
+ *
+ * If the prev_cpu is idle and cache affine then avoid a migration.
+ * There is no guarantee that the cache hot data from an interrupt
+ * is more important than cache hot data on the prev_cpu and from
+ * a cpufreq perspective, it's better to have higher utilisation
+ * on one CPU.
*/
if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
- return true;
+ return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
if (sync && cpu_rq(this_cpu)->nr_running == 1)
- return true;
+ return this_cpu;
- return false;
+ return nr_cpumask_bits;
}
-static bool
+static int
wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
@@ -5726,7 +5730,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
unsigned long current_load = task_h_load(current);
if (current_load > this_eff_load)
- return true;
+ return this_cpu;
this_eff_load -= current_load;
}
@@ -5743,28 +5747,28 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
prev_eff_load *= capacity_of(this_cpu);
- return this_eff_load <= prev_eff_load;
+ return this_eff_load <= prev_eff_load ? this_cpu : nr_cpumask_bits;
}
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int prev_cpu, int sync)
{
int this_cpu = smp_processor_id();
- bool affine = false;
+ int target = nr_cpumask_bits;
- if (sched_feat(WA_IDLE) && !affine)
- affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
+ if (sched_feat(WA_IDLE))
+ target = wake_affine_idle(this_cpu, prev_cpu, sync);
- if (sched_feat(WA_WEIGHT) && !affine)
- affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
+ if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
+ target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
- if (affine) {
- schedstat_inc(sd->ttwu_move_affine);
- schedstat_inc(p->se.statistics.nr_wakeups_affine);
- }
+ if (target == nr_cpumask_bits)
+ return prev_cpu;
- return affine;
+ schedstat_inc(sd->ttwu_move_affine);
+ schedstat_inc(p->se.statistics.nr_wakeups_affine);
+ return target;
}
static inline unsigned long task_util(struct task_struct *p);
@@ -6193,7 +6197,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
- int i;
+ int i, recent_used_cpu;
if (idle_cpu(target))
return target;
@@ -6204,6 +6208,21 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
return prev;
+ /* Check a recently used CPU as a potential idle candidate */
+ recent_used_cpu = p->recent_used_cpu;
+ if (recent_used_cpu != prev &&
+ recent_used_cpu != target &&
+ cpus_share_cache(recent_used_cpu, target) &&
+ idle_cpu(recent_used_cpu) &&
+ cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
+ /*
+ * Replace recent_used_cpu with prev as it is a potential
+ * candidate for the next wake.
+ */
+ p->recent_used_cpu = prev;
+ return recent_used_cpu;
+ }
+
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
@@ -6357,8 +6376,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (cpu == prev_cpu)
goto pick_cpu;
- if (wake_affine(affine_sd, p, prev_cpu, sync))
- new_cpu = cpu;
+ new_cpu = wake_affine(affine_sd, p, prev_cpu, sync);
}
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
@@ -6372,9 +6390,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (!sd) {
pick_cpu:
- if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
+ if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+ if (want_affine)
+ current->recent_used_cpu = cpu;
+ }
} else {
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
}