diff options
| -rw-r--r-- | kernel/sched/fair.c | 152 |
1 files changed, 130 insertions, 22 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 071e07f8cb27..c6e5c64dd332 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -929,6 +929,16 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect) if (cfs_rq->nr_queued == 1) return curr && curr->on_rq ? curr : se; + /* + * Picking the ->next buddy will affect latency but not fairness. + */ + if (sched_feat(PICK_BUDDY) && + cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) { + /* ->next will never be delayed */ + WARN_ON_ONCE(cfs_rq->next->sched_delayed); + return cfs_rq->next; + } + if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) curr = NULL; @@ -1167,6 +1177,8 @@ static s64 update_se(struct rq *rq, struct sched_entity *se) return delta_exec; } +static void set_next_buddy(struct sched_entity *se); + /* * Used by other classes to account runtime. */ @@ -5466,16 +5478,6 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq) { struct sched_entity *se; - /* - * Picking the ->next buddy will affect latency but not fairness. - */ - if (sched_feat(PICK_BUDDY) && - cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) { - /* ->next will never be delayed */ - WARN_ON_ONCE(cfs_rq->next->sched_delayed); - return cfs_rq->next; - } - se = pick_eevdf(cfs_rq); if (se->sched_delayed) { dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED); @@ -6988,8 +6990,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) hrtick_update(rq); } -static void set_next_buddy(struct sched_entity *se); - /* * Basically dequeue_task_fair(), except it can deal with dequeue_entity() * failing half-way through and resume the dequeue later. @@ -8676,16 +8676,81 @@ static void set_next_buddy(struct sched_entity *se) } } +enum preempt_wakeup_action { + PREEMPT_WAKEUP_NONE, /* No preemption. */ + PREEMPT_WAKEUP_SHORT, /* Ignore slice protection. */ + PREEMPT_WAKEUP_PICK, /* Let __pick_eevdf() decide. */ + PREEMPT_WAKEUP_RESCHED, /* Force reschedule. */ +}; + +static inline bool +set_preempt_buddy(struct cfs_rq *cfs_rq, int wake_flags, + struct sched_entity *pse, struct sched_entity *se) +{ + /* + * Keep existing buddy if the deadline is sooner than pse. + * The older buddy may be cache cold and completely unrelated + * to the current wakeup but that is unpredictable where as + * obeying the deadline is more in line with EEVDF objectives. + */ + if (cfs_rq->next && entity_before(cfs_rq->next, pse)) + return false; + + set_next_buddy(pse); + return true; +} + +/* + * WF_SYNC|WF_TTWU indicates the waker expects to sleep but it is not + * strictly enforced because the hint is either misunderstood or + * multiple tasks must be woken up. + */ +static inline enum preempt_wakeup_action +preempt_sync(struct rq *rq, int wake_flags, + struct sched_entity *pse, struct sched_entity *se) +{ + u64 threshold, delta; + + /* + * WF_SYNC without WF_TTWU is not expected so warn if it happens even + * though it is likely harmless. + */ + WARN_ON_ONCE(!(wake_flags & WF_TTWU)); + + threshold = sysctl_sched_migration_cost; + delta = rq_clock_task(rq) - se->exec_start; + if ((s64)delta < 0) + delta = 0; + + /* + * WF_RQ_SELECTED implies the tasks are stacking on a CPU when they + * could run on other CPUs. Reduce the threshold before preemption is + * allowed to an arbitrary lower value as it is more likely (but not + * guaranteed) the waker requires the wakee to finish. + */ + if (wake_flags & WF_RQ_SELECTED) + threshold >>= 2; + + /* + * As WF_SYNC is not strictly obeyed, allow some runtime for batch + * wakeups to be issued. + */ + if (entity_before(pse, se) && delta >= threshold) + return PREEMPT_WAKEUP_RESCHED; + + return PREEMPT_WAKEUP_NONE; +} + /* * Preempt the current task with a newly woken task if needed: */ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags) { + enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK; struct task_struct *donor = rq->donor; struct sched_entity *se = &donor->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(donor); int cse_is_idle, pse_is_idle; - bool do_preempt_short = false; if (unlikely(se == pse)) return; @@ -8699,10 +8764,6 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (task_is_throttled(p)) return; - if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) { - set_next_buddy(pse); - } - /* * We can come here with TIF_NEED_RESCHED already set from new task * wake up path. @@ -8734,7 +8795,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int * When non-idle entity preempt an idle entity, * don't give idle entity slice protection. */ - do_preempt_short = true; + preempt_action = PREEMPT_WAKEUP_SHORT; goto preempt; } @@ -8753,21 +8814,68 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int * If @p has a shorter slice than current and @p is eligible, override * current's slice protection in order to allow preemption. */ - do_preempt_short = sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice); + if (sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice)) { + preempt_action = PREEMPT_WAKEUP_SHORT; + goto pick; + } /* + * Ignore wakee preemption on WF_FORK as it is less likely that + * there is shared data as exec often follow fork. Do not + * preempt for tasks that are sched_delayed as it would violate + * EEVDF to forcibly queue an ineligible task. + */ + if ((wake_flags & WF_FORK) || pse->sched_delayed) + return; + + /* + * If @p potentially is completing work required by current then + * consider preemption. + * + * Reschedule if waker is no longer eligible. */ + if (in_task() && !entity_eligible(cfs_rq, se)) { + preempt_action = PREEMPT_WAKEUP_RESCHED; + goto preempt; + } + + /* Prefer picking wakee soon if appropriate. */ + if (sched_feat(NEXT_BUDDY) && + set_preempt_buddy(cfs_rq, wake_flags, pse, se)) { + + /* + * Decide whether to obey WF_SYNC hint for a new buddy. Old + * buddies are ignored as they may not be relevant to the + * waker and less likely to be cache hot. + */ + if (wake_flags & WF_SYNC) + preempt_action = preempt_sync(rq, wake_flags, pse, se); + } + + switch (preempt_action) { + case PREEMPT_WAKEUP_NONE: + return; + case PREEMPT_WAKEUP_RESCHED: + goto preempt; + case PREEMPT_WAKEUP_SHORT: + fallthrough; + case PREEMPT_WAKEUP_PICK: + break; + } + +pick: + /* * If @p has become the most eligible task, force preemption. */ - if (__pick_eevdf(cfs_rq, !do_preempt_short) == pse) + if (__pick_eevdf(cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT) == pse) goto preempt; - if (sched_feat(RUN_TO_PARITY) && do_preempt_short) + if (sched_feat(RUN_TO_PARITY)) update_protect_slice(cfs_rq, se); return; preempt: - if (do_preempt_short) + if (preempt_action == PREEMPT_WAKEUP_SHORT) cancel_protect_slice(se); resched_curr_lazy(rq); |
