summaryrefslogtreecommitdiff
path: root/kernel/sched/deadline.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r--kernel/sched/deadline.c390
1 files changed, 323 insertions, 67 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 7b7671060bf9..319439fe1870 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -125,20 +125,11 @@ static inline struct dl_bw *dl_bw_of(int i)
static inline int dl_bw_cpus(int i)
{
struct root_domain *rd = cpu_rq(i)->rd;
- int cpus;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
- if (cpumask_subset(rd->span, cpu_active_mask))
- return cpumask_weight(rd->span);
-
- cpus = 0;
-
- for_each_cpu_and(i, rd->span, cpu_active_mask)
- cpus++;
-
- return cpus;
+ return cpumask_weight_and(rd->span, cpu_active_mask);
}
static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
@@ -405,7 +396,7 @@ static void __dl_clear_params(struct sched_dl_entity *dl_se);
* up, and checks if the task is still in the "ACTIVE non contending"
* state or not (in the second case, it updates running_bw).
*/
-static void task_non_contending(struct sched_dl_entity *dl_se)
+static void task_non_contending(struct sched_dl_entity *dl_se, bool dl_task)
{
struct hrtimer *timer = &dl_se->inactive_timer;
struct rq *rq = rq_of_dl_se(dl_se);
@@ -444,10 +435,10 @@ static void task_non_contending(struct sched_dl_entity *dl_se)
} else {
struct task_struct *p = dl_task_of(dl_se);
- if (dl_task(p))
+ if (dl_task)
sub_running_bw(dl_se, dl_rq);
- if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
+ if (!dl_task || READ_ONCE(p->__state) == TASK_DEAD) {
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
if (READ_ONCE(p->__state) == TASK_DEAD)
@@ -1166,8 +1157,17 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
sched_clock_tick();
update_rq_clock(rq);
- if (!dl_se->dl_runtime)
+ /*
+ * Make sure current has propagated its pending runtime into
+ * any relevant server through calling dl_server_update() and
+ * friends.
+ */
+ rq->donor->sched_class->update_curr(rq);
+
+ if (dl_se->dl_defer_idle) {
+ dl_server_stop(dl_se);
return HRTIMER_NORESTART;
+ }
if (dl_se->dl_defer_armed) {
/*
@@ -1416,10 +1416,11 @@ s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta
}
static inline void
-update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
- int flags);
+update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int flags);
+
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
+ bool idle = rq->curr == rq->idle;
s64 scaled_delta_exec;
if (unlikely(delta_exec <= 0)) {
@@ -1440,6 +1441,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
dl_se->runtime -= scaled_delta_exec;
+ if (dl_se->dl_defer_idle && !idle)
+ dl_se->dl_defer_idle = 0;
+
/*
* The fair server can consume its runtime while throttled (not queued/
* running as regular CFS).
@@ -1450,6 +1454,29 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
*/
if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
/*
+ * Non-servers would never get time accounted while throttled.
+ */
+ WARN_ON_ONCE(!dl_server(dl_se));
+
+ /*
+ * While the server is marked idle, do not push out the
+ * activation further, instead wait for the period timer
+ * to lapse and stop the server.
+ */
+ if (dl_se->dl_defer_idle && idle) {
+ /*
+ * The timer is at the zero-laxity point, this means
+ * dl_server_stop() / dl_server_start() can happen
+ * while now < deadline. This means update_dl_entity()
+ * will not replenish. Additionally start_dl_timer()
+ * will be set for 'deadline - runtime'. Negative
+ * runtime will not do.
+ */
+ dl_se->runtime = 0;
+ return;
+ }
+
+ /*
* If the server was previously activated - the starving condition
* took place, it this point it went away because the fair scheduler
* was able to get runtime in background. So return to the initial
@@ -1461,6 +1488,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
replenish_dl_new_period(dl_se, dl_se->rq);
+ if (idle)
+ dl_se->dl_defer_idle = 1;
+
/*
* Not being able to start the timer seems problematic. If it could not
* be started for whatever reason, we need to "unthrottle" the DL server
@@ -1543,38 +1573,213 @@ throttle:
* as time available for the fair server, avoiding a penalty for the
* rt scheduler that did not consumed that time.
*/
-void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
+void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec)
{
- s64 delta_exec;
-
- if (!rq->fair_server.dl_defer)
- return;
-
- /* no need to discount more */
- if (rq->fair_server.runtime < 0)
- return;
-
- delta_exec = rq_clock_task(rq) - p->se.exec_start;
- if (delta_exec < 0)
- return;
-
- rq->fair_server.runtime -= delta_exec;
-
- if (rq->fair_server.runtime < 0) {
- rq->fair_server.dl_defer_running = 0;
- rq->fair_server.runtime = 0;
- }
-
- p->se.exec_start = rq_clock_task(rq);
+ if (dl_se->dl_server_active && dl_se->dl_runtime && dl_se->dl_defer)
+ update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
/* 0 runtime = fair server disabled */
- if (dl_se->dl_runtime)
+ if (dl_se->dl_server_active && dl_se->dl_runtime)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
+/*
+ * dl_server && dl_defer:
+ *
+ * 6
+ * +--------------------+
+ * v |
+ * +-------------+ 4 +-----------+ 5 +------------------+
+ * +-> | A:init | <--- | D:running | -----> | E:replenish-wait |
+ * | +-------------+ +-----------+ +------------------+
+ * | | | 1 ^ ^ |
+ * | | 1 +----------+ | 3 |
+ * | v | |
+ * | +--------------------------------+ 2 |
+ * | | | ----+ |
+ * | 8 | B:zero_laxity-wait | | |
+ * | | | <---+ |
+ * | +--------------------------------+ |
+ * | | ^ ^ 2 |
+ * | | 7 | 2 +--------------------+
+ * | v |
+ * | +-------------+ |
+ * +-- | C:idle-wait | -+
+ * +-------------+
+ * ^ 7 |
+ * +---------+
+ *
+ *
+ * [A] - init
+ * dl_server_active = 0
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 0/1
+ * dl_defer_idle = 0
+ *
+ * [B] - zero_laxity-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 0
+ *
+ * [C] - idle-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 1
+ *
+ * [D] - running
+ * dl_server_active = 1
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ * [E] - replenish-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ *
+ * [1] A->B, A->D
+ * dl_server_start()
+ * dl_server_active = 1;
+ * enqueue_dl_entity()
+ * update_dl_entity(WAKEUP)
+ * if (!dl_defer_running)
+ * dl_defer_armed = 1;
+ * dl_throttled = 1;
+ * if (dl_throttled && start_dl_timer())
+ * return; // [B]
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // deplete server runtime from client-class
+ * [2] B->B, C->B, E->B
+ * dl_server_update()
+ * update_curr_dl_se() // idle = false
+ * if (dl_defer_idle)
+ * dl_defer_idle = 0;
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel(); // stop timer
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * start_dl_timer(); // restart timer
+ * // [B]
+ *
+ * // timer actually fires means we have runtime
+ * [3] B->D
+ * dl_server_timer()
+ * if (dl_defer_armed)
+ * dl_defer_running = 1;
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * // fwd period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * if (dl_defer_armed)
+ * dl_defer_armed = 0;
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // schedule server
+ * [4] D->A
+ * pick_task_dl()
+ * p = server_pick_task();
+ * if (!p)
+ * dl_server_stop()
+ * dequeue_dl_entity();
+ * hrtimer_try_to_cancel();
+ * dl_defer_armed = 0;
+ * dl_throttled = 0;
+ * dl_server_active = 0;
+ * // [A]
+ * return p;
+ *
+ * // server running
+ * [5] D->E
+ * update_curr_dl_se()
+ * if (dl_runtime_exceeded())
+ * dl_throttled = 1;
+ * dequeue_dl_entity();
+ * start_dl_timer();
+ * // [E]
+ *
+ * // server replenished
+ * [6] E->D
+ * dl_server_timer()
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * fwd-period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // deplete server runtime from idle
+ * [7] B->C, C->C
+ * dl_server_update_idle()
+ * update_curr_dl_se() // idle = true
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * if (dl_defer_idle)
+ * return;
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel();
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * dl_defer_idle = 1;
+ * start_dl_timer(); // restart timer
+ * // [C]
+ *
+ * // stop idle server
+ * [8] C->A
+ * dl_server_timer()
+ * if (dl_defer_idle)
+ * dl_server_stop();
+ * // [A]
+ *
+ *
+ * digraph dl_server {
+ * "A:init" -> "B:zero_laxity-wait" [label="1:dl_server_start"]
+ * "A:init" -> "D:running" [label="1:dl_server_start"]
+ * "B:zero_laxity-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"]
+ * "C:idle-wait" -> "A:init" [label="8:dl_server_timer"]
+ * "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "D:running" -> "A:init" [label="4:pick_task_dl"]
+ * "D:running" -> "E:replenish-wait" [label="5:update_curr_dl_se"]
+ * "E:replenish-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "E:replenish-wait" -> "D:running" [label="6:dl_server_timer"]
+ * }
+ *
+ *
+ * Notes:
+ *
+ * - When there are fair tasks running the most likely loop is [2]->[2].
+ * the dl_server never actually runs, the timer never fires.
+ *
+ * - When there is actual fair starvation; the timer fires and starts the
+ * dl_server. This will then throttle and replenish like a normal DL
+ * task. Notably it will not 'defer' again.
+ *
+ * - When idle it will push the actication forward once, and then wait
+ * for the timer to hit or a non-idle update to restart things.
+ */
void dl_server_start(struct sched_dl_entity *dl_se)
{
struct rq *rq = dl_se->rq;
@@ -1582,6 +1787,11 @@ void dl_server_start(struct sched_dl_entity *dl_se)
if (!dl_server(dl_se) || dl_se->dl_server_active)
return;
+ /*
+ * Update the current task to 'now'.
+ */
+ rq->donor->sched_class->update_curr(rq);
+
if (WARN_ON_ONCE(!cpu_online(cpu_of(rq))))
return;
@@ -1600,6 +1810,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
hrtimer_try_to_cancel(&dl_se->dl_timer);
dl_se->dl_defer_armed = 0;
dl_se->dl_throttled = 0;
+ dl_se->dl_defer_idle = 0;
dl_se->dl_server_active = 0;
}
@@ -1811,7 +2022,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
if (!dl_rq->dl_nr_running) {
dl_rq->earliest_dl.curr = 0;
dl_rq->earliest_dl.next = 0;
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, rq->online);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
} else {
struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
@@ -2048,7 +2259,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags)
* or "inactive")
*/
if (flags & DEQUEUE_SLEEP)
- task_non_contending(dl_se);
+ task_non_contending(dl_se, true);
}
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
@@ -2143,7 +2354,7 @@ static void yield_task_dl(struct rq *rq)
* it and the bandwidth timer will wake it up and will give it
* new scheduling parameters (thanks to dl_yielded=1).
*/
- rq->curr->dl.dl_yielded = 1;
+ rq->donor->dl.dl_yielded = 1;
update_rq_clock(rq);
update_curr_dl(rq);
@@ -2173,7 +2384,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
struct rq *rq;
if (!(flags & WF_TTWU))
- goto out;
+ return cpu;
rq = cpu_rq(cpu);
@@ -2211,7 +2422,6 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
}
rcu_read_unlock();
-out:
return cpu;
}
@@ -2355,7 +2565,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
* __pick_next_task_dl - Helper to pick the next -deadline task to run.
* @rq: The runqueue to pick the next task from.
*/
-static struct task_struct *__pick_task_dl(struct rq *rq)
+static struct task_struct *__pick_task_dl(struct rq *rq, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
struct dl_rq *dl_rq = &rq->dl;
@@ -2369,7 +2579,7 @@ again:
WARN_ON_ONCE(!dl_se);
if (dl_server(dl_se)) {
- p = dl_se->server_pick_task(dl_se);
+ p = dl_se->server_pick_task(dl_se, rf);
if (!p) {
dl_server_stop(dl_se);
goto again;
@@ -2382,9 +2592,9 @@ again:
return p;
}
-static struct task_struct *pick_task_dl(struct rq *rq)
+static struct task_struct *pick_task_dl(struct rq *rq, struct rq_flags *rf)
{
- return __pick_task_dl(rq);
+ return __pick_task_dl(rq, rf);
}
static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct task_struct *next)
@@ -2465,6 +2675,7 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
return NULL;
}
+/* Access rule: must be called on local CPU with preemption disabled */
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
static int find_later_rq(struct task_struct *task)
@@ -2883,9 +3094,10 @@ static void rq_online_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_set_overload(rq);
- cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
if (rq->dl.dl_nr_running > 0)
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
+ else
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, true);
}
/* Assumes rq->lock is held */
@@ -2894,8 +3106,7 @@ static void rq_offline_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_clear_overload(rq);
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
- cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, false);
}
void __init init_sched_dl_class(void)
@@ -2907,11 +3118,43 @@ void __init init_sched_dl_class(void)
GFP_KERNEL, cpu_to_node(i));
}
+/*
+ * This function always returns a non-empty bitmap in @cpus. This is because
+ * if a root domain has reserved bandwidth for DL tasks, the DL bandwidth
+ * check will prevent CPU hotplug from deactivating all CPUs in that domain.
+ */
+static void dl_get_task_effective_cpus(struct task_struct *p, struct cpumask *cpus)
+{
+ const struct cpumask *hk_msk;
+
+ hk_msk = housekeeping_cpumask(HK_TYPE_DOMAIN);
+ if (housekeeping_enabled(HK_TYPE_DOMAIN)) {
+ if (!cpumask_intersects(p->cpus_ptr, hk_msk)) {
+ /*
+ * CPUs isolated by isolcpu="domain" always belong to
+ * def_root_domain.
+ */
+ cpumask_andnot(cpus, cpu_active_mask, hk_msk);
+ return;
+ }
+ }
+
+ /*
+ * If a root domain holds a DL task, it must have active CPUs. So
+ * active CPUs can always be found by walking up the task's cpuset
+ * hierarchy up to the partition root.
+ */
+ cpuset_cpus_allowed_locked(p, cpus);
+}
+
+/* The caller should hold cpuset_mutex */
void dl_add_task_root_domain(struct task_struct *p)
{
struct rq_flags rf;
struct rq *rq;
struct dl_bw *dl_b;
+ unsigned int cpu;
+ struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
@@ -2919,16 +3162,25 @@ void dl_add_task_root_domain(struct task_struct *p)
return;
}
- rq = __task_rq_lock(p, &rf);
-
+ /*
+ * Get an active rq, whose rq->rd traces the correct root
+ * domain.
+ * Ideally this would be under cpuset reader lock until rq->rd is
+ * fetched. However, sleepable locks cannot nest inside pi_lock, so we
+ * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
+ * to guarantee the CPU stays in the cpuset.
+ */
+ dl_get_task_effective_cpus(p, msk);
+ cpu = cpumask_first_and(cpu_active_mask, msk);
+ BUG_ON(cpu >= nr_cpu_ids);
+ rq = cpu_rq(cpu);
dl_b = &rq->rd->dl_bw;
- raw_spin_lock(&dl_b->lock);
+ /* End of fetching rd */
+ raw_spin_lock(&dl_b->lock);
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
-
raw_spin_unlock(&dl_b->lock);
-
- task_rq_unlock(rq, p, &rf);
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
}
void dl_clear_root_domain(struct root_domain *rd)
@@ -2973,7 +3225,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
* will reset the task parameters.
*/
if (task_on_rq_queued(p) && p->dl.dl_runtime)
- task_non_contending(&p->dl);
+ task_non_contending(&p->dl, false);
/*
* In case a task is setscheduled out from SCHED_DEADLINE we need to
@@ -3045,23 +3297,24 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
}
}
+static u64 get_prio_dl(struct rq *rq, struct task_struct *p)
+{
+ return p->dl.deadline;
+}
+
/*
* If the scheduling parameters of a -deadline task changed,
* a push or pull operation might be needed.
*/
-static void prio_changed_dl(struct rq *rq, struct task_struct *p,
- int oldprio)
+static void prio_changed_dl(struct rq *rq, struct task_struct *p, u64 old_deadline)
{
if (!task_on_rq_queued(p))
return;
- /*
- * This might be too much, but unfortunately
- * we don't have the old deadline value, and
- * we can't argue if the task is increasing
- * or lowering its prio, so...
- */
- if (!rq->dl.overloaded)
+ if (p->dl.deadline == old_deadline)
+ return;
+
+ if (dl_time_before(old_deadline, p->dl.deadline))
deadline_queue_pull_task(rq);
if (task_current_donor(rq, p)) {
@@ -3094,6 +3347,8 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu)
DEFINE_SCHED_CLASS(dl) = {
+ .queue_mask = 8,
+
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
@@ -3116,6 +3371,7 @@ DEFINE_SCHED_CLASS(dl) = {
.task_tick = task_tick_dl,
.task_fork = task_fork_dl,
+ .get_prio = get_prio_dl,
.prio_changed = prio_changed_dl,
.switched_from = switched_from_dl,
.switched_to = switched_to_dl,