summaryrefslogtreecommitdiff
path: root/kernel/sched/deadline.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r--kernel/sched/deadline.c1730
1 files changed, 1203 insertions, 527 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 58b542bf2893..319439fe1870 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,10 @@
*/
#include <linux/cpuset.h>
+#include <linux/sched/clock.h>
+#include <uapi/linux/sched/types.h>
+#include "sched.h"
+#include "pelt.h"
/*
* Default limits for DL period; on the top end we guard against small util
@@ -26,7 +30,7 @@
static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
static unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
#ifdef CONFIG_SYSCTL
-static struct ctl_table sched_dl_sysctls[] = {
+static const struct ctl_table sched_dl_sysctls[] = {
{
.procname = "sched_deadline_period_max_us",
.data = &sysctl_sched_dl_period_max,
@@ -43,7 +47,6 @@ static struct ctl_table sched_dl_sysctls[] = {
.proc_handler = proc_douintvec_minmax,
.extra2 = (void *)&sysctl_sched_dl_period_max,
},
- {}
};
static int __init sched_dl_sysctl_init(void)
@@ -52,10 +55,16 @@ static int __init sched_dl_sysctl_init(void)
return 0;
}
late_initcall(sched_dl_sysctl_init);
-#endif
+#endif /* CONFIG_SYSCTL */
+
+static bool dl_server(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_server;
+}
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
{
+ BUG_ON(dl_server(dl_se));
return container_of(dl_se, struct task_struct, dl);
}
@@ -64,12 +73,19 @@ static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
return container_of(dl_rq, struct rq, dl);
}
-static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
+static inline struct rq *rq_of_dl_se(struct sched_dl_entity *dl_se)
{
- struct task_struct *p = dl_task_of(dl_se);
- struct rq *rq = task_rq(p);
+ struct rq *rq = dl_se->rq;
- return &rq->dl;
+ if (!dl_server(dl_se))
+ rq = task_rq(dl_task_of(dl_se));
+
+ return rq;
+}
+
+static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
+{
+ return &rq_of_dl_se(dl_se)->dl;
}
static inline int on_dl_rq(struct sched_dl_entity *dl_se)
@@ -87,7 +103,7 @@ static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
{
return pi_of(dl_se) != dl_se;
}
-#else
+#else /* !CONFIG_RT_MUTEXES: */
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
{
return dl_se;
@@ -97,9 +113,8 @@ static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
{
return false;
}
-#endif
+#endif /* !CONFIG_RT_MUTEXES */
-#ifdef CONFIG_SMP
static inline struct dl_bw *dl_bw_of(int i)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
@@ -110,20 +125,11 @@ static inline struct dl_bw *dl_bw_of(int i)
static inline int dl_bw_cpus(int i)
{
struct root_domain *rd = cpu_rq(i)->rd;
- int cpus;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
- if (cpumask_subset(rd->span, cpu_active_mask))
- return cpumask_weight(rd->span);
-
- cpus = 0;
-
- for_each_cpu_and(i, rd->span, cpu_active_mask)
- cpus++;
-
- return cpus;
+ return cpumask_weight_and(rd->span, cpu_active_mask);
}
static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
@@ -132,7 +138,7 @@ static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
int i;
for_each_cpu_and(i, mask, cpu_active_mask)
- cap += capacity_orig_of(i);
+ cap += arch_scale_cpu_capacity(i);
return cap;
}
@@ -144,7 +150,7 @@ static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
static inline unsigned long dl_bw_capacity(int i)
{
if (!sched_asym_cpucap_active() &&
- capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
+ arch_scale_cpu_capacity(i) == SCHED_CAPACITY_SCALE) {
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
} else {
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
@@ -154,14 +160,14 @@ static inline unsigned long dl_bw_capacity(int i)
}
}
-static inline bool dl_bw_visited(int cpu, u64 gen)
+bool dl_bw_visited(int cpu, u64 cookie)
{
struct root_domain *rd = cpu_rq(cpu)->rd;
- if (rd->visit_gen == gen)
+ if (rd->visit_cookie == cookie)
return true;
- rd->visit_gen = gen;
+ rd->visit_cookie = cookie;
return false;
}
@@ -179,35 +185,6 @@ void __dl_update(struct dl_bw *dl_b, s64 bw)
rq->dl.extra_bw += bw;
}
}
-#else
-static inline struct dl_bw *dl_bw_of(int i)
-{
- return &cpu_rq(i)->dl.dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
- return 1;
-}
-
-static inline unsigned long dl_bw_capacity(int i)
-{
- return SCHED_CAPACITY_SCALE;
-}
-
-static inline bool dl_bw_visited(int cpu, u64 gen)
-{
- return false;
-}
-
-static inline
-void __dl_update(struct dl_bw *dl_b, s64 bw)
-{
- struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
-
- dl->extra_bw += bw;
-}
-#endif
static inline
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
@@ -237,8 +214,8 @@ void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->running_bw += dl_bw;
- SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
- SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+ WARN_ON_ONCE(dl_rq->running_bw < old); /* overflow */
+ WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
}
@@ -250,7 +227,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->running_bw -= dl_bw;
- SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
+ WARN_ON_ONCE(dl_rq->running_bw > old); /* underflow */
if (dl_rq->running_bw > old)
dl_rq->running_bw = 0;
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
@@ -264,7 +241,7 @@ void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->this_bw += dl_bw;
- SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
+ WARN_ON_ONCE(dl_rq->this_bw < old); /* overflow */
}
static inline
@@ -274,10 +251,10 @@ void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->this_bw -= dl_bw;
- SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
+ WARN_ON_ONCE(dl_rq->this_bw > old); /* underflow */
if (dl_rq->this_bw > old)
dl_rq->this_bw = 0;
- SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+ WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
}
static inline
@@ -308,19 +285,12 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
__sub_running_bw(dl_se->dl_bw, dl_rq);
}
-static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
{
- struct rq *rq;
-
- WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
-
- if (task_on_rq_queued(p))
- return;
+ if (dl_se->dl_non_contending) {
+ sub_running_bw(dl_se, &rq->dl);
+ dl_se->dl_non_contending = 0;
- rq = task_rq(p);
- if (p->dl.dl_non_contending) {
- sub_running_bw(&p->dl, &rq->dl);
- p->dl.dl_non_contending = 0;
/*
* If the timer handler is currently running and the
* timer cannot be canceled, inactive_task_timer()
@@ -328,13 +298,50 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
+ if (!dl_server(dl_se))
+ put_task_struct(dl_task_of(dl_se));
+ }
}
- __sub_rq_bw(p->dl.dl_bw, &rq->dl);
+ __sub_rq_bw(dl_se->dl_bw, &rq->dl);
__add_rq_bw(new_bw, &rq->dl);
}
+static __always_inline
+void cancel_dl_timer(struct sched_dl_entity *dl_se, struct hrtimer *timer)
+{
+ /*
+ * If the timer callback was running (hrtimer_try_to_cancel == -1),
+ * it will eventually call put_task_struct().
+ */
+ if (hrtimer_try_to_cancel(timer) == 1 && !dl_server(dl_se))
+ put_task_struct(dl_task_of(dl_se));
+}
+
+static __always_inline
+void cancel_replenish_timer(struct sched_dl_entity *dl_se)
+{
+ cancel_dl_timer(dl_se, &dl_se->dl_timer);
+}
+
+static __always_inline
+void cancel_inactive_timer(struct sched_dl_entity *dl_se)
+{
+ cancel_dl_timer(dl_se, &dl_se->inactive_timer);
+}
+
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+ WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
+
+ if (task_on_rq_queued(p))
+ return;
+
+ dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
+}
+
+static void __dl_clear_params(struct sched_dl_entity *dl_se);
+
/*
* The utilization of a task cannot be immediately removed from
* the rq active utilization (running_bw) when the task blocks.
@@ -389,12 +396,11 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* up, and checks if the task is still in the "ACTIVE non contending"
* state or not (in the second case, it updates running_bw).
*/
-static void task_non_contending(struct task_struct *p)
+static void task_non_contending(struct sched_dl_entity *dl_se, bool dl_task)
{
- struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->inactive_timer;
- struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq);
+ struct rq *rq = rq_of_dl_se(dl_se);
+ struct dl_rq *dl_rq = &rq->dl;
s64 zerolag_time;
/*
@@ -424,24 +430,33 @@ static void task_non_contending(struct task_struct *p)
* utilization now, instead of starting a timer
*/
if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
- if (dl_task(p))
+ if (dl_server(dl_se)) {
sub_running_bw(dl_se, dl_rq);
- if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-
- if (READ_ONCE(p->__state) == TASK_DEAD)
- sub_rq_bw(&p->dl, &rq->dl);
- raw_spin_lock(&dl_b->lock);
- __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
- raw_spin_unlock(&dl_b->lock);
- __dl_clear_params(p);
+ } else {
+ struct task_struct *p = dl_task_of(dl_se);
+
+ if (dl_task)
+ sub_running_bw(dl_se, dl_rq);
+
+ if (!dl_task || READ_ONCE(p->__state) == TASK_DEAD) {
+ struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+ if (READ_ONCE(p->__state) == TASK_DEAD)
+ sub_rq_bw(dl_se, &rq->dl);
+ raw_spin_lock(&dl_b->lock);
+ __dl_sub(dl_b, dl_se->dl_bw, dl_bw_cpus(task_cpu(p)));
+ raw_spin_unlock(&dl_b->lock);
+ __dl_clear_params(dl_se);
+ }
}
return;
}
dl_se->dl_non_contending = 1;
- get_task_struct(p);
+ if (!dl_server(dl_se))
+ get_task_struct(dl_task_of(dl_se));
+
hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
}
@@ -468,8 +483,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
- put_task_struct(dl_task_of(dl_se));
+ cancel_inactive_timer(dl_se);
} else {
/*
* Since "dl_non_contending" is not set, the
@@ -482,10 +496,8 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
}
}
-static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
+static inline int is_leftmost(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- struct sched_dl_entity *dl_se = &p->dl;
-
return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
}
@@ -505,24 +517,17 @@ void init_dl_rq(struct dl_rq *dl_rq)
{
dl_rq->root = RB_ROOT_CACHED;
-#ifdef CONFIG_SMP
/* zero means no -deadline tasks */
dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
- dl_rq->dl_nr_migratory = 0;
dl_rq->overloaded = 0;
dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
-#else
- init_dl_bw(&dl_rq->dl_bw);
-#endif
dl_rq->running_bw = 0;
dl_rq->this_bw = 0;
init_dl_rq_bw_ratio(dl_rq);
}
-#ifdef CONFIG_SMP
-
static inline int dl_overloaded(struct rq *rq)
{
return atomic_read(&rq->rd->dlo_count);
@@ -553,39 +558,6 @@ static inline void dl_clear_overload(struct rq *rq)
cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
}
-static void update_dl_migration(struct dl_rq *dl_rq)
-{
- if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
- if (!dl_rq->overloaded) {
- dl_set_overload(rq_of_dl_rq(dl_rq));
- dl_rq->overloaded = 1;
- }
- } else if (dl_rq->overloaded) {
- dl_clear_overload(rq_of_dl_rq(dl_rq));
- dl_rq->overloaded = 0;
- }
-}
-
-static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
-{
- struct task_struct *p = dl_task_of(dl_se);
-
- if (p->nr_cpus_allowed > 1)
- dl_rq->dl_nr_migratory++;
-
- update_dl_migration(dl_rq);
-}
-
-static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
-{
- struct task_struct *p = dl_task_of(dl_se);
-
- if (p->nr_cpus_allowed > 1)
- dl_rq->dl_nr_migratory--;
-
- update_dl_migration(dl_rq);
-}
-
#define __node_2_pdl(node) \
rb_entry((node), struct task_struct, pushable_dl_tasks)
@@ -594,6 +566,11 @@ static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
}
+static inline int has_pushable_dl_tasks(struct rq *rq)
+{
+ return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
+}
+
/*
* The list of pushable -deadline task is not a plist, like in
* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
@@ -609,6 +586,11 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
__pushable_less);
if (leftmost)
rq->dl.earliest_dl.next = p->dl.deadline;
+
+ if (!rq->dl.overloaded) {
+ dl_set_overload(rq);
+ rq->dl.overloaded = 1;
+ }
}
static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
@@ -625,11 +607,11 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
RB_CLEAR_NODE(&p->pushable_dl_tasks);
-}
-static inline int has_pushable_dl_tasks(struct rq *rq)
-{
- return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
+ if (!has_pushable_dl_tasks(rq) && rq->dl.overloaded) {
+ dl_clear_overload(rq);
+ rq->dl.overloaded = 0;
+ }
}
static int push_dl_task(struct rq *rq);
@@ -710,7 +692,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
}
/*
- * And we finally need to fixup root_domain(s) bandwidth accounting,
+ * And we finally need to fix up root_domain(s) bandwidth accounting,
* since p is still hanging out in the old (now moved to default) root
* domain.
*/
@@ -730,40 +712,11 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
return later_rq;
}
-#else
-
-static inline
-void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
-{
-}
-
-static inline
-void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
-{
-}
-
-static inline
-void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
-{
-}
-
-static inline
-void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
-{
-}
-
-static inline void deadline_queue_push_tasks(struct rq *rq)
-{
-}
-
-static inline void deadline_queue_pull_task(struct rq *rq)
-{
-}
-#endif /* CONFIG_SMP */
-
+static void
+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags);
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
-static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
+static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags);
+static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags);
static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
struct rq *rq)
@@ -771,6 +724,15 @@ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
/* for non-boosted task, pi_of(dl_se) == dl_se */
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
+
+ /*
+ * If it is a deferred reservation, and the server
+ * is not handling an starvation case, defer it.
+ */
+ if (dl_se->dl_defer && !dl_se->dl_defer_running) {
+ dl_se->dl_throttled = 1;
+ dl_se->dl_defer_armed = 1;
+ }
}
/*
@@ -790,6 +752,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
+ update_rq_clock(rq);
+
WARN_ON(is_dl_boosted(dl_se));
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
@@ -809,6 +773,9 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
replenish_dl_new_period(dl_se, rq);
}
+static int start_dl_timer(struct sched_dl_entity *dl_se);
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t);
+
/*
* Pure Earliest Deadline First (EDF) scheduling does not deal with the
* possibility of a entity lasting more than what it declared, and thus
@@ -837,9 +804,18 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
/*
* This could be the case for a !-dl task that is boosted.
* Just go with full inherited parameters.
+ *
+ * Or, it could be the case of a deferred reservation that
+ * was not able to consume its runtime in background and
+ * reached this point with current u > U.
+ *
+ * In both cases, set a new period.
*/
- if (dl_se->dl_deadline == 0)
- replenish_dl_new_period(dl_se, rq);
+ if (dl_se->dl_deadline == 0 ||
+ (dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) {
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
+ }
if (dl_se->dl_yielded && dl_se->runtime > 0)
dl_se->runtime = 0;
@@ -873,6 +849,44 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
dl_se->dl_yielded = 0;
if (dl_se->dl_throttled)
dl_se->dl_throttled = 0;
+
+ /*
+ * If this is the replenishment of a deferred reservation,
+ * clear the flag and return.
+ */
+ if (dl_se->dl_defer_armed) {
+ dl_se->dl_defer_armed = 0;
+ return;
+ }
+
+ /*
+ * A this point, if the deferred server is not armed, and the deadline
+ * is in the future, if it is not running already, throttle the server
+ * and arm the defer timer.
+ */
+ if (dl_se->dl_defer && !dl_se->dl_defer_running &&
+ dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
+ if (!is_dl_boosted(dl_se)) {
+
+ /*
+ * Set dl_se->dl_defer_armed and dl_throttled variables to
+ * inform the start_dl_timer() that this is a deferred
+ * activation.
+ */
+ dl_se->dl_defer_armed = 1;
+ dl_se->dl_throttled = 1;
+ if (!start_dl_timer(dl_se)) {
+ /*
+ * If for whatever reason (delays), a previous timer was
+ * queued but not serviced, cancel it and clean the
+ * deferrable server variables intended for start_dl_timer().
+ */
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ }
+ }
+ }
}
/*
@@ -992,7 +1006,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
* is detected, the runtime and deadline need to be updated.
*
* If the task has an implicit deadline, i.e., deadline == period, the Original
- * CBS is applied. the runtime is replenished and a new absolute deadline is
+ * CBS is applied. The runtime is replenished and a new absolute deadline is
* set, as in the previous cases.
*
* However, the Original CBS does not work properly for tasks with
@@ -1010,8 +1024,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
*/
static void update_dl_entity(struct sched_dl_entity *dl_se)
{
- struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq);
+ struct rq *rq = rq_of_dl_se(dl_se);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
dl_entity_overflow(dl_se, rq_clock(rq))) {
@@ -1024,6 +1037,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
}
replenish_dl_new_period(dl_se, rq);
+ } else if (dl_server(dl_se) && dl_se->dl_defer) {
+ /*
+ * The server can still use its previous deadline, so check if
+ * it left the dl_defer_running state.
+ */
+ if (!dl_se->dl_defer_running) {
+ dl_se->dl_defer_armed = 1;
+ dl_se->dl_throttled = 1;
+ }
}
}
@@ -1042,11 +1064,11 @@ static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
-static int start_dl_timer(struct task_struct *p)
+static int start_dl_timer(struct sched_dl_entity *dl_se)
{
- struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->dl_timer;
- struct rq *rq = task_rq(p);
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq);
ktime_t now, act;
s64 delta;
@@ -1056,8 +1078,21 @@ static int start_dl_timer(struct task_struct *p)
* We want the timer to fire at the deadline, but considering
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
+ *
+ * The deferred reservation will have its timer set to
+ * (deadline - runtime). At that point, the CBS rule will decide
+ * if the current deadline can be used, or if a replenishment is
+ * required to avoid add too much pressure on the system
+ * (current u > U).
*/
- act = ns_to_ktime(dl_next_period(dl_se));
+ if (dl_se->dl_defer_armed) {
+ WARN_ON_ONCE(!dl_se->dl_throttled);
+ act = ns_to_ktime(dl_se->deadline - dl_se->runtime);
+ } else {
+ /* act = deadline - rel-deadline + period */
+ act = ns_to_ktime(dl_next_period(dl_se));
+ }
+
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
@@ -1080,13 +1115,91 @@ static int start_dl_timer(struct task_struct *p)
* and observe our state.
*/
if (!hrtimer_is_queued(timer)) {
- get_task_struct(p);
+ if (!dl_server(dl_se))
+ get_task_struct(dl_task_of(dl_se));
hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
}
return 1;
}
+static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
+{
+ /*
+ * Queueing this task back might have overloaded rq, check if we need
+ * to kick someone away.
+ */
+ if (has_pushable_dl_tasks(rq)) {
+ /*
+ * Nothing relies on rq->lock after this, so its safe to drop
+ * rq->lock.
+ */
+ rq_unpin_lock(rq, rf);
+ push_dl_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+}
+
+/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
+static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
+
+static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
+{
+ struct rq *rq = rq_of_dl_se(dl_se);
+ u64 fw;
+
+ scoped_guard (rq_lock, rq) {
+ struct rq_flags *rf = &scope.rf;
+
+ if (!dl_se->dl_throttled || !dl_se->dl_runtime)
+ return HRTIMER_NORESTART;
+
+ sched_clock_tick();
+ update_rq_clock(rq);
+
+ /*
+ * Make sure current has propagated its pending runtime into
+ * any relevant server through calling dl_server_update() and
+ * friends.
+ */
+ rq->donor->sched_class->update_curr(rq);
+
+ if (dl_se->dl_defer_idle) {
+ dl_server_stop(dl_se);
+ return HRTIMER_NORESTART;
+ }
+
+ if (dl_se->dl_defer_armed) {
+ /*
+ * First check if the server could consume runtime in background.
+ * If so, it is possible to push the defer timer for this amount
+ * of time. The dl_server_min_res serves as a limit to avoid
+ * forwarding the timer for a too small amount of time.
+ */
+ if (dl_time_before(rq_clock(dl_se->rq),
+ (dl_se->deadline - dl_se->runtime - dl_server_min_res))) {
+
+ /* reset the defer timer */
+ fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime;
+
+ hrtimer_forward_now(timer, ns_to_ktime(fw));
+ return HRTIMER_RESTART;
+ }
+
+ dl_se->dl_defer_running = 1;
+ }
+
+ enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
+
+ if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &dl_se->rq->curr->dl))
+ resched_curr(rq);
+
+ __push_dl_task(rq, rf);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
/*
* This is the bandwidth enforcement timer callback. If here, we know
* a task is not on its dl_rq, since the fact that the timer was running
@@ -1105,10 +1218,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
struct sched_dl_entity *dl_se = container_of(timer,
struct sched_dl_entity,
dl_timer);
- struct task_struct *p = dl_task_of(dl_se);
+ struct task_struct *p;
struct rq_flags rf;
struct rq *rq;
+ if (dl_server(dl_se))
+ return dl_server_timer(timer, dl_se);
+
+ p = dl_task_of(dl_se);
rq = task_rq_lock(p, &rf);
/*
@@ -1154,7 +1271,6 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
goto unlock;
}
-#ifdef CONFIG_SMP
if (unlikely(!rq->online)) {
/*
* If the runqueue is no longer available, migrate the
@@ -1171,29 +1287,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* there.
*/
}
-#endif
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
- if (dl_task(rq->curr))
- check_preempt_curr_dl(rq, p, 0);
+ if (dl_task(rq->donor))
+ wakeup_preempt_dl(rq, p, 0);
else
resched_curr(rq);
-#ifdef CONFIG_SMP
- /*
- * Queueing this task back might have overloaded rq, check if we need
- * to kick someone away.
- */
- if (has_pushable_dl_tasks(rq)) {
- /*
- * Nothing relies on rq->lock after this, so its safe to drop
- * rq->lock.
- */
- rq_unpin_lock(rq, &rf);
- push_dl_task(rq);
- rq_repin_lock(rq, &rf);
- }
-#endif
+ __push_dl_task(rq, &rf);
unlock:
task_rq_unlock(rq, p, &rf);
@@ -1207,12 +1308,11 @@ unlock:
return HRTIMER_NORESTART;
}
-void init_dl_task_timer(struct sched_dl_entity *dl_se)
+static void init_dl_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->dl_timer;
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
- timer->function = dl_task_timer;
+ hrtimer_setup(timer, dl_task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
}
/*
@@ -1235,12 +1335,11 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
*/
static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
{
- struct task_struct *p = dl_task_of(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+ struct rq *rq = rq_of_dl_se(dl_se);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
- if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se)))
return;
dl_se->dl_throttled = 1;
if (dl_se->runtime > 0)
@@ -1266,7 +1365,7 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
- * is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
+ * is multiplied by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
* Since delta is a 64 bit variable, to have an overflow its value should be
* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
* not an issue here.
@@ -1291,46 +1390,9 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
return (delta * u_act) >> BW_SHIFT;
}
-/*
- * Update the current task's runtime statistics (provided it is still
- * a -deadline task and has not been removed from the dl_rq).
- */
-static void update_curr_dl(struct rq *rq)
+s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
- struct task_struct *curr = rq->curr;
- struct sched_dl_entity *dl_se = &curr->dl;
- u64 delta_exec, scaled_delta_exec;
- int cpu = cpu_of(rq);
- u64 now;
-
- if (!dl_task(curr) || !on_dl_rq(dl_se))
- return;
-
- /*
- * Consumed budget is computed considering the time as
- * observed by schedulable tasks (excluding time spent
- * in hardirq context, etc.). Deadlines are instead
- * computed using hard walltime. This seems to be the more
- * natural solution, but the full ramifications of this
- * approach need further study.
- */
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0)) {
- if (unlikely(dl_se->dl_yielded))
- goto throttle;
- return;
- }
-
- schedstat_set(curr->stats.exec_max,
- max(curr->stats.exec_max, delta_exec));
-
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
- update_current_exec_runtime(curr, now, delta_exec);
-
- if (dl_entity_is_special(dl_se))
- return;
+ s64 scaled_delta_exec;
/*
* For tasks that participate in GRUB, we implement GRUB-PA: the
@@ -1340,10 +1402,9 @@ static void update_curr_dl(struct rq *rq)
* according to current frequency and CPU maximum capacity.
*/
if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
- scaled_delta_exec = grub_reclaim(delta_exec,
- rq,
- &curr->dl);
+ scaled_delta_exec = grub_reclaim(delta_exec, rq, dl_se);
} else {
+ int cpu = cpu_of(rq);
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
@@ -1351,8 +1412,96 @@ static void update_curr_dl(struct rq *rq)
scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
}
+ return scaled_delta_exec;
+}
+
+static inline void
+update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int flags);
+
+static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
+{
+ bool idle = rq->curr == rq->idle;
+ s64 scaled_delta_exec;
+
+ if (unlikely(delta_exec <= 0)) {
+ if (unlikely(dl_se->dl_yielded))
+ goto throttle;
+ return;
+ }
+
+ if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer)
+ return;
+
+ if (dl_entity_is_special(dl_se))
+ return;
+
+ scaled_delta_exec = delta_exec;
+ if (!dl_server(dl_se))
+ scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
+
dl_se->runtime -= scaled_delta_exec;
+ if (dl_se->dl_defer_idle && !idle)
+ dl_se->dl_defer_idle = 0;
+
+ /*
+ * The fair server can consume its runtime while throttled (not queued/
+ * running as regular CFS).
+ *
+ * If the server consumes its entire runtime in this state. The server
+ * is not required for the current period. Thus, reset the server by
+ * starting a new period, pushing the activation.
+ */
+ if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
+ /*
+ * Non-servers would never get time accounted while throttled.
+ */
+ WARN_ON_ONCE(!dl_server(dl_se));
+
+ /*
+ * While the server is marked idle, do not push out the
+ * activation further, instead wait for the period timer
+ * to lapse and stop the server.
+ */
+ if (dl_se->dl_defer_idle && idle) {
+ /*
+ * The timer is at the zero-laxity point, this means
+ * dl_server_stop() / dl_server_start() can happen
+ * while now < deadline. This means update_dl_entity()
+ * will not replenish. Additionally start_dl_timer()
+ * will be set for 'deadline - runtime'. Negative
+ * runtime will not do.
+ */
+ dl_se->runtime = 0;
+ return;
+ }
+
+ /*
+ * If the server was previously activated - the starving condition
+ * took place, it this point it went away because the fair scheduler
+ * was able to get runtime in background. So return to the initial
+ * state.
+ */
+ dl_se->dl_defer_running = 0;
+
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+
+ replenish_dl_new_period(dl_se, dl_se->rq);
+
+ if (idle)
+ dl_se->dl_defer_idle = 1;
+
+ /*
+ * Not being able to start the timer seems problematic. If it could not
+ * be started for whatever reason, we need to "unthrottle" the DL server
+ * and queue right away. Otherwise nothing might queue it. That's similar
+ * to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn.
+ */
+ WARN_ON_ONCE(!start_dl_timer(dl_se));
+
+ return;
+ }
+
throttle:
if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
dl_se->dl_throttled = 1;
@@ -1362,15 +1511,34 @@ throttle:
(dl_se->flags & SCHED_FLAG_DL_OVERRUN))
dl_se->dl_overrun = 1;
- __dequeue_task_dl(rq, curr, 0);
- if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
- enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
+ dequeue_dl_entity(dl_se, 0);
+ if (!dl_server(dl_se)) {
+ update_stats_dequeue_dl(&rq->dl, dl_se, 0);
+ dequeue_pushable_dl_task(rq, dl_task_of(dl_se));
+ }
- if (!is_leftmost(curr, &rq->dl))
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) {
+ if (dl_server(dl_se)) {
+ replenish_dl_new_period(dl_se, rq);
+ start_dl_timer(dl_se);
+ } else {
+ enqueue_task_dl(rq, dl_task_of(dl_se), ENQUEUE_REPLENISH);
+ }
+ }
+
+ if (!is_leftmost(dl_se, &rq->dl))
resched_curr(rq);
}
/*
+ * The fair server (sole dl_server) does not account for real-time
+ * workload because it is running fair work.
+ */
+ if (dl_se == &rq->fair_server)
+ return;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ /*
* Because -- for now -- we share the rt bandwidth, we need to
* account our runtime there too, otherwise actual rt tasks
* would be able to exceed the shared quota.
@@ -1394,6 +1562,372 @@ throttle:
rt_rq->rt_time += delta_exec;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
}
+#endif /* CONFIG_RT_GROUP_SCHED */
+}
+
+/*
+ * In the non-defer mode, the idle time is not accounted, as the
+ * server provides a guarantee.
+ *
+ * If the dl_server is in defer mode, the idle time is also considered
+ * as time available for the fair server, avoiding a penalty for the
+ * rt scheduler that did not consumed that time.
+ */
+void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec)
+{
+ if (dl_se->dl_server_active && dl_se->dl_runtime && dl_se->dl_defer)
+ update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+}
+
+void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
+{
+ /* 0 runtime = fair server disabled */
+ if (dl_se->dl_server_active && dl_se->dl_runtime)
+ update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+}
+
+/*
+ * dl_server && dl_defer:
+ *
+ * 6
+ * +--------------------+
+ * v |
+ * +-------------+ 4 +-----------+ 5 +------------------+
+ * +-> | A:init | <--- | D:running | -----> | E:replenish-wait |
+ * | +-------------+ +-----------+ +------------------+
+ * | | | 1 ^ ^ |
+ * | | 1 +----------+ | 3 |
+ * | v | |
+ * | +--------------------------------+ 2 |
+ * | | | ----+ |
+ * | 8 | B:zero_laxity-wait | | |
+ * | | | <---+ |
+ * | +--------------------------------+ |
+ * | | ^ ^ 2 |
+ * | | 7 | 2 +--------------------+
+ * | v |
+ * | +-------------+ |
+ * +-- | C:idle-wait | -+
+ * +-------------+
+ * ^ 7 |
+ * +---------+
+ *
+ *
+ * [A] - init
+ * dl_server_active = 0
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 0/1
+ * dl_defer_idle = 0
+ *
+ * [B] - zero_laxity-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 0
+ *
+ * [C] - idle-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 1
+ *
+ * [D] - running
+ * dl_server_active = 1
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ * [E] - replenish-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ *
+ * [1] A->B, A->D
+ * dl_server_start()
+ * dl_server_active = 1;
+ * enqueue_dl_entity()
+ * update_dl_entity(WAKEUP)
+ * if (!dl_defer_running)
+ * dl_defer_armed = 1;
+ * dl_throttled = 1;
+ * if (dl_throttled && start_dl_timer())
+ * return; // [B]
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // deplete server runtime from client-class
+ * [2] B->B, C->B, E->B
+ * dl_server_update()
+ * update_curr_dl_se() // idle = false
+ * if (dl_defer_idle)
+ * dl_defer_idle = 0;
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel(); // stop timer
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * start_dl_timer(); // restart timer
+ * // [B]
+ *
+ * // timer actually fires means we have runtime
+ * [3] B->D
+ * dl_server_timer()
+ * if (dl_defer_armed)
+ * dl_defer_running = 1;
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * // fwd period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * if (dl_defer_armed)
+ * dl_defer_armed = 0;
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // schedule server
+ * [4] D->A
+ * pick_task_dl()
+ * p = server_pick_task();
+ * if (!p)
+ * dl_server_stop()
+ * dequeue_dl_entity();
+ * hrtimer_try_to_cancel();
+ * dl_defer_armed = 0;
+ * dl_throttled = 0;
+ * dl_server_active = 0;
+ * // [A]
+ * return p;
+ *
+ * // server running
+ * [5] D->E
+ * update_curr_dl_se()
+ * if (dl_runtime_exceeded())
+ * dl_throttled = 1;
+ * dequeue_dl_entity();
+ * start_dl_timer();
+ * // [E]
+ *
+ * // server replenished
+ * [6] E->D
+ * dl_server_timer()
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * fwd-period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * __enqueue_dl_entity();
+ * // [D]
+ *
+ * // deplete server runtime from idle
+ * [7] B->C, C->C
+ * dl_server_update_idle()
+ * update_curr_dl_se() // idle = true
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * if (dl_defer_idle)
+ * return;
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel();
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * dl_defer_idle = 1;
+ * start_dl_timer(); // restart timer
+ * // [C]
+ *
+ * // stop idle server
+ * [8] C->A
+ * dl_server_timer()
+ * if (dl_defer_idle)
+ * dl_server_stop();
+ * // [A]
+ *
+ *
+ * digraph dl_server {
+ * "A:init" -> "B:zero_laxity-wait" [label="1:dl_server_start"]
+ * "A:init" -> "D:running" [label="1:dl_server_start"]
+ * "B:zero_laxity-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"]
+ * "C:idle-wait" -> "A:init" [label="8:dl_server_timer"]
+ * "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "D:running" -> "A:init" [label="4:pick_task_dl"]
+ * "D:running" -> "E:replenish-wait" [label="5:update_curr_dl_se"]
+ * "E:replenish-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "E:replenish-wait" -> "D:running" [label="6:dl_server_timer"]
+ * }
+ *
+ *
+ * Notes:
+ *
+ * - When there are fair tasks running the most likely loop is [2]->[2].
+ * the dl_server never actually runs, the timer never fires.
+ *
+ * - When there is actual fair starvation; the timer fires and starts the
+ * dl_server. This will then throttle and replenish like a normal DL
+ * task. Notably it will not 'defer' again.
+ *
+ * - When idle it will push the actication forward once, and then wait
+ * for the timer to hit or a non-idle update to restart things.
+ */
+void dl_server_start(struct sched_dl_entity *dl_se)
+{
+ struct rq *rq = dl_se->rq;
+
+ if (!dl_server(dl_se) || dl_se->dl_server_active)
+ return;
+
+ /*
+ * Update the current task to 'now'.
+ */
+ rq->donor->sched_class->update_curr(rq);
+
+ if (WARN_ON_ONCE(!cpu_online(cpu_of(rq))))
+ return;
+
+ dl_se->dl_server_active = 1;
+ enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+ if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+ resched_curr(dl_se->rq);
+}
+
+void dl_server_stop(struct sched_dl_entity *dl_se)
+{
+ if (!dl_server(dl_se) || !dl_server_active(dl_se))
+ return;
+
+ dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ dl_se->dl_defer_idle = 0;
+ dl_se->dl_server_active = 0;
+}
+
+void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+ dl_server_pick_f pick_task)
+{
+ dl_se->rq = rq;
+ dl_se->server_pick_task = pick_task;
+}
+
+void sched_init_dl_servers(void)
+{
+ int cpu;
+ struct rq *rq;
+ struct sched_dl_entity *dl_se;
+
+ for_each_online_cpu(cpu) {
+ u64 runtime = 50 * NSEC_PER_MSEC;
+ u64 period = 1000 * NSEC_PER_MSEC;
+
+ rq = cpu_rq(cpu);
+
+ guard(rq_lock_irq)(rq);
+
+ dl_se = &rq->fair_server;
+
+ WARN_ON(dl_server(dl_se));
+
+ dl_server_apply_params(dl_se, runtime, period, 1);
+
+ dl_se->dl_server = 1;
+ dl_se->dl_defer = 1;
+ setup_new_dl_entity(dl_se);
+ }
+}
+
+void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+ u64 new_bw = dl_se->dl_bw;
+ int cpu = cpu_of(rq);
+ struct dl_bw *dl_b;
+
+ dl_b = dl_bw_of(cpu_of(rq));
+ guard(raw_spinlock)(&dl_b->lock);
+
+ if (!dl_bw_cpus(cpu))
+ return;
+
+ __dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
+}
+
+int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
+{
+ u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+ u64 new_bw = to_ratio(period, runtime);
+ struct rq *rq = dl_se->rq;
+ int cpu = cpu_of(rq);
+ struct dl_bw *dl_b;
+ unsigned long cap;
+ int retval = 0;
+ int cpus;
+
+ dl_b = dl_bw_of(cpu);
+ guard(raw_spinlock)(&dl_b->lock);
+
+ cpus = dl_bw_cpus(cpu);
+ cap = dl_bw_capacity(cpu);
+
+ if (__dl_overflow(dl_b, cap, old_bw, new_bw))
+ return -EBUSY;
+
+ if (init) {
+ __add_rq_bw(new_bw, &rq->dl);
+ __dl_add(dl_b, new_bw, cpus);
+ } else {
+ __dl_sub(dl_b, dl_se->dl_bw, cpus);
+ __dl_add(dl_b, new_bw, cpus);
+
+ dl_rq_change_utilization(rq, dl_se, new_bw);
+ }
+
+ dl_se->dl_runtime = runtime;
+ dl_se->dl_deadline = period;
+ dl_se->dl_period = period;
+
+ dl_se->runtime = 0;
+ dl_se->deadline = 0;
+
+ dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+ dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+
+ return retval;
+}
+
+/*
+ * Update the current task's runtime statistics (provided it is still
+ * a -deadline task and has not been removed from the dl_rq).
+ */
+static void update_curr_dl(struct rq *rq)
+{
+ struct task_struct *donor = rq->donor;
+ struct sched_dl_entity *dl_se = &donor->dl;
+ s64 delta_exec;
+
+ if (!dl_task(donor) || !on_dl_rq(dl_se))
+ return;
+
+ /*
+ * Consumed budget is computed considering the time as
+ * observed by schedulable tasks (excluding time spent
+ * in hardirq context, etc.). Deadlines are instead
+ * computed using hard walltime. This seems to be the more
+ * natural solution, but the full ramifications of this
+ * approach need further study.
+ */
+ delta_exec = update_curr_common(rq);
+ update_curr_dl_se(rq, dl_se, delta_exec);
}
static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
@@ -1401,15 +1935,24 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
struct sched_dl_entity *dl_se = container_of(timer,
struct sched_dl_entity,
inactive_timer);
- struct task_struct *p = dl_task_of(dl_se);
+ struct task_struct *p = NULL;
struct rq_flags rf;
struct rq *rq;
- rq = task_rq_lock(p, &rf);
+ if (!dl_server(dl_se)) {
+ p = dl_task_of(dl_se);
+ rq = task_rq_lock(p, &rf);
+ } else {
+ rq = dl_se->rq;
+ rq_lock(rq, &rf);
+ }
sched_clock_tick();
update_rq_clock(rq);
+ if (dl_server(dl_se))
+ goto no_task;
+
if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
@@ -1422,35 +1965,39 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
raw_spin_lock(&dl_b->lock);
__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&dl_b->lock);
- __dl_clear_params(p);
+ __dl_clear_params(dl_se);
goto unlock;
}
+
+no_task:
if (dl_se->dl_non_contending == 0)
goto unlock;
sub_running_bw(dl_se, &rq->dl);
dl_se->dl_non_contending = 0;
unlock:
- task_rq_unlock(rq, p, &rf);
- put_task_struct(p);
+
+ if (!dl_server(dl_se)) {
+ task_rq_unlock(rq, p, &rf);
+ put_task_struct(p);
+ } else {
+ rq_unlock(rq, &rf);
+ }
return HRTIMER_NORESTART;
}
-void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+static void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->inactive_timer;
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
- timer->function = inactive_task_timer;
+ hrtimer_setup(timer, inactive_task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
}
#define __node_2_dle(node) \
rb_entry((node), struct sched_dl_entity, rb_node)
-#ifdef CONFIG_SMP
-
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
{
struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -1475,7 +2022,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
if (!dl_rq->dl_nr_running) {
dl_rq->earliest_dl.curr = 0;
dl_rq->earliest_dl.next = 0;
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, rq->online);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
} else {
struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
@@ -1486,39 +2033,29 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
}
}
-#else
-
-static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
-static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
-
-#endif /* CONFIG_SMP */
-
static inline
void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- int prio = dl_task_of(dl_se)->prio;
u64 deadline = dl_se->deadline;
- WARN_ON(!dl_prio(prio));
dl_rq->dl_nr_running++;
- add_nr_running(rq_of_dl_rq(dl_rq), 1);
+
+ if (!dl_server(dl_se))
+ add_nr_running(rq_of_dl_rq(dl_rq), 1);
inc_dl_deadline(dl_rq, deadline);
- inc_dl_migration(dl_se, dl_rq);
}
static inline
void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- int prio = dl_task_of(dl_se)->prio;
-
- WARN_ON(!dl_prio(prio));
WARN_ON(!dl_rq->dl_nr_running);
dl_rq->dl_nr_running--;
- sub_nr_running(rq_of_dl_rq(dl_rq), 1);
+
+ if (!dl_server(dl_se))
+ sub_nr_running(rq_of_dl_rq(dl_rq), 1);
dec_dl_deadline(dl_rq, dl_se->deadline);
- dec_dl_migration(dl_se, dl_rq);
}
static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
@@ -1526,46 +2063,40 @@ static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
}
-static inline struct sched_statistics *
+static __always_inline struct sched_statistics *
__schedstats_from_dl_se(struct sched_dl_entity *dl_se)
{
+ if (!schedstat_enabled())
+ return NULL;
+
+ if (dl_server(dl_se))
+ return NULL;
+
return &dl_task_of(dl_se)->stats;
}
static inline void
update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
{
- struct sched_statistics *stats;
-
- if (!schedstat_enabled())
- return;
-
- stats = __schedstats_from_dl_se(dl_se);
- __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
}
static inline void
update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
{
- struct sched_statistics *stats;
-
- if (!schedstat_enabled())
- return;
-
- stats = __schedstats_from_dl_se(dl_se);
- __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
}
static inline void
update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
{
- struct sched_statistics *stats;
-
- if (!schedstat_enabled())
- return;
-
- stats = __schedstats_from_dl_se(dl_se);
- __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
}
static inline void
@@ -1635,6 +2166,41 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
/*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!dl_se->dl_throttled && !dl_is_implicit(dl_se))
+ dl_check_constrained_dl(dl_se);
+
+ if (flags & (ENQUEUE_RESTORE|ENQUEUE_MIGRATING)) {
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+ add_rq_bw(dl_se, dl_rq);
+ add_running_bw(dl_se, dl_rq);
+ }
+
+ /*
+ * If p is throttled, we do not enqueue it. In fact, if it exhausted
+ * its budget it needs a replenishment and, since it now is on
+ * its rq, the bandwidth timer callback (which clearly has not
+ * run yet) will take care of this.
+ * However, the active utilization does not depend on the fact
+ * that the task is on the runqueue or not (but depends on the
+ * task's state - in GRUB parlance, "inactive" vs "active contending").
+ * In other words, even if a task is throttled its utilization must
+ * be counted in the active utilization; hence, we need to call
+ * add_running_bw().
+ */
+ if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+ if (flags & ENQUEUE_WAKEUP)
+ task_contending(dl_se, flags);
+
+ return;
+ }
+
+ /*
* If this is a wakeup or a new instance, the scheduling
* parameters of the task might need updating. Otherwise,
* we want a replenishment of its runtime.
@@ -1645,17 +2211,55 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
- dl_time_before(dl_se->deadline,
- rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
+ !is_dl_boosted(dl_se) &&
+ dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) {
setup_new_dl_entity(dl_se);
}
+ /*
+ * If the reservation is still throttled, e.g., it got replenished but is a
+ * deferred task and still got to wait, don't enqueue.
+ */
+ if (dl_se->dl_throttled && start_dl_timer(dl_se))
+ return;
+
+ /*
+ * We're about to enqueue, make sure we're not ->dl_throttled!
+ * In case the timer was not started, say because the defer time
+ * has passed, mark as not throttled and mark unarmed.
+ * Also cancel earlier timers, since letting those run is pointless.
+ */
+ if (dl_se->dl_throttled) {
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ }
+
__enqueue_dl_entity(dl_se);
}
-static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
+static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
__dequeue_dl_entity(dl_se);
+
+ if (flags & (DEQUEUE_SAVE|DEQUEUE_MIGRATING)) {
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+ sub_running_bw(dl_se, dl_rq);
+ sub_rq_bw(dl_se, dl_rq);
+ }
+
+ /*
+ * This check allows to start the inactive timer (or to immediately
+ * decrease the active utilization, if needed) in two cases:
+ * when the task blocks and when it is terminating
+ * (p->state == TASK_DEAD). We can handle the two cases in the same
+ * way, because from GRUB's point of view the same thing is happening
+ * (the task moves from "active contending" to "active non contending"
+ * or "inactive")
+ */
+ if (flags & DEQUEUE_SLEEP)
+ task_non_contending(dl_se, true);
}
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
@@ -1679,7 +2283,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* problem if it fires concurrently: boosted threads
* are ignored in dl_task_timer().
*/
- hrtimer_try_to_cancel(&p->dl.dl_timer);
+ cancel_replenish_timer(&p->dl);
p->dl.dl_throttled = 0;
}
} else if (!dl_prio(p->normal_prio)) {
@@ -1700,76 +2304,36 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
return;
}
- /*
- * Check if a constrained deadline task was activated
- * after the deadline but before the next period.
- * If that is the case, the task will be throttled and
- * the replenishment timer will be set to the next period.
- */
- if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
- dl_check_constrained_dl(&p->dl);
+ check_schedstat_required();
+ update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
- add_rq_bw(&p->dl, &rq->dl);
- add_running_bw(&p->dl, &rq->dl);
- }
+ if (p->on_rq == TASK_ON_RQ_MIGRATING)
+ flags |= ENQUEUE_MIGRATING;
- /*
- * If p is throttled, we do not enqueue it. In fact, if it exhausted
- * its budget it needs a replenishment and, since it now is on
- * its rq, the bandwidth timer callback (which clearly has not
- * run yet) will take care of this.
- * However, the active utilization does not depend on the fact
- * that the task is on the runqueue or not (but depends on the
- * task's state - in GRUB parlance, "inactive" vs "active contending").
- * In other words, even if a task is throttled its utilization must
- * be counted in the active utilization; hence, we need to call
- * add_running_bw().
- */
- if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
- if (flags & ENQUEUE_WAKEUP)
- task_contending(&p->dl, flags);
+ enqueue_dl_entity(&p->dl, flags);
+ if (dl_server(&p->dl))
return;
- }
-
- check_schedstat_required();
- update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
- enqueue_dl_entity(&p->dl, flags);
+ if (task_is_blocked(p))
+ return;
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+ if (!task_current(rq, p) && !p->dl.dl_throttled && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
}
-static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
-{
- update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
- dequeue_dl_entity(&p->dl);
- dequeue_pushable_dl_task(rq, p);
-}
-
-static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+static bool dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
update_curr_dl(rq);
- __dequeue_task_dl(rq, p, flags);
- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
- sub_running_bw(&p->dl, &rq->dl);
- sub_rq_bw(&p->dl, &rq->dl);
- }
+ if (p->on_rq == TASK_ON_RQ_MIGRATING)
+ flags |= DEQUEUE_MIGRATING;
- /*
- * This check allows to start the inactive timer (or to immediately
- * decrease the active utilization, if needed) in two cases:
- * when the task blocks and when it is terminating
- * (p->state == TASK_DEAD). We can handle the two cases in the same
- * way, because from GRUB's point of view the same thing is happening
- * (the task moves from "active contending" to "active non contending"
- * or "inactive")
- */
- if (flags & DEQUEUE_SLEEP)
- task_non_contending(p);
+ dequeue_dl_entity(&p->dl, flags);
+ if (!p->dl.dl_throttled && !dl_server(&p->dl))
+ dequeue_pushable_dl_task(rq, p);
+
+ return true;
}
/*
@@ -1790,7 +2354,7 @@ static void yield_task_dl(struct rq *rq)
* it and the bandwidth timer will wake it up and will give it
* new scheduling parameters (thanks to dl_yielded=1).
*/
- rq->curr->dl.dl_yielded = 1;
+ rq->donor->dl.dl_yielded = 1;
update_rq_clock(rq);
update_curr_dl(rq);
@@ -1802,8 +2366,6 @@ static void yield_task_dl(struct rq *rq)
rq_clock_skip_update(rq);
}
-#ifdef CONFIG_SMP
-
static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
struct rq *rq)
{
@@ -1817,17 +2379,18 @@ static int find_later_rq(struct task_struct *task);
static int
select_task_rq_dl(struct task_struct *p, int cpu, int flags)
{
- struct task_struct *curr;
+ struct task_struct *curr, *donor;
bool select_rq;
struct rq *rq;
if (!(flags & WF_TTWU))
- goto out;
+ return cpu;
rq = cpu_rq(cpu);
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
+ donor = READ_ONCE(rq->donor);
/*
* If we are dealing with a -deadline task, we must
@@ -1838,9 +2401,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
* other hand, if it has a shorter deadline, we
* try to make it stay here, it might be important.
*/
- select_rq = unlikely(dl_task(curr)) &&
+ select_rq = unlikely(dl_task(donor)) &&
(curr->nr_cpus_allowed < 2 ||
- !dl_entity_preempt(&p->dl, &curr->dl)) &&
+ !dl_entity_preempt(&p->dl, &donor->dl)) &&
p->nr_cpus_allowed > 1;
/*
@@ -1859,7 +2422,6 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
}
rcu_read_unlock();
-out:
return cpu;
}
@@ -1889,8 +2451,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ cancel_inactive_timer(&p->dl);
}
sub_rq_bw(&p->dl, &rq->dl);
rq_unlock(rq, &rf);
@@ -1903,7 +2464,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
* let's hope p can move out.
*/
if (rq->curr->nr_cpus_allowed == 1 ||
- !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
+ !cpudl_find(&rq->rd->cpudl, rq->donor, NULL))
return;
/*
@@ -1933,41 +2494,38 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
return sched_stop_runnable(rq) || sched_dl_runnable(rq);
}
-#endif /* CONFIG_SMP */
/*
* Only called when both the current and waking task are -deadline
* tasks.
*/
-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
+static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
int flags)
{
- if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
+ if (dl_entity_preempt(&p->dl, &rq->donor->dl)) {
resched_curr(rq);
return;
}
-#ifdef CONFIG_SMP
/*
* In the unlikely case current and p have the same deadline
* let us try to decide what's the best thing to do...
*/
- if ((p->dl.deadline == rq->curr->dl.deadline) &&
+ if ((p->dl.deadline == rq->donor->dl.deadline) &&
!test_tsk_need_resched(rq->curr))
check_preempt_equal_dl(rq, p);
-#endif /* CONFIG_SMP */
}
#ifdef CONFIG_SCHED_HRTICK
-static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
+static void start_hrtick_dl(struct rq *rq, struct sched_dl_entity *dl_se)
{
- hrtick_start(rq, p->dl.runtime);
+ hrtick_start(rq, dl_se->runtime);
}
-#else /* !CONFIG_SCHED_HRTICK */
-static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
+#else /* !CONFIG_SCHED_HRTICK: */
+static void start_hrtick_dl(struct rq *rq, struct sched_dl_entity *dl_se)
{
}
-#endif
+#endif /* !CONFIG_SCHED_HRTICK */
static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
{
@@ -1984,13 +2542,13 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
if (!first)
return;
- if (hrtick_enabled_dl(rq))
- start_hrtick_dl(rq, p);
-
- if (rq->curr->sched_class != &dl_sched_class)
+ if (rq->donor->sched_class != &dl_sched_class)
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
deadline_queue_push_tasks(rq);
+
+ if (hrtick_enabled_dl(rq))
+ start_hrtick_dl(rq, &p->dl);
}
static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
@@ -2003,34 +2561,43 @@ static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
return __node_2_dle(left);
}
-static struct task_struct *pick_task_dl(struct rq *rq)
+/*
+ * __pick_next_task_dl - Helper to pick the next -deadline task to run.
+ * @rq: The runqueue to pick the next task from.
+ */
+static struct task_struct *__pick_task_dl(struct rq *rq, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
+again:
if (!sched_dl_runnable(rq))
return NULL;
dl_se = pick_next_dl_entity(dl_rq);
WARN_ON_ONCE(!dl_se);
- p = dl_task_of(dl_se);
+
+ if (dl_server(dl_se)) {
+ p = dl_se->server_pick_task(dl_se, rf);
+ if (!p) {
+ dl_server_stop(dl_se);
+ goto again;
+ }
+ rq->dl_server = dl_se;
+ } else {
+ p = dl_task_of(dl_se);
+ }
return p;
}
-static struct task_struct *pick_next_task_dl(struct rq *rq)
+static struct task_struct *pick_task_dl(struct rq *rq, struct rq_flags *rf)
{
- struct task_struct *p;
-
- p = pick_task_dl(rq);
- if (p)
- set_next_task_dl(rq, p, true);
-
- return p;
+ return __pick_task_dl(rq, rf);
}
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct task_struct *next)
{
struct sched_dl_entity *dl_se = &p->dl;
struct dl_rq *dl_rq = &rq->dl;
@@ -2041,6 +2608,10 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
update_curr_dl(rq);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
+
+ if (task_is_blocked(p))
+ return;
+
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
}
@@ -2064,8 +2635,8 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
* be set and schedule() will start a new hrtick for the next task.
*/
if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
- is_leftmost(p, &rq->dl))
- start_hrtick_dl(rq, p);
+ is_leftmost(&p->dl, &rq->dl))
+ start_hrtick_dl(rq, &p->dl);
}
static void task_fork_dl(struct task_struct *p)
@@ -2076,19 +2647,9 @@ static void task_fork_dl(struct task_struct *p)
*/
}
-#ifdef CONFIG_SMP
-
/* Only try algorithms three times */
#define DL_MAX_TRIES 3
-static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
-{
- if (!task_on_cpu(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_mask))
- return 1;
- return 0;
-}
-
/*
* Return the earliest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise:
@@ -2102,21 +2663,19 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
return NULL;
next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
-
-next_node:
- if (next_node) {
+ while (next_node) {
p = __node_2_pdl(next_node);
- if (pick_dl_task(rq, p, cpu))
+ if (task_is_pushable(rq, p, cpu))
return p;
next_node = rb_next(next_node);
- goto next_node;
}
return NULL;
}
+/* Access rule: must be called on local CPU with preemption disabled */
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
static int find_later_rq(struct task_struct *task)
@@ -2206,6 +2765,25 @@ static int find_later_rq(struct task_struct *task)
return -1;
}
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+ struct task_struct *p;
+
+ if (!has_pushable_dl_tasks(rq))
+ return NULL;
+
+ p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
+
+ WARN_ON_ONCE(rq->cpu != task_cpu(p));
+ WARN_ON_ONCE(task_current(rq, p));
+ WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+
+ WARN_ON_ONCE(!task_on_rq_queued(p));
+ WARN_ON_ONCE(!dl_task(p));
+
+ return p;
+}
+
/* Locks the rq it finds */
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
{
@@ -2233,12 +2811,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
/* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) {
- if (unlikely(task_rq(task) != rq ||
+ /*
+ * double_lock_balance had to release rq->lock, in the
+ * meantime, task may no longer be fit to be migrated.
+ * Check the following to ensure that the task is
+ * still suitable for migration:
+ * 1. It is possible the task was scheduled,
+ * migrate_disabled was set and then got preempted,
+ * so we must check the task migration disable
+ * flag.
+ * 2. The CPU picked is in the task's affinity.
+ * 3. For throttled task (dl_task_offline_migration),
+ * check the following:
+ * - the task is not on the rq anymore (it was
+ * migrated)
+ * - the task is not on CPU anymore
+ * - the task is still a dl task
+ * - the task is not queued on the rq anymore
+ * 4. For the non-throttled task (push_dl_task), the
+ * check to ensure that this task is still at the
+ * head of the pushable tasks list is enough.
+ */
+ if (unlikely(is_migration_disabled(task) ||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
- task_on_cpu(rq, task) ||
- !dl_task(task) ||
- is_migration_disabled(task) ||
- !task_on_rq_queued(task))) {
+ (task->dl.dl_throttled &&
+ (task_rq(task) != rq ||
+ task_on_cpu(rq, task) ||
+ !dl_task(task) ||
+ !task_on_rq_queued(task))) ||
+ (!task->dl.dl_throttled &&
+ task != pick_next_pushable_dl_task(rq)))) {
+
double_unlock_balance(rq, later_rq);
later_rq = NULL;
break;
@@ -2261,25 +2864,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
return later_rq;
}
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
- struct task_struct *p;
-
- if (!has_pushable_dl_tasks(rq))
- return NULL;
-
- p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
-
- WARN_ON_ONCE(rq->cpu != task_cpu(p));
- WARN_ON_ONCE(task_current(rq, p));
- WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
-
- WARN_ON_ONCE(!task_on_rq_queued(p));
- WARN_ON_ONCE(!dl_task(p));
-
- return p;
-}
-
/*
* See if the non running -deadline tasks on this rq
* can be sent to some other CPU where they can preempt
@@ -2291,9 +2875,6 @@ static int push_dl_task(struct rq *rq)
struct rq *later_rq;
int ret = 0;
- if (!rq->dl.overloaded)
- return 0;
-
next_task = pick_next_pushable_dl_task(rq);
if (!next_task)
return 0;
@@ -2304,8 +2885,8 @@ retry:
* can move away, it makes sense to just reschedule
* without going further in pushing next_task.
*/
- if (dl_task(rq->curr) &&
- dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
+ if (dl_task(rq->donor) &&
+ dl_time_before(next_task->dl.deadline, rq->donor->dl.deadline) &&
rq->curr->nr_cpus_allowed > 1) {
resched_curr(rq);
return 0;
@@ -2348,9 +2929,7 @@ retry:
goto retry;
}
- deactivate_task(rq, next_task, 0);
- set_task_cpu(next_task, later_rq->cpu);
- activate_task(later_rq, next_task, 0);
+ move_queued_task_locked(rq, later_rq, next_task);
ret = 1;
resched_curr(later_rq);
@@ -2394,7 +2973,7 @@ static void pull_dl_task(struct rq *this_rq)
src_rq = cpu_rq(cpu);
/*
- * It looks racy, abd it is! However, as in sched_rt.c,
+ * It looks racy, and it is! However, as in sched_rt.c,
* we are fine with this.
*/
if (this_rq->dl.dl_nr_running &&
@@ -2430,15 +3009,13 @@ static void pull_dl_task(struct rq *this_rq)
* deadline than the current task of its runqueue.
*/
if (dl_time_before(p->dl.deadline,
- src_rq->curr->dl.deadline))
+ src_rq->donor->dl.deadline))
goto skip;
if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
} else {
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, this_cpu);
- activate_task(this_rq, p, 0);
+ move_queued_task_locked(src_rq, this_rq, p);
dmin = p->dl.deadline;
resched = true;
}
@@ -2449,9 +3026,11 @@ skip:
double_unlock_balance(this_rq, src_rq);
if (push_task) {
+ preempt_disable();
raw_spin_rq_unlock(this_rq);
stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
push_task, &src_rq->push_work);
+ preempt_enable();
raw_spin_rq_lock(this_rq);
}
}
@@ -2469,9 +3048,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
if (!task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
- dl_task(rq->curr) &&
+ dl_task(rq->donor) &&
(rq->curr->nr_cpus_allowed < 2 ||
- !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
+ !dl_entity_preempt(&p->dl, &rq->donor->dl))) {
push_dl_tasks(rq);
}
}
@@ -2515,9 +3094,10 @@ static void rq_online_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_set_overload(rq);
- cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
if (rq->dl.dl_nr_running > 0)
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
+ else
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, true);
}
/* Assumes rq->lock is held */
@@ -2526,8 +3106,7 @@ static void rq_offline_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_clear_overload(rq);
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
- cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, false);
}
void __init init_sched_dl_class(void)
@@ -2539,40 +3118,101 @@ void __init init_sched_dl_class(void)
GFP_KERNEL, cpu_to_node(i));
}
+/*
+ * This function always returns a non-empty bitmap in @cpus. This is because
+ * if a root domain has reserved bandwidth for DL tasks, the DL bandwidth
+ * check will prevent CPU hotplug from deactivating all CPUs in that domain.
+ */
+static void dl_get_task_effective_cpus(struct task_struct *p, struct cpumask *cpus)
+{
+ const struct cpumask *hk_msk;
+
+ hk_msk = housekeeping_cpumask(HK_TYPE_DOMAIN);
+ if (housekeeping_enabled(HK_TYPE_DOMAIN)) {
+ if (!cpumask_intersects(p->cpus_ptr, hk_msk)) {
+ /*
+ * CPUs isolated by isolcpu="domain" always belong to
+ * def_root_domain.
+ */
+ cpumask_andnot(cpus, cpu_active_mask, hk_msk);
+ return;
+ }
+ }
+
+ /*
+ * If a root domain holds a DL task, it must have active CPUs. So
+ * active CPUs can always be found by walking up the task's cpuset
+ * hierarchy up to the partition root.
+ */
+ cpuset_cpus_allowed_locked(p, cpus);
+}
+
+/* The caller should hold cpuset_mutex */
void dl_add_task_root_domain(struct task_struct *p)
{
struct rq_flags rf;
struct rq *rq;
struct dl_bw *dl_b;
+ unsigned int cpu;
+ struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
- if (!dl_task(p)) {
+ if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
return;
}
- rq = __task_rq_lock(p, &rf);
-
+ /*
+ * Get an active rq, whose rq->rd traces the correct root
+ * domain.
+ * Ideally this would be under cpuset reader lock until rq->rd is
+ * fetched. However, sleepable locks cannot nest inside pi_lock, so we
+ * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
+ * to guarantee the CPU stays in the cpuset.
+ */
+ dl_get_task_effective_cpus(p, msk);
+ cpu = cpumask_first_and(cpu_active_mask, msk);
+ BUG_ON(cpu >= nr_cpu_ids);
+ rq = cpu_rq(cpu);
dl_b = &rq->rd->dl_bw;
- raw_spin_lock(&dl_b->lock);
+ /* End of fetching rd */
+ raw_spin_lock(&dl_b->lock);
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
-
raw_spin_unlock(&dl_b->lock);
-
- task_rq_unlock(rq, p, &rf);
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
}
void dl_clear_root_domain(struct root_domain *rd)
{
- unsigned long flags;
+ int i;
- raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
+ guard(raw_spinlock_irqsave)(&rd->dl_bw.lock);
+
+ /*
+ * Reset total_bw to zero and extra_bw to max_bw so that next
+ * loop will add dl-servers contributions back properly,
+ */
rd->dl_bw.total_bw = 0;
- raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
+ for_each_cpu(i, rd->span)
+ cpu_rq(i)->dl.extra_bw = cpu_rq(i)->dl.max_bw;
+
+ /*
+ * dl_servers are not tasks. Since dl_add_task_root_domain ignores
+ * them, we need to account for them here explicitly.
+ */
+ for_each_cpu(i, rd->span) {
+ struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
+
+ if (dl_server(dl_se) && cpu_active(i))
+ __dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i));
+ }
}
-#endif /* CONFIG_SMP */
+void dl_clear_root_domain_cpu(int cpu)
+{
+ dl_clear_root_domain(cpu_rq(cpu)->rd);
+}
static void switched_from_dl(struct rq *rq, struct task_struct *p)
{
@@ -2585,7 +3225,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
* will reset the task parameters.
*/
if (task_on_rq_queued(p) && p->dl.dl_runtime)
- task_non_contending(p);
+ task_non_contending(&p->dl, false);
/*
* In case a task is setscheduled out from SCHED_DEADLINE we need to
@@ -2630,8 +3270,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
*/
static void switched_to_dl(struct rq *rq, struct task_struct *p)
{
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ cancel_inactive_timer(&p->dl);
/*
* In case a task is setscheduled to SCHED_DEADLINE we need to keep
@@ -2646,13 +3285,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
return;
}
- if (rq->curr != p) {
-#ifdef CONFIG_SMP
+ if (rq->donor != p) {
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
deadline_queue_push_tasks(rq);
-#endif
- if (dl_task(rq->curr))
- check_preempt_curr_dl(rq, p, 0);
+ if (dl_task(rq->donor))
+ wakeup_preempt_dl(rq, p, 0);
else
resched_curr(rq);
} else {
@@ -2660,27 +3297,27 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
}
}
+static u64 get_prio_dl(struct rq *rq, struct task_struct *p)
+{
+ return p->dl.deadline;
+}
+
/*
* If the scheduling parameters of a -deadline task changed,
* a push or pull operation might be needed.
*/
-static void prio_changed_dl(struct rq *rq, struct task_struct *p,
- int oldprio)
+static void prio_changed_dl(struct rq *rq, struct task_struct *p, u64 old_deadline)
{
if (!task_on_rq_queued(p))
return;
-#ifdef CONFIG_SMP
- /*
- * This might be too much, but unfortunately
- * we don't have the old deadline value, and
- * we can't argue if the task is increasing
- * or lowering its prio, so...
- */
- if (!rq->dl.overloaded)
+ if (p->dl.deadline == old_deadline)
+ return;
+
+ if (dl_time_before(old_deadline, p->dl.deadline))
deadline_queue_pull_task(rq);
- if (task_current(rq, p)) {
+ if (task_current_donor(rq, p)) {
/*
* If we now have a earlier deadline task than p,
* then reschedule, provided p is still on this
@@ -2699,13 +3336,6 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
resched_curr(rq);
}
-#else
- /*
- * We don't know if p has a earlier or later deadline, so let's blindly
- * set a (maybe not needed) rescheduling point.
- */
- resched_curr(rq);
-#endif
}
#ifdef CONFIG_SCHED_CORE
@@ -2717,19 +3347,19 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu)
DEFINE_SCHED_CLASS(dl) = {
+ .queue_mask = 8,
+
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
- .check_preempt_curr = check_preempt_curr_dl,
+ .wakeup_preempt = wakeup_preempt_dl,
- .pick_next_task = pick_next_task_dl,
+ .pick_task = pick_task_dl,
.put_prev_task = put_prev_task_dl,
.set_next_task = set_next_task_dl,
-#ifdef CONFIG_SMP
.balance = balance_dl,
- .pick_task = pick_task_dl,
.select_task_rq = select_task_rq_dl,
.migrate_task_rq = migrate_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
@@ -2737,11 +3367,11 @@ DEFINE_SCHED_CLASS(dl) = {
.rq_offline = rq_offline_dl,
.task_woken = task_woken_dl,
.find_lock_rq = find_lock_later_rq,
-#endif
.task_tick = task_tick_dl,
.task_fork = task_fork_dl,
+ .get_prio = get_prio_dl,
.prio_changed = prio_changed_dl,
.switched_from = switched_from_dl,
.switched_to = switched_to_dl,
@@ -2752,15 +3382,18 @@ DEFINE_SCHED_CLASS(dl) = {
#endif
};
-/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
-static u64 dl_generation;
+/*
+ * Used for dl_bw check and update, used under sched_rt_handler()::mutex and
+ * sched_domains_mutex.
+ */
+u64 dl_cookie;
int sched_dl_global_validate(void)
{
u64 runtime = global_rt_runtime();
u64 period = global_rt_period();
u64 new_bw = to_ratio(period, runtime);
- u64 gen = ++dl_generation;
+ u64 cookie = ++dl_cookie;
struct dl_bw *dl_b;
int cpu, cpus, ret = 0;
unsigned long flags;
@@ -2770,10 +3403,10 @@ int sched_dl_global_validate(void)
* value smaller than the currently allocated bandwidth in
* any of the root_domains.
*/
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
rcu_read_lock_sched();
- if (dl_bw_visited(cpu, gen))
+ if (dl_bw_visited(cpu, cookie))
goto next;
dl_b = dl_bw_of(cpu);
@@ -2810,7 +3443,7 @@ static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
void sched_dl_do_global(void)
{
u64 new_bw = -1;
- u64 gen = ++dl_generation;
+ u64 cookie = ++dl_cookie;
struct dl_bw *dl_b;
int cpu;
unsigned long flags;
@@ -2818,10 +3451,13 @@ void sched_dl_do_global(void)
if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
+ for_each_possible_cpu(cpu)
+ init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
+
for_each_possible_cpu(cpu) {
rcu_read_lock_sched();
- if (dl_bw_visited(cpu, gen)) {
+ if (dl_bw_visited(cpu, cookie)) {
rcu_read_unlock_sched();
continue;
}
@@ -2833,7 +3469,6 @@ void sched_dl_do_global(void)
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched();
- init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
}
}
@@ -2993,10 +3628,8 @@ bool __checkparam_dl(const struct sched_attr *attr)
/*
* This function clears the sched_dl_entity static params.
*/
-void __dl_clear_params(struct task_struct *p)
+static void __dl_clear_params(struct sched_dl_entity *dl_se)
{
- struct sched_dl_entity *dl_se = &p->dl;
-
dl_se->dl_runtime = 0;
dl_se->dl_deadline = 0;
dl_se->dl_period = 0;
@@ -3008,12 +3641,21 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0;
+ dl_se->dl_server = 0;
#ifdef CONFIG_RT_MUTEXES
dl_se->pi_se = dl_se;
#endif
}
+void init_dl_entity(struct sched_dl_entity *dl_se)
+{
+ RB_CLEAR_NODE(&dl_se->rb_node);
+ init_dl_task_timer(dl_se);
+ init_dl_inactive_task_timer(dl_se);
+ __dl_clear_params(dl_se);
+}
+
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
{
struct sched_dl_entity *dl_se = &p->dl;
@@ -3027,7 +3669,6 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
return false;
}
-#ifdef CONFIG_SMP
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
@@ -3048,29 +3689,31 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
}
enum dl_bw_request {
- dl_bw_req_check_overflow = 0,
+ dl_bw_req_deactivate = 0,
dl_bw_req_alloc,
dl_bw_req_free
};
static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
{
- unsigned long flags;
+ unsigned long flags, cap;
struct dl_bw *dl_b;
bool overflow = 0;
+ u64 fair_server_bw = 0;
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
- if (req == dl_bw_req_free) {
+ cap = dl_bw_capacity(cpu);
+ switch (req) {
+ case dl_bw_req_free:
__dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
- } else {
- unsigned long cap = dl_bw_capacity(cpu);
-
+ break;
+ case dl_bw_req_alloc:
overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
- if (req == dl_bw_req_alloc && !overflow) {
+ if (!overflow) {
/*
* We reserve space in the destination
* root_domain, as we can't fail after this point.
@@ -3079,6 +3722,42 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
*/
__dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
}
+ break;
+ case dl_bw_req_deactivate:
+ /*
+ * cpu is not off yet, but we need to do the math by
+ * considering it off already (i.e., what would happen if we
+ * turn cpu off?).
+ */
+ cap -= arch_scale_cpu_capacity(cpu);
+
+ /*
+ * cpu is going offline and NORMAL tasks will be moved away
+ * from it. We can thus discount dl_server bandwidth
+ * contribution as it won't need to be servicing tasks after
+ * the cpu is off.
+ */
+ if (cpu_rq(cpu)->fair_server.dl_server)
+ fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw;
+
+ /*
+ * Not much to check if no DEADLINE bandwidth is present.
+ * dl_servers we can discount, as tasks will be moved out the
+ * offlined CPUs anyway.
+ */
+ if (dl_b->total_bw - fair_server_bw > 0) {
+ /*
+ * Leaving at least one CPU for DEADLINE tasks seems a
+ * wise thing to do. As said above, cpu is not offline
+ * yet, so account for that.
+ */
+ if (dl_bw_cpus(cpu) - 1)
+ overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0);
+ else
+ overflow = 1;
+ }
+
+ break;
}
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
@@ -3087,9 +3766,9 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
return overflow ? -EBUSY : 0;
}
-int dl_bw_check_overflow(int cpu)
+int dl_bw_deactivate(int cpu)
{
- return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
+ return dl_bw_manage(dl_bw_req_deactivate, cpu, 0);
}
int dl_bw_alloc(int cpu, u64 dl_bw)
@@ -3101,11 +3780,8 @@ void dl_bw_free(int cpu, u64 dl_bw)
{
dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
}
-#endif
-#ifdef CONFIG_SCHED_DEBUG
void print_dl_stats(struct seq_file *m, int cpu)
{
print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
}
-#endif /* CONFIG_SCHED_DEBUG */