summaryrefslogtreecommitdiff
path: root/kernel/sched/deadline.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r--kernel/sched/deadline.c1745
1 files changed, 1188 insertions, 557 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 75686c6d4436..ff4df16b5186 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -15,13 +15,52 @@
* Michael Trimarchi <michael@amarulasolutions.com>,
* Fabio Checconi <fchecconi@gmail.com>
*/
-#include "sched.h"
-#include "pelt.h"
-struct dl_bandwidth def_dl_bandwidth;
+#include <linux/cpuset.h>
+
+/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting ridiculously long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+static unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+#ifdef CONFIG_SYSCTL
+static const struct ctl_table sched_dl_sysctls[] = {
+ {
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)&sysctl_sched_dl_period_min,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = (void *)&sysctl_sched_dl_period_max,
+ },
+};
+
+static int __init sched_dl_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sched_dl_sysctls);
+ return 0;
+}
+late_initcall(sched_dl_sysctl_init);
+#endif
+
+static bool dl_server(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_server;
+}
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
{
+ BUG_ON(dl_server(dl_se));
return container_of(dl_se, struct task_struct, dl);
}
@@ -30,12 +69,19 @@ static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
return container_of(dl_rq, struct rq, dl);
}
-static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
+static inline struct rq *rq_of_dl_se(struct sched_dl_entity *dl_se)
{
- struct task_struct *p = dl_task_of(dl_se);
- struct rq *rq = task_rq(p);
+ struct rq *rq = dl_se->rq;
+
+ if (!dl_server(dl_se))
+ rq = task_rq(dl_task_of(dl_se));
- return &rq->dl;
+ return rq;
+}
+
+static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
+{
+ return &rq_of_dl_se(dl_se)->dl;
}
static inline int on_dl_rq(struct sched_dl_entity *dl_se)
@@ -92,16 +138,13 @@ static inline int dl_bw_cpus(int i)
return cpus;
}
-static inline unsigned long __dl_bw_capacity(int i)
+static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
{
- struct root_domain *rd = cpu_rq(i)->rd;
unsigned long cap = 0;
+ int i;
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
- "sched RCU must be held");
-
- for_each_cpu_and(i, rd->span, cpu_active_mask)
- cap += capacity_orig_of(i);
+ for_each_cpu_and(i, mask, cpu_active_mask)
+ cap += arch_scale_cpu_capacity(i);
return cap;
}
@@ -112,11 +155,14 @@ static inline unsigned long __dl_bw_capacity(int i)
*/
static inline unsigned long dl_bw_capacity(int i)
{
- if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
- capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
+ if (!sched_asym_cpucap_active() &&
+ arch_scale_cpu_capacity(i) == SCHED_CAPACITY_SCALE) {
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
} else {
- return __dl_bw_capacity(i);
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+
+ return __dl_bw_capacity(cpu_rq(i)->rd->span);
}
}
@@ -130,6 +176,21 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
rd->visit_gen = gen;
return false;
}
+
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
+ int i;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+ for_each_cpu_and(i, rd->span, cpu_active_mask) {
+ struct rq *rq = cpu_rq(i);
+
+ rq->dl.extra_bw += bw;
+ }
+}
#else
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -150,14 +211,43 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
{
return false;
}
+
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
+
+ dl->extra_bw += bw;
+}
#endif
static inline
+void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
+{
+ dl_b->total_bw -= tsk_bw;
+ __dl_update(dl_b, (s32)tsk_bw / cpus);
+}
+
+static inline
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
+{
+ dl_b->total_bw += tsk_bw;
+ __dl_update(dl_b, -((s32)tsk_bw / cpus));
+}
+
+static inline bool
+__dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
+{
+ return dl_b->bw != -1 &&
+ cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
+}
+
+static inline
void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
{
u64 old = dl_rq->running_bw;
- lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+ lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->running_bw += dl_bw;
SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
@@ -170,7 +260,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
{
u64 old = dl_rq->running_bw;
- lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+ lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->running_bw -= dl_bw;
SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
if (dl_rq->running_bw > old)
@@ -184,7 +274,7 @@ void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
{
u64 old = dl_rq->this_bw;
- lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+ lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->this_bw += dl_bw;
SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
}
@@ -194,7 +284,7 @@ void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
{
u64 old = dl_rq->this_bw;
- lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+ lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
dl_rq->this_bw -= dl_bw;
SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
if (dl_rq->this_bw > old)
@@ -230,33 +320,63 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
__sub_running_bw(dl_se->dl_bw, dl_rq);
}
-static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
{
- struct rq *rq;
-
- BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
-
- if (task_on_rq_queued(p))
- return;
+ if (dl_se->dl_non_contending) {
+ sub_running_bw(dl_se, &rq->dl);
+ dl_se->dl_non_contending = 0;
- rq = task_rq(p);
- if (p->dl.dl_non_contending) {
- sub_running_bw(&p->dl, &rq->dl);
- p->dl.dl_non_contending = 0;
/*
* If the timer handler is currently running and the
- * timer cannot be cancelled, inactive_task_timer()
+ * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
+ if (!dl_server(dl_se))
+ put_task_struct(dl_task_of(dl_se));
+ }
}
- __sub_rq_bw(p->dl.dl_bw, &rq->dl);
+ __sub_rq_bw(dl_se->dl_bw, &rq->dl);
__add_rq_bw(new_bw, &rq->dl);
}
+static __always_inline
+void cancel_dl_timer(struct sched_dl_entity *dl_se, struct hrtimer *timer)
+{
+ /*
+ * If the timer callback was running (hrtimer_try_to_cancel == -1),
+ * it will eventually call put_task_struct().
+ */
+ if (hrtimer_try_to_cancel(timer) == 1 && !dl_server(dl_se))
+ put_task_struct(dl_task_of(dl_se));
+}
+
+static __always_inline
+void cancel_replenish_timer(struct sched_dl_entity *dl_se)
+{
+ cancel_dl_timer(dl_se, &dl_se->dl_timer);
+}
+
+static __always_inline
+void cancel_inactive_timer(struct sched_dl_entity *dl_se)
+{
+ cancel_dl_timer(dl_se, &dl_se->inactive_timer);
+}
+
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+ WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
+
+ if (task_on_rq_queued(p))
+ return;
+
+ dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
+}
+
+static void __dl_clear_params(struct sched_dl_entity *dl_se);
+
/*
* The utilization of a task cannot be immediately removed from
* the rq active utilization (running_bw) when the task blocks.
@@ -267,7 +387,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* fires.
*
* If the task wakes up again before the inactive timer fires,
- * the timer is cancelled, whereas if the task wakes up after the
+ * the timer is canceled, whereas if the task wakes up after the
* inactive timer fired (and running_bw has been decreased) the
* task's utilization has to be added to running_bw again.
* A flag in the deadline scheduling entity (dl_non_contending)
@@ -311,12 +431,11 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* up, and checks if the task is still in the "ACTIVE non contending"
* state or not (in the second case, it updates running_bw).
*/
-static void task_non_contending(struct task_struct *p)
+static void task_non_contending(struct sched_dl_entity *dl_se)
{
- struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->inactive_timer;
- struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq);
+ struct rq *rq = rq_of_dl_se(dl_se);
+ struct dl_rq *dl_rq = &rq->dl;
s64 zerolag_time;
/*
@@ -346,24 +465,33 @@ static void task_non_contending(struct task_struct *p)
* utilization now, instead of starting a timer
*/
if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
- if (dl_task(p))
+ if (dl_server(dl_se)) {
sub_running_bw(dl_se, dl_rq);
- if (!dl_task(p) || p->state == TASK_DEAD) {
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-
- if (p->state == TASK_DEAD)
- sub_rq_bw(&p->dl, &rq->dl);
- raw_spin_lock(&dl_b->lock);
- __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
- __dl_clear_params(p);
- raw_spin_unlock(&dl_b->lock);
+ } else {
+ struct task_struct *p = dl_task_of(dl_se);
+
+ if (dl_task(p))
+ sub_running_bw(dl_se, dl_rq);
+
+ if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
+ struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+ if (READ_ONCE(p->__state) == TASK_DEAD)
+ sub_rq_bw(dl_se, &rq->dl);
+ raw_spin_lock(&dl_b->lock);
+ __dl_sub(dl_b, dl_se->dl_bw, dl_bw_cpus(task_cpu(p)));
+ raw_spin_unlock(&dl_b->lock);
+ __dl_clear_params(dl_se);
+ }
}
return;
}
dl_se->dl_non_contending = 1;
- get_task_struct(p);
+ if (!dl_server(dl_se))
+ get_task_struct(dl_task_of(dl_se));
+
hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
}
@@ -385,13 +513,12 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
dl_se->dl_non_contending = 0;
/*
* If the timer handler is currently running and the
- * timer cannot be cancelled, inactive_task_timer()
+ * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
- put_task_struct(dl_task_of(dl_se));
+ cancel_inactive_timer(dl_se);
} else {
/*
* Since "dl_non_contending" is not set, the
@@ -404,31 +531,20 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
}
}
-static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
+static inline int is_leftmost(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- struct sched_dl_entity *dl_se = &p->dl;
-
- return dl_rq->root.rb_leftmost == &dl_se->rb_node;
+ return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
}
static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
-void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
-{
- raw_spin_lock_init(&dl_b->dl_runtime_lock);
- dl_b->dl_period = period;
- dl_b->dl_runtime = runtime;
-}
-
void init_dl_bw(struct dl_bw *dl_b)
{
raw_spin_lock_init(&dl_b->lock);
- raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
if (global_rt_runtime() == RUNTIME_INF)
dl_b->bw = -1;
else
dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
- raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
dl_b->total_bw = 0;
}
@@ -440,7 +556,6 @@ void init_dl_rq(struct dl_rq *dl_rq)
/* zero means no -deadline tasks */
dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
- dl_rq->dl_nr_migratory = 0;
dl_rq->overloaded = 0;
dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
#else
@@ -484,37 +599,17 @@ static inline void dl_clear_overload(struct rq *rq)
cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
}
-static void update_dl_migration(struct dl_rq *dl_rq)
-{
- if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
- if (!dl_rq->overloaded) {
- dl_set_overload(rq_of_dl_rq(dl_rq));
- dl_rq->overloaded = 1;
- }
- } else if (dl_rq->overloaded) {
- dl_clear_overload(rq_of_dl_rq(dl_rq));
- dl_rq->overloaded = 0;
- }
-}
+#define __node_2_pdl(node) \
+ rb_entry((node), struct task_struct, pushable_dl_tasks)
-static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
{
- struct task_struct *p = dl_task_of(dl_se);
-
- if (p->nr_cpus_allowed > 1)
- dl_rq->dl_nr_migratory++;
-
- update_dl_migration(dl_rq);
+ return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
}
-static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+static inline int has_pushable_dl_tasks(struct rq *rq)
{
- struct task_struct *p = dl_task_of(dl_se);
-
- if (p->nr_cpus_allowed > 1)
- dl_rq->dl_nr_migratory--;
-
- update_dl_migration(dl_rq);
+ return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
}
/*
@@ -523,58 +618,41 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
*/
static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
{
- struct dl_rq *dl_rq = &rq->dl;
- struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node;
- struct rb_node *parent = NULL;
- struct task_struct *entry;
- bool leftmost = true;
-
- BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
-
- while (*link) {
- parent = *link;
- entry = rb_entry(parent, struct task_struct,
- pushable_dl_tasks);
- if (dl_entity_preempt(&p->dl, &entry->dl))
- link = &parent->rb_left;
- else {
- link = &parent->rb_right;
- leftmost = false;
- }
- }
+ struct rb_node *leftmost;
+
+ WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
+ leftmost = rb_add_cached(&p->pushable_dl_tasks,
+ &rq->dl.pushable_dl_tasks_root,
+ __pushable_less);
if (leftmost)
- dl_rq->earliest_dl.next = p->dl.deadline;
+ rq->dl.earliest_dl.next = p->dl.deadline;
- rb_link_node(&p->pushable_dl_tasks, parent, link);
- rb_insert_color_cached(&p->pushable_dl_tasks,
- &dl_rq->pushable_dl_tasks_root, leftmost);
+ if (!rq->dl.overloaded) {
+ dl_set_overload(rq);
+ rq->dl.overloaded = 1;
+ }
}
static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
{
struct dl_rq *dl_rq = &rq->dl;
+ struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
+ struct rb_node *leftmost;
if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
return;
- if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) {
- struct rb_node *next_node;
-
- next_node = rb_next(&p->pushable_dl_tasks);
- if (next_node) {
- dl_rq->earliest_dl.next = rb_entry(next_node,
- struct task_struct, pushable_dl_tasks)->dl.deadline;
- }
- }
+ leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
+ if (leftmost)
+ dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
- rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
-}
-static inline int has_pushable_dl_tasks(struct rq *rq)
-{
- return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
+ if (!has_pushable_dl_tasks(rq) && rq->dl.overloaded) {
+ dl_clear_overload(rq);
+ rq->dl.overloaded = 0;
+ }
}
static int push_dl_task(struct rq *rq);
@@ -584,8 +662,8 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return rq->online && dl_task(prev);
}
-static DEFINE_PER_CPU(struct callback_head, dl_push_head);
-static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
+static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
+static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
static void push_dl_tasks(struct rq *);
static void pull_dl_task(struct rq *);
@@ -624,7 +702,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
* Failed to find any suitable CPU.
* The task will never come back!
*/
- BUG_ON(dl_bandwidth_enabled());
+ WARN_ON_ONCE(dl_bandwidth_enabled());
/*
* If admission control is disabled we
@@ -655,7 +733,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
}
/*
- * And we finally need to fixup root_domain(s) bandwidth accounting,
+ * And we finally need to fix up root_domain(s) bandwidth accounting,
* since p is still hanging out in the old (now moved to default) root
* domain.
*/
@@ -697,15 +775,6 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
}
-static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
-{
- return false;
-}
-
-static inline void pull_dl_task(struct rq *rq)
-{
-}
-
static inline void deadline_queue_push_tasks(struct rq *rq)
{
}
@@ -715,9 +784,28 @@ static inline void deadline_queue_pull_task(struct rq *rq)
}
#endif /* CONFIG_SMP */
+static void
+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags);
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
-static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
+static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags);
+static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags);
+
+static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
+ struct rq *rq)
+{
+ /* for non-boosted task, pi_of(dl_se) == dl_se */
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
+
+ /*
+ * If it is a deferred reservation, and the server
+ * is not handling an starvation case, defer it.
+ */
+ if (dl_se->dl_defer && !dl_se->dl_defer_running) {
+ dl_se->dl_throttled = 1;
+ dl_se->dl_defer_armed = 1;
+ }
+}
/*
* We are being explicitly informed that a new instance is starting,
@@ -752,10 +840,12 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
* future; in fact, we must consider execution overheads (time
* spent on hardirq context, etc.).
*/
- dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
- dl_se->runtime = dl_se->dl_runtime;
+ replenish_dl_new_period(dl_se, rq);
}
+static int start_dl_timer(struct sched_dl_entity *dl_se);
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t);
+
/*
* Pure Earliest Deadline First (EDF) scheduling does not deal with the
* possibility of a entity lasting more than what it declared, and thus
@@ -779,13 +869,20 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
- BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
+ WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= 0);
/*
* This could be the case for a !-dl task that is boosted.
* Just go with full inherited parameters.
+ *
+ * Or, it could be the case of a deferred reservation that
+ * was not able to consume its runtime in background and
+ * reached this point with current u > U.
+ *
+ * In both cases, set a new period.
*/
- if (dl_se->dl_deadline == 0) {
+ if (dl_se->dl_deadline == 0 ||
+ (dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) {
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
@@ -815,14 +912,51 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
printk_deferred_once("sched: DL replenish lagged too much\n");
- dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
- dl_se->runtime = pi_of(dl_se)->dl_runtime;
+ replenish_dl_new_period(dl_se, rq);
}
if (dl_se->dl_yielded)
dl_se->dl_yielded = 0;
if (dl_se->dl_throttled)
dl_se->dl_throttled = 0;
+
+ /*
+ * If this is the replenishment of a deferred reservation,
+ * clear the flag and return.
+ */
+ if (dl_se->dl_defer_armed) {
+ dl_se->dl_defer_armed = 0;
+ return;
+ }
+
+ /*
+ * A this point, if the deferred server is not armed, and the deadline
+ * is in the future, if it is not running already, throttle the server
+ * and arm the defer timer.
+ */
+ if (dl_se->dl_defer && !dl_se->dl_defer_running &&
+ dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
+ if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
+
+ /*
+ * Set dl_se->dl_defer_armed and dl_throttled variables to
+ * inform the start_dl_timer() that this is a deferred
+ * activation.
+ */
+ dl_se->dl_defer_armed = 1;
+ dl_se->dl_throttled = 1;
+ if (!start_dl_timer(dl_se)) {
+ /*
+ * If for whatever reason (delays), a previous timer was
+ * queued but not serviced, cancel it and clean the
+ * deferrable server variables intended for start_dl_timer().
+ */
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ }
+ }
+ }
}
/*
@@ -942,7 +1076,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
* is detected, the runtime and deadline need to be updated.
*
* If the task has an implicit deadline, i.e., deadline == period, the Original
- * CBS is applied. the runtime is replenished and a new absolute deadline is
+ * CBS is applied. The runtime is replenished and a new absolute deadline is
* set, as in the previous cases.
*
* However, the Original CBS does not work properly for tasks with
@@ -960,8 +1094,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
*/
static void update_dl_entity(struct sched_dl_entity *dl_se)
{
- struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq);
+ struct rq *rq = rq_of_dl_se(dl_se);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
dl_entity_overflow(dl_se, rq_clock(rq))) {
@@ -973,8 +1106,16 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
return;
}
- dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
- dl_se->runtime = pi_of(dl_se)->dl_runtime;
+ replenish_dl_new_period(dl_se, rq);
+ } else if (dl_server(dl_se) && dl_se->dl_defer) {
+ /*
+ * The server can still use its previous deadline, so check if
+ * it left the dl_defer_running state.
+ */
+ if (!dl_se->dl_defer_running) {
+ dl_se->dl_defer_armed = 1;
+ dl_se->dl_throttled = 1;
+ }
}
}
@@ -993,22 +1134,35 @@ static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
-static int start_dl_timer(struct task_struct *p)
+static int start_dl_timer(struct sched_dl_entity *dl_se)
{
- struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->dl_timer;
- struct rq *rq = task_rq(p);
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq);
ktime_t now, act;
s64 delta;
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
/*
* We want the timer to fire at the deadline, but considering
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
+ *
+ * The deferred reservation will have its timer set to
+ * (deadline - runtime). At that point, the CBS rule will decide
+ * if the current deadline can be used, or if a replenishment is
+ * required to avoid add too much pressure on the system
+ * (current u > U).
*/
- act = ns_to_ktime(dl_next_period(dl_se));
+ if (dl_se->dl_defer_armed) {
+ WARN_ON_ONCE(!dl_se->dl_throttled);
+ act = ns_to_ktime(dl_se->deadline - dl_se->runtime);
+ } else {
+ /* act = deadline - rel-deadline + period */
+ act = ns_to_ktime(dl_next_period(dl_se));
+ }
+
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
@@ -1031,13 +1185,89 @@ static int start_dl_timer(struct task_struct *p)
* and observe our state.
*/
if (!hrtimer_is_queued(timer)) {
- get_task_struct(p);
+ if (!dl_server(dl_se))
+ get_task_struct(dl_task_of(dl_se));
hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
}
return 1;
}
+static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
+{
+#ifdef CONFIG_SMP
+ /*
+ * Queueing this task back might have overloaded rq, check if we need
+ * to kick someone away.
+ */
+ if (has_pushable_dl_tasks(rq)) {
+ /*
+ * Nothing relies on rq->lock after this, so its safe to drop
+ * rq->lock.
+ */
+ rq_unpin_lock(rq, rf);
+ push_dl_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+#endif
+}
+
+/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
+static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
+
+static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
+{
+ struct rq *rq = rq_of_dl_se(dl_se);
+ u64 fw;
+
+ scoped_guard (rq_lock, rq) {
+ struct rq_flags *rf = &scope.rf;
+
+ if (!dl_se->dl_throttled || !dl_se->dl_runtime)
+ return HRTIMER_NORESTART;
+
+ sched_clock_tick();
+ update_rq_clock(rq);
+
+ if (!dl_se->dl_runtime)
+ return HRTIMER_NORESTART;
+
+ if (!dl_se->server_has_tasks(dl_se)) {
+ replenish_dl_entity(dl_se);
+ return HRTIMER_NORESTART;
+ }
+
+ if (dl_se->dl_defer_armed) {
+ /*
+ * First check if the server could consume runtime in background.
+ * If so, it is possible to push the defer timer for this amount
+ * of time. The dl_server_min_res serves as a limit to avoid
+ * forwarding the timer for a too small amount of time.
+ */
+ if (dl_time_before(rq_clock(dl_se->rq),
+ (dl_se->deadline - dl_se->runtime - dl_server_min_res))) {
+
+ /* reset the defer timer */
+ fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime;
+
+ hrtimer_forward_now(timer, ns_to_ktime(fw));
+ return HRTIMER_RESTART;
+ }
+
+ dl_se->dl_defer_running = 1;
+ }
+
+ enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
+
+ if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &dl_se->rq->curr->dl))
+ resched_curr(rq);
+
+ __push_dl_task(rq, rf);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
/*
* This is the bandwidth enforcement timer callback. If here, we know
* a task is not on its dl_rq, since the fact that the timer was running
@@ -1056,10 +1286,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
struct sched_dl_entity *dl_se = container_of(timer,
struct sched_dl_entity,
dl_timer);
- struct task_struct *p = dl_task_of(dl_se);
+ struct task_struct *p;
struct rq_flags rf;
struct rq *rq;
+ if (dl_server(dl_se))
+ return dl_server_timer(timer, dl_se);
+
+ p = dl_task_of(dl_se);
rq = task_rq_lock(p, &rf);
/*
@@ -1111,9 +1345,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
* If the runqueue is no longer available, migrate the
* task elsewhere. This necessarily changes rq.
*/
- lockdep_unpin_lock(&rq->lock, rf.cookie);
+ lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
rq = dl_task_offline_migration(rq, p);
- rf.cookie = lockdep_pin_lock(&rq->lock);
+ rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
update_rq_clock(rq);
/*
@@ -1125,26 +1359,12 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
#endif
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
- if (dl_task(rq->curr))
- check_preempt_curr_dl(rq, p, 0);
+ if (dl_task(rq->donor))
+ wakeup_preempt_dl(rq, p, 0);
else
resched_curr(rq);
-#ifdef CONFIG_SMP
- /*
- * Queueing this task back might have overloaded rq, check if we need
- * to kick someone away.
- */
- if (has_pushable_dl_tasks(rq)) {
- /*
- * Nothing relies on rq->lock after this, so its safe to drop
- * rq->lock.
- */
- rq_unpin_lock(rq, &rf);
- push_dl_task(rq);
- rq_repin_lock(rq, &rf);
- }
-#endif
+ __push_dl_task(rq, &rf);
unlock:
task_rq_unlock(rq, p, &rf);
@@ -1158,7 +1378,7 @@ unlock:
return HRTIMER_NORESTART;
}
-void init_dl_task_timer(struct sched_dl_entity *dl_se)
+static void init_dl_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->dl_timer;
@@ -1186,12 +1406,11 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
*/
static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
{
- struct task_struct *p = dl_task_of(dl_se);
- struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+ struct rq *rq = rq_of_dl_se(dl_se);
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
- if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se)))
return;
dl_se->dl_throttled = 1;
if (dl_se->runtime > 0)
@@ -1205,91 +1424,46 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
return (dl_se->runtime <= 0);
}
-extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
-
/*
- * This function implements the GRUB accounting rule:
- * according to the GRUB reclaiming algorithm, the runtime is
- * not decreased as "dq = -dt", but as
- * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
+ * This function implements the GRUB accounting rule. According to the
+ * GRUB reclaiming algorithm, the runtime is not decreased as "dq = -dt",
+ * but as "dq = -(max{u, (Umax - Uinact - Uextra)} / Umax) dt",
* where u is the utilization of the task, Umax is the maximum reclaimable
* utilization, Uinact is the (per-runqueue) inactive utilization, computed
* as the difference between the "total runqueue utilization" and the
- * runqueue active utilization, and Uextra is the (per runqueue) extra
+ * "runqueue active utilization", and Uextra is the (per runqueue) extra
* reclaimable utilization.
- * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
- * multiplied by 2^BW_SHIFT, the result has to be shifted right by
- * BW_SHIFT.
- * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
- * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
- * Since delta is a 64 bit variable, to have an overflow its value
- * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
- * So, overflow is not an issue here.
+ * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
+ * by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
+ * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
+ * is multiplied by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
+ * Since delta is a 64 bit variable, to have an overflow its value should be
+ * larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
+ * not an issue here.
*/
static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
{
- u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
u64 u_act;
- u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
+ u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
/*
- * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
- * we compare u_inact + rq->dl.extra_bw with
- * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
- * u_inact + rq->dl.extra_bw can be larger than
- * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
- * leading to wrong results)
+ * Instead of computing max{u, (u_max - u_inact - u_extra)}, we
+ * compare u_inact + u_extra with u_max - u, because u_inact + u_extra
+ * can be larger than u_max. So, u_max - u_inact - u_extra would be
+ * negative leading to wrong results.
*/
- if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
- u_act = u_act_min;
+ if (u_inact + rq->dl.extra_bw > rq->dl.max_bw - dl_se->dl_bw)
+ u_act = dl_se->dl_bw;
else
- u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
+ u_act = rq->dl.max_bw - u_inact - rq->dl.extra_bw;
+ u_act = (u_act * rq->dl.bw_ratio) >> RATIO_SHIFT;
return (delta * u_act) >> BW_SHIFT;
}
-/*
- * Update the current task's runtime statistics (provided it is still
- * a -deadline task and has not been removed from the dl_rq).
- */
-static void update_curr_dl(struct rq *rq)
+s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
- struct task_struct *curr = rq->curr;
- struct sched_dl_entity *dl_se = &curr->dl;
- u64 delta_exec, scaled_delta_exec;
- int cpu = cpu_of(rq);
- u64 now;
-
- if (!dl_task(curr) || !on_dl_rq(dl_se))
- return;
-
- /*
- * Consumed budget is computed considering the time as
- * observed by schedulable tasks (excluding time spent
- * in hardirq context, etc.). Deadlines are instead
- * computed using hard walltime. This seems to be the more
- * natural solution, but the full ramifications of this
- * approach need further study.
- */
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0)) {
- if (unlikely(dl_se->dl_yielded))
- goto throttle;
- return;
- }
-
- schedstat_set(curr->se.statistics.exec_max,
- max(curr->se.statistics.exec_max, delta_exec));
-
- curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
- curr->se.exec_start = now;
- cgroup_account_cputime(curr, delta_exec);
-
- if (dl_entity_is_special(dl_se))
- return;
+ s64 scaled_delta_exec;
/*
* For tasks that participate in GRUB, we implement GRUB-PA: the
@@ -1299,10 +1473,9 @@ static void update_curr_dl(struct rq *rq)
* according to current frequency and CPU maximum capacity.
*/
if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
- scaled_delta_exec = grub_reclaim(delta_exec,
- rq,
- &curr->dl);
+ scaled_delta_exec = grub_reclaim(delta_exec, rq, dl_se);
} else {
+ int cpu = cpu_of(rq);
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
@@ -1310,8 +1483,64 @@ static void update_curr_dl(struct rq *rq)
scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
}
+ return scaled_delta_exec;
+}
+
+static inline void
+update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
+ int flags);
+static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
+{
+ s64 scaled_delta_exec;
+
+ if (unlikely(delta_exec <= 0)) {
+ if (unlikely(dl_se->dl_yielded))
+ goto throttle;
+ return;
+ }
+
+ if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer)
+ return;
+
+ if (dl_entity_is_special(dl_se))
+ return;
+
+ scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
+
dl_se->runtime -= scaled_delta_exec;
+ /*
+ * The fair server can consume its runtime while throttled (not queued/
+ * running as regular CFS).
+ *
+ * If the server consumes its entire runtime in this state. The server
+ * is not required for the current period. Thus, reset the server by
+ * starting a new period, pushing the activation.
+ */
+ if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
+ /*
+ * If the server was previously activated - the starving condition
+ * took place, it this point it went away because the fair scheduler
+ * was able to get runtime in background. So return to the initial
+ * state.
+ */
+ dl_se->dl_defer_running = 0;
+
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+
+ replenish_dl_new_period(dl_se, dl_se->rq);
+
+ /*
+ * Not being able to start the timer seems problematic. If it could not
+ * be started for whatever reason, we need to "unthrottle" the DL server
+ * and queue right away. Otherwise nothing might queue it. That's similar
+ * to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn.
+ */
+ WARN_ON_ONCE(!start_dl_timer(dl_se));
+
+ return;
+ }
+
throttle:
if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
dl_se->dl_throttled = 1;
@@ -1321,15 +1550,32 @@ throttle:
(dl_se->flags & SCHED_FLAG_DL_OVERRUN))
dl_se->dl_overrun = 1;
- __dequeue_task_dl(rq, curr, 0);
- if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
- enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
+ dequeue_dl_entity(dl_se, 0);
+ if (!dl_server(dl_se)) {
+ update_stats_dequeue_dl(&rq->dl, dl_se, 0);
+ dequeue_pushable_dl_task(rq, dl_task_of(dl_se));
+ }
+
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) {
+ if (dl_server(dl_se))
+ enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
+ else
+ enqueue_task_dl(rq, dl_task_of(dl_se), ENQUEUE_REPLENISH);
+ }
- if (!is_leftmost(curr, &rq->dl))
+ if (!is_leftmost(dl_se, &rq->dl))
resched_curr(rq);
}
/*
+ * The fair server (sole dl_server) does not account for real-time
+ * workload because it is running fair work.
+ */
+ if (dl_se == &rq->fair_server)
+ return;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ /*
* Because -- for now -- we share the rt bandwidth, we need to
* account our runtime there too, otherwise actual rt tasks
* would be able to exceed the shared quota.
@@ -1353,6 +1599,182 @@ throttle:
rt_rq->rt_time += delta_exec;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
}
+#endif
+}
+
+/*
+ * In the non-defer mode, the idle time is not accounted, as the
+ * server provides a guarantee.
+ *
+ * If the dl_server is in defer mode, the idle time is also considered
+ * as time available for the fair server, avoiding a penalty for the
+ * rt scheduler that did not consumed that time.
+ */
+void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
+{
+ s64 delta_exec, scaled_delta_exec;
+
+ if (!rq->fair_server.dl_defer)
+ return;
+
+ /* no need to discount more */
+ if (rq->fair_server.runtime < 0)
+ return;
+
+ delta_exec = rq_clock_task(rq) - p->se.exec_start;
+ if (delta_exec < 0)
+ return;
+
+ scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec);
+
+ rq->fair_server.runtime -= scaled_delta_exec;
+
+ if (rq->fair_server.runtime < 0) {
+ rq->fair_server.dl_defer_running = 0;
+ rq->fair_server.runtime = 0;
+ }
+
+ p->se.exec_start = rq_clock_task(rq);
+}
+
+void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
+{
+ /* 0 runtime = fair server disabled */
+ if (dl_se->dl_runtime)
+ update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+}
+
+void dl_server_start(struct sched_dl_entity *dl_se)
+{
+ struct rq *rq = dl_se->rq;
+
+ /*
+ * XXX: the apply do not work fine at the init phase for the
+ * fair server because things are not yet set. We need to improve
+ * this before getting generic.
+ */
+ if (!dl_server(dl_se)) {
+ u64 runtime = 50 * NSEC_PER_MSEC;
+ u64 period = 1000 * NSEC_PER_MSEC;
+
+ dl_server_apply_params(dl_se, runtime, period, 1);
+
+ dl_se->dl_server = 1;
+ dl_se->dl_defer = 1;
+ setup_new_dl_entity(dl_se);
+ }
+
+ if (!dl_se->dl_runtime)
+ return;
+
+ dl_se->dl_server_active = 1;
+ enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+ if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+ resched_curr(dl_se->rq);
+}
+
+void dl_server_stop(struct sched_dl_entity *dl_se)
+{
+ if (!dl_se->dl_runtime)
+ return;
+
+ dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ dl_se->dl_server_active = 0;
+}
+
+void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+ dl_server_has_tasks_f has_tasks,
+ dl_server_pick_f pick_task)
+{
+ dl_se->rq = rq;
+ dl_se->server_has_tasks = has_tasks;
+ dl_se->server_pick_task = pick_task;
+}
+
+void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+ u64 new_bw = dl_se->dl_bw;
+ int cpu = cpu_of(rq);
+ struct dl_bw *dl_b;
+
+ dl_b = dl_bw_of(cpu_of(rq));
+ guard(raw_spinlock)(&dl_b->lock);
+
+ if (!dl_bw_cpus(cpu))
+ return;
+
+ __dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
+}
+
+int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
+{
+ u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+ u64 new_bw = to_ratio(period, runtime);
+ struct rq *rq = dl_se->rq;
+ int cpu = cpu_of(rq);
+ struct dl_bw *dl_b;
+ unsigned long cap;
+ int retval = 0;
+ int cpus;
+
+ dl_b = dl_bw_of(cpu);
+ guard(raw_spinlock)(&dl_b->lock);
+
+ cpus = dl_bw_cpus(cpu);
+ cap = dl_bw_capacity(cpu);
+
+ if (__dl_overflow(dl_b, cap, old_bw, new_bw))
+ return -EBUSY;
+
+ if (init) {
+ __add_rq_bw(new_bw, &rq->dl);
+ __dl_add(dl_b, new_bw, cpus);
+ } else {
+ __dl_sub(dl_b, dl_se->dl_bw, cpus);
+ __dl_add(dl_b, new_bw, cpus);
+
+ dl_rq_change_utilization(rq, dl_se, new_bw);
+ }
+
+ dl_se->dl_runtime = runtime;
+ dl_se->dl_deadline = period;
+ dl_se->dl_period = period;
+
+ dl_se->runtime = 0;
+ dl_se->deadline = 0;
+
+ dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+ dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+
+ return retval;
+}
+
+/*
+ * Update the current task's runtime statistics (provided it is still
+ * a -deadline task and has not been removed from the dl_rq).
+ */
+static void update_curr_dl(struct rq *rq)
+{
+ struct task_struct *donor = rq->donor;
+ struct sched_dl_entity *dl_se = &donor->dl;
+ s64 delta_exec;
+
+ if (!dl_task(donor) || !on_dl_rq(dl_se))
+ return;
+
+ /*
+ * Consumed budget is computed considering the time as
+ * observed by schedulable tasks (excluding time spent
+ * in hardirq context, etc.). Deadlines are instead
+ * computed using hard walltime. This seems to be the more
+ * natural solution, but the full ramifications of this
+ * approach need further study.
+ */
+ delta_exec = update_curr_common(rq);
+ update_curr_dl_se(rq, dl_se, delta_exec);
}
static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
@@ -1360,19 +1782,28 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
struct sched_dl_entity *dl_se = container_of(timer,
struct sched_dl_entity,
inactive_timer);
- struct task_struct *p = dl_task_of(dl_se);
+ struct task_struct *p = NULL;
struct rq_flags rf;
struct rq *rq;
- rq = task_rq_lock(p, &rf);
+ if (!dl_server(dl_se)) {
+ p = dl_task_of(dl_se);
+ rq = task_rq_lock(p, &rf);
+ } else {
+ rq = dl_se->rq;
+ rq_lock(rq, &rf);
+ }
sched_clock_tick();
update_rq_clock(rq);
- if (!dl_task(p) || p->state == TASK_DEAD) {
+ if (dl_server(dl_se))
+ goto no_task;
+
+ if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
- if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
+ if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
dl_se->dl_non_contending = 0;
@@ -1381,23 +1812,30 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
raw_spin_lock(&dl_b->lock);
__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&dl_b->lock);
- __dl_clear_params(p);
+ __dl_clear_params(dl_se);
goto unlock;
}
+
+no_task:
if (dl_se->dl_non_contending == 0)
goto unlock;
sub_running_bw(dl_se, &rq->dl);
dl_se->dl_non_contending = 0;
unlock:
- task_rq_unlock(rq, p, &rf);
- put_task_struct(p);
+
+ if (!dl_server(dl_se)) {
+ task_rq_unlock(rq, p, &rf);
+ put_task_struct(p);
+ } else {
+ rq_unlock(rq, &rf);
+ }
return HRTIMER_NORESTART;
}
-void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+static void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->inactive_timer;
@@ -1405,6 +1843,9 @@ void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
timer->function = inactive_task_timer;
}
+#define __node_2_dle(node) \
+ rb_entry((node), struct sched_dl_entity, rb_node)
+
#ifdef CONFIG_SMP
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -1434,10 +1875,9 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
cpudl_clear(&rq->rd->cpudl, rq->cpu);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
} else {
- struct rb_node *leftmost = dl_rq->root.rb_leftmost;
- struct sched_dl_entity *entry;
+ struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
+ struct sched_dl_entity *entry = __node_2_dle(leftmost);
- entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline;
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
}
@@ -1453,54 +1893,106 @@ static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
static inline
void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- int prio = dl_task_of(dl_se)->prio;
u64 deadline = dl_se->deadline;
- WARN_ON(!dl_prio(prio));
dl_rq->dl_nr_running++;
add_nr_running(rq_of_dl_rq(dl_rq), 1);
inc_dl_deadline(dl_rq, deadline);
- inc_dl_migration(dl_se, dl_rq);
}
static inline
void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
- int prio = dl_task_of(dl_se)->prio;
-
- WARN_ON(!dl_prio(prio));
WARN_ON(!dl_rq->dl_nr_running);
dl_rq->dl_nr_running--;
sub_nr_running(rq_of_dl_rq(dl_rq), 1);
dec_dl_deadline(dl_rq, dl_se->deadline);
- dec_dl_migration(dl_se, dl_rq);
+}
+
+static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
+{
+ return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
+}
+
+static __always_inline struct sched_statistics *
+__schedstats_from_dl_se(struct sched_dl_entity *dl_se)
+{
+ if (!schedstat_enabled())
+ return NULL;
+
+ if (dl_server(dl_se))
+ return NULL;
+
+ return &dl_task_of(dl_se)->stats;
+}
+
+static inline void
+update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
+{
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+}
+
+static inline void
+update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
+{
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+}
+
+static inline void
+update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
+{
+ struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+ if (stats)
+ __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+}
+
+static inline void
+update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
+ int flags)
+{
+ if (!schedstat_enabled())
+ return;
+
+ if (flags & ENQUEUE_WAKEUP)
+ update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
+}
+
+static inline void
+update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
+ int flags)
+{
+ struct task_struct *p = dl_task_of(dl_se);
+
+ if (!schedstat_enabled())
+ return;
+
+ if ((flags & DEQUEUE_SLEEP)) {
+ unsigned int state;
+
+ state = READ_ONCE(p->__state);
+ if (state & TASK_INTERRUPTIBLE)
+ __schedstat_set(p->stats.sleep_start,
+ rq_clock(rq_of_dl_rq(dl_rq)));
+
+ if (state & TASK_UNINTERRUPTIBLE)
+ __schedstat_set(p->stats.block_start,
+ rq_clock(rq_of_dl_rq(dl_rq)));
+ }
}
static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
- struct rb_node **link = &dl_rq->root.rb_root.rb_node;
- struct rb_node *parent = NULL;
- struct sched_dl_entity *entry;
- int leftmost = 1;
-
- BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
-
- while (*link) {
- parent = *link;
- entry = rb_entry(parent, struct sched_dl_entity, rb_node);
- if (dl_time_before(dl_se->deadline, entry->deadline))
- link = &parent->rb_left;
- else {
- link = &parent->rb_right;
- leftmost = 0;
- }
- }
- rb_link_node(&dl_se->rb_node, parent, link);
- rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost);
+ WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
+
+ rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
inc_dl_tasks(dl_se, dl_rq);
}
@@ -1513,6 +2005,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
return;
rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
+
RB_CLEAR_NODE(&dl_se->rb_node);
dec_dl_tasks(dl_se, dl_rq);
@@ -1521,7 +2014,44 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
static void
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
- BUG_ON(on_dl_rq(dl_se));
+ WARN_ON_ONCE(on_dl_rq(dl_se));
+
+ update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
+
+ /*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!dl_se->dl_throttled && !dl_is_implicit(dl_se))
+ dl_check_constrained_dl(dl_se);
+
+ if (flags & (ENQUEUE_RESTORE|ENQUEUE_MIGRATING)) {
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+ add_rq_bw(dl_se, dl_rq);
+ add_running_bw(dl_se, dl_rq);
+ }
+
+ /*
+ * If p is throttled, we do not enqueue it. In fact, if it exhausted
+ * its budget it needs a replenishment and, since it now is on
+ * its rq, the bandwidth timer callback (which clearly has not
+ * run yet) will take care of this.
+ * However, the active utilization does not depend on the fact
+ * that the task is on the runqueue or not (but depends on the
+ * task's state - in GRUB parlance, "inactive" vs "active contending").
+ * In other words, even if a task is throttled its utilization must
+ * be counted in the active utilization; hence, we need to call
+ * add_running_bw().
+ */
+ if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+ if (flags & ENQUEUE_WAKEUP)
+ task_contending(dl_se, flags);
+
+ return;
+ }
/*
* If this is a wakeup or a new instance, the scheduling
@@ -1534,17 +2064,55 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se);
} else if ((flags & ENQUEUE_RESTORE) &&
- dl_time_before(dl_se->deadline,
- rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
+ !is_dl_boosted(dl_se) &&
+ dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) {
setup_new_dl_entity(dl_se);
}
+ /*
+ * If the reservation is still throttled, e.g., it got replenished but is a
+ * deferred task and still got to wait, don't enqueue.
+ */
+ if (dl_se->dl_throttled && start_dl_timer(dl_se))
+ return;
+
+ /*
+ * We're about to enqueue, make sure we're not ->dl_throttled!
+ * In case the timer was not started, say because the defer time
+ * has passed, mark as not throttled and mark unarmed.
+ * Also cancel earlier timers, since letting those run is pointless.
+ */
+ if (dl_se->dl_throttled) {
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
+ }
+
__enqueue_dl_entity(dl_se);
}
-static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
+static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
__dequeue_dl_entity(dl_se);
+
+ if (flags & (DEQUEUE_SAVE|DEQUEUE_MIGRATING)) {
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+ sub_running_bw(dl_se, dl_rq);
+ sub_rq_bw(dl_se, dl_rq);
+ }
+
+ /*
+ * This check allows to start the inactive timer (or to immediately
+ * decrease the active utilization, if needed) in two cases:
+ * when the task blocks and when it is terminating
+ * (p->state == TASK_DEAD). We can handle the two cases in the same
+ * way, because from GRUB's point of view the same thing is happening
+ * (the task moves from "active contending" to "active non contending"
+ * or "inactive")
+ */
+ if (flags & DEQUEUE_SLEEP)
+ task_non_contending(dl_se);
}
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
@@ -1568,7 +2136,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* problem if it fires concurrently: boosted threads
* are ignored in dl_task_timer().
*/
- hrtimer_try_to_cancel(&p->dl.dl_timer);
+ cancel_replenish_timer(&p->dl);
p->dl.dl_throttled = 0;
}
} else if (!dl_prio(p->normal_prio)) {
@@ -1582,76 +2150,40 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* the throttle.
*/
p->dl.dl_throttled = 0;
- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
+ if (!(flags & ENQUEUE_REPLENISH))
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
+ task_pid_nr(p));
+
return;
}
- /*
- * Check if a constrained deadline task was activated
- * after the deadline but before the next period.
- * If that is the case, the task will be throttled and
- * the replenishment timer will be set to the next period.
- */
- if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
- dl_check_constrained_dl(&p->dl);
+ check_schedstat_required();
+ update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
- add_rq_bw(&p->dl, &rq->dl);
- add_running_bw(&p->dl, &rq->dl);
- }
+ if (p->on_rq == TASK_ON_RQ_MIGRATING)
+ flags |= ENQUEUE_MIGRATING;
- /*
- * If p is throttled, we do not enqueue it. In fact, if it exhausted
- * its budget it needs a replenishment and, since it now is on
- * its rq, the bandwidth timer callback (which clearly has not
- * run yet) will take care of this.
- * However, the active utilization does not depend on the fact
- * that the task is on the runqueue or not (but depends on the
- * task's state - in GRUB parlance, "inactive" vs "active contending").
- * In other words, even if a task is throttled its utilization must
- * be counted in the active utilization; hence, we need to call
- * add_running_bw().
- */
- if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
- if (flags & ENQUEUE_WAKEUP)
- task_contending(&p->dl, flags);
+ enqueue_dl_entity(&p->dl, flags);
+ if (dl_server(&p->dl))
return;
- }
- enqueue_dl_entity(&p->dl, flags);
-
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+ if (!task_current(rq, p) && !p->dl.dl_throttled && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
}
-static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
-{
- dequeue_dl_entity(&p->dl);
- dequeue_pushable_dl_task(rq, p);
-}
-
-static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+static bool dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
update_curr_dl(rq);
- __dequeue_task_dl(rq, p, flags);
- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
- sub_running_bw(&p->dl, &rq->dl);
- sub_rq_bw(&p->dl, &rq->dl);
- }
+ if (p->on_rq == TASK_ON_RQ_MIGRATING)
+ flags |= DEQUEUE_MIGRATING;
- /*
- * This check allows to start the inactive timer (or to immediately
- * decrease the active utilization, if needed) in two cases:
- * when the task blocks and when it is terminating
- * (p->state == TASK_DEAD). We can handle the two cases in the same
- * way, because from GRUB's point of view the same thing is happening
- * (the task moves from "active contending" to "active non contending"
- * or "inactive")
- */
- if (flags & DEQUEUE_SLEEP)
- task_non_contending(p);
+ dequeue_dl_entity(&p->dl, flags);
+ if (!p->dl.dl_throttled && !dl_server(&p->dl))
+ dequeue_pushable_dl_task(rq, p);
+
+ return true;
}
/*
@@ -1686,12 +2218,20 @@ static void yield_task_dl(struct rq *rq)
#ifdef CONFIG_SMP
+static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
+ struct rq *rq)
+{
+ return (!rq->dl.dl_nr_running ||
+ dl_time_before(p->dl.deadline,
+ rq->dl.earliest_dl.curr));
+}
+
static int find_later_rq(struct task_struct *task);
static int
select_task_rq_dl(struct task_struct *p, int cpu, int flags)
{
- struct task_struct *curr;
+ struct task_struct *curr, *donor;
bool select_rq;
struct rq *rq;
@@ -1702,6 +2242,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
+ donor = READ_ONCE(rq->donor);
/*
* If we are dealing with a -deadline task, we must
@@ -1712,25 +2253,23 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
* other hand, if it has a shorter deadline, we
* try to make it stay here, it might be important.
*/
- select_rq = unlikely(dl_task(curr)) &&
+ select_rq = unlikely(dl_task(donor)) &&
(curr->nr_cpus_allowed < 2 ||
- !dl_entity_preempt(&p->dl, &curr->dl)) &&
+ !dl_entity_preempt(&p->dl, &donor->dl)) &&
p->nr_cpus_allowed > 1;
/*
* Take the capacity of the CPU into account to
* ensure it fits the requirement of the task.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity))
+ if (sched_asym_cpucap_active())
select_rq |= !dl_task_fits_capacity(p, cpu);
if (select_rq) {
int target = find_later_rq(p);
if (target != -1 &&
- (dl_time_before(p->dl.deadline,
- cpu_rq(target)->dl.earliest_dl.curr) ||
- (cpu_rq(target)->dl.dl_nr_running == 0)))
+ dl_task_is_earliest_deadline(p, cpu_rq(target)))
cpu = target;
}
rcu_read_unlock();
@@ -1741,9 +2280,10 @@ out:
static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
{
+ struct rq_flags rf;
struct rq *rq;
- if (p->state != TASK_WAKING)
+ if (READ_ONCE(p->__state) != TASK_WAKING)
return;
rq = task_rq(p);
@@ -1752,22 +2292,22 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
* from try_to_wake_up(). Hence, p->pi_lock is locked, but
* rq->lock is not... So, lock it
*/
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, &rf);
if (p->dl.dl_non_contending) {
+ update_rq_clock(rq);
sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0;
/*
* If the timer handler is currently running and the
- * timer cannot be cancelled, inactive_task_timer()
+ * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and
* will not touch the rq's active utilization,
* so we are still safe.
*/
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ cancel_inactive_timer(&p->dl);
}
sub_rq_bw(&p->dl, &rq->dl);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, &rf);
}
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
@@ -1777,7 +2317,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
* let's hope p can move out.
*/
if (rq->curr->nr_cpus_allowed == 1 ||
- !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
+ !cpudl_find(&rq->rd->cpudl, rq->donor, NULL))
return;
/*
@@ -1813,10 +2353,10 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
* Only called when both the current and waking task are -deadline
* tasks.
*/
-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
+static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
int flags)
{
- if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
+ if (dl_entity_preempt(&p->dl, &rq->donor->dl)) {
resched_curr(rq);
return;
}
@@ -1826,26 +2366,31 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
* In the unlikely case current and p have the same deadline
* let us try to decide what's the best thing to do...
*/
- if ((p->dl.deadline == rq->curr->dl.deadline) &&
+ if ((p->dl.deadline == rq->donor->dl.deadline) &&
!test_tsk_need_resched(rq->curr))
check_preempt_equal_dl(rq, p);
#endif /* CONFIG_SMP */
}
#ifdef CONFIG_SCHED_HRTICK
-static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
+static void start_hrtick_dl(struct rq *rq, struct sched_dl_entity *dl_se)
{
- hrtick_start(rq, p->dl.runtime);
+ hrtick_start(rq, dl_se->runtime);
}
#else /* !CONFIG_SCHED_HRTICK */
-static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
+static void start_hrtick_dl(struct rq *rq, struct sched_dl_entity *dl_se)
{
}
#endif
static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
{
+ struct sched_dl_entity *dl_se = &p->dl;
+ struct dl_rq *dl_rq = &rq->dl;
+
p->se.exec_start = rq_clock_task(rq);
+ if (on_dl_rq(&p->dl))
+ update_stats_wait_end_dl(dl_rq, dl_se);
/* You can't push away the running task */
dequeue_pushable_dl_task(rq, p);
@@ -1853,44 +2398,72 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
if (!first)
return;
- if (hrtick_enabled(rq))
- start_hrtick_dl(rq, p);
-
- if (rq->curr->sched_class != &dl_sched_class)
+ if (rq->donor->sched_class != &dl_sched_class)
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
deadline_queue_push_tasks(rq);
+
+ if (hrtick_enabled_dl(rq))
+ start_hrtick_dl(rq, &p->dl);
}
-static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
- struct dl_rq *dl_rq)
+static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
{
struct rb_node *left = rb_first_cached(&dl_rq->root);
if (!left)
return NULL;
- return rb_entry(left, struct sched_dl_entity, rb_node);
+ return __node_2_dle(left);
}
-static struct task_struct *pick_next_task_dl(struct rq *rq)
+/*
+ * __pick_next_task_dl - Helper to pick the next -deadline task to run.
+ * @rq: The runqueue to pick the next task from.
+ */
+static struct task_struct *__pick_task_dl(struct rq *rq)
{
struct sched_dl_entity *dl_se;
struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
+again:
if (!sched_dl_runnable(rq))
return NULL;
- dl_se = pick_next_dl_entity(rq, dl_rq);
- BUG_ON(!dl_se);
- p = dl_task_of(dl_se);
- set_next_task_dl(rq, p, true);
+ dl_se = pick_next_dl_entity(dl_rq);
+ WARN_ON_ONCE(!dl_se);
+
+ if (dl_server(dl_se)) {
+ p = dl_se->server_pick_task(dl_se);
+ if (!p) {
+ if (dl_server_active(dl_se)) {
+ dl_se->dl_yielded = 1;
+ update_curr_dl_se(rq, dl_se, 0);
+ }
+ goto again;
+ }
+ rq->dl_server = dl_se;
+ } else {
+ p = dl_task_of(dl_se);
+ }
+
return p;
}
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
+static struct task_struct *pick_task_dl(struct rq *rq)
{
+ return __pick_task_dl(rq);
+}
+
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct task_struct *next)
+{
+ struct sched_dl_entity *dl_se = &p->dl;
+ struct dl_rq *dl_rq = &rq->dl;
+
+ if (on_dl_rq(&p->dl))
+ update_stats_wait_start_dl(dl_rq, dl_se);
+
update_curr_dl(rq);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
@@ -1916,9 +2489,9 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
* not being the leftmost task anymore. In that case NEED_RESCHED will
* be set and schedule() will start a new hrtick for the next task.
*/
- if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
- is_leftmost(p, &rq->dl))
- start_hrtick_dl(rq, p);
+ if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
+ is_leftmost(&p->dl, &rq->dl))
+ start_hrtick_dl(rq, &p->dl);
}
static void task_fork_dl(struct task_struct *p)
@@ -1934,35 +2507,26 @@ static void task_fork_dl(struct task_struct *p)
/* Only try algorithms three times */
#define DL_MAX_TRIES 3
-static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
-{
- if (!task_running(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_mask))
- return 1;
- return 0;
-}
-
/*
* Return the earliest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise:
*/
static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
{
- struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
struct task_struct *p = NULL;
+ struct rb_node *next_node;
if (!has_pushable_dl_tasks(rq))
return NULL;
-next_node:
- if (next_node) {
- p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
+ next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
+ while (next_node) {
+ p = __node_2_pdl(next_node);
- if (pick_dl_task(rq, p, cpu))
+ if (task_is_pushable(rq, p, cpu))
return p;
next_node = rb_next(next_node);
- goto next_node;
}
return NULL;
@@ -2072,9 +2636,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
later_rq = cpu_rq(cpu);
- if (later_rq->dl.dl_nr_running &&
- !dl_time_before(task->dl.deadline,
- later_rq->dl.earliest_dl.curr)) {
+ if (!dl_task_is_earliest_deadline(task, later_rq)) {
/*
* Target rq has tasks of equal or earlier deadline,
* retrying does not release any lock and is unlikely
@@ -2088,8 +2650,9 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
- task_running(rq, task) ||
+ task_on_cpu(rq, task) ||
!dl_task(task) ||
+ is_migration_disabled(task) ||
!task_on_rq_queued(task))) {
double_unlock_balance(rq, later_rq);
later_rq = NULL;
@@ -2102,9 +2665,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
* its earliest one has a later deadline than our
* task, the rq is a good one.
*/
- if (!later_rq->dl.dl_nr_running ||
- dl_time_before(task->dl.deadline,
- later_rq->dl.earliest_dl.curr))
+ if (dl_task_is_earliest_deadline(task, later_rq))
break;
/* Otherwise we try again. */
@@ -2122,15 +2683,14 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
if (!has_pushable_dl_tasks(rq))
return NULL;
- p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
- struct task_struct, pushable_dl_tasks);
+ p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
- BUG_ON(rq->cpu != task_cpu(p));
- BUG_ON(task_current(rq, p));
- BUG_ON(p->nr_cpus_allowed <= 1);
+ WARN_ON_ONCE(rq->cpu != task_cpu(p));
+ WARN_ON_ONCE(task_current(rq, p));
+ WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
- BUG_ON(!task_on_rq_queued(p));
- BUG_ON(!dl_task(p));
+ WARN_ON_ONCE(!task_on_rq_queued(p));
+ WARN_ON_ONCE(!dl_task(p));
return p;
}
@@ -2146,32 +2706,29 @@ static int push_dl_task(struct rq *rq)
struct rq *later_rq;
int ret = 0;
- if (!rq->dl.overloaded)
- return 0;
-
next_task = pick_next_pushable_dl_task(rq);
if (!next_task)
return 0;
retry:
- if (is_migration_disabled(next_task))
- return 0;
-
- if (WARN_ON(next_task == rq->curr))
- return 0;
-
/*
* If next_task preempts rq->curr, and rq->curr
* can move away, it makes sense to just reschedule
* without going further in pushing next_task.
*/
- if (dl_task(rq->curr) &&
- dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
+ if (dl_task(rq->donor) &&
+ dl_time_before(next_task->dl.deadline, rq->donor->dl.deadline) &&
rq->curr->nr_cpus_allowed > 1) {
resched_curr(rq);
return 0;
}
+ if (is_migration_disabled(next_task))
+ return 0;
+
+ if (WARN_ON(next_task == rq->curr))
+ return 0;
+
/* We might release rq lock */
get_task_struct(next_task);
@@ -2203,15 +2760,7 @@ retry:
goto retry;
}
- deactivate_task(rq, next_task, 0);
- set_task_cpu(next_task, later_rq->cpu);
-
- /*
- * Update the later_rq clock here, because the clock is used
- * by the cpufreq_update_util() inside __add_running_bw().
- */
- update_rq_clock(later_rq);
- activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
+ move_queued_task_locked(rq, later_rq, next_task);
ret = 1;
resched_curr(later_rq);
@@ -2255,7 +2804,7 @@ static void pull_dl_task(struct rq *this_rq)
src_rq = cpu_rq(cpu);
/*
- * It looks racy, abd it is! However, as in sched_rt.c,
+ * It looks racy, and it is! However, as in sched_rt.c,
* we are fine with this.
*/
if (this_rq->dl.dl_nr_running &&
@@ -2282,9 +2831,7 @@ static void pull_dl_task(struct rq *this_rq)
* - it will preempt the last one we pulled (if any).
*/
if (p && dl_time_before(p->dl.deadline, dmin) &&
- (!this_rq->dl.dl_nr_running ||
- dl_time_before(p->dl.deadline,
- this_rq->dl.earliest_dl.curr))) {
+ dl_task_is_earliest_deadline(p, this_rq)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!task_on_rq_queued(p));
@@ -2293,15 +2840,13 @@ static void pull_dl_task(struct rq *this_rq)
* deadline than the current task of its runqueue.
*/
if (dl_time_before(p->dl.deadline,
- src_rq->curr->dl.deadline))
+ src_rq->donor->dl.deadline))
goto skip;
if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
} else {
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, this_cpu);
- activate_task(this_rq, p, 0);
+ move_queued_task_locked(src_rq, this_rq, p);
dmin = p->dl.deadline;
resched = true;
}
@@ -2312,10 +2857,12 @@ skip:
double_unlock_balance(this_rq, src_rq);
if (push_task) {
- raw_spin_unlock(&this_rq->lock);
+ preempt_disable();
+ raw_spin_rq_unlock(this_rq);
stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
push_task, &src_rq->push_work);
- raw_spin_lock(&this_rq->lock);
+ preempt_enable();
+ raw_spin_rq_lock(this_rq);
}
}
@@ -2329,24 +2876,23 @@ skip:
*/
static void task_woken_dl(struct rq *rq, struct task_struct *p)
{
- if (!task_running(rq, p) &&
+ if (!task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
- dl_task(rq->curr) &&
+ dl_task(rq->donor) &&
(rq->curr->nr_cpus_allowed < 2 ||
- !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
+ !dl_entity_preempt(&p->dl, &rq->donor->dl))) {
push_dl_tasks(rq);
}
}
static void set_cpus_allowed_dl(struct task_struct *p,
- const struct cpumask *new_mask,
- u32 flags)
+ struct affinity_context *ctx)
{
struct root_domain *src_rd;
struct rq *rq;
- BUG_ON(!dl_task(p));
+ WARN_ON_ONCE(!dl_task(p));
rq = task_rq(p);
src_rd = rq->rd;
@@ -2356,7 +2902,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
* update. We already made space for us in the destination
* domain (see cpuset_can_attach()).
*/
- if (!cpumask_intersects(src_rd->span, new_mask)) {
+ if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
struct dl_bw *src_dl_b;
src_dl_b = dl_bw_of(cpu_of(rq));
@@ -2370,7 +2916,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
raw_spin_unlock(&src_dl_b->lock);
}
- set_cpus_allowed_common(p, new_mask, flags);
+ set_cpus_allowed_common(p, ctx);
}
/* Assumes rq->lock is held */
@@ -2409,9 +2955,13 @@ void dl_add_task_root_domain(struct task_struct *p)
struct rq *rq;
struct dl_bw *dl_b;
- rq = task_rq_lock(p, &rf);
- if (!dl_task(p))
- goto unlock;
+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+ if (!dl_task(p)) {
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
+ return;
+ }
+
+ rq = __task_rq_lock(p, &rf);
dl_b = &rq->rd->dl_bw;
raw_spin_lock(&dl_b->lock);
@@ -2420,17 +2970,27 @@ void dl_add_task_root_domain(struct task_struct *p)
raw_spin_unlock(&dl_b->lock);
-unlock:
task_rq_unlock(rq, p, &rf);
}
void dl_clear_root_domain(struct root_domain *rd)
{
- unsigned long flags;
+ int i;
- raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
+ guard(raw_spinlock_irqsave)(&rd->dl_bw.lock);
rd->dl_bw.total_bw = 0;
- raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
+
+ /*
+ * dl_server bandwidth is only restored when CPUs are attached to root
+ * domains (after domains are created or CPUs moved back to the
+ * default root doamin).
+ */
+ for_each_cpu(i, rd->span) {
+ struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
+
+ if (dl_server(dl_se) && cpu_active(i))
+ rd->dl_bw.total_bw += dl_se->dl_bw;
+ }
}
#endif /* CONFIG_SMP */
@@ -2446,7 +3006,13 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
* will reset the task parameters.
*/
if (task_on_rq_queued(p) && p->dl.dl_runtime)
- task_non_contending(p);
+ task_non_contending(&p->dl);
+
+ /*
+ * In case a task is setscheduled out from SCHED_DEADLINE we need to
+ * keep track of that on its cpuset (for correct bandwidth tracking).
+ */
+ dec_dl_tasks_cs(p);
if (!task_on_rq_queued(p)) {
/*
@@ -2485,8 +3051,13 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
*/
static void switched_to_dl(struct rq *rq, struct task_struct *p)
{
- if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
- put_task_struct(p);
+ cancel_inactive_timer(&p->dl);
+
+ /*
+ * In case a task is setscheduled to SCHED_DEADLINE we need to keep
+ * track of that on its cpuset (for correct bandwidth tracking).
+ */
+ inc_dl_tasks_cs(p);
/* If p is not queued we will update its parameters at next wakeup. */
if (!task_on_rq_queued(p)) {
@@ -2495,15 +3066,17 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
return;
}
- if (rq->curr != p) {
+ if (rq->donor != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
deadline_queue_push_tasks(rq);
#endif
- if (dl_task(rq->curr))
- check_preempt_curr_dl(rq, p, 0);
+ if (dl_task(rq->donor))
+ wakeup_preempt_dl(rq, p, 0);
else
resched_curr(rq);
+ } else {
+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
}
}
@@ -2514,17 +3087,20 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
static void prio_changed_dl(struct rq *rq, struct task_struct *p,
int oldprio)
{
- if (task_on_rq_queued(p) || rq->curr == p) {
+ if (!task_on_rq_queued(p))
+ return;
+
#ifdef CONFIG_SMP
- /*
- * This might be too much, but unfortunately
- * we don't have the old deadline value, and
- * we can't argue if the task is increasing
- * or lowering its prio, so...
- */
- if (!rq->dl.overloaded)
- deadline_queue_pull_task(rq);
+ /*
+ * This might be too much, but unfortunately
+ * we don't have the old deadline value, and
+ * we can't argue if the task is increasing
+ * or lowering its prio, so...
+ */
+ if (!rq->dl.overloaded)
+ deadline_queue_pull_task(rq);
+ if (task_current_donor(rq, p)) {
/*
* If we now have a earlier deadline task than p,
* then reschedule, provided p is still on this
@@ -2532,16 +3108,32 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
*/
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
resched_curr(rq);
-#else
+ } else {
/*
- * Again, we don't know if p has a earlier
- * or later deadline, so let's blindly set a
- * (maybe not needed) rescheduling point.
+ * Current may not be deadline in case p was throttled but we
+ * have just replenished it (e.g. rt_mutex_setprio()).
+ *
+ * Otherwise, if p was given an earlier deadline, reschedule.
*/
- resched_curr(rq);
-#endif /* CONFIG_SMP */
+ if (!dl_task(rq->curr) ||
+ dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
+ resched_curr(rq);
}
+#else
+ /*
+ * We don't know if p has a earlier or later deadline, so let's blindly
+ * set a (maybe not needed) rescheduling point.
+ */
+ resched_curr(rq);
+#endif
+}
+
+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_dl(struct task_struct *p, int cpu)
+{
+ return p->dl.dl_throttled;
}
+#endif
DEFINE_SCHED_CLASS(dl) = {
@@ -2549,9 +3141,9 @@ DEFINE_SCHED_CLASS(dl) = {
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
- .check_preempt_curr = check_preempt_curr_dl,
+ .wakeup_preempt = wakeup_preempt_dl,
- .pick_next_task = pick_next_task_dl,
+ .pick_task = pick_task_dl,
.put_prev_task = put_prev_task_dl,
.set_next_task = set_next_task_dl,
@@ -2574,6 +3166,9 @@ DEFINE_SCHED_CLASS(dl) = {
.switched_to = switched_to_dl,
.update_curr = update_curr_dl,
+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_dl,
+#endif
};
/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
@@ -2594,7 +3189,7 @@ int sched_dl_global_validate(void)
* value smaller than the currently allocated bandwidth in
* any of the root_domains.
*/
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
rcu_read_lock_sched();
if (dl_bw_visited(cpu, gen))
@@ -2622,12 +3217,12 @@ static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
{
if (global_rt_runtime() == RUNTIME_INF) {
dl_rq->bw_ratio = 1 << RATIO_SHIFT;
- dl_rq->extra_bw = 1 << BW_SHIFT;
+ dl_rq->max_bw = dl_rq->extra_bw = 1 << BW_SHIFT;
} else {
dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
- dl_rq->extra_bw = to_ratio(global_rt_period(),
- global_rt_runtime());
+ dl_rq->max_bw = dl_rq->extra_bw =
+ to_ratio(global_rt_period(), global_rt_runtime());
}
}
@@ -2639,9 +3234,6 @@ void sched_dl_do_global(void)
int cpu;
unsigned long flags;
- def_dl_bandwidth.dl_period = global_rt_period();
- def_dl_bandwidth.dl_runtime = global_rt_runtime();
-
if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
@@ -2745,7 +3337,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
dl_se->dl_runtime = attr->sched_runtime;
dl_se->dl_deadline = attr->sched_deadline;
dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
- dl_se->flags = attr->sched_flags;
+ dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
}
@@ -2758,18 +3350,11 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
attr->sched_runtime = dl_se->dl_runtime;
attr->sched_deadline = dl_se->dl_deadline;
attr->sched_period = dl_se->dl_period;
- attr->sched_flags = dl_se->flags;
+ attr->sched_flags &= ~SCHED_DL_FLAGS;
+ attr->sched_flags |= dl_se->flags;
}
/*
- * Default limits for DL period; on the top end we guard against small util
- * tasks still getting rediculous long effective runtimes, on the bottom end we
- * guard against timer DoS.
- */
-unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
-unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
-
-/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2827,10 +3412,8 @@ bool __checkparam_dl(const struct sched_attr *attr)
/*
* This function clears the sched_dl_entity static params.
*/
-void __dl_clear_params(struct task_struct *p)
+static void __dl_clear_params(struct sched_dl_entity *dl_se)
{
- struct sched_dl_entity *dl_se = &p->dl;
-
dl_se->dl_runtime = 0;
dl_se->dl_deadline = 0;
dl_se->dl_period = 0;
@@ -2842,12 +3425,21 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0;
+ dl_se->dl_server = 0;
#ifdef CONFIG_RT_MUTEXES
dl_se->pi_se = dl_se;
#endif
}
+void init_dl_entity(struct sched_dl_entity *dl_se)
+{
+ RB_CLEAR_NODE(&dl_se->rb_node);
+ init_dl_task_timer(dl_se);
+ init_dl_inactive_task_timer(dl_se);
+ __dl_clear_params(dl_se);
+}
+
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
{
struct sched_dl_entity *dl_se = &p->dl;
@@ -2855,62 +3447,25 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
if (dl_se->dl_runtime != attr->sched_runtime ||
dl_se->dl_deadline != attr->sched_deadline ||
dl_se->dl_period != attr->sched_period ||
- dl_se->flags != attr->sched_flags)
+ dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
return true;
return false;
}
#ifdef CONFIG_SMP
-int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
-{
- unsigned long flags, cap;
- unsigned int dest_cpu;
- struct dl_bw *dl_b;
- bool overflow;
- int ret;
-
- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
-
- rcu_read_lock_sched();
- dl_b = dl_bw_of(dest_cpu);
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cap = dl_bw_capacity(dest_cpu);
- overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
- if (overflow) {
- ret = -EBUSY;
- } else {
- /*
- * We reserve space for this task in the destination
- * root_domain, as we can't fail after this point.
- * We will free resources in the source root_domain
- * later on (see set_cpus_allowed_dl()).
- */
- int cpus = dl_bw_cpus(dest_cpu);
-
- __dl_add(dl_b, p->dl.dl_bw, cpus);
- ret = 0;
- }
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- rcu_read_unlock_sched();
-
- return ret;
-}
-
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
- int ret = 1, trial_cpus;
+ unsigned long flags, cap;
struct dl_bw *cur_dl_b;
- unsigned long flags;
+ int ret = 1;
rcu_read_lock_sched();
cur_dl_b = dl_bw_of(cpumask_any(cur));
- trial_cpus = cpumask_weight(trial);
-
+ cap = __dl_bw_capacity(trial);
raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
- if (cur_dl_b->bw != -1 &&
- cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+ if (__dl_overflow(cur_dl_b, cap, 0, 0))
ret = 0;
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
rcu_read_unlock_sched();
@@ -2918,21 +3473,97 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret;
}
-bool dl_cpu_busy(unsigned int cpu)
+enum dl_bw_request {
+ dl_bw_req_deactivate = 0,
+ dl_bw_req_alloc,
+ dl_bw_req_free
+};
+
+static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
{
unsigned long flags, cap;
struct dl_bw *dl_b;
- bool overflow;
+ bool overflow = 0;
+ u64 fair_server_bw = 0;
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
+
cap = dl_bw_capacity(cpu);
- overflow = __dl_overflow(dl_b, cap, 0, 0);
+ switch (req) {
+ case dl_bw_req_free:
+ __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
+ break;
+ case dl_bw_req_alloc:
+ overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
+
+ if (!overflow) {
+ /*
+ * We reserve space in the destination
+ * root_domain, as we can't fail after this point.
+ * We will free resources in the source root_domain
+ * later on (see set_cpus_allowed_dl()).
+ */
+ __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
+ }
+ break;
+ case dl_bw_req_deactivate:
+ /*
+ * cpu is not off yet, but we need to do the math by
+ * considering it off already (i.e., what would happen if we
+ * turn cpu off?).
+ */
+ cap -= arch_scale_cpu_capacity(cpu);
+
+ /*
+ * cpu is going offline and NORMAL tasks will be moved away
+ * from it. We can thus discount dl_server bandwidth
+ * contribution as it won't need to be servicing tasks after
+ * the cpu is off.
+ */
+ if (cpu_rq(cpu)->fair_server.dl_server)
+ fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw;
+
+ /*
+ * Not much to check if no DEADLINE bandwidth is present.
+ * dl_servers we can discount, as tasks will be moved out the
+ * offlined CPUs anyway.
+ */
+ if (dl_b->total_bw - fair_server_bw > 0) {
+ /*
+ * Leaving at least one CPU for DEADLINE tasks seems a
+ * wise thing to do. As said above, cpu is not offline
+ * yet, so account for that.
+ */
+ if (dl_bw_cpus(cpu) - 1)
+ overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0);
+ else
+ overflow = 1;
+ }
+
+ break;
+ }
+
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched();
- return overflow;
+ return overflow ? -EBUSY : 0;
+}
+
+int dl_bw_deactivate(int cpu)
+{
+ return dl_bw_manage(dl_bw_req_deactivate, cpu, 0);
+}
+
+int dl_bw_alloc(int cpu, u64 dl_bw)
+{
+ return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
+}
+
+void dl_bw_free(int cpu, u64 dl_bw)
+{
+ dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
}
#endif