summaryrefslogtreecommitdiff
path: root/kernel/sched/rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r--kernel/sched/rt.c716
1 files changed, 301 insertions, 415 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ed2a47e4ddae..f1867fe8e5c5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -4,19 +4,18 @@
* policies)
*/
+#include "sched.h"
+#include "pelt.h"
+
int sched_rr_timeslice = RR_TIMESLICE;
/* More than 4 hours if BW_SHIFT equals 20. */
static const u64 max_rt_runtime = MAX_BW;
-static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
-
-struct rt_bandwidth def_rt_bandwidth;
-
/*
* period over which we measure -rt task CPU usage in us.
* default: 1s
*/
-unsigned int sysctl_sched_rt_period = 1000000;
+int sysctl_sched_rt_period = 1000000;
/*
* part of the period that we allow rt tasks to run in us.
@@ -25,18 +24,20 @@ unsigned int sysctl_sched_rt_period = 1000000;
int sysctl_sched_rt_runtime = 950000;
#ifdef CONFIG_SYSCTL
-static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
-static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
+static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
-static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
-static struct ctl_table sched_rt_sysctls[] = {
+static const struct ctl_table sched_rt_sysctls[] = {
{
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sched_rt_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "sched_rt_runtime_us",
@@ -44,6 +45,8 @@ static struct ctl_table sched_rt_sysctls[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sched_rt_handler,
+ .extra1 = SYSCTL_NEG_ONE,
+ .extra2 = (void *)&sysctl_sched_rt_period,
},
{
.procname = "sched_rr_timeslice_ms",
@@ -52,7 +55,6 @@ static struct ctl_table sched_rt_sysctls[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
- {}
};
static int __init sched_rt_sysctl_init(void)
@@ -61,7 +63,40 @@ static int __init sched_rt_sysctl_init(void)
return 0;
}
late_initcall(sched_rt_sysctl_init);
+#endif /* CONFIG_SYSCTL */
+
+void init_rt_rq(struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array;
+ int i;
+
+ array = &rt_rq->active;
+ for (i = 0; i < MAX_RT_PRIO; i++) {
+ INIT_LIST_HEAD(array->queue + i);
+ __clear_bit(i, array->bitmap);
+ }
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_RT_PRIO, array->bitmap);
+
+ rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
+ rt_rq->highest_prio.next = MAX_RT_PRIO-1;
+ rt_rq->overloaded = 0;
+ plist_head_init(&rt_rq->pushable_tasks);
+ /* We start is dequeued state, because no RT tasks are queued */
+ rt_rq->rt_queued = 0;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ rt_rq->rt_time = 0;
+ rt_rq->rt_throttled = 0;
+ rt_rq->rt_runtime = 0;
+ raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+ rt_rq->tg = &root_task_group;
#endif
+}
+
+#ifdef CONFIG_RT_GROUP_SCHED
+
+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
{
@@ -94,9 +129,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
raw_spin_lock_init(&rt_b->rt_runtime_lock);
- hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL_HARD);
- rt_b->rt_period_timer.function = sched_rt_period_timer;
+ hrtimer_setup(&rt_b->rt_period_timer, sched_rt_period_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_HARD);
}
static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -127,36 +161,6 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
do_start_rt_bandwidth(rt_b);
}
-void init_rt_rq(struct rt_rq *rt_rq)
-{
- struct rt_prio_array *array;
- int i;
-
- array = &rt_rq->active;
- for (i = 0; i < MAX_RT_PRIO; i++) {
- INIT_LIST_HEAD(array->queue + i);
- __clear_bit(i, array->bitmap);
- }
- /* delimiter for bitsearch: */
- __set_bit(MAX_RT_PRIO, array->bitmap);
-
-#if defined CONFIG_SMP
- rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
- rt_rq->highest_prio.next = MAX_RT_PRIO-1;
- rt_rq->rt_nr_migratory = 0;
- rt_rq->overloaded = 0;
- plist_head_init(&rt_rq->pushable_tasks);
-#endif /* CONFIG_SMP */
- /* We start is dequeued state, because no RT tasks are queued */
- rt_rq->rt_queued = 0;
-
- rt_rq->rt_time = 0;
- rt_rq->rt_throttled = 0;
- rt_rq->rt_runtime = 0;
- raw_spin_lock_init(&rt_rq->rt_runtime_lock);
-}
-
-#ifdef CONFIG_RT_GROUP_SCHED
static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
{
hrtimer_cancel(&rt_b->rt_period_timer);
@@ -166,19 +170,21 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
{
-#ifdef CONFIG_SCHED_DEBUG
WARN_ON_ONCE(!rt_entity_is_task(rt_se));
-#endif
+
return container_of(rt_se, struct task_struct, rt);
}
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
+ /* Cannot fold with non-CONFIG_RT_GROUP_SCHED version, layout */
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
return rt_rq->rq;
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
+ WARN_ON(!rt_group_sched_enabled() && rt_se->rt_rq->tg != &root_task_group);
return rt_se->rt_rq;
}
@@ -186,20 +192,26 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
{
struct rt_rq *rt_rq = rt_se->rt_rq;
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
return rt_rq->rq;
}
void unregister_rt_sched_group(struct task_group *tg)
{
+ if (!rt_group_sched_enabled())
+ return;
+
if (tg->rt_se)
destroy_rt_bandwidth(&tg->rt_bandwidth);
-
}
void free_rt_sched_group(struct task_group *tg)
{
int i;
+ if (!rt_group_sched_enabled())
+ return;
+
for_each_possible_cpu(i) {
if (tg->rt_rq)
kfree(tg->rt_rq[i]);
@@ -244,6 +256,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
struct sched_rt_entity *rt_se;
int i;
+ if (!rt_group_sched_enabled())
+ return 1;
+
tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
if (!tg->rt_rq)
goto err;
@@ -251,8 +266,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
if (!tg->rt_se)
goto err;
- init_rt_bandwidth(&tg->rt_bandwidth,
- ktime_to_ns(def_rt_bandwidth.rt_period), 0);
+ init_rt_bandwidth(&tg->rt_bandwidth, ktime_to_ns(global_rt_period()), 0);
for_each_possible_cpu(i) {
rt_rq = kzalloc_node(sizeof(struct rt_rq),
@@ -278,7 +292,7 @@ err:
return 0;
}
-#else /* CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
#define rt_entity_is_task(rt_se) (1)
@@ -314,9 +328,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
return 1;
}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-#ifdef CONFIG_SMP
+#endif /* !CONFIG_RT_GROUP_SCHED */
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
@@ -358,53 +370,6 @@ static inline void rt_clear_overload(struct rq *rq)
cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
}
-static void update_rt_migration(struct rt_rq *rt_rq)
-{
- if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
- if (!rt_rq->overloaded) {
- rt_set_overload(rq_of_rt_rq(rt_rq));
- rt_rq->overloaded = 1;
- }
- } else if (rt_rq->overloaded) {
- rt_clear_overload(rq_of_rt_rq(rt_rq));
- rt_rq->overloaded = 0;
- }
-}
-
-static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
-{
- struct task_struct *p;
-
- if (!rt_entity_is_task(rt_se))
- return;
-
- p = rt_task_of(rt_se);
- rt_rq = &rq_of_rt_rq(rt_rq)->rt;
-
- rt_rq->rt_nr_total++;
- if (p->nr_cpus_allowed > 1)
- rt_rq->rt_nr_migratory++;
-
- update_rt_migration(rt_rq);
-}
-
-static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
-{
- struct task_struct *p;
-
- if (!rt_entity_is_task(rt_se))
- return;
-
- p = rt_task_of(rt_se);
- rt_rq = &rq_of_rt_rq(rt_rq)->rt;
-
- rt_rq->rt_nr_total--;
- if (p->nr_cpus_allowed > 1)
- rt_rq->rt_nr_migratory--;
-
- update_rt_migration(rt_rq);
-}
-
static inline int has_pushable_tasks(struct rq *rq)
{
return !plist_head_empty(&rq->rt.pushable_tasks);
@@ -438,6 +403,11 @@ static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
/* Update the highest prio pushable task */
if (p->prio < rq->rt.highest_prio.next)
rq->rt.highest_prio.next = p->prio;
+
+ if (!rq->rt.overloaded) {
+ rt_set_overload(rq);
+ rq->rt.overloaded = 1;
+ }
}
static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
@@ -451,33 +421,13 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
rq->rt.highest_prio.next = p->prio;
} else {
rq->rt.highest_prio.next = MAX_RT_PRIO-1;
- }
-}
-
-#else
-
-static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
-{
-}
-
-static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
-{
-}
-static inline
-void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
-{
-}
-
-static inline
-void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
-{
-}
-
-static inline void rt_queue_push_tasks(struct rq *rq)
-{
+ if (rq->rt.overloaded) {
+ rt_clear_overload(rq);
+ rq->rt.overloaded = 0;
+ }
+ }
}
-#endif /* CONFIG_SMP */
static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
@@ -515,24 +465,21 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
- cpu_cap = capacity_orig_of(cpu);
+ cpu_cap = arch_scale_cpu_capacity(cpu);
return cpu_cap >= min(min_cap, max_cap);
}
-#else
+#else /* !CONFIG_UCLAMP_TASK: */
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
-#endif
+#endif /* !CONFIG_UCLAMP_TASK */
#ifdef CONFIG_RT_GROUP_SCHED
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
- if (!rt_rq->tg)
- return RUNTIME_INF;
-
return rt_rq->rt_runtime;
}
@@ -545,6 +492,11 @@ typedef struct task_group *rt_rq_iter_t;
static inline struct task_group *next_task_group(struct task_group *tg)
{
+ if (!rt_group_sched_enabled()) {
+ WARN_ON(tg != &root_task_group);
+ return NULL;
+ }
+
do {
tg = list_entry_rcu(tg->list.next,
typeof(struct task_group), list);
@@ -557,9 +509,9 @@ static inline struct task_group *next_task_group(struct task_group *tg)
}
#define for_each_rt_rq(rt_rq, iter, rq) \
- for (iter = container_of(&task_groups, typeof(*iter), list); \
- (iter = next_task_group(iter)) && \
- (rt_rq = iter->rt_rq[cpu_of(rq)]);)
+ for (iter = &root_task_group; \
+ iter && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
+ iter = next_task_group(iter))
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = rt_se->parent)
@@ -574,7 +526,7 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
- struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+ struct task_struct *donor = rq_of_rt_rq(rt_rq)->donor;
struct rq *rq = rq_of_rt_rq(rt_rq);
struct sched_rt_entity *rt_se;
@@ -588,7 +540,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
else if (!on_rt_rq(rt_se))
enqueue_rt_entity(rt_se, 0);
- if (rt_rq->highest_prio.curr < curr->prio)
+ if (rt_rq->highest_prio.curr < donor->prio)
resched_curr(rq);
}
}
@@ -626,17 +578,10 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
return p->prio != p->normal_prio;
}
-#ifdef CONFIG_SMP
static inline const struct cpumask *sched_rt_period_mask(void)
{
return this_rq()->rd->span;
}
-#else
-static inline const struct cpumask *sched_rt_period_mask(void)
-{
- return cpu_online_mask;
-}
-#endif
static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
@@ -649,70 +594,6 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
return &rt_rq->tg->rt_bandwidth;
}
-#else /* !CONFIG_RT_GROUP_SCHED */
-
-static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
-{
- return rt_rq->rt_runtime;
-}
-
-static inline u64 sched_rt_period(struct rt_rq *rt_rq)
-{
- return ktime_to_ns(def_rt_bandwidth.rt_period);
-}
-
-typedef struct rt_rq *rt_rq_iter_t;
-
-#define for_each_rt_rq(rt_rq, iter, rq) \
- for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
-
-#define for_each_sched_rt_entity(rt_se) \
- for (; rt_se; rt_se = NULL)
-
-static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
-{
- return NULL;
-}
-
-static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
-{
- struct rq *rq = rq_of_rt_rq(rt_rq);
-
- if (!rt_rq->rt_nr_running)
- return;
-
- enqueue_top_rt_rq(rt_rq);
- resched_curr(rq);
-}
-
-static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
-{
- dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
-}
-
-static inline int rt_rq_throttled(struct rt_rq *rt_rq)
-{
- return rt_rq->rt_throttled;
-}
-
-static inline const struct cpumask *sched_rt_period_mask(void)
-{
- return cpu_online_mask;
-}
-
-static inline
-struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
-{
- return &cpu_rq(cpu)->rt;
-}
-
-static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
-{
- return &def_rt_bandwidth;
-}
-
-#endif /* CONFIG_RT_GROUP_SCHED */
-
bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
{
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -721,7 +602,6 @@ bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
rt_rq->rt_time < rt_b->rt_runtime);
}
-#ifdef CONFIG_SMP
/*
* We ran out of runtime, see if we can borrow some from our neighbours.
*/
@@ -894,9 +774,6 @@ static void balance_runtime(struct rt_rq *rt_rq)
raw_spin_lock(&rt_rq->rt_runtime_lock);
}
}
-#else /* !CONFIG_SMP */
-static inline void balance_runtime(struct rt_rq *rt_rq) {}
-#endif /* CONFIG_SMP */
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{
@@ -904,7 +781,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
const struct cpumask *span;
span = sched_rt_period_mask();
-#ifdef CONFIG_RT_GROUP_SCHED
+
/*
* FIXME: isolated CPUs should really leave the root task group,
* whether they are isolcpus or were isolated via cpusets, lest
@@ -916,7 +793,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
*/
if (rt_b == &root_task_group.rt_bandwidth)
span = cpu_online_mask;
-#endif
+
for_each_cpu(i, span) {
int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
@@ -953,7 +830,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
/*
* When we're idle and a woken (rt) task is
- * throttled check_preempt_curr() will set
+ * throttled wakeup_preempt() will set
* skip_update and the time between the wakeup
* and this unthrottle will get accounted as
* 'runtime'.
@@ -983,18 +860,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
return idle;
}
-static inline int rt_se_prio(struct sched_rt_entity *rt_se)
-{
-#ifdef CONFIG_RT_GROUP_SCHED
- struct rt_rq *rt_rq = group_rt_rq(rt_se);
-
- if (rt_rq)
- return rt_rq->highest_prio.curr;
-#endif
-
- return rt_task_of(rt_se)->prio;
-}
-
static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
u64 runtime = sched_rt_runtime(rt_rq);
@@ -1038,31 +903,88 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
return 0;
}
+#else /* !CONFIG_RT_GROUP_SCHED: */
+
+typedef struct rt_rq *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+ for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+
+#define for_each_sched_rt_entity(rt_se) \
+ for (; rt_se; rt_se = NULL)
+
+static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+{
+ return NULL;
+}
+
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+{
+ struct rq *rq = rq_of_rt_rq(rt_rq);
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ enqueue_top_rt_rq(rt_rq);
+ resched_curr(rq);
+}
+
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
+{
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
+}
+
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+ return false;
+}
+
+static inline const struct cpumask *sched_rt_period_mask(void)
+{
+ return cpu_online_mask;
+}
+
+static inline
+struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
+{
+ return &cpu_rq(cpu)->rt;
+}
+
+static void __enable_runtime(struct rq *rq) { }
+static void __disable_runtime(struct rq *rq) { }
+
+#endif /* !CONFIG_RT_GROUP_SCHED */
+
+static inline int rt_se_prio(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct rt_rq *rt_rq = group_rt_rq(rt_se);
+
+ if (rt_rq)
+ return rt_rq->highest_prio.curr;
+#endif
+
+ return rt_task_of(rt_se)->prio;
+}
+
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
static void update_curr_rt(struct rq *rq)
{
- struct task_struct *curr = rq->curr;
- struct sched_rt_entity *rt_se = &curr->rt;
- u64 delta_exec;
- u64 now;
+ struct task_struct *donor = rq->donor;
+ s64 delta_exec;
- if (curr->sched_class != &rt_sched_class)
+ if (donor->sched_class != &rt_sched_class)
return;
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0))
+ delta_exec = update_curr_common(rq);
+ if (unlikely(delta_exec <= 0))
return;
- schedstat_set(curr->stats.exec_max,
- max(curr->stats.exec_max, delta_exec));
-
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
- update_current_exec_runtime(curr, now, delta_exec);
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct sched_rt_entity *rt_se = &donor->rt;
if (!rt_bandwidth_enabled())
return;
@@ -1082,6 +1004,7 @@ static void update_curr_rt(struct rq *rq)
do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
}
}
+#endif /* CONFIG_RT_GROUP_SCHED */
}
static void
@@ -1123,20 +1046,17 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
cpufreq_update_util(rq, 0);
}
-#if defined CONFIG_SMP
-
static void
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
-#ifdef CONFIG_RT_GROUP_SCHED
/*
* Change rq's cpupri only if rt_rq is the top queue.
*/
- if (&rq->rt != rt_rq)
+ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
return;
-#endif
+
if (rq->online && prio < prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
}
@@ -1146,27 +1066,16 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
-#ifdef CONFIG_RT_GROUP_SCHED
/*
* Change rq's cpupri only if rt_rq is the top queue.
*/
- if (&rq->rt != rt_rq)
+ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
return;
-#endif
+
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}
-#else /* CONFIG_SMP */
-
-static inline
-void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
-static inline
-void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
-
-#endif /* CONFIG_SMP */
-
-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
static void
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
@@ -1189,7 +1098,7 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
/*
* This may have been our highest task, and therefore
- * we may have some recomputation to do
+ * we may have some re-computation to do
*/
if (prio == prev_prio) {
struct rt_prio_array *array = &rt_rq->active;
@@ -1205,13 +1114,6 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
dec_rt_prio_smp(rt_rq, prio, prev_prio);
}
-#else
-
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
-
-#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
-
#ifdef CONFIG_RT_GROUP_SCHED
static void
@@ -1220,8 +1122,7 @@ inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++;
- if (rt_rq->tg)
- start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
+ start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
}
static void
@@ -1233,18 +1134,17 @@ dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
}
-#else /* CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
- start_rt_bandwidth(&def_rt_bandwidth);
}
static inline
void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
-#endif /* CONFIG_RT_GROUP_SCHED */
+#endif /* !CONFIG_RT_GROUP_SCHED */
static inline
unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
@@ -1281,7 +1181,6 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
inc_rt_prio(rt_rq, prio);
- inc_rt_migration(rt_se, rt_rq);
inc_rt_group(rt_se, rt_rq);
}
@@ -1294,7 +1193,6 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
- dec_rt_migration(rt_se, rt_rq);
dec_rt_group(rt_se, rt_rq);
}
@@ -1324,11 +1222,9 @@ static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_arr
static inline struct sched_statistics *
__schedstats_from_rt_se(struct sched_rt_entity *rt_se)
{
-#ifdef CONFIG_RT_GROUP_SCHED
/* schedstats is not supported for rt group. */
if (!rt_entity_is_task(rt_se))
return NULL;
-#endif
return &rt_task_of(rt_se)->stats;
}
@@ -1544,11 +1440,14 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
enqueue_rt_entity(rt_se, flags);
+ if (task_is_blocked(p))
+ return;
+
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
-static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
+static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
@@ -1556,6 +1455,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
dequeue_rt_entity(rt_se, flags);
dequeue_pushable_task(rq, p);
+
+ return true;
}
/*
@@ -1589,16 +1490,15 @@ static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
static void yield_task_rt(struct rq *rq)
{
- requeue_task_rt(rq, rq->curr, 0);
+ requeue_task_rt(rq, rq->donor, 0);
}
-#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);
static int
select_task_rq_rt(struct task_struct *p, int cpu, int flags)
{
- struct task_struct *curr;
+ struct task_struct *curr, *donor;
struct rq *rq;
bool test;
@@ -1610,6 +1510,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
+ donor = READ_ONCE(rq->donor);
/*
* If the current task on @p's runqueue is an RT task, then
@@ -1627,7 +1528,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
*
* For equal prio tasks, we just let the scheduler sort it out.
*
- * Otherwise, just let it ride on the affined RQ and the
+ * Otherwise, just let it ride on the affine RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
@@ -1638,8 +1539,8 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
* systems like big.LITTLE.
*/
test = curr &&
- unlikely(rt_task(curr)) &&
- (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
+ unlikely(rt_task(donor)) &&
+ (curr->nr_cpus_allowed < 2 || donor->prio <= p->prio);
if (test || !rt_task_fits_capacity(p, cpu)) {
int target = find_lowest_rq(p);
@@ -1669,12 +1570,8 @@ out:
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
{
- /*
- * Current can't be migrated, useless to reschedule,
- * let's hope p can move out.
- */
if (rq->curr->nr_cpus_allowed == 1 ||
- !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
+ !cpupri_find(&rq->rd->cpupri, rq->donor, NULL))
return;
/*
@@ -1710,19 +1607,19 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
}
-#endif /* CONFIG_SMP */
/*
* Preempt the current task with a newly woken task if needed:
*/
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
+static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
{
- if (p->prio < rq->curr->prio) {
+ struct task_struct *donor = rq->donor;
+
+ if (p->prio < donor->prio) {
resched_curr(rq);
return;
}
-#ifdef CONFIG_SMP
/*
* If:
*
@@ -1735,9 +1632,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
* to move current somewhere else, making room for our non-migratable
* task.
*/
- if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
+ if (p->prio == donor->prio && !test_tsk_need_resched(rq->curr))
check_preempt_equal_prio(rq, p);
-#endif
}
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
@@ -1760,7 +1656,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
* utilization. We only care of the case where we start to schedule a
* rt task
*/
- if (rq->curr->sched_class != &rt_sched_class)
+ if (rq->donor->sched_class != &rt_sched_class)
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
rt_queue_push_tasks(rq);
@@ -1777,6 +1673,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
BUG_ON(idx >= MAX_RT_PRIO);
queue = array->queue + idx;
+ if (WARN_ON_ONCE(list_empty(queue)))
+ return NULL;
next = list_entry(queue->next, struct sched_rt_entity, run_list);
return next;
@@ -1789,14 +1687,15 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
do {
rt_se = pick_next_rt_entity(rt_rq);
- BUG_ON(!rt_se);
+ if (unlikely(!rt_se))
+ return NULL;
rt_rq = group_rt_rq(rt_se);
} while (rt_rq);
return rt_task_of(rt_se);
}
-static struct task_struct *pick_task_rt(struct rq *rq)
+static struct task_struct *pick_task_rt(struct rq *rq, struct rq_flags *rf)
{
struct task_struct *p;
@@ -1808,17 +1707,7 @@ static struct task_struct *pick_task_rt(struct rq *rq)
return p;
}
-static struct task_struct *pick_next_task_rt(struct rq *rq)
-{
- struct task_struct *p = pick_task_rt(rq);
-
- if (p)
- set_next_task_rt(rq, p, true);
-
- return p;
-}
-
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct task_struct *next)
{
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq = &rq->rt;
@@ -1830,6 +1719,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
+ if (task_is_blocked(p))
+ return;
/*
* The previous task needs to be made eligible for pushing
* if it is still active
@@ -1838,20 +1729,9 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
enqueue_pushable_task(rq, p);
}
-#ifdef CONFIG_SMP
-
/* Only try algorithms three times */
#define RT_MAX_TRIES 3
-static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
-{
- if (!task_on_cpu(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_mask))
- return 1;
-
- return 0;
-}
-
/*
* Return the highest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise
@@ -1865,7 +1745,7 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
return NULL;
plist_for_each_entry(p, head, pushable_tasks) {
- if (pick_rt_task(rq, p, cpu))
+ if (task_is_pushable(rq, p, cpu))
return p;
}
@@ -1965,6 +1845,27 @@ static int find_lowest_rq(struct task_struct *task)
return -1;
}
+static struct task_struct *pick_next_pushable_task(struct rq *rq)
+{
+ struct task_struct *p;
+
+ if (!has_pushable_tasks(rq))
+ return NULL;
+
+ p = plist_first_entry(&rq->rt.pushable_tasks,
+ struct task_struct, pushable_tasks);
+
+ BUG_ON(rq->cpu != task_cpu(p));
+ BUG_ON(task_current(rq, p));
+ BUG_ON(task_current_donor(rq, p));
+ BUG_ON(p->nr_cpus_allowed <= 1);
+
+ BUG_ON(!task_on_rq_queued(p));
+ BUG_ON(!rt_task(p));
+
+ return p;
+}
+
/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
@@ -1995,14 +1896,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
/*
* We had to unlock the run queue. In
* the mean time, task could have
- * migrated already or had its affinity changed.
- * Also make sure that it wasn't scheduled on its rq.
+ * migrated already or had its affinity changed,
+ * therefore check if the task is still at the
+ * head of the pushable tasks list.
+ * It is possible the task was scheduled, set
+ * "migrate_disabled" and then got preempted, so we must
+ * check the task migration disable flag here too.
*/
- if (unlikely(task_rq(task) != rq ||
+ if (unlikely(is_migration_disabled(task) ||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
- task_on_cpu(rq, task) ||
- !rt_task(task) ||
- !task_on_rq_queued(task))) {
+ task != pick_next_pushable_task(rq))) {
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
@@ -2022,26 +1925,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
return lowest_rq;
}
-static struct task_struct *pick_next_pushable_task(struct rq *rq)
-{
- struct task_struct *p;
-
- if (!has_pushable_tasks(rq))
- return NULL;
-
- p = plist_first_entry(&rq->rt.pushable_tasks,
- struct task_struct, pushable_tasks);
-
- BUG_ON(rq->cpu != task_cpu(p));
- BUG_ON(task_current(rq, p));
- BUG_ON(p->nr_cpus_allowed <= 1);
-
- BUG_ON(!task_on_rq_queued(p));
- BUG_ON(!rt_task(p));
-
- return p;
-}
-
/*
* If the current CPU has more than one RT task, see if the non
* running task can migrate over to a CPU that is running a task
@@ -2066,7 +1949,7 @@ retry:
* higher priority than current. If that's the case
* just reschedule current.
*/
- if (unlikely(next_task->prio < rq->curr->prio)) {
+ if (unlikely(next_task->prio < rq->donor->prio)) {
resched_curr(rq);
return 0;
}
@@ -2087,7 +1970,7 @@ retry:
* Note that the stoppers are masqueraded as SCHED_FIFO
* (cf. sched_set_stop_task()), so we can't rely on rt_task().
*/
- if (rq->curr->sched_class != &rt_sched_class)
+ if (rq->donor->sched_class != &rt_sched_class)
return 0;
cpu = find_lowest_rq(rq->curr);
@@ -2102,9 +1985,11 @@ retry:
*/
push_task = get_push_task(rq);
if (push_task) {
+ preempt_disable();
raw_spin_rq_unlock(rq);
stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
push_task, &rq->push_work);
+ preempt_enable();
raw_spin_rq_lock(rq);
}
@@ -2152,9 +2037,7 @@ retry:
goto retry;
}
- deactivate_task(rq, next_task, 0);
- set_task_cpu(next_task, lowest_rq->cpu);
- activate_task(lowest_rq, next_task, 0);
+ move_queued_task_locked(rq, lowest_rq, next_task);
resched_curr(lowest_rq);
ret = 1;
@@ -2194,14 +2077,14 @@ static void push_rt_tasks(struct rq *rq)
* if its the only CPU with multiple RT tasks queued, and a large number
* of CPUs scheduling a lower priority task at the same time.
*
- * Each root domain has its own irq work function that can iterate over
+ * Each root domain has its own IRQ work function that can iterate over
* all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
* task must be checked if there's one or many CPUs that are lowering
- * their priority, there's a single irq work iterator that will try to
+ * their priority, there's a single IRQ work iterator that will try to
* push off RT tasks that are waiting to run.
*
* When a CPU schedules a lower priority task, it will kick off the
- * irq work iterator that will jump to each CPU with overloaded RT tasks.
+ * IRQ work iterator that will jump to each CPU with overloaded RT tasks.
* As it only takes the first CPU that schedules a lower priority task
* to start the process, the rto_start variable is incremented and if
* the atomic result is one, then that CPU will try to take the rto_lock.
@@ -2209,7 +2092,7 @@ static void push_rt_tasks(struct rq *rq)
* CPUs scheduling lower priority tasks.
*
* All CPUs that are scheduling a lower priority task will increment the
- * rt_loop_next variable. This will make sure that the irq work iterator
+ * rt_loop_next variable. This will make sure that the IRQ work iterator
* checks all RT overloaded CPUs whenever a CPU schedules a new lower
* priority task, even if the iterator is in the middle of a scan. Incrementing
* the rt_loop_next will cause the iterator to perform another scan.
@@ -2289,7 +2172,7 @@ static void tell_cpu_to_push(struct rq *rq)
* The rto_cpu is updated under the lock, if it has a valid CPU
* then the IPI is still running and will continue due to the
* update to loop_next, and nothing needs to be done here.
- * Otherwise it is finishing up and an ipi needs to be sent.
+ * Otherwise it is finishing up and an IPI needs to be sent.
*/
if (rq->rd->rto_cpu < 0)
cpu = rto_next_cpu(rq->rd);
@@ -2419,15 +2302,13 @@ static void pull_rt_task(struct rq *this_rq)
* p if it is lower in priority than the
* current task on the run queue
*/
- if (p->prio < src_rq->curr->prio)
+ if (p->prio < src_rq->donor->prio)
goto skip;
if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
} else {
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, this_cpu);
- activate_task(this_rq, p, 0);
+ move_queued_task_locked(src_rq, this_rq, p);
resched = true;
}
/*
@@ -2441,9 +2322,11 @@ skip:
double_unlock_balance(this_rq, src_rq);
if (push_task) {
+ preempt_disable();
raw_spin_rq_unlock(this_rq);
stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
push_task, &src_rq->push_work);
+ preempt_enable();
raw_spin_rq_lock(this_rq);
}
}
@@ -2461,9 +2344,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
bool need_to_push = !task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
- (dl_task(rq->curr) || rt_task(rq->curr)) &&
+ (dl_task(rq->donor) || rt_task(rq->donor)) &&
(rq->curr->nr_cpus_allowed < 2 ||
- rq->curr->prio <= p->prio);
+ rq->donor->prio <= p->prio);
if (need_to_push)
push_rt_tasks(rq);
@@ -2519,7 +2402,6 @@ void __init init_sched_rt_class(void)
GFP_KERNEL, cpu_to_node(i));
}
}
-#endif /* CONFIG_SMP */
/*
* When switching a task to RT, we may overload the runqueue
@@ -2543,11 +2425,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* then see if we can move to another run queue.
*/
if (task_on_rq_queued(p)) {
-#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
rt_queue_push_tasks(rq);
-#endif /* CONFIG_SMP */
- if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
+ if (p->prio < rq->donor->prio && cpu_online(cpu_of(rq)))
resched_curr(rq);
}
}
@@ -2557,13 +2437,15 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* us to initiate a push or pull.
*/
static void
-prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
+prio_changed_rt(struct rq *rq, struct task_struct *p, u64 oldprio)
{
if (!task_on_rq_queued(p))
return;
- if (task_current(rq, p)) {
-#ifdef CONFIG_SMP
+ if (p->prio == oldprio)
+ return;
+
+ if (task_current_donor(rq, p)) {
/*
* If our priority decreases while running, we
* may need to pull tasks to this runqueue.
@@ -2577,18 +2459,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
-#else
- /* For UP simply resched on drop of prio */
- if (oldprio < p->prio)
- resched_curr(rq);
-#endif /* CONFIG_SMP */
} else {
/*
* This task is not running, but if it is
* greater than the current running task
* then reschedule.
*/
- if (p->prio < rq->curr->prio)
+ if (p->prio < rq->donor->prio)
resched_curr(rq);
}
}
@@ -2617,9 +2494,9 @@ static void watchdog(struct rq *rq, struct task_struct *p)
}
}
}
-#else
+#else /* !CONFIG_POSIX_TIMERS: */
static inline void watchdog(struct rq *rq, struct task_struct *p) { }
-#endif
+#endif /* !CONFIG_POSIX_TIMERS */
/*
* scheduler tick hitting a task of our scheduling class.
@@ -2639,7 +2516,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
watchdog(rq, p);
/*
- * RR tasks need a special form of timeslice management.
+ * RR tasks need a special form of time-slice management.
* FIFO tasks have no timeslices.
*/
if (p->policy != SCHED_RR)
@@ -2674,21 +2551,37 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
return 0;
}
+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_rt(struct task_struct *p, int cpu)
+{
+ struct rt_rq *rt_rq;
+
+#ifdef CONFIG_RT_GROUP_SCHED // XXX maybe add task_rt_rq(), see also sched_rt_period_rt_rq
+ rt_rq = task_group(p)->rt_rq[cpu];
+ WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
+#else
+ rt_rq = &cpu_rq(cpu)->rt;
+#endif
+
+ return rt_rq_throttled(rt_rq);
+}
+#endif /* CONFIG_SCHED_CORE */
+
DEFINE_SCHED_CLASS(rt) = {
+ .queue_mask = 4,
+
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
- .check_preempt_curr = check_preempt_curr_rt,
+ .wakeup_preempt = wakeup_preempt_rt,
- .pick_next_task = pick_next_task_rt,
+ .pick_task = pick_task_rt,
.put_prev_task = put_prev_task_rt,
.set_next_task = set_next_task_rt,
-#ifdef CONFIG_SMP
.balance = balance_rt,
- .pick_task = pick_task_rt,
.select_task_rq = select_task_rq_rt,
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
@@ -2696,17 +2589,20 @@ DEFINE_SCHED_CLASS(rt) = {
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
.find_lock_rq = find_lock_lowest_rq,
-#endif
.task_tick = task_tick_rt,
.get_rr_interval = get_rr_interval_rt,
- .prio_changed = prio_changed_rt,
.switched_to = switched_to_rt,
+ .prio_changed = prio_changed_rt,
.update_curr = update_curr_rt,
+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_rt,
+#endif
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
@@ -2772,6 +2668,9 @@ static int tg_rt_schedulable(struct task_group *tg, void *data)
tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
return -EBUSY;
+ if (WARN_ON(!rt_group_sched_enabled() && tg != &root_task_group))
+ return -EBUSY;
+
total = to_ratio(period, runtime);
/*
@@ -2926,42 +2825,26 @@ static int sched_rt_global_constraints(void)
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
{
- /* Don't accept realtime tasks when there is no way for them to run */
- if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+ /* Don't accept real-time tasks when there is no way for them to run */
+ if (rt_group_sched_enabled() && rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
return 0;
return 1;
}
-#else /* !CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
#ifdef CONFIG_SYSCTL
static int sched_rt_global_constraints(void)
{
- unsigned long flags;
- int i;
-
- raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
- for_each_possible_cpu(i) {
- struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-
- raw_spin_lock(&rt_rq->rt_runtime_lock);
- rt_rq->rt_runtime = global_rt_runtime();
- raw_spin_unlock(&rt_rq->rt_runtime_lock);
- }
- raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
-
return 0;
}
#endif /* CONFIG_SYSCTL */
-#endif /* CONFIG_RT_GROUP_SCHED */
+#endif /* !CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SYSCTL
static int sched_rt_global_validate(void)
{
- if (sysctl_sched_rt_period <= 0)
- return -EINVAL;
-
if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
((u64)sysctl_sched_rt_runtime *
@@ -2973,15 +2856,9 @@ static int sched_rt_global_validate(void)
static void sched_rt_do_global(void)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
- def_rt_bandwidth.rt_runtime = global_rt_runtime();
- def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
- raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
}
-static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int old_period, old_runtime;
@@ -2989,10 +2866,11 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
int ret;
mutex_lock(&mutex);
+ sched_domains_mutex_lock();
old_period = sysctl_sched_rt_period;
old_runtime = sysctl_sched_rt_runtime;
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write) {
ret = sched_rt_global_validate();
@@ -3015,12 +2893,19 @@ undo:
sysctl_sched_rt_period = old_period;
sysctl_sched_rt_runtime = old_runtime;
}
+ sched_domains_mutex_unlock();
mutex_unlock(&mutex);
+ /*
+ * After changing maximum available bandwidth for DEADLINE, we need to
+ * recompute per root domain and per cpus variables accordingly.
+ */
+ rebuild_sched_domains();
+
return ret;
}
-static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
@@ -3030,12 +2915,15 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
ret = proc_dointvec(table, write, buffer, lenp, ppos);
/*
* Make sure that internally we keep jiffies.
- * Also, writing zero resets the timeslice to default:
+ * Also, writing zero resets the time-slice to default:
*/
if (!ret && write) {
sched_rr_timeslice =
sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
msecs_to_jiffies(sysctl_sched_rr_timeslice);
+
+ if (sysctl_sched_rr_timeslice <= 0)
+ sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE);
}
mutex_unlock(&mutex);
@@ -3043,7 +2931,6 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
}
#endif /* CONFIG_SYSCTL */
-#ifdef CONFIG_SCHED_DEBUG
void print_rt_stats(struct seq_file *m, int cpu)
{
rt_rq_iter_t iter;
@@ -3054,4 +2941,3 @@ void print_rt_stats(struct seq_file *m, int cpu)
print_rt_rq(m, cpu, rt_rq);
rcu_read_unlock();
}
-#endif /* CONFIG_SCHED_DEBUG */