diff options
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r-- | kernel/sched/rt.c | 474 |
1 files changed, 217 insertions, 257 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3261b067b67e..e40422c37033 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -8,10 +8,6 @@ int sched_rr_timeslice = RR_TIMESLICE; /* More than 4 hours if BW_SHIFT equals 20. */ static const u64 max_rt_runtime = MAX_BW; -static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); - -struct rt_bandwidth def_rt_bandwidth; - /* * period over which we measure -rt task CPU usage in us. * default: 1s @@ -26,11 +22,11 @@ int sysctl_sched_rt_runtime = 950000; #ifdef CONFIG_SYSCTL static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ; -static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -static struct ctl_table sched_rt_sysctls[] = { +static const struct ctl_table sched_rt_sysctls[] = { { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, @@ -56,7 +52,6 @@ static struct ctl_table sched_rt_sysctls[] = { .mode = 0644, .proc_handler = sched_rr_handler, }, - {} }; static int __init sched_rt_sysctl_init(void) @@ -67,6 +62,41 @@ static int __init sched_rt_sysctl_init(void) late_initcall(sched_rt_sysctl_init); #endif +void init_rt_rq(struct rt_rq *rt_rq) +{ + struct rt_prio_array *array; + int i; + + array = &rt_rq->active; + for (i = 0; i < MAX_RT_PRIO; i++) { + INIT_LIST_HEAD(array->queue + i); + __clear_bit(i, array->bitmap); + } + /* delimiter for bitsearch: */ + __set_bit(MAX_RT_PRIO, array->bitmap); + +#if defined CONFIG_SMP + rt_rq->highest_prio.curr = MAX_RT_PRIO-1; + rt_rq->highest_prio.next = MAX_RT_PRIO-1; + rt_rq->overloaded = 0; + plist_head_init(&rt_rq->pushable_tasks); +#endif /* CONFIG_SMP */ + /* We start is dequeued state, because no RT tasks are queued */ + rt_rq->rt_queued = 0; + +#ifdef CONFIG_RT_GROUP_SCHED + rt_rq->rt_time = 0; + rt_rq->rt_throttled = 0; + rt_rq->rt_runtime = 0; + raw_spin_lock_init(&rt_rq->rt_runtime_lock); + rt_rq->tg = &root_task_group; +#endif +} + +#ifdef CONFIG_RT_GROUP_SCHED + +static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); + static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) { struct rt_bandwidth *rt_b = @@ -98,9 +128,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) raw_spin_lock_init(&rt_b->rt_runtime_lock); - hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_HARD); - rt_b->rt_period_timer.function = sched_rt_period_timer; + hrtimer_setup(&rt_b->rt_period_timer, sched_rt_period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_HARD); } static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b) @@ -131,35 +160,6 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) do_start_rt_bandwidth(rt_b); } -void init_rt_rq(struct rt_rq *rt_rq) -{ - struct rt_prio_array *array; - int i; - - array = &rt_rq->active; - for (i = 0; i < MAX_RT_PRIO; i++) { - INIT_LIST_HEAD(array->queue + i); - __clear_bit(i, array->bitmap); - } - /* delimiter for bitsearch: */ - __set_bit(MAX_RT_PRIO, array->bitmap); - -#if defined CONFIG_SMP - rt_rq->highest_prio.curr = MAX_RT_PRIO-1; - rt_rq->highest_prio.next = MAX_RT_PRIO-1; - rt_rq->overloaded = 0; - plist_head_init(&rt_rq->pushable_tasks); -#endif /* CONFIG_SMP */ - /* We start is dequeued state, because no RT tasks are queued */ - rt_rq->rt_queued = 0; - - rt_rq->rt_time = 0; - rt_rq->rt_throttled = 0; - rt_rq->rt_runtime = 0; - raw_spin_lock_init(&rt_rq->rt_runtime_lock); -} - -#ifdef CONFIG_RT_GROUP_SCHED static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) { hrtimer_cancel(&rt_b->rt_period_timer); @@ -169,19 +169,21 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) { -#ifdef CONFIG_SCHED_DEBUG WARN_ON_ONCE(!rt_entity_is_task(rt_se)); -#endif + return container_of(rt_se, struct task_struct, rt); } static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) { + /* Cannot fold with non-CONFIG_RT_GROUP_SCHED version, layout */ + WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group); return rt_rq->rq; } static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) { + WARN_ON(!rt_group_sched_enabled() && rt_se->rt_rq->tg != &root_task_group); return rt_se->rt_rq; } @@ -189,20 +191,26 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = rt_se->rt_rq; + WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group); return rt_rq->rq; } void unregister_rt_sched_group(struct task_group *tg) { + if (!rt_group_sched_enabled()) + return; + if (tg->rt_se) destroy_rt_bandwidth(&tg->rt_bandwidth); - } void free_rt_sched_group(struct task_group *tg) { int i; + if (!rt_group_sched_enabled()) + return; + for_each_possible_cpu(i) { if (tg->rt_rq) kfree(tg->rt_rq[i]); @@ -247,6 +255,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) struct sched_rt_entity *rt_se; int i; + if (!rt_group_sched_enabled()) + return 1; + tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL); if (!tg->rt_rq) goto err; @@ -254,8 +265,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) if (!tg->rt_se) goto err; - init_rt_bandwidth(&tg->rt_bandwidth, - ktime_to_ns(def_rt_bandwidth.rt_period), 0); + init_rt_bandwidth(&tg->rt_bandwidth, ktime_to_ns(global_rt_period()), 0); for_each_possible_cpu(i) { rt_rq = kzalloc_node(sizeof(struct rt_rq), @@ -486,9 +496,6 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { - if (!rt_rq->tg) - return RUNTIME_INF; - return rt_rq->rt_runtime; } @@ -501,6 +508,11 @@ typedef struct task_group *rt_rq_iter_t; static inline struct task_group *next_task_group(struct task_group *tg) { + if (!rt_group_sched_enabled()) { + WARN_ON(tg != &root_task_group); + return NULL; + } + do { tg = list_entry_rcu(tg->list.next, typeof(struct task_group), list); @@ -513,9 +525,9 @@ static inline struct task_group *next_task_group(struct task_group *tg) } #define for_each_rt_rq(rt_rq, iter, rq) \ - for (iter = container_of(&task_groups, typeof(*iter), list); \ - (iter = next_task_group(iter)) && \ - (rt_rq = iter->rt_rq[cpu_of(rq)]);) + for (iter = &root_task_group; \ + iter && (rt_rq = iter->rt_rq[cpu_of(rq)]); \ + iter = next_task_group(iter)) #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = rt_se->parent) @@ -530,7 +542,7 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { - struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; + struct task_struct *donor = rq_of_rt_rq(rt_rq)->donor; struct rq *rq = rq_of_rt_rq(rt_rq); struct sched_rt_entity *rt_se; @@ -544,7 +556,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) else if (!on_rt_rq(rt_se)) enqueue_rt_entity(rt_se, 0); - if (rt_rq->highest_prio.curr < curr->prio) + if (rt_rq->highest_prio.curr < donor->prio) resched_curr(rq); } } @@ -605,70 +617,6 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) return &rt_rq->tg->rt_bandwidth; } -#else /* !CONFIG_RT_GROUP_SCHED */ - -static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) -{ - return rt_rq->rt_runtime; -} - -static inline u64 sched_rt_period(struct rt_rq *rt_rq) -{ - return ktime_to_ns(def_rt_bandwidth.rt_period); -} - -typedef struct rt_rq *rt_rq_iter_t; - -#define for_each_rt_rq(rt_rq, iter, rq) \ - for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) - -#define for_each_sched_rt_entity(rt_se) \ - for (; rt_se; rt_se = NULL) - -static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) -{ - return NULL; -} - -static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) -{ - struct rq *rq = rq_of_rt_rq(rt_rq); - - if (!rt_rq->rt_nr_running) - return; - - enqueue_top_rt_rq(rt_rq); - resched_curr(rq); -} - -static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) -{ - dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); -} - -static inline int rt_rq_throttled(struct rt_rq *rt_rq) -{ - return rt_rq->rt_throttled; -} - -static inline const struct cpumask *sched_rt_period_mask(void) -{ - return cpu_online_mask; -} - -static inline -struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) -{ - return &cpu_rq(cpu)->rt; -} - -static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) -{ - return &def_rt_bandwidth; -} - -#endif /* CONFIG_RT_GROUP_SCHED */ - bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); @@ -860,7 +808,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) const struct cpumask *span; span = sched_rt_period_mask(); -#ifdef CONFIG_RT_GROUP_SCHED + /* * FIXME: isolated CPUs should really leave the root task group, * whether they are isolcpus or were isolated via cpusets, lest @@ -872,7 +820,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) */ if (rt_b == &root_task_group.rt_bandwidth) span = cpu_online_mask; -#endif + for_each_cpu(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); @@ -939,18 +887,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) return idle; } -static inline int rt_se_prio(struct sched_rt_entity *rt_se) -{ -#ifdef CONFIG_RT_GROUP_SCHED - struct rt_rq *rt_rq = group_rt_rq(rt_se); - - if (rt_rq) - return rt_rq->highest_prio.curr; -#endif - - return rt_task_of(rt_se)->prio; -} - static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); @@ -994,23 +930,91 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; } +#else /* !CONFIG_RT_GROUP_SCHED */ + +typedef struct rt_rq *rt_rq_iter_t; + +#define for_each_rt_rq(rt_rq, iter, rq) \ + for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) + +#define for_each_sched_rt_entity(rt_se) \ + for (; rt_se; rt_se = NULL) + +static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) +{ + return NULL; +} + +static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) +{ + struct rq *rq = rq_of_rt_rq(rt_rq); + + if (!rt_rq->rt_nr_running) + return; + + enqueue_top_rt_rq(rt_rq); + resched_curr(rq); +} + +static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) +{ + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); +} + +static inline int rt_rq_throttled(struct rt_rq *rt_rq) +{ + return false; +} + +static inline const struct cpumask *sched_rt_period_mask(void) +{ + return cpu_online_mask; +} + +static inline +struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) +{ + return &cpu_rq(cpu)->rt; +} + +#ifdef CONFIG_SMP +static void __enable_runtime(struct rq *rq) { } +static void __disable_runtime(struct rq *rq) { } +#endif + +#endif /* CONFIG_RT_GROUP_SCHED */ + +static inline int rt_se_prio(struct sched_rt_entity *rt_se) +{ +#ifdef CONFIG_RT_GROUP_SCHED + struct rt_rq *rt_rq = group_rt_rq(rt_se); + + if (rt_rq) + return rt_rq->highest_prio.curr; +#endif + + return rt_task_of(rt_se)->prio; +} + /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */ static void update_curr_rt(struct rq *rq) { - struct task_struct *curr = rq->curr; - struct sched_rt_entity *rt_se = &curr->rt; + struct task_struct *donor = rq->donor; s64 delta_exec; - if (curr->sched_class != &rt_sched_class) + if (donor->sched_class != &rt_sched_class) return; delta_exec = update_curr_common(rq); if (unlikely(delta_exec <= 0)) return; +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity *rt_se = &donor->rt; + if (!rt_bandwidth_enabled()) return; @@ -1029,6 +1033,7 @@ static void update_curr_rt(struct rq *rq) do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); } } +#endif } static void @@ -1077,13 +1082,12 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); -#ifdef CONFIG_RT_GROUP_SCHED /* * Change rq's cpupri only if rt_rq is the top queue. */ - if (&rq->rt != rt_rq) + if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq) return; -#endif + if (rq->online && prio < prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, prio); } @@ -1093,13 +1097,12 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); -#ifdef CONFIG_RT_GROUP_SCHED /* * Change rq's cpupri only if rt_rq is the top queue. */ - if (&rq->rt != rt_rq) + if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq) return; -#endif + if (rq->online && rt_rq->highest_prio.curr != prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); } @@ -1136,7 +1139,7 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio) /* * This may have been our highest task, and therefore - * we may have some recomputation to do + * we may have some re-computation to do */ if (prio == prev_prio) { struct rt_prio_array *array = &rt_rq->active; @@ -1167,8 +1170,7 @@ inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) if (rt_se_boosted(rt_se)) rt_rq->rt_nr_boosted++; - if (rt_rq->tg) - start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); + start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); } static void @@ -1185,7 +1187,6 @@ dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { - start_rt_bandwidth(&def_rt_bandwidth); } static inline @@ -1269,11 +1270,9 @@ static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_arr static inline struct sched_statistics * __schedstats_from_rt_se(struct sched_rt_entity *rt_se) { -#ifdef CONFIG_RT_GROUP_SCHED /* schedstats is not supported for rt group. */ if (!rt_entity_is_task(rt_se)) return NULL; -#endif return &rt_task_of(rt_se)->stats; } @@ -1493,7 +1492,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_pushable_task(rq, p); } -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) +static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; @@ -1501,6 +1500,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dequeue_rt_entity(rt_se, flags); dequeue_pushable_task(rq, p); + + return true; } /* @@ -1543,7 +1544,7 @@ static int find_lowest_rq(struct task_struct *task); static int select_task_rq_rt(struct task_struct *p, int cpu, int flags) { - struct task_struct *curr; + struct task_struct *curr, *donor; struct rq *rq; bool test; @@ -1555,6 +1556,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) rcu_read_lock(); curr = READ_ONCE(rq->curr); /* unlocked access */ + donor = READ_ONCE(rq->donor); /* * If the current task on @p's runqueue is an RT task, then @@ -1572,7 +1574,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) * * For equal prio tasks, we just let the scheduler sort it out. * - * Otherwise, just let it ride on the affined RQ and the + * Otherwise, just let it ride on the affine RQ and the * post-schedule router will push the preempted task away * * This test is optimistic, if we get it wrong the load-balancer @@ -1583,8 +1585,8 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) * systems like big.LITTLE. */ test = curr && - unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); + unlikely(rt_task(donor)) && + (curr->nr_cpus_allowed < 2 || donor->prio <= p->prio); if (test || !rt_task_fits_capacity(p, cpu)) { int target = find_lowest_rq(p); @@ -1614,12 +1616,8 @@ out: static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - /* - * Current can't be migrated, useless to reschedule, - * let's hope p can move out. - */ if (rq->curr->nr_cpus_allowed == 1 || - !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) + !cpupri_find(&rq->rd->cpupri, rq->donor, NULL)) return; /* @@ -1662,7 +1660,9 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) */ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags) { - if (p->prio < rq->curr->prio) { + struct task_struct *donor = rq->donor; + + if (p->prio < donor->prio) { resched_curr(rq); return; } @@ -1680,7 +1680,7 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags) * to move current somewhere else, making room for our non-migratable * task. */ - if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) + if (p->prio == donor->prio && !test_tsk_need_resched(rq->curr)) check_preempt_equal_prio(rq, p); #endif } @@ -1705,7 +1705,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f * utilization. We only care of the case where we start to schedule a * rt task */ - if (rq->curr->sched_class != &rt_sched_class) + if (rq->donor->sched_class != &rt_sched_class) update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); rt_queue_push_tasks(rq); @@ -1722,7 +1722,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq) BUG_ON(idx >= MAX_RT_PRIO); queue = array->queue + idx; - if (SCHED_WARN_ON(list_empty(queue))) + if (WARN_ON_ONCE(list_empty(queue))) return NULL; next = list_entry(queue->next, struct sched_rt_entity, run_list); @@ -1756,17 +1756,7 @@ static struct task_struct *pick_task_rt(struct rq *rq) return p; } -static struct task_struct *pick_next_task_rt(struct rq *rq) -{ - struct task_struct *p = pick_task_rt(rq); - - if (p) - set_next_task_rt(rq, p, true); - - return p; -} - -static void put_prev_task_rt(struct rq *rq, struct task_struct *p) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct task_struct *next) { struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq = &rq->rt; @@ -1791,15 +1781,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) /* Only try algorithms three times */ #define RT_MAX_TRIES 3 -static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) -{ - if (!task_on_cpu(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_mask)) - return 1; - - return 0; -} - /* * Return the highest pushable rq's task, which is suitable to be executed * on the CPU, NULL otherwise @@ -1813,7 +1794,7 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) return NULL; plist_for_each_entry(p, head, pushable_tasks) { - if (pick_rt_task(rq, p, cpu)) + if (task_is_pushable(rq, p, cpu)) return p; } @@ -1913,6 +1894,27 @@ static int find_lowest_rq(struct task_struct *task) return -1; } +static struct task_struct *pick_next_pushable_task(struct rq *rq) +{ + struct task_struct *p; + + if (!has_pushable_tasks(rq)) + return NULL; + + p = plist_first_entry(&rq->rt.pushable_tasks, + struct task_struct, pushable_tasks); + + BUG_ON(rq->cpu != task_cpu(p)); + BUG_ON(task_current(rq, p)); + BUG_ON(task_current_donor(rq, p)); + BUG_ON(p->nr_cpus_allowed <= 1); + + BUG_ON(!task_on_rq_queued(p)); + BUG_ON(!rt_task(p)); + + return p; +} + /* Will lock the rq it finds */ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) { @@ -1943,18 +1945,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) /* * We had to unlock the run queue. In * the mean time, task could have - * migrated already or had its affinity changed. - * Also make sure that it wasn't scheduled on its rq. + * migrated already or had its affinity changed, + * therefore check if the task is still at the + * head of the pushable tasks list. * It is possible the task was scheduled, set * "migrate_disabled" and then got preempted, so we must * check the task migration disable flag here too. */ - if (unlikely(task_rq(task) != rq || + if (unlikely(is_migration_disabled(task) || !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !rt_task(task) || - is_migration_disabled(task) || - !task_on_rq_queued(task))) { + task != pick_next_pushable_task(rq))) { double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; @@ -1974,26 +1974,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) return lowest_rq; } -static struct task_struct *pick_next_pushable_task(struct rq *rq) -{ - struct task_struct *p; - - if (!has_pushable_tasks(rq)) - return NULL; - - p = plist_first_entry(&rq->rt.pushable_tasks, - struct task_struct, pushable_tasks); - - BUG_ON(rq->cpu != task_cpu(p)); - BUG_ON(task_current(rq, p)); - BUG_ON(p->nr_cpus_allowed <= 1); - - BUG_ON(!task_on_rq_queued(p)); - BUG_ON(!rt_task(p)); - - return p; -} - /* * If the current CPU has more than one RT task, see if the non * running task can migrate over to a CPU that is running a task @@ -2018,7 +1998,7 @@ retry: * higher priority than current. If that's the case * just reschedule current. */ - if (unlikely(next_task->prio < rq->curr->prio)) { + if (unlikely(next_task->prio < rq->donor->prio)) { resched_curr(rq); return 0; } @@ -2039,7 +2019,7 @@ retry: * Note that the stoppers are masqueraded as SCHED_FIFO * (cf. sched_set_stop_task()), so we can't rely on rt_task(). */ - if (rq->curr->sched_class != &rt_sched_class) + if (rq->donor->sched_class != &rt_sched_class) return 0; cpu = find_lowest_rq(rq->curr); @@ -2106,9 +2086,7 @@ retry: goto retry; } - deactivate_task(rq, next_task, 0); - set_task_cpu(next_task, lowest_rq->cpu); - activate_task(lowest_rq, next_task, 0); + move_queued_task_locked(rq, lowest_rq, next_task); resched_curr(lowest_rq); ret = 1; @@ -2148,14 +2126,14 @@ static void push_rt_tasks(struct rq *rq) * if its the only CPU with multiple RT tasks queued, and a large number * of CPUs scheduling a lower priority task at the same time. * - * Each root domain has its own irq work function that can iterate over + * Each root domain has its own IRQ work function that can iterate over * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT * task must be checked if there's one or many CPUs that are lowering - * their priority, there's a single irq work iterator that will try to + * their priority, there's a single IRQ work iterator that will try to * push off RT tasks that are waiting to run. * * When a CPU schedules a lower priority task, it will kick off the - * irq work iterator that will jump to each CPU with overloaded RT tasks. + * IRQ work iterator that will jump to each CPU with overloaded RT tasks. * As it only takes the first CPU that schedules a lower priority task * to start the process, the rto_start variable is incremented and if * the atomic result is one, then that CPU will try to take the rto_lock. @@ -2163,7 +2141,7 @@ static void push_rt_tasks(struct rq *rq) * CPUs scheduling lower priority tasks. * * All CPUs that are scheduling a lower priority task will increment the - * rt_loop_next variable. This will make sure that the irq work iterator + * rt_loop_next variable. This will make sure that the IRQ work iterator * checks all RT overloaded CPUs whenever a CPU schedules a new lower * priority task, even if the iterator is in the middle of a scan. Incrementing * the rt_loop_next will cause the iterator to perform another scan. @@ -2243,7 +2221,7 @@ static void tell_cpu_to_push(struct rq *rq) * The rto_cpu is updated under the lock, if it has a valid CPU * then the IPI is still running and will continue due to the * update to loop_next, and nothing needs to be done here. - * Otherwise it is finishing up and an ipi needs to be sent. + * Otherwise it is finishing up and an IPI needs to be sent. */ if (rq->rd->rto_cpu < 0) cpu = rto_next_cpu(rq->rd); @@ -2373,15 +2351,13 @@ static void pull_rt_task(struct rq *this_rq) * p if it is lower in priority than the * current task on the run queue */ - if (p->prio < src_rq->curr->prio) + if (p->prio < src_rq->donor->prio) goto skip; if (is_migration_disabled(p)) { push_task = get_push_task(src_rq); } else { - deactivate_task(src_rq, p, 0); - set_task_cpu(p, this_cpu); - activate_task(this_rq, p, 0); + move_queued_task_locked(src_rq, this_rq, p); resched = true; } /* @@ -2417,9 +2393,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) bool need_to_push = !task_on_cpu(rq, p) && !test_tsk_need_resched(rq->curr) && p->nr_cpus_allowed > 1 && - (dl_task(rq->curr) || rt_task(rq->curr)) && + (dl_task(rq->donor) || rt_task(rq->donor)) && (rq->curr->nr_cpus_allowed < 2 || - rq->curr->prio <= p->prio); + rq->donor->prio <= p->prio); if (need_to_push) push_rt_tasks(rq); @@ -2503,7 +2479,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) rt_queue_push_tasks(rq); #endif /* CONFIG_SMP */ - if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) + if (p->prio < rq->donor->prio && cpu_online(cpu_of(rq))) resched_curr(rq); } } @@ -2518,7 +2494,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) if (!task_on_rq_queued(p)) return; - if (task_current(rq, p)) { + if (task_current_donor(rq, p)) { #ifdef CONFIG_SMP /* * If our priority decreases while running, we @@ -2544,7 +2520,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) * greater than the current running task * then reschedule. */ - if (p->prio < rq->curr->prio) + if (p->prio < rq->donor->prio) resched_curr(rq); } } @@ -2595,7 +2571,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) watchdog(rq, p); /* - * RR tasks need a special form of timeslice management. + * RR tasks need a special form of time-slice management. * FIFO tasks have no timeslices. */ if (p->policy != SCHED_RR) @@ -2635,8 +2611,9 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu) { struct rt_rq *rt_rq; -#ifdef CONFIG_RT_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED // XXX maybe add task_rt_rq(), see also sched_rt_period_rt_rq rt_rq = task_group(p)->rt_rq[cpu]; + WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group); #else rt_rq = &cpu_rq(cpu)->rt; #endif @@ -2653,13 +2630,12 @@ DEFINE_SCHED_CLASS(rt) = { .wakeup_preempt = wakeup_preempt_rt, - .pick_next_task = pick_next_task_rt, + .pick_task = pick_task_rt, .put_prev_task = put_prev_task_rt, .set_next_task = set_next_task_rt, #ifdef CONFIG_SMP .balance = balance_rt, - .pick_task = pick_task_rt, .select_task_rq = select_task_rq_rt, .set_cpus_allowed = set_cpus_allowed_common, .rq_online = rq_online_rt, @@ -2747,6 +2723,9 @@ static int tg_rt_schedulable(struct task_group *tg, void *data) tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg)) return -EBUSY; + if (WARN_ON(!rt_group_sched_enabled() && tg != &root_task_group)) + return -EBUSY; + total = to_ratio(period, runtime); /* @@ -2901,8 +2880,8 @@ static int sched_rt_global_constraints(void) int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) { - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + /* Don't accept real-time tasks when there is no way for them to run */ + if (rt_group_sched_enabled() && rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) return 0; return 1; @@ -2913,19 +2892,6 @@ int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) #ifdef CONFIG_SYSCTL static int sched_rt_global_constraints(void) { - unsigned long flags; - int i; - - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); - for_each_possible_cpu(i) { - struct rt_rq *rt_rq = &cpu_rq(i)->rt; - - raw_spin_lock(&rt_rq->rt_runtime_lock); - rt_rq->rt_runtime = global_rt_runtime(); - raw_spin_unlock(&rt_rq->rt_runtime_lock); - } - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); - return 0; } #endif /* CONFIG_SYSCTL */ @@ -2945,15 +2911,9 @@ static int sched_rt_global_validate(void) static void sched_rt_do_global(void) { - unsigned long flags; - - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); - def_rt_bandwidth.rt_runtime = global_rt_runtime(); - def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } -static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_period, old_runtime; @@ -2961,6 +2921,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, int ret; mutex_lock(&mutex); + sched_domains_mutex_lock(); old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; @@ -2987,12 +2948,13 @@ undo: sysctl_sched_rt_period = old_period; sysctl_sched_rt_runtime = old_runtime; } + sched_domains_mutex_unlock(); mutex_unlock(&mutex); return ret; } -static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -3002,7 +2964,7 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, ret = proc_dointvec(table, write, buffer, lenp, ppos); /* * Make sure that internally we keep jiffies. - * Also, writing zero resets the timeslice to default: + * Also, writing zero resets the time-slice to default: */ if (!ret && write) { sched_rr_timeslice = @@ -3018,7 +2980,6 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, } #endif /* CONFIG_SYSCTL */ -#ifdef CONFIG_SCHED_DEBUG void print_rt_stats(struct seq_file *m, int cpu) { rt_rq_iter_t iter; @@ -3029,4 +2990,3 @@ void print_rt_stats(struct seq_file *m, int cpu) print_rt_rq(m, cpu, rt_rq); rcu_read_unlock(); } -#endif /* CONFIG_SCHED_DEBUG */ |