diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 6 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 35 | ||||
-rw-r--r-- | kernel/sched/ext.c | 6 | ||||
-rw-r--r-- | kernel/sched/ext_idle.c | 28 | ||||
-rw-r--r-- | kernel/sched/fair.c | 9 | ||||
-rw-r--r-- | kernel/sched/sched.h | 41 |
6 files changed, 69 insertions, 56 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index be00629f0ba4..ec33b0353027 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4472,7 +4472,7 @@ int wake_up_state(struct task_struct *p, unsigned int state) * __sched_fork() is basic setup which is also used by sched_init() to * initialize the boot CPU's idle task. */ -static void __sched_fork(unsigned long clone_flags, struct task_struct *p) +static void __sched_fork(u64 clone_flags, struct task_struct *p) { p->on_rq = 0; @@ -4707,7 +4707,7 @@ late_initcall(sched_core_sysctl_init); /* * fork()/clone()-time setup: */ -int sched_fork(unsigned long clone_flags, struct task_struct *p) +int sched_fork(u64 clone_flags, struct task_struct *p) { __sched_fork(clone_flags, p); /* @@ -9551,7 +9551,7 @@ static unsigned long tg_weight(struct task_group *tg) #ifdef CONFIG_FAIR_GROUP_SCHED return scale_load_down(tg->shares); #else - return sched_weight_from_cgroup(tg->scx_weight); + return sched_weight_from_cgroup(tg->scx.weight); #endif } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f25301267e47..72c1f72463c7 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) */ if (dl_se->dl_defer && !dl_se->dl_defer_running && dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) { - if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) { + if (!is_dl_boosted(dl_se)) { /* * Set dl_se->dl_defer_armed and dl_throttled variables to @@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; -static bool dl_server_stopped(struct sched_dl_entity *dl_se); - static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) { struct rq *rq = rq_of_dl_se(dl_se); @@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ if (!dl_se->dl_runtime) return HRTIMER_NORESTART; - if (!dl_se->server_has_tasks(dl_se)) { - replenish_dl_entity(dl_se); - dl_server_stopped(dl_se); - return HRTIMER_NORESTART; - } - if (dl_se->dl_defer_armed) { /* * First check if the server could consume runtime in background. @@ -1579,10 +1571,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) { /* 0 runtime = fair server disabled */ - if (dl_se->dl_runtime) { - dl_se->dl_server_idle = 0; + if (dl_se->dl_runtime) update_curr_dl_se(dl_se->rq, dl_se, delta_exec); - } } void dl_server_start(struct sched_dl_entity *dl_se) @@ -1610,26 +1600,10 @@ void dl_server_stop(struct sched_dl_entity *dl_se) dl_se->dl_server_active = 0; } -static bool dl_server_stopped(struct sched_dl_entity *dl_se) -{ - if (!dl_se->dl_server_active) - return true; - - if (dl_se->dl_server_idle) { - dl_server_stop(dl_se); - return true; - } - - dl_se->dl_server_idle = 1; - return false; -} - void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, - dl_server_has_tasks_f has_tasks, dl_server_pick_f pick_task) { dl_se->rq = rq; - dl_se->server_has_tasks = has_tasks; dl_se->server_pick_task = pick_task; } @@ -2394,10 +2368,7 @@ again: if (dl_server(dl_se)) { p = dl_se->server_pick_task(dl_se); if (!p) { - if (!dl_server_stopped(dl_se)) { - dl_se->dl_yielded = 1; - update_curr_dl_se(rq, dl_se, 0); - } + dl_server_stop(dl_se); goto again; } rq->dl_server = dl_se; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 4ae32ef179dd..088ceff38c8a 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6788,12 +6788,8 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(void) * CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to * the current local DSQ for running tasks and thus are not * visible to the BPF scheduler. - * - * Also skip re-enqueueing tasks that can only run on this - * CPU, as they would just be re-added to the same local - * DSQ without any benefit. */ - if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1) + if (p->migration_pending) continue; dispatch_dequeue(rq, p); diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 7174e1c1a392..537c6992bb63 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -856,6 +856,32 @@ static bool check_builtin_idle_enabled(void) return false; } +/* + * Determine whether @p is a migration-disabled task in the context of BPF + * code. + * + * We can't simply check whether @p->migration_disabled is set in a + * sched_ext callback, because migration is always disabled for the current + * task while running BPF code. + * + * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively + * disable and re-enable migration. For this reason, the current task + * inside a sched_ext callback is always a migration-disabled task. + * + * Therefore, when @p->migration_disabled == 1, check whether @p is the + * current task or not: if it is, then migration was not disabled before + * entering the callback, otherwise migration was disabled. + * + * Returns true if @p is migration-disabled, false otherwise. + */ +static bool is_bpf_migration_disabled(const struct task_struct *p) +{ + if (p->migration_disabled == 1) + return p != current; + else + return p->migration_disabled; +} + static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, const struct cpumask *allowed, u64 flags) { @@ -898,7 +924,7 @@ static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_f * selection optimizations and simply check whether the previously * used CPU is idle and within the allowed cpumask. */ - if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) { + if (p->nr_cpus_allowed == 1 || is_bpf_migration_disabled(p)) { if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) && scx_idle_test_and_clear_cpu(prev_cpu)) cpu = prev_cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b173a059315c..426287930497 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3542,7 +3542,7 @@ out: } } -void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) +void init_numa_balancing(u64 clone_flags, struct task_struct *p) { int mm_users = 0; struct mm_struct *mm = p->mm; @@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru return pick_next_task_fair(rq, prev, NULL); } -static bool fair_server_has_tasks(struct sched_dl_entity *dl_se) -{ - return !!dl_se->rq->cfs.nr_queued; -} - static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se) { return pick_task_fair(dl_se->rq); @@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq) init_dl_entity(dl_se); - dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task); + dl_server_init(dl_se, rq, fair_server_pick_task); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index be9745d104f7..69458b9b46dc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -365,25 +365,50 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6 * * dl_se::rq -- runqueue we belong to. * - * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the - * server when it runs out of tasks to run. - * * dl_se::server_pick() -- nested pick_next_task(); we yield the period if this * returns NULL. * * dl_server_update() -- called from update_curr_common(), propagates runtime * to the server. * - * dl_server_start() - * dl_server_stop() -- start/stop the server when it has (no) tasks. + * dl_server_start() -- start the server when it has tasks; it will stop + * automatically when there are no more tasks, per + * dl_se::server_pick() returning NULL. + * + * dl_server_stop() -- (force) stop the server; use when updating + * parameters. * * dl_server_init() -- initializes the server. + * + * When started the dl_server will (per dl_defer) schedule a timer for its + * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a + * server will run at the very end of its period. + * + * This is done such that any runtime from the target class can be accounted + * against the server -- through dl_server_update() above -- such that when it + * becomes time to run, it might already be out of runtime and get deferred + * until the next period. In this case dl_server_timer() will alternate + * between defer and replenish but never actually enqueue the server. + * + * Only when the target class does not manage to exhaust the server's runtime + * (there's actualy starvation in the given period), will the dl_server get on + * the runqueue. Once queued it will pick tasks from the target class and run + * them until either its runtime is exhaused, at which point its back to + * dl_server_timer, or until there are no more tasks to run, at which point + * the dl_server stops itself. + * + * By stopping at this point the dl_server retains bandwidth, which, if a new + * task wakes up imminently (starting the server again), can be used -- + * subject to CBS wakeup rules -- without having to wait for the next period. + * + * Additionally, because of the dl_defer behaviour the start/stop behaviour is + * naturally thottled to once per period, avoiding high context switch + * workloads from spamming the hrtimer program/cancel paths. */ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec); extern void dl_server_start(struct sched_dl_entity *dl_se); extern void dl_server_stop(struct sched_dl_entity *dl_se); extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, - dl_server_has_tasks_f has_tasks, dl_server_pick_f pick_task); extern void sched_init_dl_servers(void); @@ -1935,12 +1960,12 @@ extern void sched_setnuma(struct task_struct *p, int node); extern int migrate_task_to(struct task_struct *p, int cpu); extern int migrate_swap(struct task_struct *p, struct task_struct *t, int cpu, int scpu); -extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); +extern void init_numa_balancing(u64 clone_flags, struct task_struct *p); #else /* !CONFIG_NUMA_BALANCING: */ static inline void -init_numa_balancing(unsigned long clone_flags, struct task_struct *p) +init_numa_balancing(u64 clone_flags, struct task_struct *p) { } |