diff options
Diffstat (limited to 'kernel/sched/pelt.h')
| -rw-r--r-- | kernel/sched/pelt.h | 175 |
1 files changed, 146 insertions, 29 deletions
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 7e56b489ff32..f921302dc40f 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -1,10 +1,36 @@ -#ifdef CONFIG_SMP +// SPDX-License-Identifier: GPL-2.0 +#ifndef _KERNEL_SCHED_PELT_H +#define _KERNEL_SCHED_PELT_H +#include "sched.h" -int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se); -int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se); -int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq); +#include "sched-pelt.h" + +int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); +int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); +int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +bool update_other_load_avgs(struct rq *rq); + +#ifdef CONFIG_SCHED_HW_PRESSURE +int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity); + +static inline u64 hw_load_avg(struct rq *rq) +{ + return READ_ONCE(rq->avg_hw.load_avg); +} +#else /* !CONFIG_SCHED_HW_PRESSURE: */ +static inline int +update_hw_load_avg(u64 now, struct rq *rq, u64 capacity) +{ + return 0; +} + +static inline u64 hw_load_avg(struct rq *rq) +{ + return 0; +} +#endif /* !CONFIG_SCHED_HW_PRESSURE */ #ifdef CONFIG_HAVE_SCHED_AVG_IRQ int update_irq_load_avg(struct rq *rq, u64 running); @@ -16,14 +42,12 @@ update_irq_load_avg(struct rq *rq, u64 running) } #endif -/* - * When a task is dequeued, its estimated utilization should not be update if - * its util_avg has not been updated at least once. - * This flag is used to synchronize util_avg updates with util_est updates. - * We map this information into the LSB bit of the utilization saved at - * dequeue time (i.e. util_est.dequeued). - */ -#define UTIL_AVG_UNCHANGED 0x1 +#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024) + +static inline u32 get_pelt_divider(struct sched_avg *avg) +{ + return PELT_MIN_DIVIDER + avg->period_contrib; +} static inline void cfs_se_util_change(struct sched_avg *avg) { @@ -32,41 +56,134 @@ static inline void cfs_se_util_change(struct sched_avg *avg) if (!sched_feat(UTIL_EST)) return; - /* Avoid store if the flag has been already set */ - enqueued = avg->util_est.enqueued; + /* Avoid store if the flag has been already reset */ + enqueued = avg->util_est; if (!(enqueued & UTIL_AVG_UNCHANGED)) return; /* Reset flag to report util_avg has been updated */ enqueued &= ~UTIL_AVG_UNCHANGED; - WRITE_ONCE(avg->util_est.enqueued, enqueued); + WRITE_ONCE(avg->util_est, enqueued); } -#else +static inline u64 rq_clock_pelt(struct rq *rq) +{ + lockdep_assert_rq_held(rq); + assert_clock_updated(rq); -static inline int -update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) + return rq->clock_pelt - rq->lost_idle_time; +} + +/* The rq is idle, we can sync to clock_task */ +static inline void _update_idle_rq_clock_pelt(struct rq *rq) { - return 0; + rq->clock_pelt = rq_clock_task(rq); + + u64_u32_store(rq->clock_idle, rq_clock(rq)); + /* Paired with smp_rmb in migrate_se_pelt_lag() */ + smp_wmb(); + u64_u32_store(rq->clock_pelt_idle, rq_clock_pelt(rq)); } -static inline int -update_rt_rq_load_avg(u64 now, struct rq *rq, int running) +/* + * The clock_pelt scales the time to reflect the effective amount of + * computation done during the running delta time but then sync back to + * clock_task when rq is idle. + * + * + * absolute time | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16 + * @ max capacity ------******---------------******--------------- + * @ half capacity ------************---------************--------- + * clock pelt | 1| 2| 3| 4| 7| 8| 9| 10| 11|14|15|16 + * + */ +static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) { - return 0; + if (unlikely(is_idle_task(rq->curr))) { + _update_idle_rq_clock_pelt(rq); + return; + } + + /* + * When a rq runs at a lower compute capacity, it will need + * more time to do the same amount of work than at max + * capacity. In order to be invariant, we scale the delta to + * reflect how much work has been really done. + * Running longer results in stealing idle time that will + * disturb the load signal compared to max capacity. This + * stolen idle time will be automatically reflected when the + * rq will be idle and the clock will be synced with + * rq_clock_task. + */ + + /* + * Scale the elapsed time to reflect the real amount of + * computation + */ + delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq))); + delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq))); + + rq->clock_pelt += delta; } -static inline int -update_dl_rq_load_avg(u64 now, struct rq *rq, int running) +/* + * When rq becomes idle, we have to check if it has lost idle time + * because it was fully busy. A rq is fully used when the /Sum util_sum + * is greater or equal to: + * (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT; + * For optimization and computing rounding purpose, we don't take into account + * the position in the current window (period_contrib) and we use the higher + * bound of util_sum to decide. + */ +static inline void update_idle_rq_clock_pelt(struct rq *rq) { - return 0; + u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX; + u32 util_sum = rq->cfs.avg.util_sum; + util_sum += rq->avg_rt.util_sum; + util_sum += rq->avg_dl.util_sum; + + /* + * Reflecting stolen time makes sense only if the idle + * phase would be present at max capacity. As soon as the + * utilization of a rq has reached the maximum value, it is + * considered as an always running rq without idle time to + * steal. This potential idle time is considered as lost in + * this case. We keep track of this lost idle time compare to + * rq's clock_task. + */ + if (util_sum >= divider) + rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt; + + _update_idle_rq_clock_pelt(rq); } -static inline int -update_irq_load_avg(struct rq *rq, u64 running) +#ifdef CONFIG_CFS_BANDWIDTH +static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { - return 0; + u64 throttled; + + if (unlikely(cfs_rq->pelt_clock_throttled)) + throttled = U64_MAX; + else + throttled = cfs_rq->throttled_clock_pelt_time; + + u64_u32_store(cfs_rq->throttled_pelt_idle, throttled); } -#endif +/* rq->task_clock normalized against any time this cfs_rq has spent throttled */ +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + if (unlikely(cfs_rq->pelt_clock_throttled)) + return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time; + + return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time; +} +#else /* !CONFIG_CFS_BANDWIDTH: */ +static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { } +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + return rq_clock_pelt(rq_of(cfs_rq)); +} +#endif /* !CONFIG_CFS_BANDWIDTH */ +#endif /* _KERNEL_SCHED_PELT_H */ |
