summaryrefslogtreecommitdiff
path: root/kernel/sched/pelt.c
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2020-02-24 09:52:18 +0000
committerIngo Molnar <mingo@kernel.org>2020-02-24 11:36:36 +0100
commit9f68395333ad7f5bfe2f83473fed363d4229f11c (patch)
tree888575956e952ea9b0037e16c35c04e3ddbbd21a /kernel/sched/pelt.c
parent0dacee1bfa70e171be3a12a30414c228453048d2 (diff)
sched/pelt: Add a new runnable average signal
Now that runnable_load_avg has been removed, we can replace it by a new signal that will highlight the runnable pressure on a cfs_rq. This signal track the waiting time of tasks on rq and can help to better define the state of rqs. At now, only util_avg is used to define the state of a rq: A rq with more that around 80% of utilization and more than 1 tasks is considered as overloaded. But the util_avg signal of a rq can become temporaly low after that a task migrated onto another rq which can bias the classification of the rq. When tasks compete for the same rq, their runnable average signal will be higher than util_avg as it will include the waiting time and we can use this signal to better classify cfs_rqs. The new runnable_avg will track the runnable time of a task which simply adds the waiting time to the running time. The runnable _avg of cfs_rq will be the /Sum of se's runnable_avg and the runnable_avg of group entity will follow the one of the rq similarly to util_avg. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: "Dietmar Eggemann <dietmar.eggemann@arm.com>" Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Valentin Schneider <valentin.schneider@arm.com> Cc: Phil Auld <pauld@redhat.com> Cc: Hillf Danton <hdanton@sina.com> Link: https://lore.kernel.org/r/20200224095223.13361-9-mgorman@techsingularity.net
Diffstat (limited to 'kernel/sched/pelt.c')
-rw-r--r--kernel/sched/pelt.c39
1 files changed, 28 insertions, 11 deletions
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 3eb0ed333dcb..c40d57a2a248 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -108,7 +108,7 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
*/
static __always_inline u32
accumulate_sum(u64 delta, struct sched_avg *sa,
- unsigned long load, int running)
+ unsigned long load, unsigned long runnable, int running)
{
u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
u64 periods;
@@ -121,6 +121,8 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
*/
if (periods) {
sa->load_sum = decay_load(sa->load_sum, periods);
+ sa->runnable_sum =
+ decay_load(sa->runnable_sum, periods);
sa->util_sum = decay_load((u64)(sa->util_sum), periods);
/*
@@ -146,6 +148,8 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
if (load)
sa->load_sum += load * contrib;
+ if (runnable)
+ sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT;
if (running)
sa->util_sum += contrib << SCHED_CAPACITY_SHIFT;
@@ -182,7 +186,7 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
*/
static __always_inline int
___update_load_sum(u64 now, struct sched_avg *sa,
- unsigned long load, int running)
+ unsigned long load, unsigned long runnable, int running)
{
u64 delta;
@@ -218,7 +222,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
* Also see the comment in accumulate_sum().
*/
if (!load)
- running = 0;
+ runnable = running = 0;
/*
* Now we know we crossed measurement unit boundaries. The *_avg
@@ -227,7 +231,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
* Step 1: accumulate *_sum since last_update_time. If we haven't
* crossed period boundaries, finish.
*/
- if (!accumulate_sum(delta, sa, load, running))
+ if (!accumulate_sum(delta, sa, load, runnable, running))
return 0;
return 1;
@@ -242,6 +246,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
* Step 2: update *_avg.
*/
sa->load_avg = div_u64(load * sa->load_sum, divider);
+ sa->runnable_avg = div_u64(sa->runnable_sum, divider);
WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}
@@ -250,24 +255,30 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
*
* task:
* se_weight() = se->load.weight
+ * se_runnable() = !!on_rq
*
* group: [ see update_cfs_group() ]
* se_weight() = tg->weight * grq->load_avg / tg->load_avg
+ * se_runnable() = grq->h_nr_running
+ *
+ * runnable_sum = se_runnable() * runnable = grq->runnable_sum
+ * runnable_avg = runnable_sum
*
* load_sum := runnable
* load_avg = se_weight(se) * load_sum
*
- * XXX collapse load_sum and runnable_load_sum
- *
* cfq_rq:
*
+ * runnable_sum = \Sum se->avg.runnable_sum
+ * runnable_avg = \Sum se->avg.runnable_avg
+ *
* load_sum = \Sum se_weight(se) * se->avg.load_sum
* load_avg = \Sum se->avg.load_avg
*/
int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
{
- if (___update_load_sum(now, &se->avg, 0, 0)) {
+ if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
___update_load_avg(&se->avg, se_weight(se));
trace_pelt_se_tp(se);
return 1;
@@ -278,7 +289,8 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- if (___update_load_sum(now, &se->avg, !!se->on_rq, cfs_rq->curr == se)) {
+ if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se),
+ cfs_rq->curr == se)) {
___update_load_avg(&se->avg, se_weight(se));
cfs_se_util_change(&se->avg);
@@ -293,6 +305,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
{
if (___update_load_sum(now, &cfs_rq->avg,
scale_load_down(cfs_rq->load.weight),
+ cfs_rq->h_nr_running,
cfs_rq->curr != NULL)) {
___update_load_avg(&cfs_rq->avg, 1);
@@ -310,7 +323,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
* util_sum = cpu_scale * load_sum
* runnable_sum = util_sum
*
- * load_avg is not supported and meaningless.
+ * load_avg and runnable_avg are not supported and meaningless.
*
*/
@@ -318,6 +331,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
{
if (___update_load_sum(now, &rq->avg_rt,
running,
+ running,
running)) {
___update_load_avg(&rq->avg_rt, 1);
@@ -335,7 +349,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
* util_sum = cpu_scale * load_sum
* runnable_sum = util_sum
*
- * load_avg is not supported and meaningless.
+ * load_avg and runnable_avg are not supported and meaningless.
*
*/
@@ -343,6 +357,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
{
if (___update_load_sum(now, &rq->avg_dl,
running,
+ running,
running)) {
___update_load_avg(&rq->avg_dl, 1);
@@ -361,7 +376,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
* util_sum = cpu_scale * load_sum
* runnable_sum = util_sum
*
- * load_avg is not supported and meaningless.
+ * load_avg and runnable_avg are not supported and meaningless.
*
*/
@@ -390,9 +405,11 @@ int update_irq_load_avg(struct rq *rq, u64 running)
*/
ret = ___update_load_sum(rq->clock - running, &rq->avg_irq,
0,
+ 0,
0);
ret += ___update_load_sum(rq->clock, &rq->avg_irq,
1,
+ 1,
1);
if (ret) {