From ebc0f83c78a2d26384401ecf2d2fa48063c0ee27 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sat, 11 Jan 2020 04:53:39 -0500 Subject: timers/nohz: Update NOHZ load in remote tick The way loadavg is tracked during nohz only pays attention to the load upon entering nohz. This can be particularly noticeable if full nohz is entered while non-idle, and then the cpu goes idle and stays that way for a long time. Use the remote tick to ensure that full nohz cpus report their deltas within a reasonable time. [ swood: Added changelog and removed recheck of stopped tick. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Scott Wood Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/1578736419-14628-3-git-send-email-swood@redhat.com --- include/linux/sched/nohz.h | 2 ++ kernel/sched/core.c | 4 +++- kernel/sched/loadavg.c | 33 +++++++++++++++++++++++---------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 1abe91ff6e4a..6d67e9a5af6b 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { } #ifdef CONFIG_NO_HZ_COMMON void calc_load_nohz_start(void); +void calc_load_nohz_remote(struct rq *rq); void calc_load_nohz_stop(void); #else static inline void calc_load_nohz_start(void) { } +static inline void calc_load_nohz_remote(struct rq *rq) { } static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cf8b33dc4513..4ff03c27779e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3677,6 +3677,7 @@ static void sched_tick_remote(struct work_struct *work) if (cpu_is_offline(cpu)) goto out_unlock; + curr = rq->curr; update_rq_clock(rq); if (!is_idle_task(curr)) { @@ -3689,10 +3690,11 @@ static void sched_tick_remote(struct work_struct *work) } curr->sched_class->task_tick(rq, curr, 0); + calc_load_nohz_remote(rq); out_unlock: rq_unlock_irq(rq, &rf); - out_requeue: + /* * Run the remote tick once per second (1Hz). This arbitrary * frequency is large enough to avoid overload but short enough diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 28a516575c18..de22da666ac7 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void) return calc_load_idx & 1; } -void calc_load_nohz_start(void) +static void calc_load_nohz_fold(struct rq *rq) { - struct rq *this_rq = this_rq(); long delta; - /* - * We're going into NO_HZ mode, if there's any pending delta, fold it - * into the pending NO_HZ delta. - */ - delta = calc_load_fold_active(this_rq, 0); + delta = calc_load_fold_active(rq, 0); if (delta) { int idx = calc_load_write_idx(); @@ -248,6 +243,24 @@ void calc_load_nohz_start(void) } } +void calc_load_nohz_start(void) +{ + /* + * We're going into NO_HZ mode, if there's any pending delta, fold it + * into the pending NO_HZ delta. + */ + calc_load_nohz_fold(this_rq()); +} + +/* + * Keep track of the load for NOHZ_FULL, must be called between + * calc_load_nohz_{start,stop}(). + */ +void calc_load_nohz_remote(struct rq *rq) +{ + calc_load_nohz_fold(rq); +} + void calc_load_nohz_stop(void) { struct rq *this_rq = this_rq(); @@ -268,7 +281,7 @@ void calc_load_nohz_stop(void) this_rq->calc_load_update += LOAD_FREQ; } -static long calc_load_nohz_fold(void) +static long calc_load_nohz_read(void) { int idx = calc_load_read_idx(); long delta = 0; @@ -323,7 +336,7 @@ static void calc_global_nohz(void) } #else /* !CONFIG_NO_HZ_COMMON */ -static inline long calc_load_nohz_fold(void) { return 0; } +static inline long calc_load_nohz_read(void) { return 0; } static inline void calc_global_nohz(void) { } #endif /* CONFIG_NO_HZ_COMMON */ @@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks) /* * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs. */ - delta = calc_load_nohz_fold(); + delta = calc_load_nohz_read(); if (delta) atomic_long_add(delta, &calc_load_tasks); -- cgit