summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/tick.h92
-rw-r--r--kernel/time/tick-sched.c150
-rw-r--r--kernel/time/tick-sched.h1
4 files changed, 244 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..307e48375f21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -719,6 +719,10 @@ struct signal_struct {
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;
+#ifdef CONFIG_NO_HZ_FULL
+ unsigned long tick_dep_mask;
+#endif
+
struct list_head cpu_timers[3];
struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS,
} vtime_snap_whence;
#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+ unsigned long tick_dep_mask;
+#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e543846..d60e5df8ec28 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
}
+enum tick_dep_bits {
+ TICK_DEP_BIT_POSIX_TIMER = 0,
+ TICK_DEP_BIT_PERF_EVENTS = 1,
+ TICK_DEP_BIT_SCHED = 2,
+ TICK_DEP_BIT_CLOCK_UNSTABLE = 3
+};
+
+#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
+#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
+#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
+#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
+
#ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void);
@@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
}
+extern void tick_nohz_dep_set(enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_task(struct task_struct *tsk,
+ enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
+ enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit);
+
+/*
+ * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
+ * on top of static keys.
+ */
+static inline void tick_dep_set(enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_set(bit);
+}
+
+static inline void tick_dep_clear(enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_clear(bit);
+}
+
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_cpu(cpu))
+ tick_nohz_dep_set_cpu(cpu, bit);
+}
+
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_cpu(cpu))
+ tick_nohz_dep_clear_cpu(cpu, bit);
+}
+
+static inline void tick_dep_set_task(struct task_struct *tsk,
+ enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_set_task(tsk, bit);
+}
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+ enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_clear_task(tsk, bit);
+}
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_set_signal(signal, bit);
+}
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit)
+{
+ if (tick_nohz_full_enabled())
+ tick_nohz_dep_clear_signal(signal, bit);
+}
+
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
@@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
+
+static inline void tick_dep_set(enum tick_dep_bits bit) { }
+static inline void tick_dep_clear(enum tick_dep_bits bit) { }
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_set_task(struct task_struct *tsk,
+ enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+ enum tick_dep_bits bit) { }
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit) { }
+
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 548a4e2551a9..74ab7dbdc023 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -158,11 +158,53 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running;
+static unsigned long tick_dep_mask;
-static bool can_stop_full_tick(void)
+static void trace_tick_dependency(unsigned long dep)
+{
+ if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+ trace_tick_stop(0, "posix timers running\n");
+ return;
+ }
+
+ if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+ trace_tick_stop(0, "perf events running\n");
+ return;
+ }
+
+ if (dep & TICK_DEP_MASK_SCHED) {
+ trace_tick_stop(0, "more than 1 task in runqueue\n");
+ return;
+ }
+
+ if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+ trace_tick_stop(0, "unstable sched clock\n");
+}
+
+static bool can_stop_full_tick(struct tick_sched *ts)
{
WARN_ON_ONCE(!irqs_disabled());
+ if (tick_dep_mask) {
+ trace_tick_dependency(tick_dep_mask);
+ return false;
+ }
+
+ if (ts->tick_dep_mask) {
+ trace_tick_dependency(ts->tick_dep_mask);
+ return false;
+ }
+
+ if (current->tick_dep_mask) {
+ trace_tick_dependency(current->tick_dep_mask);
+ return false;
+ }
+
+ if (current->signal->tick_dep_mask) {
+ trace_tick_dependency(current->signal->tick_dep_mask);
+ return false;
+ }
+
if (!sched_can_stop_tick()) {
trace_tick_stop(0, "more than 1 task in runqueue\n");
return false;
@@ -178,9 +220,10 @@ static bool can_stop_full_tick(void)
return false;
}
- /* sched_clock_tick() needs us? */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
+ * sched_clock_tick() needs us?
+ *
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
@@ -199,13 +242,13 @@ static bool can_stop_full_tick(void)
return true;
}
-static void nohz_full_kick_work_func(struct irq_work *work)
+static void nohz_full_kick_func(struct irq_work *work)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
- .func = nohz_full_kick_work_func,
+ .func = nohz_full_kick_func,
};
/*
@@ -251,6 +294,95 @@ void tick_nohz_full_kick_all(void)
preempt_enable();
}
+static void tick_nohz_dep_set_all(unsigned long *dep,
+ enum tick_dep_bits bit)
+{
+ unsigned long prev;
+
+ prev = fetch_or(dep, BIT_MASK(bit));
+ if (!prev)
+ tick_nohz_full_kick_all();
+}
+
+/*
+ * Set a global tick dependency. Used by perf events that rely on freq and
+ * by unstable clock.
+ */
+void tick_nohz_dep_set(enum tick_dep_bits bit)
+{
+ tick_nohz_dep_set_all(&tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear(enum tick_dep_bits bit)
+{
+ clear_bit(bit, &tick_dep_mask);
+}
+
+/*
+ * Set per-CPU tick dependency. Used by scheduler and perf events in order to
+ * manage events throttling.
+ */
+void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+ unsigned long prev;
+ struct tick_sched *ts;
+
+ ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+ prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+ if (!prev) {
+ preempt_disable();
+ /* Perf needs local kick that is NMI safe */
+ if (cpu == smp_processor_id()) {
+ tick_nohz_full_kick();
+ } else {
+ /* Remote irq work not NMI-safe */
+ if (!WARN_ON_ONCE(in_nmi()))
+ tick_nohz_full_kick_cpu(cpu);
+ }
+ preempt_enable();
+ }
+}
+
+void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+ struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+ clear_bit(bit, &ts->tick_dep_mask);
+}
+
+/*
+ * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
+ * per task timers.
+ */
+void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+ /*
+ * We could optimize this with just kicking the target running the task
+ * if that noise matters for nohz full users.
+ */
+ tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+ clear_bit(bit, &tsk->tick_dep_mask);
+}
+
+/*
+ * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
+ * per process timers.
+ */
+void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+ tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+ clear_bit(bit, &sig->tick_dep_mask);
+}
+
/*
* Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties:
@@ -259,15 +391,19 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void)
{
unsigned long flags;
+ struct tick_sched *ts;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;
- if (tick_nohz_tick_stopped() && !can_stop_full_tick())
- tick_nohz_full_kick();
+ ts = this_cpu_ptr(&tick_cpu_sched);
+ if (ts->tick_stopped) {
+ if (current->tick_dep_mask || current->signal->tick_dep_mask)
+ tick_nohz_full_kick();
+ }
out:
local_irq_restore(flags);
}
@@ -736,7 +872,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
- if (can_stop_full_tick())
+ if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e9baa1..eb4e32566a83 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
+ unsigned long tick_dep_mask;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);