summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 12:14:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 12:14:19 -0700
commit54a728dc5e4feb0a9278ad62b19f34ad21ed0ee4 (patch)
tree2737c23d4dbc6426d6d9467626a7634cbbb40fcd /include/linux/sched.h
parent28a27cbd86076c1a6be311c751b421c4c17a7dd9 (diff)
parentadf3c31e18b765ea24eba7b0c1efc076b8ee3d55 (diff)
Merge tag 'sched-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler udpates from Ingo Molnar: - Changes to core scheduling facilities: - Add "Core Scheduling" via CONFIG_SCHED_CORE=y, which enables coordinated scheduling across SMT siblings. This is a much requested feature for cloud computing platforms, to allow the flexible utilization of SMT siblings, without exposing untrusted domains to information leaks & side channels, plus to ensure more deterministic computing performance on SMT systems used by heterogenous workloads. There are new prctls to set core scheduling groups, which allows more flexible management of workloads that can share siblings. - Fix task->state access anti-patterns that may result in missed wakeups and rename it to ->__state in the process to catch new abuses. - Load-balancing changes: - Tweak newidle_balance for fair-sched, to improve 'memcache'-like workloads. - "Age" (decay) average idle time, to better track & improve workloads such as 'tbench'. - Fix & improve energy-aware (EAS) balancing logic & metrics. - Fix & improve the uclamp metrics. - Fix task migration (taskset) corner case on !CONFIG_CPUSET. - Fix RT and deadline utilization tracking across policy changes - Introduce a "burstable" CFS controller via cgroups, which allows bursty CPU-bound workloads to borrow a bit against their future quota to improve overall latencies & batching. Can be tweaked via /sys/fs/cgroup/cpu/<X>/cpu.cfs_burst_us. - Rework assymetric topology/capacity detection & handling. - Scheduler statistics & tooling: - Disable delayacct by default, but add a sysctl to enable it at runtime if tooling needs it. Use static keys and other optimizations to make it more palatable. - Use sched_clock() in delayacct, instead of ktime_get_ns(). - Misc cleanups and fixes. * tag 'sched-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (72 commits) sched/doc: Update the CPU capacity asymmetry bits sched/topology: Rework CPU capacity asymmetry detection sched/core: Introduce SD_ASYM_CPUCAPACITY_FULL sched_domain flag psi: Fix race between psi_trigger_create/destroy sched/fair: Introduce the burstable CFS controller sched/uclamp: Fix uclamp_tg_restrict() sched/rt: Fix Deadline utilization tracking during policy change sched/rt: Fix RT utilization tracking during policy change sched: Change task_struct::state sched,arch: Remove unused TASK_STATE offsets sched,timer: Use __set_current_state() sched: Add get_current_state() sched,perf,kvm: Fix preemption condition sched: Introduce task_is_running() sched: Unbreak wakeups sched/fair: Age the average idle time sched/cpufreq: Consider reduced CPU capacity in energy calculation sched/fair: Take thermal pressure into account while estimating energy thermal/cpufreq_cooling: Update offline CPUs per-cpu thermal_pressure sched/fair: Return early from update_tg_cfs_load() if delta == 0 ...
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h50
1 files changed, 35 insertions, 15 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 32813c345115..31aaf9d7b3e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -113,11 +113,13 @@ struct task_group;
__TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
TASK_PARKED)
-#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
+#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
+
+#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
@@ -132,14 +134,14 @@ struct task_group;
do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
- current->state = (state_value); \
+ WRITE_ONCE(current->__state, (state_value)); \
} while (0)
#define set_current_state(state_value) \
do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
- smp_store_mb(current->state, (state_value)); \
+ smp_store_mb(current->__state, (state_value)); \
} while (0)
#define set_special_state(state_value) \
@@ -148,7 +150,7 @@ struct task_group;
WARN_ON_ONCE(!is_special_task_state(state_value)); \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
- current->state = (state_value); \
+ WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
#else
@@ -190,10 +192,10 @@ struct task_group;
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
- current->state = (state_value)
+ WRITE_ONCE(current->__state, (state_value))
#define set_current_state(state_value) \
- smp_store_mb(current->state, (state_value))
+ smp_store_mb(current->__state, (state_value))
/*
* set_special_state() should be used for those states when the blocking task
@@ -205,12 +207,14 @@ struct task_group;
do { \
unsigned long flags; /* may shadow */ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
- current->state = (state_value); \
+ WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
#endif
+#define get_current_state() READ_ONCE(current->__state)
+
/* Task command name length: */
#define TASK_COMM_LEN 16
@@ -662,8 +666,7 @@ struct task_struct {
*/
struct thread_info thread_info;
#endif
- /* -1 unrunnable, 0 runnable, >0 stopped: */
- volatile long state;
+ unsigned int __state;
/*
* This begins the randomizable portion of task_struct. Only
@@ -708,10 +711,17 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+ struct sched_dl_entity dl;
+
+#ifdef CONFIG_SCHED_CORE
+ struct rb_node core_node;
+ unsigned long core_cookie;
+ unsigned int core_occupation;
+#endif
+
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
- struct sched_dl_entity dl;
#ifdef CONFIG_UCLAMP_TASK
/*
@@ -1520,7 +1530,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
static inline unsigned int task_state_index(struct task_struct *tsk)
{
- unsigned int tsk_state = READ_ONCE(tsk->state);
+ unsigned int tsk_state = READ_ONCE(tsk->__state);
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
@@ -1828,10 +1838,10 @@ static __always_inline void scheduler_ipi(void)
*/
preempt_fold_need_resched();
}
-extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
+extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
#else
static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
{
return 1;
}
@@ -2179,4 +2189,14 @@ int sched_trace_rq_nr_running(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
+#ifdef CONFIG_SCHED_CORE
+extern void sched_core_free(struct task_struct *tsk);
+extern void sched_core_fork(struct task_struct *p);
+extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+ unsigned long uaddr);
+#else
+static inline void sched_core_free(struct task_struct *tsk) { }
+static inline void sched_core_fork(struct task_struct *p) { }
+#endif
+
#endif