summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 15:52:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 15:52:04 -0700
commit23b7776290b10297fe2cae0fb5f166a4f2c68121 (patch)
tree73d1e76644a20bc7bff80fbfdb08e8b9a9f28420 /include/linux/sched.h
parent6bc4c3ad3619e1bcb4a6330e030007ace8ca465e (diff)
parent6fab54101923044712baee429ff573f03b99fc47 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes are: - lockless wakeup support for futexes and IPC message queues (Davidlohr Bueso, Peter Zijlstra) - Replace spinlocks with atomics in thread_group_cputimer(), to improve scalability (Jason Low) - NUMA balancing improvements (Rik van Riel) - SCHED_DEADLINE improvements (Wanpeng Li) - clean up and reorganize preemption helpers (Frederic Weisbecker) - decouple page fault disabling machinery from the preemption counter, to improve debuggability and robustness (David Hildenbrand) - SCHED_DEADLINE documentation updates (Luca Abeni) - topology CPU masks cleanups (Bartosz Golaszewski) - /proc/sched_debug improvements (Srikar Dronamraju)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (79 commits) sched/deadline: Remove needless parameter in dl_runtime_exceeded() sched: Remove superfluous resetting of the p->dl_throttled flag sched/deadline: Drop duplicate init_sched_dl_class() declaration sched/deadline: Reduce rq lock contention by eliminating locking of non-feasible target sched/deadline: Make init_sched_dl_class() __init sched/deadline: Optimize pull_dl_task() sched/preempt: Add static_key() to preempt_notifiers sched/preempt: Fix preempt notifiers documentation about hlist_del() within unsafe iteration sched/stop_machine: Fix deadlock between multiple stop_two_cpus() sched/debug: Add sum_sleep_runtime to /proc/<pid>/sched sched/debug: Replace vruntime with wait_sum in /proc/sched_debug sched/debug: Properly format runnable tasks in /proc/sched_debug sched/numa: Only consider less busy nodes as numa balancing destinations Revert 095bebf61a46 ("sched/numa: Do not move past the balance point if unbalanced") sched/fair: Prevent throttling in early pick_next_task_fair() preempt: Reorganize the notrace definitions a bit preempt: Use preempt_schedule_context() as the official tracing preemption point sched: Make preempt_schedule_context() function-tracing safe x86: Remove cpu_sibling_mask() and cpu_core_mask() x86: Replace cpu_**_mask() with topology_**_cpumask() ...
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h118
1 files changed, 91 insertions, 27 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 659f5729cacc..d4193d5613cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,7 +25,7 @@ struct sched_param {
#include <linux/errno.h>
#include <linux/nodemask.h>
#include <linux/mm_types.h>
-#include <linux/preempt_mask.h>
+#include <linux/preempt.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -174,7 +174,12 @@ extern unsigned long nr_iowait_cpu(int cpu);
extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void update_cpu_load_nohz(void);
+#else
+static inline void update_cpu_load_nohz(void) { }
+#endif
extern unsigned long get_parent_ip(unsigned long addr);
@@ -214,9 +219,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
#define TASK_PARKED 512
-#define TASK_STATE_MAX 1024
+#define TASK_NOLOAD 1024
+#define TASK_STATE_MAX 2048
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -226,6 +232,8 @@ extern char ___assert_task_state[1 - 2*!!(
#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
+#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
/* Convenience macros for the sake of wake_up */
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
@@ -241,7 +249,8 @@ extern char ___assert_task_state[1 - 2*!!(
((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
#define task_contributes_to_load(task) \
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
- (task->flags & PF_FROZEN) == 0)
+ (task->flags & PF_FROZEN) == 0 && \
+ (task->state & TASK_NOLOAD) == 0)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
@@ -568,6 +577,23 @@ struct task_cputime {
.sum_exec_runtime = 0, \
}
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+ atomic64_t utime;
+ atomic64_t stime;
+ atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+ (struct task_cputime_atomic) { \
+ .utime = ATOMIC64_INIT(0), \
+ .stime = ATOMIC64_INIT(0), \
+ .sum_exec_runtime = ATOMIC64_INIT(0), \
+ }
+
#ifdef CONFIG_PREEMPT_COUNT
#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
#else
@@ -585,18 +611,16 @@ struct task_cputime {
/**
* struct thread_group_cputimer - thread group interval timer counts
- * @cputime: thread group interval timers.
+ * @cputime_atomic: atomic thread group interval timers.
* @running: non-zero when there are timers running and
* @cputime receives updates.
- * @lock: lock for fields in this struct.
*
* This structure contains the version of task_cputime, above, that is
* used for thread group CPU timer calculations.
*/
struct thread_group_cputimer {
- struct task_cputime cputime;
+ struct task_cputime_atomic cputime_atomic;
int running;
- raw_spinlock_t lock;
};
#include <linux/rwsem.h>
@@ -901,6 +925,50 @@ enum cpu_idle_type {
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function, where the fact that the queue is
+ * not used again will be easy to see by inspection.
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+struct wake_q_node {
+ struct wake_q_node *next;
+};
+
+struct wake_q_head {
+ struct wake_q_node *first;
+ struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define WAKE_Q(name) \
+ struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+extern void wake_q_add(struct wake_q_head *head,
+ struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
+/*
* sched-domains (multiprocessor balancing) declarations:
*/
#ifdef CONFIG_SMP
@@ -1335,8 +1403,6 @@ struct task_struct {
int rcu_read_lock_nesting;
union rcu_special rcu_read_unlock_special;
struct list_head rcu_node_entry;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_PREEMPT_RCU
struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TASKS_RCU
@@ -1367,7 +1433,7 @@ struct task_struct {
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
- unsigned int jobctl; /* JOBCTL_*, siglock protected */
+ unsigned long jobctl; /* JOBCTL_*, siglock protected */
/* Used for emulating ABI behavior of previous Linux versions */
unsigned int personality;
@@ -1513,6 +1579,8 @@ struct task_struct {
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
+ struct wake_q_node wake_q;
+
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task */
struct rb_root pi_waiters;
@@ -1726,6 +1794,7 @@ struct task_struct {
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
+ int pagefault_disabled;
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2079,22 +2148,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
-#define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT)
+#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
extern bool task_set_jobctl_pending(struct task_struct *task,
- unsigned int mask);
+ unsigned long mask);
extern void task_clear_jobctl_trapping(struct task_struct *task);
extern void task_clear_jobctl_pending(struct task_struct *task,
- unsigned int mask);
+ unsigned long mask);
static inline void rcu_copy_process(struct task_struct *p)
{
@@ -2964,11 +3033,6 @@ static __always_inline bool need_resched(void)
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-static inline void thread_group_cputime_init(struct signal_struct *sig)
-{
- raw_spin_lock_init(&sig->cputimer.lock);
-}
-
/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
@@ -3082,13 +3146,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
{
- return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+ return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
}
static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
unsigned int limit)
{
- return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+ return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
}
static inline unsigned long rlimit(unsigned int limit)