summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h187
1 files changed, 105 insertions, 82 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04b5ada460b4..eeb5066a44fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -384,6 +384,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
+extern unsigned int hardlockup_panic;
void lockup_detector_init(void);
#else
static inline void touch_softlockup_watchdog(void)
@@ -530,39 +531,49 @@ struct cpu_itimer {
};
/**
- * struct cputime - snaphsot of system and user cputime
+ * struct prev_cputime - snaphsot of system and user cputime
* @utime: time spent in user mode
* @stime: time spent in system mode
+ * @lock: protects the above two fields
*
- * Gathers a generic snapshot of user and system time.
+ * Stores previous user/system time values such that we can guarantee
+ * monotonicity.
*/
-struct cputime {
+struct prev_cputime {
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
cputime_t utime;
cputime_t stime;
+ raw_spinlock_t lock;
+#endif
};
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ prev->utime = prev->stime = 0;
+ raw_spin_lock_init(&prev->lock);
+#endif
+}
+
/**
* struct task_cputime - collected CPU time counts
* @utime: time spent in user mode, in &cputime_t units
* @stime: time spent in kernel mode, in &cputime_t units
* @sum_exec_runtime: total time spent on the CPU, in nanoseconds
*
- * This is an extension of struct cputime that includes the total runtime
- * spent by the task from the scheduler point of view.
- *
- * As a result, this structure groups together three kinds of CPU time
- * that are tracked for threads and thread groups. Most things considering
- * CPU time want to group these counts together and treat all three
- * of them in parallel.
+ * This structure groups together three kinds of CPU time that are tracked for
+ * threads and thread groups. Most things considering CPU time want to group
+ * these counts together and treat all three of them in parallel.
*/
struct task_cputime {
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
};
+
/* Alternate field names when used to cache expirations. */
-#define prof_exp stime
#define virt_exp utime
+#define prof_exp stime
#define sched_exp sum_exec_runtime
#define INIT_CPUTIME \
@@ -589,33 +600,42 @@ struct task_cputime_atomic {
.sum_exec_runtime = ATOMIC64_INIT(0), \
}
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
-#else
-#define PREEMPT_DISABLED PREEMPT_ENABLED
-#endif
+#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
/*
- * Disable preemption until the scheduler is running.
- * Reset by start_kernel()->sched_init()->init_idle().
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
*
- * We include PREEMPT_ACTIVE to avoid cond_resched() from working
- * before the scheduler is active -- see should_resched().
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
*/
-#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT PREEMPT_OFFSET
+
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ * preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
/**
* struct thread_group_cputimer - thread group interval timer counts
* @cputime_atomic: atomic thread group interval timers.
- * @running: non-zero when there are timers running and
- * @cputime receives updates.
+ * @running: true when there are timers running and
+ * @cputime_atomic receives updates.
+ * @checking_timer: true when a thread in the group is in the
+ * process of checking for thread group timers.
*
* This structure contains the version of task_cputime, above, that is
* used for thread group CPU timer calculations.
*/
struct thread_group_cputimer {
struct task_cputime_atomic cputime_atomic;
- int running;
+ bool running;
+ bool checking_timer;
};
#include <linux/rwsem.h>
@@ -715,9 +735,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- struct cputime prev_cputime;
-#endif
+ struct prev_cputime prev_cputime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
@@ -820,7 +838,7 @@ struct user_struct {
struct hlist_node uidhash_node;
kuid_t uid;
-#ifdef CONFIG_PERF_EVENTS
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
atomic_long_t locked_vm;
#endif
};
@@ -1119,8 +1137,6 @@ struct sched_domain_topology_level {
#endif
};
-extern struct sched_domain_topology_level *sched_domain_topology;
-
extern void set_sched_topology(struct sched_domain_topology_level *tl);
extern void wake_up_if_idle(int cpu);
@@ -1167,29 +1183,24 @@ struct load_weight {
u32 inv_weight;
};
+/*
+ * The load_avg/util_avg accumulates an infinite geometric series.
+ * 1) load_avg factors frequency scaling into the amount of time that a
+ * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
+ * aggregated such weights of all runnable and blocked sched_entities.
+ * 2) util_avg factors frequency and cpu scaling into the amount of time
+ * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
+ * For cfs_rq, it is the aggregated such times of all runnable and
+ * blocked sched_entities.
+ * The 64 bit load_sum can:
+ * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
+ * the highest weight (=88761) always runnable, we should not overflow
+ * 2) for entity, support any load.weight always runnable
+ */
struct sched_avg {
- u64 last_runnable_update;
- s64 decay_count;
- /*
- * utilization_avg_contrib describes the amount of time that a
- * sched_entity is running on a CPU. It is based on running_avg_sum
- * and is scaled in the range [0..SCHED_LOAD_SCALE].
- * load_avg_contrib described the amount of time that a sched_entity
- * is runnable on a rq. It is based on both runnable_avg_sum and the
- * weight of the task.
- */
- unsigned long load_avg_contrib, utilization_avg_contrib;
- /*
- * These sums represent an infinite geometric series and so are bound
- * above by 1024/(1-y). Thus we only need a u32 to store them for all
- * choices of y < 1-2^(-32)*1024.
- * running_avg_sum reflects the time that the sched_entity is
- * effectively running on the CPU.
- * runnable_avg_sum represents the amount of time a sched_entity is on
- * a runqueue which includes the running time that is monitored by
- * running_avg_sum.
- */
- u32 runnable_avg_sum, avg_period, running_avg_sum;
+ u64 last_update_time, load_sum;
+ u32 util_sum, period_contrib;
+ unsigned long load_avg, util_avg;
};
#ifdef CONFIG_SCHEDSTATS
@@ -1255,7 +1266,7 @@ struct sched_entity {
#endif
#ifdef CONFIG_SMP
- /* Per-entity load-tracking */
+ /* Per entity load average tracking */
struct sched_avg avg;
#endif
};
@@ -1327,10 +1338,12 @@ struct sched_dl_entity {
union rcu_special {
struct {
- bool blocked;
- bool need_qs;
- } b;
- short s;
+ u8 blocked;
+ u8 need_qs;
+ u8 exp_need_qs;
+ u8 pad; /* Otherwise the compiler can store garbage here. */
+ } b; /* Bits. */
+ u32 s; /* Set of bits. */
};
struct rcu_node;
@@ -1341,6 +1354,25 @@ enum perf_event_task_context {
perf_nr_task_contexts,
};
+/* Track pages that require TLB flushes */
+struct tlbflush_unmap_batch {
+ /*
+ * Each bit set is a CPU that potentially has a TLB entry for one of
+ * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+ */
+ struct cpumask cpumask;
+
+ /* True if any bit in cpumask is set */
+ bool flush_required;
+
+ /*
+ * If true then the PTE was dirty when unmapped. The entry must be
+ * flushed before IO is initiated or a stale TLB entry potentially
+ * allows an update without redirtying the page.
+ */
+ bool writable;
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -1351,9 +1383,9 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
- struct task_struct *last_wakee;
- unsigned long wakee_flips;
+ unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
+ struct task_struct *last_wakee;
int wake_cpu;
#endif
@@ -1429,7 +1461,9 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
-
+#ifdef CONFIG_MEMCG
+ unsigned memcg_may_oom:1;
+#endif
#ifdef CONFIG_MEMCG_KMEM
unsigned memcg_kmem_skip_account:1;
#endif
@@ -1481,9 +1515,7 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- struct cputime prev_cputime;
-#endif
+ struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock;
unsigned long long vtime_snap;
@@ -1699,6 +1731,10 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ struct tlbflush_unmap_batch tlb_ubc;
+#endif
+
struct rcu_head rcu;
/*
@@ -1758,12 +1794,12 @@ struct task_struct {
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG
- struct memcg_oom_info {
- struct mem_cgroup *memcg;
- gfp_t gfp_mask;
- int order;
- unsigned int may_oom:1;
- } memcg_oom;
+ struct mem_cgroup *memcg_in_oom;
+ gfp_t memcg_oom_gfp_mask;
+ int memcg_oom_order;
+
+ /* number of pages to reclaim on returning to userland */
+ unsigned int memcg_nr_pages_over_high;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
@@ -2214,13 +2250,6 @@ static inline void calc_load_enter_idle(void) { }
static inline void calc_load_exit_idle(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
-#ifndef CONFIG_CPUMASK_OFFSTACK
-static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
-{
- return set_cpus_allowed_ptr(p, &new_mask);
-}
-#endif
-
/*
* Do not use outside of architecture code which knows its limitations.
*
@@ -2897,12 +2926,6 @@ extern int _cond_resched(void);
extern int __cond_resched_lock(spinlock_t *lock);
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
-#else
-#define PREEMPT_LOCK_OFFSET 0
-#endif
-
#define cond_resched_lock(lock) ({ \
___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \