diff options
Diffstat (limited to 'include/linux/sched.h')
| -rw-r--r-- | include/linux/sched.h | 439 |
1 files changed, 324 insertions, 115 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index d380bffee2ef..d395f2810fac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,19 +34,24 @@ #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> +#include <linux/spinlock.h> #include <linux/syscall_user_dispatch_types.h> #include <linux/mm_types_task.h> #include <linux/netdevice_xmit.h> #include <linux/task_io_accounting.h> #include <linux/posix-timers_types.h> #include <linux/restart_block.h> -#include <uapi/linux/rseq.h> +#include <linux/rseq_types.h> #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> -#include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> +#include <linux/tracepoint-defs.h> +#include <linux/unwind_deferred_types.h> #include <asm/kmap_size.h> +#ifndef COMPILE_OFFSETS +#include <generated/rq-offsets.h> +#endif /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -65,6 +70,7 @@ struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; +struct perf_ctx_data; struct pid_namespace; struct pipe_inode_info; struct rcu_node; @@ -186,6 +192,12 @@ struct user_event_mm; # define debug_rtlock_wait_restore_state() do { } while (0) #endif +#define trace_set_current_state(state_value) \ + do { \ + if (tracepoint_enabled(sched_set_state_tp)) \ + __trace_set_current_state(state_value); \ + } while (0) + /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to @@ -226,12 +238,14 @@ struct user_event_mm; #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ smp_store_mb(current->__state, (state_value)); \ } while (0) @@ -247,6 +261,7 @@ struct user_event_mm; \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ debug_special_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) @@ -282,6 +297,7 @@ struct user_event_mm; raw_spin_lock(¤t->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ + trace_set_current_state(TASK_RTLOCK_WAIT); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(¤t->pi_lock); \ } while (0); @@ -291,6 +307,7 @@ struct user_event_mm; lockdep_assert_irqs_disabled(); \ raw_spin_lock(¤t->pi_lock); \ debug_rtlock_wait_restore_state(); \ + trace_set_current_state(current->saved_state); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(¤t->pi_lock); \ @@ -327,6 +344,12 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); +/* wrapper functions to trace from this header file */ +DECLARE_TRACEPOINT(sched_set_state_tp); +extern void __trace_set_current_state(int state_value); +DECLARE_TRACEPOINT(sched_set_need_resched_tp); +extern void __trace_set_need_resched(struct task_struct *curr, int tif); + /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode @@ -379,10 +402,10 @@ enum uclamp_id { UCLAMP_CNT }; -#ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; -#endif +extern void sched_domains_mutex_lock(void); +extern void sched_domains_mutex_unlock(void); struct sched_param { int sched_priority; @@ -398,6 +421,12 @@ struct sched_info { /* Time spent waiting on a runqueue: */ unsigned long long run_delay; + /* Max time spent waiting on a runqueue: */ + unsigned long long max_run_delay; + + /* Min time spent waiting on a runqueue: */ + unsigned long long min_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ @@ -557,7 +586,15 @@ struct sched_entity { u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; - s64 vlag; + union { + /* + * When !@on_rq this field is vlag. + * When cfs_rq->curr == se (which implies @on_rq) + * this field is vprot. See protect_slice(). + */ + s64 vlag; + u64 vprot; + }; u64 slice; u64 nr_migrations; @@ -573,7 +610,6 @@ struct sched_entity { unsigned long runnable_weight; #endif -#ifdef CONFIG_SMP /* * Per entity load average tracking. * @@ -581,7 +617,6 @@ struct sched_entity { * collide with read-mostly values above. */ struct sched_avg avg; -#endif }; struct sched_rt_entity { @@ -602,8 +637,8 @@ struct sched_rt_entity { #endif } __randomize_layout; -typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); -typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); +struct rq_flags; +typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *, struct rq_flags *rf); struct sched_dl_entity { struct rb_node rb_node; @@ -650,6 +685,12 @@ struct sched_dl_entity { * * @dl_server tells if this is a server entity. * + * @dl_server_active tells if the dlserver is active(started). + * dlserver is started on first cfs enqueue on an idle runqueue + * and is stopped when a dequeue results in 0 cfs tasks on the + * runqueue. In other words, dlserver is active only when cpu's + * runqueue has atleast one cfs task. + * * @dl_defer tells if this is a deferred or regular server. For * now only defer server exists. * @@ -658,15 +699,19 @@ struct sched_dl_entity { * * @dl_defer_running tells if the deferrable server is actually * running, skipping the defer phase. + * + * @dl_defer_idle tracks idle state */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_server_active : 1; unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; + unsigned int dl_defer_idle : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -688,12 +733,8 @@ struct sched_dl_entity { * dl_server_update(). * * @rq the runqueue this server is for - * - * @server_has_tasks() returns true if @server_pick return a - * runnable task. */ struct rq *rq; - dl_server_has_tasks_f server_has_tasks; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES @@ -804,7 +845,6 @@ struct task_struct { struct alloc_tag *alloc_tag; #endif -#ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; unsigned int wakee_flips; @@ -820,7 +860,6 @@ struct task_struct { */ int recent_used_cpu; int wake_cpu; -#endif int on_rq; int prio; @@ -845,6 +884,11 @@ struct task_struct { #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; +#ifdef CONFIG_CFS_BANDWIDTH + struct callback_head sched_throttle_work; + struct list_head throttle_node; + bool throttled; +#endif #endif @@ -879,9 +923,7 @@ struct task_struct { cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; -#ifdef CONFIG_SMP unsigned short migration_disabled; -#endif unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU @@ -913,10 +955,8 @@ struct task_struct { struct sched_info sched_info; struct list_head tasks; -#ifdef CONFIG_SMP struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; -#endif struct mm_struct *mm; struct mm_struct *active_mm; @@ -937,6 +977,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; + unsigned sched_task_hot:1; /* Force alignment to the next boundary: */ unsigned :0; @@ -1008,6 +1049,7 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif + unsigned in_nf_duplicate:1; #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif @@ -1198,9 +1240,14 @@ struct task_struct { struct rt_mutex_waiter *pi_blocked_on; #endif -#ifdef CONFIG_DEBUG_MUTEXES - /* Mutex deadlock detection: */ - struct mutex_waiter *blocked_on; + struct mutex *blocked_on; /* lock we're blocked on */ + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + /* + * Encoded lock address causing task block (lower 2 bits = type from + * <linux/hung_task.h>). Accessed via hung_task_*() helpers. + */ + unsigned long blocker; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -1277,7 +1324,10 @@ struct task_struct { struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; -#endif +#ifdef CONFIG_PREEMPT_RT + struct llist_node cg_dead_lnode; +#endif /* CONFIG_PREEMPT_RT */ +#endif /* CONFIG_CGROUPS */ #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; @@ -1297,6 +1347,7 @@ struct task_struct { struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; + struct perf_ctx_data __rcu *perf_ctx_data; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; @@ -1358,24 +1409,8 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_RSEQ - struct rseq __user *rseq; - u32 rseq_len; - u32 rseq_sig; - /* - * RmW on rseq_event_mask must be performed atomically - * with respect to preemption. - */ - unsigned long rseq_event_mask; -#endif - -#ifdef CONFIG_SCHED_MM_CID - int mm_cid; /* Current cid in mm */ - int last_mm_cid; /* Most recent cid in mm */ - int migrate_from_cpu; - int mm_cid_active; /* Whether cid bitmap is active */ - struct callback_head cid_work; -#endif + struct rseq_data rseq; + struct sched_mm_cid mm_cid; struct tlbflush_unmap_batch tlb_ubc; @@ -1549,8 +1584,10 @@ struct task_struct { /* Used by BPF for per-TASK xdp storage */ struct bpf_net_context *bpf_net_context; -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE unsigned long lowest_stack; +#endif +#ifdef CONFIG_KSTACK_ERASE_METRICS unsigned long prev_lowest_stack; #endif @@ -1584,34 +1621,42 @@ struct task_struct { #ifdef CONFIG_RV /* - * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. - * If we find justification for more monitors, we can think - * about adding more or developing a dynamic method. So far, - * none of these are justified. + * Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS. + * If memory becomes a concern, we can think about a dynamic method. */ - union rv_task_monitor rv[RV_PER_TASK_MONITORS]; + union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS]; #endif #ifdef CONFIG_USER_EVENTS struct user_event_mm *user_event_mm; #endif - /* - * New fields for task_struct should be added above here, so that - * they are included in the randomized portion of task_struct. - */ - randomized_struct_fields_end +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif /* CPU-specific state of this task: */ struct thread_struct thread; /* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. */ -}; + randomized_struct_fields_end +} __attribute__ ((aligned (64))); + +#ifdef CONFIG_SCHED_PROXY_EXEC +DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec); +static inline bool sched_proxy_exec(void) +{ + return static_branch_likely(&__sched_proxy_exec); +} +#else +static inline bool sched_proxy_exec(void) +{ + return false; +} +#endif #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -1630,8 +1675,9 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. + * Report frozen tasks as uninterruptible. */ - if (tsk_state & TASK_RTLOCK_WAIT) + if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN)) state = TASK_UNINTERRUPTIBLE; return fls(state); @@ -1677,7 +1723,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ @@ -1726,12 +1772,8 @@ extern struct pid *cad_pid; static __always_inline bool is_percpu_thread(void) { -#ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && (current->nr_cpus_allowed == 1); -#else - return true; -#endif } /* Per-process atomic flags. */ @@ -1796,10 +1838,9 @@ extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpu extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); -#ifdef CONFIG_SMP -/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ -extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); +/* set_cpus_allowed_force() - consider using set_cpus_allowed_ptr() instead */ +extern void set_cpus_allowed_force(struct task_struct *p, const struct cpumask *new_mask); /** * set_cpus_allowed_ptr - set CPU affinity mask of a task @@ -1814,33 +1855,6 @@ extern void release_user_cpus_ptr(struct task_struct *p); extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); -#else -static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -{ -} -static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) -{ - /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */ - if ((*cpumask_bits(new_mask) & 1) == 0) - return -EINVAL; - return 0; -} -static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) -{ - if (src->user_cpus_ptr) - return -EINVAL; - return 0; -} -static inline void release_user_cpus_ptr(struct task_struct *p) -{ - WARN_ON(p->user_cpus_ptr); -} - -static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) -{ - return 0; -} -#endif extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); @@ -1865,6 +1879,7 @@ extern int sched_setscheduler(struct task_struct *, int, const struct sched_para extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); +extern void sched_set_fifo_secondary(struct task_struct *p); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); @@ -1929,18 +1944,13 @@ extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); -#ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); -#else -static inline void kick_process(struct task_struct *tsk) { } -#endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); - -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} +#define set_task_comm(tsk, from) ({ \ + BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ + __set_task_comm(tsk, from, false); \ +}) /* * - Why not use task_lock()? @@ -1961,7 +1971,6 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) buf; \ }) -#ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { /* @@ -1971,9 +1980,6 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -#else -static inline void scheduler_ipi(void) { } -#endif extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); @@ -2014,6 +2020,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) static inline void set_tsk_need_resched(struct task_struct *tsk) { + if (tracepoint_enabled(sched_set_need_resched_tp) && + !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED)) + __trace_set_need_resched(tsk, TIF_NEED_RESCHED); set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } @@ -2028,6 +2037,13 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +static inline void set_need_resched_current(void) +{ + lockdep_assert_irqs_disabled(); + set_tsk_need_resched(current); + set_preempt_need_resched(); +} + /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -2039,9 +2055,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2062,7 +2075,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2072,7 +2084,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } @@ -2121,6 +2132,72 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) +#ifndef CONFIG_PREEMPT_RT +static inline struct mutex *__get_task_blocked_on(struct task_struct *p) +{ + struct mutex *m = p->blocked_on; + + if (m) + lockdep_assert_held_once(&m->wait_lock); + return m; +} + +static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + WARN_ON_ONCE(!m); + /* The task should only be setting itself as blocked */ + WARN_ON_ONCE(p != current); + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * Check ensure we don't overwrite existing mutex value + * with a different mutex. Note, setting it to the same + * lock repeatedly is ok. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + WRITE_ONCE(p->blocked_on, m); +} + +static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + guard(raw_spinlock_irqsave)(&m->wait_lock); + __set_task_blocked_on(p, m); +} + +static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + if (m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + } + WRITE_ONCE(p->blocked_on, NULL); +} + +static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) +{ + guard(raw_spinlock_irqsave)(&m->wait_lock); + __clear_task_blocked_on(p, m); +} +#else +static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) +{ +} + +static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) +{ +} +#endif /* !CONFIG_PREEMPT_RT */ + static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); @@ -2160,8 +2237,6 @@ extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); -#include <linux/spinlock.h> - /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. @@ -2184,7 +2259,6 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_SMP static inline bool owner_on_cpu(struct task_struct *owner) { /* @@ -2196,7 +2270,6 @@ static inline bool owner_on_cpu(struct task_struct *owner) /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu); -#endif /* CONFIG_SMP */ #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); @@ -2231,4 +2304,140 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #define alloc_tag_restore(_tag, _old) do {} while (0) #endif +/* Avoids recursive inclusion hell */ +#ifdef CONFIG_SCHED_MM_CID +void sched_mm_cid_before_execve(struct task_struct *t); +void sched_mm_cid_after_execve(struct task_struct *t); +void sched_mm_cid_fork(struct task_struct *t); +void sched_mm_cid_exit(struct task_struct *t); +static __always_inline int task_mm_cid(struct task_struct *t) +{ + return t->mm_cid.cid & ~(MM_CID_ONCPU | MM_CID_TRANSIT); +} +#else +static inline void sched_mm_cid_before_execve(struct task_struct *t) { } +static inline void sched_mm_cid_after_execve(struct task_struct *t) { } +static inline void sched_mm_cid_fork(struct task_struct *t) { } +static inline void sched_mm_cid_exit(struct task_struct *t) { } +static __always_inline int task_mm_cid(struct task_struct *t) +{ + /* + * Use the processor id as a fall-back when the mm cid feature is + * disabled. This provides functional per-cpu data structure accesses + * in user-space, althrough it won't provide the memory usage benefits. + */ + return task_cpu(t); +} +#endif + +#ifndef MODULE +#ifndef COMPILE_OFFSETS + +extern void ___migrate_enable(void); + +struct rq; +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + +/* + * The "struct rq" is not available here, so we can't access the + * "runqueues" with this_cpu_ptr(), as the compilation will fail in + * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): + * typeof((ptr) + 0) + * + * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here. + */ +#ifdef CONFIG_SMP +#define this_rq_raw() arch_raw_cpu_ptr(&runqueues) +#else +#define this_rq_raw() PERCPU_PTR(&runqueues) +#endif +#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned)) + +static inline void __migrate_enable(void) +{ + struct task_struct *p = current; + +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Check both overflow from migrate_disable() and superfluous + * migrate_enable(). + */ + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) + return; +#endif + + if (p->migration_disabled > 1) { + p->migration_disabled--; + return; + } + + /* + * Ensure stop_task runs either before or after this, and that + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). + */ + guard(preempt)(); + if (unlikely(p->cpus_ptr != &p->cpus_mask)) + ___migrate_enable(); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. + * select_fallback_rq) get confused. + */ + barrier(); + p->migration_disabled = 0; + this_rq_pinned()--; +} + +static inline void __migrate_disable(void) +{ + struct task_struct *p = current; + + if (p->migration_disabled) { +#ifdef CONFIG_DEBUG_PREEMPT + /* + *Warn about overflow half-way through the range. + */ + WARN_ON_ONCE((s16)p->migration_disabled < 0); +#endif + p->migration_disabled++; + return; + } + + guard(preempt)(); + this_rq_pinned()++; + p->migration_disabled = 1; +} +#else /* !COMPILE_OFFSETS */ +static inline void __migrate_disable(void) { } +static inline void __migrate_enable(void) { } +#endif /* !COMPILE_OFFSETS */ + +/* + * So that it is possible to not export the runqueues variable, define and + * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use + * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will + * be defined in kernel/sched/core.c. + */ +#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE +static __always_inline void migrate_disable(void) +{ + __migrate_disable(); +} + +static __always_inline void migrate_enable(void) +{ + __migrate_enable(); +} +#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ + +#else /* MODULE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* MODULE */ + +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif |
