diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 221 | 
1 files changed, 172 insertions, 49 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..af017e038b48 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -152,7 +152,7 @@ __read_mostly int scheduler_running;  DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);  /* kernel prio, less is more */ -static inline int __task_prio(struct task_struct *p) +static inline int __task_prio(const struct task_struct *p)  {  	if (p->sched_class == &stop_sched_class) /* trumps deadline */  		return -2; @@ -174,7 +174,8 @@ static inline int __task_prio(struct task_struct *p)   */  /* real prio, less is less */ -static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) +static inline bool prio_less(const struct task_struct *a, +			     const struct task_struct *b, bool in_fi)  {  	int pa = __task_prio(a), pb = __task_prio(b); @@ -194,7 +195,8 @@ static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool  	return false;  } -static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b) +static inline bool __sched_core_less(const struct task_struct *a, +				     const struct task_struct *b)  {  	if (a->core_cookie < b->core_cookie)  		return true; @@ -2604,27 +2606,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)  		.user_mask = NULL,  		.flags     = SCA_USER,	/* clear the user requested mask */  	}; +	union cpumask_rcuhead { +		cpumask_t cpumask; +		struct rcu_head rcu; +	};  	__do_set_cpus_allowed(p, &ac); -	kfree(ac.user_mask); + +	/* +	 * Because this is called with p->pi_lock held, it is not possible +	 * to use kfree() here (when PREEMPT_RT=y), therefore punt to using +	 * kfree_rcu(). +	 */ +	kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ +	/* +	 * See do_set_cpus_allowed() above for the rcu_head usage. +	 */ +	int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + +	return kmalloc_node(size, GFP_KERNEL, node);  }  int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,  		      int node)  { +	cpumask_t *user_mask;  	unsigned long flags; -	if (!src->user_cpus_ptr) +	/* +	 * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's +	 * may differ by now due to racing. +	 */ +	dst->user_cpus_ptr = NULL; + +	/* +	 * This check is racy and losing the race is a valid situation. +	 * It is not worth the extra overhead of taking the pi_lock on +	 * every fork/clone. +	 */ +	if (data_race(!src->user_cpus_ptr))  		return 0; -	dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); -	if (!dst->user_cpus_ptr) +	user_mask = alloc_user_cpus_ptr(node); +	if (!user_mask)  		return -ENOMEM; -	/* Use pi_lock to protect content of user_cpus_ptr */ +	/* +	 * Use pi_lock to protect content of user_cpus_ptr +	 * +	 * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent +	 * do_set_cpus_allowed(). +	 */  	raw_spin_lock_irqsave(&src->pi_lock, flags); -	cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); +	if (src->user_cpus_ptr) { +		swap(dst->user_cpus_ptr, user_mask); +		cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); +	}  	raw_spin_unlock_irqrestore(&src->pi_lock, flags); + +	if (unlikely(user_mask)) +		kfree(user_mask); +  	return 0;  } @@ -2907,8 +2953,11 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,  	}  	if (!(ctx->flags & SCA_MIGRATE_ENABLE)) { -		if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) +		if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) { +			if (ctx->flags & SCA_USER) +				swap(p->user_cpus_ptr, ctx->user_mask);  			goto out; +		}  		if (WARN_ON_ONCE(p == current &&  				 is_migration_disabled(p) && @@ -3581,6 +3630,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)  	return false;  } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ +	return NULL; +} +  #endif /* !CONFIG_SMP */  static void @@ -3623,14 +3677,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)  }  /* - * Mark the task runnable and perform wakeup-preemption. + * Mark the task runnable.   */ -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, -			   struct rq_flags *rf) +static inline void ttwu_do_wakeup(struct task_struct *p)  { -	check_preempt_curr(rq, p, wake_flags);  	WRITE_ONCE(p->__state, TASK_RUNNING);  	trace_sched_wakeup(p); +} + +static void +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, +		 struct rq_flags *rf) +{ +	int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; + +	lockdep_assert_rq_held(rq); + +	if (p->sched_contributes_to_load) +		rq->nr_uninterruptible--; + +#ifdef CONFIG_SMP +	if (wake_flags & WF_MIGRATED) +		en_flags |= ENQUEUE_MIGRATED; +	else +#endif +	if (p->in_iowait) { +		delayacct_blkio_end(p); +		atomic_dec(&task_rq(p)->nr_iowait); +	} + +	activate_task(rq, p, en_flags); +	check_preempt_curr(rq, p, wake_flags); + +	ttwu_do_wakeup(p);  #ifdef CONFIG_SMP  	if (p->sched_class->task_woken) { @@ -3660,31 +3739,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,  #endif  } -static void -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, -		 struct rq_flags *rf) -{ -	int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; - -	lockdep_assert_rq_held(rq); - -	if (p->sched_contributes_to_load) -		rq->nr_uninterruptible--; - -#ifdef CONFIG_SMP -	if (wake_flags & WF_MIGRATED) -		en_flags |= ENQUEUE_MIGRATED; -	else -#endif -	if (p->in_iowait) { -		delayacct_blkio_end(p); -		atomic_dec(&task_rq(p)->nr_iowait); -	} - -	activate_task(rq, p, en_flags); -	ttwu_do_wakeup(rq, p, wake_flags, rf); -} -  /*   * Consider @p being inside a wait loop:   * @@ -3718,9 +3772,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)  	rq = __task_rq_lock(p, &rf);  	if (task_on_rq_queued(p)) { -		/* check_preempt_curr() may use rq clock */ -		update_rq_clock(rq); -		ttwu_do_wakeup(rq, p, wake_flags, &rf); +		if (!task_on_cpu(rq, p)) { +			/* +			 * When on_rq && !on_cpu the task is preempted, see if +			 * it should preempt the task that is current now. +			 */ +			update_rq_clock(rq); +			check_preempt_curr(rq, p, wake_flags); +		} +		ttwu_do_wakeup(p);  		ret = 1;  	}  	__task_rq_unlock(rq, &rf); @@ -4086,8 +4146,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)  			goto out;  		trace_sched_waking(p); -		WRITE_ONCE(p->__state, TASK_RUNNING); -		trace_sched_wakeup(p); +		ttwu_do_wakeup(p);  		goto out;  	} @@ -5052,6 +5111,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,  	sched_info_switch(rq, prev, next);  	perf_event_task_sched_out(prev, next);  	rseq_preempt(prev); +	switch_mm_cid(prev, next);  	fire_sched_out_preempt_notifiers(prev, next);  	kmap_local_sched_out();  	prepare_task(next); @@ -5282,6 +5342,11 @@ bool single_task_running(void)  }  EXPORT_SYMBOL(single_task_running); +unsigned long long nr_context_switches_cpu(int cpu) +{ +	return cpu_rq(cpu)->nr_switches; +} +  unsigned long long nr_context_switches(void)  {  	int i; @@ -5504,7 +5569,9 @@ void scheduler_tick(void)  	unsigned long thermal_pressure;  	u64 resched_latency; -	arch_scale_freq_tick(); +	if (housekeeping_cpu(cpu, HK_TYPE_TICK)) +		arch_scale_freq_tick(); +  	sched_clock_tick();  	rq_lock(rq, &rf); @@ -6206,7 +6273,7 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd)  {  	int i; -	for_each_cpu_wrap(i, sched_domain_span(sd), cpu) { +	for_each_cpu_wrap(i, sched_domain_span(sd), cpu + 1) {  		if (i == cpu)  			continue; @@ -8239,12 +8306,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)  	if (retval)  		goto out_put_task; -	user_mask = kmalloc(cpumask_size(), GFP_KERNEL); -	if (!user_mask) { +	/* +	 * With non-SMP configs, user_cpus_ptr/user_mask isn't used and +	 * alloc_user_cpus_ptr() returns NULL. +	 */ +	user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); +	if (user_mask) { +		cpumask_copy(user_mask, in_mask); +	} else if (IS_ENABLED(CONFIG_SMP)) {  		retval = -ENOMEM;  		goto out_put_task;  	} -	cpumask_copy(user_mask, in_mask); +  	ac = (struct affinity_context){  		.new_mask  = in_mask,  		.user_mask = user_mask, @@ -11305,3 +11378,53 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)  {          trace_sched_update_nr_running_tp(rq, count);  } + +#ifdef CONFIG_SCHED_MM_CID +void sched_mm_cid_exit_signals(struct task_struct *t) +{ +	struct mm_struct *mm = t->mm; +	unsigned long flags; + +	if (!mm) +		return; +	local_irq_save(flags); +	mm_cid_put(mm, t->mm_cid); +	t->mm_cid = -1; +	t->mm_cid_active = 0; +	local_irq_restore(flags); +} + +void sched_mm_cid_before_execve(struct task_struct *t) +{ +	struct mm_struct *mm = t->mm; +	unsigned long flags; + +	if (!mm) +		return; +	local_irq_save(flags); +	mm_cid_put(mm, t->mm_cid); +	t->mm_cid = -1; +	t->mm_cid_active = 0; +	local_irq_restore(flags); +} + +void sched_mm_cid_after_execve(struct task_struct *t) +{ +	struct mm_struct *mm = t->mm; +	unsigned long flags; + +	if (!mm) +		return; +	local_irq_save(flags); +	t->mm_cid = mm_cid_get(mm); +	t->mm_cid_active = 1; +	local_irq_restore(flags); +	rseq_set_notify_resume(t); +} + +void sched_mm_cid_fork(struct task_struct *t) +{ +	WARN_ON_ONCE(!t->mm || t->mm_cid != -1); +	t->mm_cid_active = 1; +} +#endif  | 
