summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c76
1 files changed, 45 insertions, 31 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3da7a2444a91..bf724c1952ea 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1630,16 +1630,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
#ifdef CONFIG_SMP
if (cpu == rq->cpu) {
- schedstat_inc(rq->ttwu_local);
- schedstat_inc(p->se.statistics.nr_wakeups_local);
+ __schedstat_inc(rq->ttwu_local);
+ __schedstat_inc(p->se.statistics.nr_wakeups_local);
} else {
struct sched_domain *sd;
- schedstat_inc(p->se.statistics.nr_wakeups_remote);
+ __schedstat_inc(p->se.statistics.nr_wakeups_remote);
rcu_read_lock();
for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
- schedstat_inc(sd->ttwu_wake_remote);
+ __schedstat_inc(sd->ttwu_wake_remote);
break;
}
}
@@ -1647,14 +1647,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
}
if (wake_flags & WF_MIGRATED)
- schedstat_inc(p->se.statistics.nr_wakeups_migrate);
+ __schedstat_inc(p->se.statistics.nr_wakeups_migrate);
#endif /* CONFIG_SMP */
- schedstat_inc(rq->ttwu_count);
- schedstat_inc(p->se.statistics.nr_wakeups);
+ __schedstat_inc(rq->ttwu_count);
+ __schedstat_inc(p->se.statistics.nr_wakeups);
if (wake_flags & WF_SYNC)
- schedstat_inc(p->se.statistics.nr_wakeups_sync);
+ __schedstat_inc(p->se.statistics.nr_wakeups_sync);
}
static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2461,6 +2461,7 @@ void wake_up_new_task(struct task_struct *p)
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
* as we're not fully set-up yet.
*/
+ p->recent_used_cpu = task_cpu(p);
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif
rq = __task_rq_lock(p, &rf);
@@ -2698,23 +2699,27 @@ static struct rq *finish_task_switch(struct task_struct *prev)
prev_state = prev->state;
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
- /*
- * The membarrier system call requires a full memory barrier
- * after storing to rq->curr, before going back to user-space.
- *
- * TODO: This smp_mb__after_unlock_lock can go away if PPC end
- * up adding a full barrier to switch_mm(), or we should figure
- * out if a smp_mb__after_unlock_lock is really the proper API
- * to use.
- */
- smp_mb__after_unlock_lock();
finish_task(prev);
finish_lock_switch(rq);
finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current);
- if (mm)
+ /*
+ * When switching through a kernel thread, the loop in
+ * membarrier_{private,global}_expedited() may have observed that
+ * kernel thread and not issued an IPI. It is therefore possible to
+ * schedule between user->kernel->user threads without passing though
+ * switch_mm(). Membarrier requires a barrier after storing to
+ * rq->curr, before returning to userspace, so provide them here:
+ *
+ * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
+ * provided by mmdrop(),
+ * - a sync_core for SYNC_CORE.
+ */
+ if (mm) {
+ membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
+ }
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -2818,6 +2823,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
arch_start_context_switch(prev);
+ /*
+ * If mm is non-NULL, we pass through switch_mm(). If mm is
+ * NULL, we will pass through mmdrop() in finish_task_switch().
+ * Both of these contain the full memory barrier required by
+ * membarrier after storing to rq->curr, before returning to
+ * user-space.
+ */
if (!mm) {
next->active_mm = oldmm;
mmgrab(oldmm);
@@ -3354,6 +3366,9 @@ static void __sched notrace __schedule(bool preempt)
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
+ *
+ * The membarrier system call requires a full memory barrier
+ * after coming from user-space, before storing to rq->curr.
*/
rq_lock(rq, &rf);
smp_mb__after_spinlock();
@@ -3401,17 +3416,16 @@ static void __sched notrace __schedule(bool preempt)
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
- * rq->curr, before returning to user-space. For TSO
- * (e.g. x86), the architecture must provide its own
- * barrier in switch_mm(). For weakly ordered machines
- * for which spin_unlock() acts as a full memory
- * barrier, finish_lock_switch() in common code takes
- * care of this barrier. For weakly ordered machines for
- * which spin_unlock() acts as a RELEASE barrier (only
- * arm64 and PowerPC), arm64 has a full barrier in
- * switch_to(), and PowerPC has
- * smp_mb__after_unlock_lock() before
- * finish_lock_switch().
+ * rq->curr, before returning to user-space.
+ *
+ * Here are the schemes providing that barrier on the
+ * various architectures:
+ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
+ * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
+ * - finish_lock_switch() for weakly-ordered
+ * architectures where spin_unlock is a full barrier,
+ * - switch_to() for arm64 (weakly-ordered, spin_unlock
+ * is a RELEASE barrier),
*/
++*switch_count;
@@ -4853,7 +4867,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
- size_t retlen = min_t(size_t, len, cpumask_size());
+ unsigned int retlen = min(len, cpumask_size());
if (copy_to_user(user_mask_ptr, mask, retlen))
ret = -EFAULT;