summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 10:53:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 10:53:08 -0700
commit66e1c94db3cd4e094de66a6be68c3ab6d17e0c52 (patch)
tree920eecb13e08704407ce3aa9739699366b3ef130 /kernel/sched
parent86a4ac433b927a610c09aa6cfb1926d94a6b37b7 (diff)
parente0f6d1a526b6adfa9ca3b336b83ece0eed345033 (diff)
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/pti updates from Thomas Gleixner: "A mixed bag of fixes and updates for the ghosts which are hunting us. The scheduler fixes have been pulled into that branch to avoid conflicts. - A set of fixes to address a khread_parkme() race which caused lost wakeups and loss of state. - A deadlock fix for stop_machine() solved by moving the wakeups outside of the stopper_lock held region. - A set of Spectre V1 array access restrictions. The possible problematic spots were discuvered by Dan Carpenters new checks in smatch. - Removal of an unused file which was forgotten when the rest of that functionality was removed" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Remove unused file perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map() perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_* perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[] sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Introduce set_special_state() kthread, sched/wait: Fix kthread_parkme() completion issue kthread, sched/wait: Fix kthread_parkme() wait-loop sched/fair: Fix the update of blocked load when newly idle stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c56
-rw-r--r--kernel/sched/fair.c2
3 files changed, 34 insertions, 31 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
/*
* Auto-group scheduling implementation:
*/
+#include <linux/nospec.h>
#include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
unsigned long shares;
- int err;
+ int err, idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
- shares = scale_load(sched_prio_to_weight[nice + 20]);
+
+ idx = array_index_nospec(nice + 20, 40);
+ shares = scale_load(sched_prio_to_weight[idx]);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..092f7c4de903 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,9 @@
*/
#include "sched.h"
+#include <linux/kthread.h>
+#include <linux/nospec.h>
+
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
}
- if (unlikely(prev_state == TASK_DEAD)) {
- if (prev->sched_class->task_dead)
- prev->sched_class->task_dead(prev);
+ if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+ switch (prev_state) {
+ case TASK_DEAD:
+ if (prev->sched_class->task_dead)
+ prev->sched_class->task_dead(prev);
- /*
- * Remove function-return probe instances associated with this
- * task and put them back on the free list.
- */
- kprobe_flush_task(prev);
+ /*
+ * Remove function-return probe instances associated with this
+ * task and put them back on the free list.
+ */
+ kprobe_flush_task(prev);
+
+ /* Task is done with its stack. */
+ put_task_stack(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
+ put_task_struct(prev);
+ break;
- put_task_struct(prev);
+ case TASK_PARKED:
+ kthread_park_complete(prev);
+ break;
+ }
}
tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
void __noreturn do_task_dead(void)
{
- /*
- * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
- * when the following two conditions become true.
- * - There is race condition of mmap_sem (It is acquired by
- * exit_mm()), and
- * - SMI occurs before setting TASK_RUNINNG.
- * (or hypervisor of virtual machine switches to other guest)
- * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
- *
- * To avoid it, we have to wait for releasing tsk->pi_lock which
- * is held by try_to_wake_up()
- */
- raw_spin_lock_irq(&current->pi_lock);
- raw_spin_unlock_irq(&current->pi_lock);
-
/* Causes final put_task_struct in finish_task_switch(): */
- __set_current_state(TASK_DEAD);
+ set_special_state(TASK_DEAD);
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
struct cftype *cft, s64 nice)
{
unsigned long weight;
+ int idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -ERANGE;
- weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+ idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
+ idx = array_index_nospec(idx, 40);
+ weight = sched_prio_to_weight[idx];
+
return sched_group_set_shares(css_tg(css), scale_load(weight));
}
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f43627c6bb3d..79f574dba096 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost;
+out:
/*
* While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle,
@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (this_rq->cfs.h_nr_running && !pulled_task)
pulled_task = 1;
-out:
/* Move the next balance forward */
if (time_after(this_rq->next_balance, next_balance))
this_rq->next_balance = next_balance;