summaryrefslogtreecommitdiff
path: root/kernel/cgroup/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup/cpuset.c')
-rw-r--r--kernel/cgroup/cpuset.c41
1 files changed, 11 insertions, 30 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ae643412948a..df403e97b073 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -63,6 +63,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>
+DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
/* See "Frequency meter" comments, below. */
@@ -1038,40 +1039,25 @@ static void cpuset_post_attach(void)
* @tsk: the task to change
* @newmems: new nodes that the task will be set
*
- * In order to avoid seeing no nodes if the old and new nodes are disjoint,
- * we structure updates as setting all new allowed nodes, then clearing newly
- * disallowed ones.
+ * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
+ * and rebind an eventual tasks' mempolicy. If the task is allocating in
+ * parallel, it might temporarily see an empty intersection, which results in
+ * a seqlock check and retry before OOM or allocation failure.
*/
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
- bool need_loop;
-
task_lock(tsk);
- /*
- * Determine if a loop is necessary if another thread is doing
- * read_mems_allowed_begin(). If at least one node remains unchanged and
- * tsk does not have a mempolicy, then an empty nodemask will not be
- * possible when mems_allowed is larger than a word.
- */
- need_loop = task_has_mempolicy(tsk) ||
- !nodes_intersects(*newmems, tsk->mems_allowed);
- if (need_loop) {
- local_irq_disable();
- write_seqcount_begin(&tsk->mems_allowed_seq);
- }
+ local_irq_disable();
+ write_seqcount_begin(&tsk->mems_allowed_seq);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
- mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
-
- mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
+ mpol_rebind_task(tsk, newmems);
tsk->mems_allowed = *newmems;
- if (need_loop) {
- write_seqcount_end(&tsk->mems_allowed_seq);
- local_irq_enable();
- }
+ write_seqcount_end(&tsk->mems_allowed_seq);
+ local_irq_enable();
task_unlock(tsk);
}
@@ -1906,6 +1892,7 @@ static struct cftype files[] = {
{
.name = "memory_pressure",
.read_u64 = cpuset_read_u64,
+ .private = FILE_MEMORY_PRESSURE,
},
{
@@ -2357,13 +2344,7 @@ void cpuset_update_active_cpus(void)
* We're inside cpu hotplug critical region which usually nests
* inside cgroup synchronization. Bounce actual hotplug processing
* to a work item to avoid reverse locking order.
- *
- * We still need to do partition_sched_domains() synchronously;
- * otherwise, the scheduler will get confused and put tasks to the
- * dead CPU. Fall back to the default single domain.
- * cpuset_hotplug_workfn() will rebuild it as necessary.
*/
- partition_sched_domains(1, NULL, NULL);
schedule_work(&cpuset_hotplug_work);
}