summaryrefslogtreecommitdiff
path: root/kernel/cgroup
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/cgroup-internal.h3
-rw-r--r--kernel/cgroup/cgroup.c69
-rw-r--r--kernel/cgroup/cpuset.c24
3 files changed, 54 insertions, 42 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c167a40278e6..5151ff256c29 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -33,6 +33,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;
+ /* the number of tasks in the set */
+ int nr_tasks;
+
/* the subsys currently being processed */
int ssid;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1591e9b20122..4f2196a00953 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2148,6 +2148,8 @@ static void cgroup_migrate_add_task(struct task_struct *task,
if (!cset->mg_src_cgrp)
return;
+ mgctx->tset.nr_tasks++;
+
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node,
@@ -2236,21 +2238,19 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
struct css_set *cset, *tmp_cset;
int ssid, failed_ssid, ret;
- /* methods shouldn't be called if no task is actually migrating */
- if (list_empty(&tset->src_csets))
- return 0;
-
/* check that we can legitimately attach to the cgroup */
- do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
- if (ss->can_attach) {
- tset->ssid = ssid;
- ret = ss->can_attach(tset);
- if (ret) {
- failed_ssid = ssid;
- goto out_cancel_attach;
+ if (tset->nr_tasks) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+ if (ss->can_attach) {
+ tset->ssid = ssid;
+ ret = ss->can_attach(tset);
+ if (ret) {
+ failed_ssid = ssid;
+ goto out_cancel_attach;
+ }
}
- }
- } while_each_subsys_mask();
+ } while_each_subsys_mask();
+ }
/*
* Now that we're guaranteed success, proceed to move all tasks to
@@ -2279,25 +2279,29 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
*/
tset->csets = &tset->dst_csets;
- do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
- if (ss->attach) {
- tset->ssid = ssid;
- ss->attach(tset);
- }
- } while_each_subsys_mask();
+ if (tset->nr_tasks) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+ if (ss->attach) {
+ tset->ssid = ssid;
+ ss->attach(tset);
+ }
+ } while_each_subsys_mask();
+ }
ret = 0;
goto out_release_tset;
out_cancel_attach:
- do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
- if (ssid == failed_ssid)
- break;
- if (ss->cancel_attach) {
- tset->ssid = ssid;
- ss->cancel_attach(tset);
- }
- } while_each_subsys_mask();
+ if (tset->nr_tasks) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+ if (ssid == failed_ssid)
+ break;
+ if (ss->cancel_attach) {
+ tset->ssid = ssid;
+ ss->cancel_attach(tset);
+ }
+ } while_each_subsys_mask();
+ }
out_release_tset:
spin_lock_irq(&css_set_lock);
list_splice_init(&tset->dst_csets, &tset->src_csets);
@@ -3104,11 +3108,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
cgrp->subtree_control &= ~disable;
ret = cgroup_apply_control(cgrp);
-
cgroup_finalize_control(cgrp, ret);
+ if (ret)
+ goto out_unlock;
kernfs_activate(cgrp->kn);
- ret = 0;
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
@@ -4600,9 +4604,6 @@ static void offline_css(struct cgroup_subsys_state *css)
if (!(css->flags & CSS_ONLINE))
return;
- if (ss->css_reset)
- ss->css_reset(css);
-
if (ss->css_offline)
ss->css_offline(css);
@@ -5220,6 +5221,10 @@ int __init cgroup_init(void)
if (ss->bind)
ss->bind(init_css_set.subsys[ssid]);
+
+ mutex_lock(&cgroup_mutex);
+ css_populate_dir(init_css_set.subsys[ssid]);
+ mutex_unlock(&cgroup_mutex);
}
/* init_css_set.subsys[] has been updated, re-hash */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f3539a41c49d..67230ecf2ce1 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -56,6 +56,7 @@
#include <linux/time64.h>
#include <linux/backing-dev.h>
#include <linux/sort.h>
+#include <linux/oom.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
@@ -63,6 +64,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>
+DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
/* See "Frequency meter" comments, below. */
@@ -585,6 +587,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_unlock();
}
+/* Must be called with cpuset_mutex held. */
+static inline int nr_cpusets(void)
+{
+ /* jump label reference count + the top-level cpuset */
+ return static_key_count(&cpusets_enabled_key.key) + 1;
+}
+
/*
* generate_sched_domains()
*
@@ -1898,6 +1907,7 @@ static struct cftype files[] = {
{
.name = "memory_pressure",
.read_u64 = cpuset_read_u64,
+ .private = FILE_MEMORY_PRESSURE,
},
{
@@ -2349,13 +2359,7 @@ void cpuset_update_active_cpus(void)
* We're inside cpu hotplug critical region which usually nests
* inside cgroup synchronization. Bounce actual hotplug processing
* to a work item to avoid reverse locking order.
- *
- * We still need to do partition_sched_domains() synchronously;
- * otherwise, the scheduler will get confused and put tasks to the
- * dead CPU. Fall back to the default single domain.
- * cpuset_hotplug_workfn() will rebuild it as necessary.
*/
- partition_sched_domains(1, NULL, NULL);
schedule_work(&cpuset_hotplug_work);
}
@@ -2504,12 +2508,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* If we're in interrupt, yes, we can always allocate. If @node is set in
* current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this
* node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
- * yes. If current has access to memory reserves due to TIF_MEMDIE, yes.
+ * yes. If current has access to memory reserves as an oom victim, yes.
* Otherwise, no.
*
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
* and do not allow allocations outside the current tasks cpuset
- * unless the task has been OOM killed as is marked TIF_MEMDIE.
+ * unless the task has been OOM killed.
* GFP_KERNEL allocations are not so marked, so can escape to the
* nearest enclosing hardwalled ancestor cpuset.
*
@@ -2532,7 +2536,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* affect that:
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
- * TIF_MEMDIE - any node ok
+ * tsk_is_oom_victim - any node ok
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
@@ -2550,7 +2554,7 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
- if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ if (unlikely(tsk_is_oom_victim(current)))
return true;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
return false;