summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst109
-rw-r--r--kernel/cgroup/cpuset.c48
2 files changed, 149 insertions, 8 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 476722b7b636..01b70f69304e 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -56,11 +56,13 @@ v1 is available under Documentation/cgroup-v1/.
5-3-3-2. IO Latency Interface Files
5-4. PID
5-4-1. PID Interface Files
- 5-5. Device
- 5-6. RDMA
- 5-6-1. RDMA Interface Files
- 5-7. Misc
- 5-7-1. perf_event
+ 5-5. Cpuset
+ 5.5-1. Cpuset Interface Files
+ 5-6. Device
+ 5-7. RDMA
+ 5-7-1. RDMA Interface Files
+ 5-8. Misc
+ 5-8-1. perf_event
5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@@ -1610,6 +1612,103 @@ through fork() or clone(). These will return -EAGAIN if the creation
of a new process would cause a cgroup policy to be violated.
+Cpuset
+------
+
+The "cpuset" controller provides a mechanism for constraining
+the CPU and memory node placement of tasks to only the resources
+specified in the cpuset interface files in a task's current cgroup.
+This is especially valuable on large NUMA systems where placing jobs
+on properly sized subsets of the systems with careful processor and
+memory placement to reduce cross-node memory access and contention
+can improve overall system performance.
+
+The "cpuset" controller is hierarchical. That means the controller
+cannot use CPUs or memory nodes not allowed in its parent.
+
+
+Cpuset Interface Files
+~~~~~~~~~~~~~~~~~~~~~~
+
+ cpuset.cpus
+ A read-write multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the requested CPUs to be used by tasks within this
+ cgroup. The actual list of CPUs to be granted, however, is
+ subjected to constraints imposed by its parent and can differ
+ from the requested CPUs.
+
+ The CPU numbers are comma-separated numbers or ranges.
+ For example:
+
+ # cat cpuset.cpus
+ 0-4,6,8-10
+
+ An empty value indicates that the cgroup is using the same
+ setting as the nearest cgroup ancestor with a non-empty
+ "cpuset.cpus" or all the available CPUs if none is found.
+
+ The value of "cpuset.cpus" stays constant until the next update
+ and won't be affected by any CPU hotplug events.
+
+ cpuset.cpus.effective
+ A read-only multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the onlined CPUs that are actually granted to this
+ cgroup by its parent. These CPUs are allowed to be used by
+ tasks within the current cgroup.
+
+ If "cpuset.cpus" is empty, the "cpuset.cpus.effective" file shows
+ all the CPUs from the parent cgroup that can be available to
+ be used by this cgroup. Otherwise, it should be a subset of
+ "cpuset.cpus" unless none of the CPUs listed in "cpuset.cpus"
+ can be granted. In this case, it will be treated just like an
+ empty "cpuset.cpus".
+
+ Its value will be affected by CPU hotplug events.
+
+ cpuset.mems
+ A read-write multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the requested memory nodes to be used by tasks within
+ this cgroup. The actual list of memory nodes granted, however,
+ is subjected to constraints imposed by its parent and can differ
+ from the requested memory nodes.
+
+ The memory node numbers are comma-separated numbers or ranges.
+ For example:
+
+ # cat cpuset.mems
+ 0-1,3
+
+ An empty value indicates that the cgroup is using the same
+ setting as the nearest cgroup ancestor with a non-empty
+ "cpuset.mems" or all the available memory nodes if none
+ is found.
+
+ The value of "cpuset.mems" stays constant until the next update
+ and won't be affected by any memory nodes hotplug events.
+
+ cpuset.mems.effective
+ A read-only multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the onlined memory nodes that are actually granted to
+ this cgroup by its parent. These memory nodes are allowed to
+ be used by tasks within the current cgroup.
+
+ If "cpuset.mems" is empty, it shows all the memory nodes from the
+ parent cgroup that will be available to be used by this cgroup.
+ Otherwise, it should be a subset of "cpuset.mems" unless none of
+ the memory nodes listed in "cpuset.mems" can be granted. In this
+ case, it will be treated just like an empty "cpuset.mems".
+
+ Its value will be affected by memory nodes hotplug events.
+
+
Device controller
-----------------
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 266f10cb7222..2b5c4477d969 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1824,12 +1824,11 @@ static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
return 0;
}
-
/*
* for the common functions, 'private' gives the type of file
*/
-static struct cftype files[] = {
+static struct cftype legacy_files[] = {
{
.name = "cpus",
.seq_show = cpuset_common_seq_show,
@@ -1932,6 +1931,47 @@ static struct cftype files[] = {
};
/*
+ * This is currently a minimal set for the default hierarchy. It can be
+ * expanded later on by migrating more features and control files from v1.
+ */
+static struct cftype dfl_files[] = {
+ {
+ .name = "cpus",
+ .seq_show = cpuset_common_seq_show,
+ .write = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * NR_CPUS),
+ .private = FILE_CPULIST,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+
+ {
+ .name = "mems",
+ .seq_show = cpuset_common_seq_show,
+ .write = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * MAX_NUMNODES),
+ .private = FILE_MEMLIST,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+
+ {
+ .name = "cpus.effective",
+ .seq_show = cpuset_common_seq_show,
+ .private = FILE_EFFECTIVE_CPULIST,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+
+ {
+ .name = "mems.effective",
+ .seq_show = cpuset_common_seq_show,
+ .private = FILE_EFFECTIVE_MEMLIST,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+
+ { } /* terminate */
+};
+
+
+/*
* cpuset_css_alloc - allocate a cpuset css
* cgrp: control group that the new cpuset will be part of
*/
@@ -2105,8 +2145,10 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.fork = cpuset_fork,
- .legacy_cftypes = files,
+ .legacy_cftypes = legacy_files,
+ .dfl_cftypes = dfl_files,
.early_init = true,
+ .threaded = true,
};
/**