summaryrefslogtreecommitdiff
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h1417
1 files changed, 676 insertions, 741 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 297462b9f41a..bc892e3b37ee 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CGROUP_H
#define _LINUX_CGROUP_H
/*
@@ -9,643 +10,393 @@
*/
#include <linux/sched.h>
-#include <linux/cpumask.h>
#include <linux/nodemask.h>
-#include <linux/rcupdate.h>
+#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/cgroupstats.h>
-#include <linux/prio_heap.h>
-#include <linux/rwsem.h>
-#include <linux/idr.h>
-#include <linux/workqueue.h>
-#include <linux/xattr.h>
#include <linux/fs.h>
-#include <linux/percpu-refcount.h>
-
-#ifdef CONFIG_CGROUPS
-
-struct cgroupfs_root;
-struct cgroup_subsys;
-struct inode;
-struct cgroup;
-struct css_id;
-struct eventfd_ctx;
-
-extern int cgroup_init_early(void);
-extern int cgroup_init(void);
-extern void cgroup_fork(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
-extern void cgroup_exit(struct task_struct *p, int run_callbacks);
-extern int cgroupstats_build(struct cgroupstats *stats,
- struct dentry *dentry);
-extern int cgroup_load_subsys(struct cgroup_subsys *ss);
-extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
-
-extern int proc_cgroup_show(struct seq_file *, void *);
+#include <linux/seq_file.h>
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
+#include <linux/refcount.h>
+#include <linux/kernel_stat.h>
+
+#include <linux/cgroup-defs.h>
+#include <linux/cgroup_namespace.h>
+
+struct kernel_clone_args;
/*
- * Define the enumeration of all cgroup subsystems.
- *
- * We define ids for builtin subsystems and then modular ones.
+ * All weight knobs on the default hierarchy should use the following min,
+ * default and max values. The default value is the logarithmic center of
+ * MIN and MAX and allows 100x to be expressed in both directions.
*/
-#define SUBSYS(_x) _x ## _subsys_id,
-enum cgroup_subsys_id {
-#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
-#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
- CGROUP_BUILTIN_SUBSYS_COUNT,
+#define CGROUP_WEIGHT_MIN 1
+#define CGROUP_WEIGHT_DFL 100
+#define CGROUP_WEIGHT_MAX 10000
- __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
+#ifdef CONFIG_CGROUPS
-#define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
-#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
- CGROUP_SUBSYS_COUNT,
+enum css_task_iter_flags {
+ CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
+ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
+ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */
};
-#undef SUBSYS
-/* Per-subsystem/per-cgroup state maintained by the system. */
-struct cgroup_subsys_state {
- /*
- * The cgroup that this subsystem is attached to. Useful
- * for subsystems that want to know about the cgroup
- * hierarchy structure
- */
- struct cgroup *cgroup;
+/* a css_task_iter should be treated as an opaque object */
+struct css_task_iter {
+ struct cgroup_subsys *ss;
+ unsigned int flags;
+
+ struct list_head *cset_pos;
+ struct list_head *cset_head;
- /* reference count - access via css_[try]get() and css_put() */
- struct percpu_ref refcnt;
+ struct list_head *tcset_pos;
+ struct list_head *tcset_head;
- unsigned long flags;
- /* ID for this css, if possible */
- struct css_id __rcu *id;
+ struct list_head *task_pos;
- /* Used to put @cgroup->dentry on the last css_put() */
- struct work_struct dput_work;
+ struct list_head *cur_tasks_head;
+ struct css_set *cur_cset;
+ struct css_set *cur_dcset;
+ struct task_struct *cur_task;
+ struct list_head iters_node; /* css_set->task_iters */
};
-/* bits in struct cgroup_subsys_state flags field */
-enum {
- CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
- CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
+enum cgroup_lifetime_events {
+ CGROUP_LIFETIME_ONLINE,
+ CGROUP_LIFETIME_OFFLINE,
};
-/**
- * css_get - obtain a reference on the specified css
- * @css: target css
- *
- * The caller must already have a reference.
- */
-static inline void css_get(struct cgroup_subsys_state *css)
-{
- /* We don't need to reference count the root state */
- if (!(css->flags & CSS_ROOT))
- percpu_ref_get(&css->refcnt);
-}
+extern struct file_system_type cgroup_fs_type;
+extern struct cgroup_root cgrp_dfl_root;
+extern struct css_set init_css_set;
+extern spinlock_t css_set_lock;
+extern struct blocking_notifier_head cgroup_lifetime_notifier;
+
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+#define SUBSYS(_x) \
+ extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \
+ extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
/**
- * css_tryget - try to obtain a reference on the specified css
- * @css: target css
- *
- * Obtain a reference on @css if it's alive. The caller naturally needs to
- * ensure that @css is accessible but doesn't have to be holding a
- * reference on it - IOW, RCU protected access is good enough for this
- * function. Returns %true if a reference count was successfully obtained;
- * %false otherwise.
+ * cgroup_subsys_enabled - fast test on whether a subsys is enabled
+ * @ss: subsystem in question
*/
-static inline bool css_tryget(struct cgroup_subsys_state *css)
-{
- if (css->flags & CSS_ROOT)
- return true;
- return percpu_ref_tryget(&css->refcnt);
-}
+#define cgroup_subsys_enabled(ss) \
+ static_branch_likely(&ss ## _enabled_key)
/**
- * css_put - put a css reference
- * @css: target css
- *
- * Put a reference obtained via css_get() and css_tryget().
+ * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy
+ * @ss: subsystem in question
*/
-static inline void css_put(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_ROOT))
- percpu_ref_put(&css->refcnt);
-}
-
-/* bits in struct cgroup flags field */
-enum {
- /* Control Group is dead */
- CGRP_DEAD,
- /*
- * Control Group has previously had a child cgroup or a task,
- * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
- */
- CGRP_RELEASABLE,
- /* Control Group requires release notifications to userspace */
- CGRP_NOTIFY_ON_RELEASE,
- /*
- * Clone the parent's configuration when creating a new child
- * cpuset cgroup. For historical reasons, this option can be
- * specified at mount time and thus is implemented here.
- */
- CGRP_CPUSET_CLONE_CHILDREN,
- /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
- CGRP_SANE_BEHAVIOR,
-};
-
-struct cgroup_name {
- struct rcu_head rcu_head;
- char name[];
-};
-
-struct cgroup {
- unsigned long flags; /* "unsigned long" so bitops work */
-
- int id; /* ida allocated in-hierarchy ID */
-
- /*
- * We link our 'sibling' struct into our parent's 'children'.
- * Our children link their 'sibling' into our 'children'.
- */
- struct list_head sibling; /* my parent's children */
- struct list_head children; /* my children */
- struct list_head files; /* my files */
-
- struct cgroup *parent; /* my parent */
- struct dentry *dentry; /* cgroup fs entry, RCU protected */
-
- /*
- * Monotonically increasing unique serial number which defines a
- * uniform order among all cgroups. It's guaranteed that all
- * ->children lists are in the ascending order of ->serial_nr.
- * It's used to allow interrupting and resuming iterations.
- */
- u64 serial_nr;
-
- /*
- * This is a copy of dentry->d_name, and it's needed because
- * we can't use dentry->d_name in cgroup_path().
- *
- * You must acquire rcu_read_lock() to access cgrp->name, and
- * the only place that can change it is rename(), which is
- * protected by parent dir's i_mutex.
- *
- * Normally you should use cgroup_name() wrapper rather than
- * access it directly.
- */
- struct cgroup_name __rcu *name;
-
- /* Private pointers for each registered subsystem */
- struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
-
- struct cgroupfs_root *root;
-
- /*
- * List of cgrp_cset_links pointing at css_sets with tasks in this
- * cgroup. Protected by css_set_lock.
- */
- struct list_head cset_links;
-
- /*
- * Linked list running through all cgroups that can
- * potentially be reaped by the release agent. Protected by
- * release_list_lock
- */
- struct list_head release_list;
-
- /*
- * list of pidlists, up to two for each namespace (one for procs, one
- * for tasks); created on demand.
- */
- struct list_head pidlists;
- struct mutex pidlist_mutex;
-
- /* For css percpu_ref killing and RCU-protected deletion */
- struct rcu_head rcu_head;
- struct work_struct destroy_work;
- atomic_t css_kill_cnt;
-
- /* List of events which userspace want to receive */
- struct list_head event_list;
- spinlock_t event_list_lock;
+#define cgroup_subsys_on_dfl(ss) \
+ static_branch_likely(&ss ## _on_dfl_key)
+
+bool css_has_online_children(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
+ struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
+ struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+ struct cgroup_subsys *ss);
+
+struct cgroup *cgroup_get_from_path(const char *path);
+struct cgroup *cgroup_get_from_fd(int fd);
+struct cgroup *cgroup_v1v2_get_from_fd(int fd);
- /* directory xattrs */
- struct simple_xattrs xattrs;
-};
-
-#define MAX_CGROUP_ROOT_NAMELEN 64
-
-/* cgroupfs_root->flags */
-enum {
- /*
- * Unfortunately, cgroup core and various controllers are riddled
- * with idiosyncrasies and pointless options. The following flag,
- * when set, will force sane behavior - some options are forced on,
- * others are disallowed, and some controllers will change their
- * hierarchical or other behaviors.
- *
- * The set of behaviors affected by this flag are still being
- * determined and developed and the mount option for this flag is
- * prefixed with __DEVEL__. The prefix will be dropped once we
- * reach the point where all behaviors are compatible with the
- * planned unified hierarchy, which will automatically turn on this
- * flag.
- *
- * The followings are the behaviors currently affected this flag.
- *
- * - Mount options "noprefix" and "clone_children" are disallowed.
- * Also, cgroupfs file cgroup.clone_children is not created.
- *
- * - When mounting an existing superblock, mount options should
- * match.
- *
- * - Remount is disallowed.
- *
- * - rename(2) is disallowed.
- *
- * - "tasks" is removed. Everything should be at process
- * granularity. Use "cgroup.procs" instead.
- *
- * - "release_agent" and "notify_on_release" are removed.
- * Replacement notification mechanism will be implemented.
- *
- * - cpuset: tasks will be kept in empty cpusets when hotplug happens
- * and take masks of ancestors with non-empty cpus/mems, instead of
- * being moved to an ancestor.
- *
- * - cpuset: a task can be moved into an empty cpuset, and again it
- * takes masks of ancestors.
- *
- * - memcg: use_hierarchy is on by default and the cgroup file for
- * the flag is not created.
- *
- * - blkcg: blk-throttle becomes properly hierarchical.
- */
- CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
-
- CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */
- CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */
-
- /* mount options live below bit 16 */
- CGRP_ROOT_OPTION_MASK = (1 << 16) - 1,
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
- CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */
-};
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cftype *cfts);
+void cgroup_file_notify(struct cgroup_file *cfile);
+void cgroup_file_show(struct cgroup_file *cfile, bool show);
+
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk);
+
+void cgroup_fork(struct task_struct *p);
+extern int cgroup_can_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs);
+extern void cgroup_cancel_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs);
+extern void cgroup_post_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs);
+void cgroup_task_exit(struct task_struct *p);
+void cgroup_task_dead(struct task_struct *p);
+void cgroup_task_release(struct task_struct *p);
+void cgroup_task_free(struct task_struct *p);
+
+int cgroup_init_early(void);
+int cgroup_init(void);
+
+int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v);
/*
- * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
- * associated with a superblock to form an active hierarchy. This is
- * internal to cgroup core. Don't access directly from controllers.
+ * Iteration helpers and macros.
*/
-struct cgroupfs_root {
- struct super_block *sb;
-
- /* The bitmask of subsystems attached to this hierarchy */
- unsigned long subsys_mask;
-
- /* Unique id for this hierarchy. */
- int hierarchy_id;
- /* A list running through the attached subsystems */
- struct list_head subsys_list;
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *parent);
+struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos);
+struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *css);
- /* The root cgroup for this hierarchy */
- struct cgroup top_cgroup;
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp);
- /* Tracks how many cgroups are currently defined in hierarchy.*/
- int number_of_cgroups;
+void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
+ struct css_task_iter *it);
+struct task_struct *css_task_iter_next(struct css_task_iter *it);
+void css_task_iter_end(struct css_task_iter *it);
- /* A list running through the active hierarchies */
- struct list_head root_list;
-
- /* Hierarchy-specific flags */
- unsigned long flags;
-
- /* IDs for cgroups in this hierarchy */
- struct ida cgroup_ida;
-
- /* The path to use for release notifications. */
- char release_agent_path[PATH_MAX];
-
- /* The name for this hierarchy - may be empty */
- char name[MAX_CGROUP_ROOT_NAMELEN];
-};
-
-/*
- * A css_set is a structure holding pointers to a set of
- * cgroup_subsys_state objects. This saves space in the task struct
- * object and speeds up fork()/exit(), since a single inc/dec and a
- * list_add()/del() can bump the reference count on the entire cgroup
- * set for a task.
+/**
+ * css_for_each_child - iterate through children of a css
+ * @pos: the css * to use as the loop cursor
+ * @parent: css whose children to walk
+ *
+ * Walk @parent's children. Must be called under rcu_read_lock().
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * It is allowed to temporarily drop RCU read lock during iteration. The
+ * caller is responsible for ensuring that @pos remains accessible until
+ * the start of the next iteration by, for example, bumping the css refcnt.
*/
+#define css_for_each_child(pos, parent) \
+ for ((pos) = css_next_child(NULL, (parent)); (pos); \
+ (pos) = css_next_child((pos), (parent)))
-struct css_set {
-
- /* Reference count */
- atomic_t refcount;
-
- /*
- * List running through all cgroup groups in the same hash
- * slot. Protected by css_set_lock
- */
- struct hlist_node hlist;
-
- /*
- * List running through all tasks using this cgroup
- * group. Protected by css_set_lock
- */
- struct list_head tasks;
-
- /*
- * List of cgrp_cset_links pointing at cgroups referenced from this
- * css_set. Protected by css_set_lock.
- */
- struct list_head cgrp_links;
-
- /*
- * Set of subsystem states, one for each subsystem. This array
- * is immutable after creation apart from the init_css_set
- * during subsystem registration (at boot time) and modular subsystem
- * loading/unloading.
- */
- struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
-
- /* For RCU-protected deletion */
- struct rcu_head rcu_head;
-};
-
-/*
- * cgroup_map_cb is an abstract callback API for reporting map-valued
- * control files
+/**
+ * css_for_each_descendant_pre - pre-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @root: css whose descendants to walk
+ *
+ * Walk @root's descendants. @root is included in the iteration and the
+ * first node to be visited. Must be called under rcu_read_lock().
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * For example, the following guarantees that a descendant can't escape
+ * state updates of its ancestors.
+ *
+ * my_online(@css)
+ * {
+ * Lock @css's parent and @css;
+ * Inherit state from the parent;
+ * Unlock both.
+ * }
+ *
+ * my_update_state(@css)
+ * {
+ * css_for_each_descendant_pre(@pos, @css) {
+ * Lock @pos;
+ * if (@pos == @css)
+ * Update @css's state;
+ * else
+ * Verify @pos is alive and inherit state from its parent;
+ * Unlock @pos;
+ * }
+ * }
+ *
+ * As long as the inheriting step, including checking the parent state, is
+ * enclosed inside @pos locking, double-locking the parent isn't necessary
+ * while inheriting. The state update to the parent is guaranteed to be
+ * visible by walking order and, as long as inheriting operations to the
+ * same @pos are atomic to each other, multiple updates racing each other
+ * still result in the correct state. It's guaranateed that at least one
+ * inheritance happens for any css after the latest update to its parent.
+ *
+ * If checking parent's state requires locking the parent, each inheriting
+ * iteration should lock and unlock both @pos->parent and @pos.
+ *
+ * Alternatively, a subsystem may choose to use a single global lock to
+ * synchronize ->css_online() and ->css_offline() against tree-walking
+ * operations.
+ *
+ * It is allowed to temporarily drop RCU read lock during iteration. The
+ * caller is responsible for ensuring that @pos remains accessible until
+ * the start of the next iteration by, for example, bumping the css refcnt.
*/
+#define css_for_each_descendant_pre(pos, css) \
+ for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
+ (pos) = css_next_descendant_pre((pos), (css)))
-struct cgroup_map_cb {
- int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
- void *state;
-};
-
-/*
- * struct cftype: handler definitions for cgroup control files
+/**
+ * css_for_each_descendant_post - post-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @css: css whose descendants to walk
*
- * When reading/writing to a file:
- * - the cgroup to use is file->f_dentry->d_parent->d_fsdata
- * - the 'cftype' of the file is file->f_dentry->d_fsdata
+ * Similar to css_for_each_descendant_pre() but performs post-order
+ * traversal instead. @root is included in the iteration and the last
+ * node to be visited.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * Note that the walk visibility guarantee example described in pre-order
+ * walk doesn't apply the same to post-order walks.
*/
+#define css_for_each_descendant_post(pos, css) \
+ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
+ (pos) = css_next_descendant_post((pos), (css)))
-/* cftype->flags */
-enum {
- CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */
- CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */
- CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
-};
-
-#define MAX_CFTYPE_NAME 64
-
-struct cftype {
- /*
- * By convention, the name should begin with the name of the
- * subsystem, followed by a period. Zero length string indicates
- * end of cftype array.
- */
- char name[MAX_CFTYPE_NAME];
- int private;
- /*
- * If not 0, file mode is set to this value, otherwise it will
- * be figured out automatically
- */
- umode_t mode;
-
- /*
- * If non-zero, defines the maximum length of string that can
- * be passed to write_string; defaults to 64
- */
- size_t max_write_len;
-
- /* CFTYPE_* flags */
- unsigned int flags;
-
- int (*open)(struct inode *inode, struct file *file);
- ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos);
- /*
- * read_u64() is a shortcut for the common case of returning a
- * single integer. Use it in place of read()
- */
- u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
- /*
- * read_s64() is a signed version of read_u64()
- */
- s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
- /*
- * read_map() is used for defining a map of key/value
- * pairs. It should call cb->fill(cb, key, value) for each
- * entry. The key/value pairs (and their ordering) should not
- * change between reboots.
- */
- int (*read_map)(struct cgroup *cgrp, struct cftype *cft,
- struct cgroup_map_cb *cb);
- /*
- * read_seq_string() is used for outputting a simple sequence
- * using seqfile.
- */
- int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *m);
-
- ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *buf, size_t nbytes, loff_t *ppos);
-
- /*
- * write_u64() is a shortcut for the common case of accepting
- * a single integer (as parsed by simple_strtoull) from
- * userspace. Use in place of write(); return 0 or error.
- */
- int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
- /*
- * write_s64() is a signed version of write_u64()
- */
- int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
-
- /*
- * write_string() is passed a nul-terminated kernelspace
- * buffer of maximum length determined by max_write_len.
- * Returns 0 or -ve error code.
- */
- int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
- const char *buffer);
- /*
- * trigger() callback can be used to get some kick from the
- * userspace, when the actual string written is not important
- * at all. The private field can be used to determine the
- * kick type for multiplexing.
- */
- int (*trigger)(struct cgroup *cgrp, unsigned int event);
-
- int (*release)(struct inode *inode, struct file *file);
-
- /*
- * register_event() callback will be used to add new userspace
- * waiter for changes related to the cftype. Implement it if
- * you want to provide this functionality. Use eventfd_signal()
- * on eventfd to send notification to userspace.
- */
- int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
- struct eventfd_ctx *eventfd, const char *args);
- /*
- * unregister_event() callback will be called when userspace
- * closes the eventfd or on cgroup removing.
- * This callback must be implemented, if you want provide
- * notification functionality.
- */
- void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
- struct eventfd_ctx *eventfd);
-};
-
-/*
- * cftype_sets describe cftypes belonging to a subsystem and are chained at
- * cgroup_subsys->cftsets. Each cftset points to an array of cftypes
- * terminated by zero length name.
+/**
+ * cgroup_taskset_for_each - iterate cgroup_taskset
+ * @task: the loop cursor
+ * @dst_css: the destination css
+ * @tset: taskset to iterate
+ *
+ * @tset may contain multiple tasks and they may belong to multiple
+ * processes.
+ *
+ * On the v2 hierarchy, there may be tasks from multiple processes and they
+ * may not share the source or destination csses.
+ *
+ * On traditional hierarchies, when there are multiple tasks in @tset, if a
+ * task of a process is in @tset, all tasks of the process are in @tset.
+ * Also, all are guaranteed to share the same source and destination csses.
+ *
+ * Iteration is not in any specific order.
*/
-struct cftype_set {
- struct list_head node; /* chained at subsys->cftsets */
- struct cftype *cfts;
-};
+#define cgroup_taskset_for_each(task, dst_css, tset) \
+ for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
+ (task); \
+ (task) = cgroup_taskset_next((tset), &(dst_css)))
-struct cgroup_scanner {
- struct cgroup *cg;
- int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
- void (*process_task)(struct task_struct *p,
- struct cgroup_scanner *scan);
- struct ptr_heap *heap;
- void *data;
-};
+/**
+ * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
+ * @leader: the loop cursor
+ * @dst_css: the destination css
+ * @tset: taskset to iterate
+ *
+ * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
+ * may not contain any.
+ */
+#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
+ for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
+ (leader); \
+ (leader) = cgroup_taskset_next((tset), &(dst_css))) \
+ if ((leader) != (leader)->group_leader) \
+ ; \
+ else
/*
- * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
- * function can be called as long as @cgrp is accessible.
+ * Inline functions.
*/
-static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
-{
- return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
-}
-/* Caller should hold rcu_read_lock() */
-static inline const char *cgroup_name(const struct cgroup *cgrp)
+#ifdef CONFIG_DEBUG_CGROUP_REF
+void css_get(struct cgroup_subsys_state *css);
+void css_get_many(struct cgroup_subsys_state *css, unsigned int n);
+bool css_tryget(struct cgroup_subsys_state *css);
+bool css_tryget_online(struct cgroup_subsys_state *css);
+void css_put(struct cgroup_subsys_state *css);
+void css_put_many(struct cgroup_subsys_state *css, unsigned int n);
+#else
+#define CGROUP_REF_FN_ATTRS static inline
+#define CGROUP_REF_EXPORT(fn)
+#include <linux/cgroup_refcnt.h>
+#endif
+
+static inline u64 cgroup_id(const struct cgroup *cgrp)
{
- return rcu_dereference(cgrp->name)->name;
+ return cgrp->kn->id;
}
-int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
-
-bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
-
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
- char *buf, size_t buflen);
-
-int cgroup_task_count(const struct cgroup *cgrp);
-
-/*
- * Control Group taskset, used to pass around set of tasks to cgroup_subsys
- * methods.
- */
-struct cgroup_taskset;
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
-struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset);
-int cgroup_taskset_size(struct cgroup_taskset *tset);
-
/**
- * cgroup_taskset_for_each - iterate cgroup_taskset
- * @task: the loop cursor
- * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all
- * @tset: taskset to iterate
- */
-#define cgroup_taskset_for_each(task, skip_cgrp, tset) \
- for ((task) = cgroup_taskset_first((tset)); (task); \
- (task) = cgroup_taskset_next((tset))) \
- if (!(skip_cgrp) || \
- cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp))
-
-/*
- * Control Group subsystem type.
- * See Documentation/cgroups/cgroups.txt for details
+ * css_is_dying - test whether the specified css is dying
+ * @css: target css
+ *
+ * Test whether @css is in the process of offlining or already offline. In
+ * most cases, ->css_online() and ->css_offline() callbacks should be
+ * enough; however, the actual offline operations are RCU delayed and this
+ * test returns %true also when @css is scheduled to be offlined.
+ *
+ * This is useful, for example, when the use case requires synchronous
+ * behavior with respect to cgroup removal. cgroup removal schedules css
+ * offlining but the css can seem alive while the operation is being
+ * delayed. If the delay affects user visible semantics, this test can be
+ * used to resolve the situation.
*/
+static inline bool css_is_dying(struct cgroup_subsys_state *css)
+{
+ return css->flags & CSS_DYING;
+}
-struct cgroup_subsys {
- struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
- int (*css_online)(struct cgroup *cgrp);
- void (*css_offline)(struct cgroup *cgrp);
- void (*css_free)(struct cgroup *cgrp);
-
- int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
- void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
- void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
- void (*fork)(struct task_struct *task);
- void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
- struct task_struct *task);
- void (*bind)(struct cgroup *root);
-
- int subsys_id;
- int disabled;
- int early_init;
- /*
- * True if this subsys uses ID. ID is not available before cgroup_init()
- * (not available in early_init time.)
- */
- bool use_id;
+static inline bool css_is_online(struct cgroup_subsys_state *css)
+{
+ return css->flags & CSS_ONLINE;
+}
- /*
- * If %false, this subsystem is properly hierarchical -
- * configuration, resource accounting and restriction on a parent
- * cgroup cover those of its children. If %true, hierarchy support
- * is broken in some ways - some subsystems ignore hierarchy
- * completely while others are only implemented half-way.
- *
- * It's now disallowed to create nested cgroups if the subsystem is
- * broken and cgroup core will emit a warning message on such
- * cases. Eventually, all subsystems will be made properly
- * hierarchical and this will go away.
- */
- bool broken_hierarchy;
- bool warned_broken_hierarchy;
+static inline bool css_is_self(struct cgroup_subsys_state *css)
+{
+ if (css == &css->cgroup->self) {
+ /* cgroup::self should not have subsystem association */
+ WARN_ON(css->ss != NULL);
+ return true;
+ }
-#define MAX_CGROUP_TYPE_NAMELEN 32
- const char *name;
+ return false;
+}
- /*
- * Link to parent, and list entry in parent's children.
- * Protected by cgroup_lock()
- */
- struct cgroupfs_root *root;
- struct list_head sibling;
- /* used when use_id == true */
- struct idr idr;
- spinlock_t id_lock;
+static inline void cgroup_get(struct cgroup *cgrp)
+{
+ css_get(&cgrp->self);
+}
- /* list of cftype_sets */
- struct list_head cftsets;
+static inline bool cgroup_tryget(struct cgroup *cgrp)
+{
+ return css_tryget(&cgrp->self);
+}
- /* base cftypes, automatically [de]registered with subsys itself */
- struct cftype *base_cftypes;
- struct cftype_set base_cftset;
+static inline void cgroup_put(struct cgroup *cgrp)
+{
+ css_put(&cgrp->self);
+}
- /* should be defined only by modular subsystems */
- struct module *module;
-};
+extern struct mutex cgroup_mutex;
-#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
-#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
-#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
-#undef SUBSYS
+static inline void cgroup_lock(void)
+{
+ mutex_lock(&cgroup_mutex);
+}
-static inline struct cgroup_subsys_state *cgroup_subsys_state(
- struct cgroup *cgrp, int subsys_id)
+static inline void cgroup_unlock(void)
{
- return cgrp->subsys[subsys_id];
+ mutex_unlock(&cgroup_mutex);
}
/**
@@ -662,18 +413,19 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
- lockdep_is_held(&(task)->alloc_lock) || \
- lockdep_is_held(&cgroup_mutex) || (__c))
+ rcu_read_lock_sched_held() || \
+ lockdep_is_held(&cgroup_mutex) || \
+ lockdep_is_held(&css_set_lock) || \
+ ((task)->flags & PF_EXITING) || (__c))
#else
#define task_css_set_check(task, __c) \
rcu_dereference((task)->cgroups)
#endif
/**
- * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds
+ * task_css_check - obtain css for (task, subsys) w/ extra access conds
* @task: the target task
* @subsys_id: the target subsystem ID
* @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -681,7 +433,7 @@ extern struct mutex cgroup_mutex;
* Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
* synchronization rules are the same as task_css_set_check().
*/
-#define task_subsys_state_check(task, subsys_id, __c) \
+#define task_css_check(task, subsys_id, __c) \
task_css_set_check((task), (__c))->subsys[(subsys_id)]
/**
@@ -696,212 +448,395 @@ static inline struct css_set *task_css_set(struct task_struct *task)
}
/**
- * task_subsys_state - obtain css for (task, subsys)
+ * task_css - obtain css for (task, subsys)
* @task: the target task
* @subsys_id: the target subsystem ID
*
- * See task_subsys_state_check().
+ * See task_css_check().
*/
-static inline struct cgroup_subsys_state *
-task_subsys_state(struct task_struct *task, int subsys_id)
+static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
+ int subsys_id)
{
- return task_subsys_state_check(task, subsys_id, false);
+ return task_css_check(task, subsys_id, false);
}
-static inline struct cgroup* task_cgroup(struct task_struct *task,
- int subsys_id)
+/**
+ * task_get_css - find and get the css for (task, subsys)
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ *
+ * Find the css for the (@task, @subsys_id) combination, increment a
+ * reference on and return it. This function is guaranteed to return a
+ * valid css. The returned css may already have been offlined.
+ */
+static inline struct cgroup_subsys_state *
+task_get_css(struct task_struct *task, int subsys_id)
{
- return task_subsys_state(task, subsys_id)->cgroup;
+ struct cgroup_subsys_state *css;
+
+ rcu_read_lock();
+ while (true) {
+ css = task_css(task, subsys_id);
+ /*
+ * Can't use css_tryget_online() here. A task which has
+ * PF_EXITING set may stay associated with an offline css.
+ * If such task calls this function, css_tryget_online()
+ * will keep failing.
+ */
+ if (likely(css_tryget(css)))
+ break;
+ cpu_relax();
+ }
+ rcu_read_unlock();
+ return css;
}
-struct cgroup *cgroup_next_sibling(struct cgroup *pos);
-
/**
- * cgroup_for_each_child - iterate through children of a cgroup
- * @pos: the cgroup * to use as the loop cursor
- * @cgrp: cgroup whose children to walk
- *
- * Walk @cgrp's children. Must be called under rcu_read_lock(). A child
- * cgroup which hasn't finished ->css_online() or already has finished
- * ->css_offline() may show up during traversal and it's each subsystem's
- * responsibility to verify that each @pos is alive.
- *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a cgroup which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * task_css_is_root - test whether a task belongs to the root css
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
*
- * It is allowed to temporarily drop RCU read lock during iteration. The
- * caller is responsible for ensuring that @pos remains accessible until
- * the start of the next iteration by, for example, bumping the css refcnt.
+ * Test whether @task belongs to the root css on the specified subsystem.
+ * May be invoked in any context.
*/
-#define cgroup_for_each_child(pos, cgrp) \
- for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \
- struct cgroup, sibling); \
- (pos); (pos) = cgroup_next_sibling((pos)))
+static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
+{
+ return task_css_check(task, subsys_id, true) ==
+ init_css_set.subsys[subsys_id];
+}
-struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
- struct cgroup *cgroup);
-struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
+static inline struct cgroup *task_cgroup(struct task_struct *task,
+ int subsys_id)
+{
+ return task_css(task, subsys_id)->cgroup;
+}
+
+static inline struct cgroup *task_dfl_cgroup(struct task_struct *task)
+{
+ return task_css_set(task)->dfl_cgrp;
+}
+
+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+ struct cgroup_subsys_state *parent_css = cgrp->self.parent;
+
+ if (parent_css)
+ return container_of(parent_css, struct cgroup, self);
+ return NULL;
+}
/**
- * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
- *
- * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
- * descendant cgroup which hasn't finished ->css_online() or already has
- * finished ->css_offline() may show up during traversal and it's each
- * subsystem's responsibility to verify that each @pos is alive.
- *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant cgroup which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
- *
- * In other words, the following guarantees that a descendant can't escape
- * state updates of its ancestors.
- *
- * my_online(@cgrp)
- * {
- * Lock @cgrp->parent and @cgrp;
- * Inherit state from @cgrp->parent;
- * Unlock both.
- * }
- *
- * my_update_state(@cgrp)
- * {
- * Lock @cgrp;
- * Update @cgrp's state;
- * Unlock @cgrp;
- *
- * cgroup_for_each_descendant_pre(@pos, @cgrp) {
- * Lock @pos;
- * Verify @pos is alive and inherit state from @pos->parent;
- * Unlock @pos;
- * }
- * }
- *
- * As long as the inheriting step, including checking the parent state, is
- * enclosed inside @pos locking, double-locking the parent isn't necessary
- * while inheriting. The state update to the parent is guaranteed to be
- * visible by walking order and, as long as inheriting operations to the
- * same @pos are atomic to each other, multiple updates racing each other
- * still result in the correct state. It's guaranateed that at least one
- * inheritance happens for any cgroup after the latest update to its
- * parent.
+ * cgroup_is_descendant - test ancestry
+ * @cgrp: the cgroup to be tested
+ * @ancestor: possible ancestor of @cgrp
*
- * If checking parent's state requires locking the parent, each inheriting
- * iteration should lock and unlock both @pos->parent and @pos.
+ * Test whether @cgrp is a descendant of @ancestor. It also returns %true
+ * if @cgrp == @ancestor. This function is safe to call as long as @cgrp
+ * and @ancestor are accessible.
+ */
+static inline bool cgroup_is_descendant(struct cgroup *cgrp,
+ struct cgroup *ancestor)
+{
+ if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
+ return false;
+ return cgrp->ancestors[ancestor->level] == ancestor;
+}
+
+/**
+ * cgroup_ancestor - find ancestor of cgroup
+ * @cgrp: cgroup to find ancestor of
+ * @ancestor_level: level of ancestor to find starting from root
*
- * Alternatively, a subsystem may choose to use a single global lock to
- * synchronize ->css_online() and ->css_offline() against tree-walking
- * operations.
+ * Find ancestor of cgroup at specified level starting from root if it exists
+ * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
+ * @ancestor_level.
*
- * It is allowed to temporarily drop RCU read lock during iteration. The
- * caller is responsible for ensuring that @pos remains accessible until
- * the start of the next iteration by, for example, bumping the css refcnt.
+ * This function is safe to call as long as @cgrp is accessible.
*/
-#define cgroup_for_each_descendant_pre(pos, cgroup) \
- for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
- pos = cgroup_next_descendant_pre((pos), (cgroup)))
-
-struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
- struct cgroup *cgroup);
+static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
+ int ancestor_level)
+{
+ if (ancestor_level < 0 || ancestor_level > cgrp->level)
+ return NULL;
+ return cgrp->ancestors[ancestor_level];
+}
/**
- * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
*
- * Similar to cgroup_for_each_descendant_pre() but performs post-order
- * traversal instead. Note that the walk visibility guarantee described in
- * pre-order walk doesn't apply the same to post-order walks.
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
*/
-#define cgroup_for_each_descendant_post(pos, cgroup) \
- for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
- pos = cgroup_next_descendant_post((pos), (cgroup)))
-
-/* A cgroup_iter should be treated as an opaque object */
-struct cgroup_iter {
- struct list_head *cset_link;
- struct list_head *task;
-};
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+ struct cgroup *ancestor)
+{
+ struct css_set *cset = task_css_set(task);
+
+ return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
+/* no synchronization, the result can only be used as a hint */
+static inline bool cgroup_is_populated(struct cgroup *cgrp)
+{
+ return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children +
+ cgrp->nr_populated_threaded_children;
+}
+
+/* returns ino associated with a cgroup */
+static inline ino_t cgroup_ino(struct cgroup *cgrp)
+{
+ return kernfs_ino(cgrp->kn);
+}
+
+/* cft/css accessors for cftype->write() operation */
+static inline struct cftype *of_cft(struct kernfs_open_file *of)
+{
+ return of->kn->priv;
+}
+
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);
+
+/* cft/css accessors for cftype->seq_*() operations */
+static inline struct cftype *seq_cft(struct seq_file *seq)
+{
+ return of_cft(seq->private);
+}
+
+static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+{
+ return of_css(seq->private);
+}
/*
- * To iterate across the tasks in a cgroup:
- *
- * 1) call cgroup_iter_start to initialize an iterator
- *
- * 2) call cgroup_iter_next() to retrieve member tasks until it
- * returns NULL or until you want to end the iteration
- *
- * 3) call cgroup_iter_end() to destroy the iterator.
- *
- * Or, call cgroup_scan_tasks() to iterate through every task in a
- * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling
- * the test_task() callback, but not while calling the process_task()
- * callback.
+ * Name / path handling functions. All are thin wrappers around the kernfs
+ * counterparts and can be called under any context.
*/
-void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
-struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
- struct cgroup_iter *it);
-void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
-int cgroup_scan_tasks(struct cgroup_scanner *scan);
-int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
-int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
+static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
+{
+ return kernfs_name(cgrp->kn, buf, buflen);
+}
+
+static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
+{
+ return kernfs_path(cgrp->kn, buf, buflen);
+}
+
+static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
+{
+ pr_cont_kernfs_name(cgrp->kn);
+}
+
+static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
+{
+ pr_cont_kernfs_path(cgrp->kn);
+}
+
+bool cgroup_psi_enabled(void);
+
+static inline void cgroup_init_kthreadd(void)
+{
+ /*
+ * kthreadd is inherited by all kthreads, keep it in the root so
+ * that the new kthreads are guaranteed to stay in the root until
+ * initialization is finished.
+ */
+ current->no_cgroup_migration = 1;
+}
+
+static inline void cgroup_kthread_ready(void)
+{
+ /*
+ * This kthread finished initialization. The creator should have
+ * set PF_NO_SETAFFINITY if this kthread should stay in the root.
+ */
+ current->no_cgroup_migration = 0;
+}
+
+void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
+struct cgroup *__cgroup_get_from_id(u64 id);
+struct cgroup *cgroup_get_from_id(u64 id);
+#else /* !CONFIG_CGROUPS */
+
+struct cgroup_subsys_state;
+struct cgroup;
+
+static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
+static inline void css_get(struct cgroup_subsys_state *css) {}
+static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
+static inline int cgroup_attach_task_all(struct task_struct *from,
+ struct task_struct *t) { return 0; }
+static inline int cgroupstats_build(struct cgroupstats *stats,
+ struct dentry *dentry) { return -EINVAL; }
+
+static inline void cgroup_fork(struct task_struct *p) {}
+static inline int cgroup_can_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs) { return 0; }
+static inline void cgroup_cancel_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs) {}
+static inline void cgroup_post_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs) {}
+static inline void cgroup_task_exit(struct task_struct *p) {}
+static inline void cgroup_task_dead(struct task_struct *p) {}
+static inline void cgroup_task_release(struct task_struct *p) {}
+static inline void cgroup_task_free(struct task_struct *p) {}
+
+static inline int cgroup_init_early(void) { return 0; }
+static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_kthreadd(void) {}
+static inline void cgroup_kthread_ready(void) {}
+
+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+ return NULL;
+}
+
+static inline bool cgroup_psi_enabled(void)
+{
+ return false;
+}
+
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+ struct cgroup *ancestor)
+{
+ return true;
+}
+
+static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
+{}
+#endif /* !CONFIG_CGROUPS */
+
+#ifdef CONFIG_CGROUPS
/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be "not populated yet" or "being
- * destroyed". The caller should check css and cgroup's status.
+ * cgroup scalable recursive statistics.
*/
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
/*
- * Typically Called at ->destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
+ * Basic resource stats.
*/
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
+#ifdef CONFIG_CGROUP_CPUACCT
+void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_account_field(struct task_struct *tsk, int index,
+ u64 val) {}
+#endif
-/* Find a cgroup_subsys_state which has given ID */
+void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
+void __cgroup_account_cputime_field(struct cgroup *cgrp,
+ enum cpu_usage_stat index, u64 delta_exec);
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
+static inline void cgroup_account_cputime(struct task_struct *task,
+ u64 delta_exec)
+{
+ struct cgroup *cgrp;
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
- const struct cgroup_subsys_state *root);
+ cpuacct_charge(task, delta_exec);
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
-struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
+ cgrp = task_dfl_cgroup(task);
+ if (cgroup_parent(cgrp))
+ __cgroup_account_cputime(cgrp, delta_exec);
+}
-#else /* !CONFIG_CGROUPS */
+static inline void cgroup_account_cputime_field(struct task_struct *task,
+ enum cpu_usage_stat index,
+ u64 delta_exec)
+{
+ struct cgroup *cgrp;
-static inline int cgroup_init_early(void) { return 0; }
-static inline int cgroup_init(void) { return 0; }
-static inline void cgroup_fork(struct task_struct *p) {}
-static inline void cgroup_post_fork(struct task_struct *p) {}
-static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+ cpuacct_account_field(task, index, delta_exec);
-static inline int cgroupstats_build(struct cgroupstats *stats,
- struct dentry *dentry)
+ cgrp = task_dfl_cgroup(task);
+ if (cgroup_parent(cgrp))
+ __cgroup_account_cputime_field(cgrp, index, delta_exec);
+}
+
+#else /* CONFIG_CGROUPS */
+
+static inline void cgroup_account_cputime(struct task_struct *task,
+ u64 delta_exec) {}
+static inline void cgroup_account_cputime_field(struct task_struct *task,
+ enum cpu_usage_stat index,
+ u64 delta_exec) {}
+
+#endif /* CONFIG_CGROUPS */
+
+/*
+ * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
+ * definition in cgroup-defs.h.
+ */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+
+void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
+void cgroup_sk_clone(struct sock_cgroup_data *skcd);
+void cgroup_sk_free(struct sock_cgroup_data *skcd);
+
+static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
- return -EINVAL;
+ return skcd->cgroup;
}
-/* No cgroups - nothing to do */
-static inline int cgroup_attach_task_all(struct task_struct *from,
- struct task_struct *t)
+#else /* CONFIG_CGROUP_DATA */
+
+static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {}
+static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {}
+static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
+
+#endif /* CONFIG_CGROUP_DATA */
+
+#ifdef CONFIG_CGROUPS
+
+void cgroup_enter_frozen(void);
+void cgroup_leave_frozen(bool always_leave);
+void cgroup_update_frozen(struct cgroup *cgrp);
+void cgroup_freeze(struct cgroup *cgrp, bool freeze);
+void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
+ struct cgroup *dst);
+
+static inline bool cgroup_task_frozen(struct task_struct *task)
{
- return 0;
+ return task->frozen;
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void cgroup_enter_frozen(void) { }
+static inline void cgroup_leave_frozen(bool always_leave) { }
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+ return false;
}
#endif /* !CONFIG_CGROUPS */
+#ifdef CONFIG_CGROUP_BPF
+static inline void cgroup_bpf_get(struct cgroup *cgrp)
+{
+ percpu_ref_get(&cgrp->bpf.refcnt);
+}
+
+static inline void cgroup_bpf_put(struct cgroup *cgrp)
+{
+ percpu_ref_put(&cgrp->bpf.refcnt);
+}
+
+#else /* CONFIG_CGROUP_BPF */
+
+static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+
+#endif /* CONFIG_CGROUP_BPF */
+
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);
+
+struct cgroup_of_peak *of_peak(struct kernfs_open_file *of);
+
#endif /* _LINUX_CGROUP_H */