summaryrefslogtreecommitdiff
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-11-09 09:12:29 -0800
committerTejun Heo <tj@kernel.org>2012-11-09 09:12:29 -0800
commit574bd9f7c7c1d32f52dea5488018a6ff79031e22 (patch)
treef43657afb59dd12fe1eca329acc6caf885507727 /include/linux/cgroup.h
parenteb6fd5040ee799009173daa49c3e7aa0362167c9 (diff)
cgroup: implement generic child / descendant walk macros
Currently, cgroup doesn't provide any generic helper for walking a given cgroup's children or descendants. This patch adds the following three macros. * cgroup_for_each_child() - walk immediate children of a cgroup. * cgroup_for_each_descendant_pre() - visit all descendants of a cgroup in pre-order tree traversal. * cgroup_for_each_descendant_post() - visit all descendants of a cgroup in post-order tree traversal. All three only require the user to hold RCU read lock during traversal. Verifying that each iterated cgroup is online is the responsibility of the user. When used with proper synchronization, cgroup_for_each_descendant_pre() can be used to propagate state updates to descendants in reliable way. See comments for details. v2: s/config/state/ in commit message and comments per Michal. More documentation on synchronization rules. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujisu.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h94
1 files changed, 94 insertions, 0 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90c33ebdd6bc..8f64b459fbd4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -534,6 +534,100 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
return task_subsys_state(task, subsys_id)->cgroup;
}
+/**
+ * cgroup_for_each_child - iterate through children of a cgroup
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose children to walk
+ *
+ * Walk @cgroup's children. Must be called under rcu_read_lock(). A child
+ * cgroup which hasn't finished ->post_create() or already has finished
+ * ->pre_destroy() may show up during traversal and it's each subsystem's
+ * responsibility to verify that each @pos is alive.
+ *
+ * If a subsystem synchronizes against the parent in its ->post_create()
+ * and before starting iterating, a cgroup which finished ->post_create()
+ * is guaranteed to be visible in the future iterations.
+ */
+#define cgroup_for_each_child(pos, cgroup) \
+ list_for_each_entry_rcu(pos, &(cgroup)->children, sibling)
+
+struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
+ struct cgroup *cgroup);
+
+/**
+ * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
+ * descendant cgroup which hasn't finished ->post_create() or already has
+ * finished ->pre_destroy() may show up during traversal and it's each
+ * subsystem's responsibility to verify that each @pos is alive.
+ *
+ * If a subsystem synchronizes against the parent in its ->post_create()
+ * and before starting iterating, and synchronizes against @pos on each
+ * iteration, any descendant cgroup which finished ->post_create() is
+ * guaranteed to be visible in the future iterations.
+ *
+ * In other words, the following guarantees that a descendant can't escape
+ * state updates of its ancestors.
+ *
+ * my_post_create(@cgrp)
+ * {
+ * Lock @cgrp->parent and @cgrp;
+ * Inherit state from @cgrp->parent;
+ * Unlock both.
+ * }
+ *
+ * my_update_state(@cgrp)
+ * {
+ * Lock @cgrp;
+ * Update @cgrp's state;
+ * Unlock @cgrp;
+ *
+ * cgroup_for_each_descendant_pre(@pos, @cgrp) {
+ * Lock @pos;
+ * Verify @pos is alive and inherit state from @pos->parent;
+ * Unlock @pos;
+ * }
+ * }
+ *
+ * As long as the inheriting step, including checking the parent state, is
+ * enclosed inside @pos locking, double-locking the parent isn't necessary
+ * while inheriting. The state update to the parent is guaranteed to be
+ * visible by walking order and, as long as inheriting operations to the
+ * same @pos are atomic to each other, multiple updates racing each other
+ * still result in the correct state. It's guaranateed that at least one
+ * inheritance happens for any cgroup after the latest update to its
+ * parent.
+ *
+ * If checking parent's state requires locking the parent, each inheriting
+ * iteration should lock and unlock both @pos->parent and @pos.
+ *
+ * Alternatively, a subsystem may choose to use a single global lock to
+ * synchronize ->post_create() and ->pre_destroy() against tree-walking
+ * operations.
+ */
+#define cgroup_for_each_descendant_pre(pos, cgroup) \
+ for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
+ pos = cgroup_next_descendant_pre((pos), (cgroup)))
+
+struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
+ struct cgroup *cgroup);
+
+/**
+ * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
+ * @pos: the cgroup * to use as the loop cursor
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * Similar to cgroup_for_each_descendant_pre() but performs post-order
+ * traversal instead. Note that the walk visibility guarantee described in
+ * pre-order walk doesn't apply the same to post-order walks.
+ */
+#define cgroup_for_each_descendant_post(pos, cgroup) \
+ for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
+ pos = cgroup_next_descendant_post((pos), (cgroup)))
+
/* A cgroup_iter should be treated as an opaque object */
struct cgroup_iter {
struct list_head *cg_link;