summaryrefslogtreecommitdiff
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h171
1 files changed, 148 insertions, 23 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 665fa70e4094..ed4ba111bc8d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -28,6 +28,7 @@ struct css_id;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern void cgroup_lock(void);
+extern int cgroup_lock_is_held(void);
extern bool cgroup_lock_live_group(struct cgroup *cgrp);
extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p);
@@ -36,16 +37,24 @@ extern void cgroup_post_fork(struct task_struct *p);
extern void cgroup_exit(struct task_struct *p, int run_callbacks);
extern int cgroupstats_build(struct cgroupstats *stats,
struct dentry *dentry);
+extern int cgroup_load_subsys(struct cgroup_subsys *ss);
+extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
-extern struct file_operations proc_cgroup_operations;
+extern const struct file_operations proc_cgroup_operations;
-/* Define the enumeration of all cgroup subsystems */
+/* Define the enumeration of all builtin cgroup subsystems */
#define SUBSYS(_x) _x ## _subsys_id,
enum cgroup_subsys_id {
#include <linux/cgroup_subsys.h>
- CGROUP_SUBSYS_COUNT
+ CGROUP_BUILTIN_SUBSYS_COUNT
};
#undef SUBSYS
+/*
+ * This define indicates the maximum number of subsystems that can be loaded
+ * at once. We limit to this many since cgroupfs_root has subsys_bits to keep
+ * track of all of them.
+ */
+#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long))
/* Per-subsystem/per-cgroup state maintained by the system. */
struct cgroup_subsys_state {
@@ -66,7 +75,7 @@ struct cgroup_subsys_state {
unsigned long flags;
/* ID for this css, if possible */
- struct css_id *id;
+ struct css_id __rcu *id;
};
/* bits in struct cgroup_subsys_state flags field */
@@ -75,6 +84,12 @@ enum {
CSS_REMOVED, /* This CSS is dead */
};
+/* Caller must verify that the css is not for root cgroup */
+static inline void __css_get(struct cgroup_subsys_state *css, int count)
+{
+ atomic_add(count, &css->refcnt);
+}
+
/*
* Call css_get() to hold a reference on the css; it can be used
* for a reference obtained via:
@@ -86,7 +101,7 @@ static inline void css_get(struct cgroup_subsys_state *css)
{
/* We don't need to reference count the root state */
if (!test_bit(CSS_ROOT, &css->flags))
- atomic_inc(&css->refcnt);
+ __css_get(css, 1);
}
static inline bool css_is_removed(struct cgroup_subsys_state *css)
@@ -117,11 +132,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
* css_get() or css_tryget()
*/
-extern void __css_put(struct cgroup_subsys_state *css);
+extern void __css_put(struct cgroup_subsys_state *css, int count);
static inline void css_put(struct cgroup_subsys_state *css)
{
if (!test_bit(CSS_ROOT, &css->flags))
- __css_put(css);
+ __css_put(css, 1);
}
/* bits in struct cgroup flags field */
@@ -139,6 +154,42 @@ enum {
* A thread in rmdir() is wating for this cgroup.
*/
CGRP_WAIT_ON_RMDIR,
+ /*
+ * Clone cgroup values when creating a new child cgroup
+ */
+ CGRP_CLONE_CHILDREN,
+};
+
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+ CGROUP_FILE_PROCS,
+ CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+ /*
+ * used to find which pidlist is wanted. doesn't change as long as
+ * this particular list stays in the list.
+ */
+ struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+ /* array of xids */
+ pid_t *list;
+ /* how many elements the above list has */
+ int length;
+ /* how many files are using the current array */
+ int use_count;
+ /* each of these stored in a list by its cgroup */
+ struct list_head links;
+ /* pointer to the cgroup we belong to, for list removal purposes */
+ struct cgroup *owner;
+ /* protects the other fields */
+ struct rw_semaphore mutex;
};
struct cgroup {
@@ -158,7 +209,7 @@ struct cgroup {
struct list_head children; /* my children */
struct cgroup *parent; /* my parent */
- struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -179,17 +230,19 @@ struct cgroup {
*/
struct list_head release_list;
- /* pids_mutex protects the fields below */
- struct rw_semaphore pids_mutex;
- /* Array of process ids in the cgroup */
- pid_t *tasks_pids;
- /* How many files are using the current tasks_pids array */
- int pids_use_count;
- /* Length of the current tasks_pids array */
- int pids_length;
+ /*
+ * list of pidlists, up to two for each namespace (one for procs, one
+ * for tasks); created on demand.
+ */
+ struct list_head pidlists;
+ struct mutex pidlist_mutex;
/* For RCU-protected deletion */
struct rcu_head rcu_head;
+
+ /* List of events which userspace want to recieve */
+ struct list_head event_list;
+ spinlock_t event_list_lock;
};
/*
@@ -227,9 +280,13 @@ struct css_set {
/*
* Set of subsystem states, one for each subsystem. This array
* is immutable after creation apart from the init_css_set
- * during subsystem registration (at boot time).
+ * during subsystem registration (at boot time) and modular subsystem
+ * loading/unloading.
*/
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+ /* For RCU-protected deletion */
+ struct rcu_head rcu_head;
};
/*
@@ -329,6 +386,23 @@ struct cftype {
int (*trigger)(struct cgroup *cgrp, unsigned int event);
int (*release)(struct inode *inode, struct file *file);
+
+ /*
+ * register_event() callback will be used to add new userspace
+ * waiter for changes related to the cftype. Implement it if
+ * you want to provide this functionality. Use eventfd_signal()
+ * on eventfd to send notification to userspace.
+ */
+ int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
+ struct eventfd_ctx *eventfd, const char *args);
+ /*
+ * unregister_event() callback will be called when userspace
+ * closes the eventfd or on cgroup removing.
+ * This callback must be implemented, if you want provide
+ * notification functionality.
+ */
+ void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
+ struct eventfd_ctx *eventfd);
};
struct cgroup_scanner {
@@ -366,6 +440,23 @@ int cgroup_task_count(const struct cgroup *cgrp);
int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
/*
+ * When the subsys has to access css and may add permanent refcnt to css,
+ * it should take care of racy conditions with rmdir(). Following set of
+ * functions, is for stop/restart rmdir if necessary.
+ * Because these will call css_get/put, "css" should be alive css.
+ *
+ * cgroup_exclude_rmdir();
+ * ...do some jobs which may access arbitrary empty cgroup
+ * cgroup_release_and_wakeup_rmdir();
+ *
+ * When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
+ * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
+ */
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
+
+/*
* Control Group subsystem type.
* See Documentation/cgroups/cgroups.txt for details
*/
@@ -375,10 +466,13 @@ struct cgroup_subsys {
struct cgroup *cgrp);
int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- int (*can_attach)(struct cgroup_subsys *ss,
- struct cgroup *cgrp, struct task_struct *tsk);
+ int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup);
+ void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct task_struct *tsk, bool threadgroup);
void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *tsk);
+ struct cgroup *old_cgrp, struct task_struct *tsk,
+ bool threadgroup);
void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
int (*populate)(struct cgroup_subsys *ss,
@@ -420,6 +514,9 @@ struct cgroup_subsys {
/* used when use_id == true */
struct idr idr;
spinlock_t id_lock;
+
+ /* should be defined only by modular subsystems */
+ struct module *module;
};
#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
@@ -432,10 +529,21 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
return cgrp->subsys[subsys_id];
}
-static inline struct cgroup_subsys_state *task_subsys_state(
- struct task_struct *task, int subsys_id)
+/*
+ * function to get the cgroup_subsys_state which allows for extra
+ * rcu_dereference_check() conditions, such as locks used during the
+ * cgroup_subsys::attach() methods.
+ */
+#define task_subsys_state_check(task, subsys_id, __c) \
+ rcu_dereference_check(task->cgroups->subsys[subsys_id], \
+ rcu_read_lock_held() || \
+ lockdep_is_held(&task->alloc_lock) || \
+ cgroup_lock_is_held() || (__c))
+
+static inline struct cgroup_subsys_state *
+task_subsys_state(struct task_struct *task, int subsys_id)
{
- return rcu_dereference(task->cgroups->subsys[subsys_id]);
+ return task_subsys_state_check(task, subsys_id, false);
}
static inline struct cgroup* task_cgroup(struct task_struct *task,
@@ -474,6 +582,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
int cgroup_scan_tasks(struct cgroup_scanner *scan);
int cgroup_attach_task(struct cgroup *, struct task_struct *);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+
+static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+ return cgroup_attach_task_all(current, tsk);
+}
/*
* CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -530,6 +644,17 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
return -EINVAL;
}
+/* No cgroups - nothing to do */
+static inline int cgroup_attach_task_all(struct task_struct *from,
+ struct task_struct *t)
+{
+ return 0;
+}
+static inline int cgroup_attach_task_current_cg(struct task_struct *t)
+{
+ return 0;
+}
+
#endif /* !CONFIG_CGROUPS */
#endif /* _LINUX_CGROUP_H */