From 03b1cde6b22f625ae832b939bc7379ec1466aec5 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 23 Aug 2012 16:53:30 -0400 Subject: cgroup: add xattr support This is one of the items in the plumber's wish list. For use cases: >> What would the use case be for this? > > Attaching meta information to services, in an easily discoverable > way. For example, in systemd we create one cgroup for each service, and > could then store data like the main pid of the specific service as an > xattr on the cgroup itself. That way we'd have almost all service state > in the cgroupfs, which would make it possible to terminate systemd and > later restart it without losing any state information. But there's more: > for example, some very peculiar services cannot be terminated on > shutdown (i.e. fakeraid DM stuff) and it would be really nice if the > services in question could just mark that on their cgroup, by setting an > xattr. On the more desktopy side of things there are other > possibilities: for example there are plans defining what an application > is along the lines of a cgroup (i.e. an app being a collection of > processes). With xattrs one could then attach an icon or human readable > program name on the cgroup. > > The key idea is that this would allow attaching runtime meta information > to cgroups and everything they model (services, apps, vms), that doesn't > need any complex userspace infrastructure, has good access control > (i.e. because the file system enforces that anyway, and there's the > "trusted." xattr namespace), notifications (inotify), and can easily be > shared among applications. > > Lennart v7: - no changes v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Original-patch-by: Li Zefan Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c90eaa803440..145901f5ef99 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ struct cgroup { /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ struct cftype { /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ struct cftype { */ struct cftype_set { struct list_head node; /* chained at subsys->cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ struct cgroup_scanner { void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); -- cgit From be45c900fdc2c66baad5a7703fb8136991d88aeb Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 13 Sep 2012 09:50:55 +0200 Subject: cgroup: Remove CGROUP_BUILTIN_SUBSYS_COUNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CGROUP_BUILTIN_SUBSYS_COUNT is used as start index or stop index when looping over the subsys array looking either at the builtin or the module subsystems. Since all the builtin subsystems have an id which is lower then CGROUP_BUILTIN_SUBSYS_COUNT we know that any module will have an id larger than CGROUP_BUILTIN_SUBSYS_COUNT. In short the ids are sorted. We are about to change id assignment to happen only at compile time later in this series. That means we can't rely on the above trick since all ids will always be defined at compile time. Furthermore, ordering the builtin subsystems and the module subsystems is not really necessary. So we need a different way to know which subsystem is a builtin or a module one. We can use the subsys[]->module pointer for this. Any place where we need to know if a subsys is module we just check for the pointer. If it is NULL then the subsystem is a builtin one. With this we are able to drop the CGROUP_BUILTIN_SUBSYS_COUNT enum. Though we need to introduce a temporary placeholder so that we don't get a compilation error when only CONFIG_CGROUP is selected and no single controller. An empty enum definition is not valid. Later in this series we are able to remove the placeholder again. And with this change we get a fix for this: kernel/cgroup.c: In function ‘cgroup_load_subsys’: kernel/cgroup.c:4326:38: warning: array subscript is below array bounds [-Warray-bounds] when CONFIG_CGROUP=y and no built in controller was enabled. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 145901f5ef99..1916cdb071dc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -48,7 +48,7 @@ extern const struct file_operations proc_cgroup_operations; #define SUBSYS(_x) _x ## _subsys_id, enum cgroup_subsys_id { #include - CGROUP_BUILTIN_SUBSYS_COUNT + __CGROUP_TEMPORARY_PLACEHOLDER }; #undef SUBSYS /* -- cgit From 5fc0b02544b3b9bd3db5a8156b5f3e7350f8e797 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:05 +0200 Subject: cgroup: Wrap subsystem selection macro Before we are able to define all subsystem ids at compile time we need a more fine grained control what gets defined when we include cgroup_subsys.h. For example we define the enums for the subsystems or to declare for struct cgroup_subsys (builtin subsystem) by including cgroup_subsys.h and defining SUBSYS accordingly. Currently, the decision if a subsys is used is defined inside the header by testing if CONFIG_*=y is true. By moving this test outside of cgroup_subsys.h we are able to control it on the include level. This is done by introducing IS_SUBSYS_ENABLED which then is defined according the task, e.g. is CONFIG_*=y or CONFIG_*=m. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1916cdb071dc..a5ab5651441b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -46,10 +46,12 @@ extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) enum cgroup_subsys_id { #include __CGROUP_TEMPORARY_PLACEHOLDER }; +#undef IS_SUBSYS_ENABLED #undef SUBSYS /* * This define indicates the maximum number of subsystems that can be loaded @@ -528,7 +530,9 @@ struct cgroup_subsys { }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) #include +#undef IS_SUBSYS_ENABLED #undef SUBSYS static inline struct cgroup_subsys_state *cgroup_subsys_state( -- cgit From 8a8e04df4747661daaee77e98e102d99c9e09b98 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:07 +0200 Subject: cgroup: Assign subsystem IDs during compile time WARNING: With this change it is impossible to load external built controllers anymore. In case where CONFIG_NETPRIO_CGROUP=m and CONFIG_NET_CLS_CGROUP=m is set, corresponding subsys_id should also be a constant. Up to now, net_prio_subsys_id and net_cls_subsys_id would be of the type int and the value would be assigned during runtime. By switching the macro definition IS_SUBSYS_ENABLED from IS_BUILTIN to IS_ENABLED, all *_subsys_id will have constant value. That means we need to remove all the code which assumes a value can be assigned to net_prio_subsys_id and net_cls_subsys_id. A close look is necessary on the RCU part which was introduces by following patch: commit f845172531fb7410c7fb7780b1a6e51ee6df7d52 Author: Herbert Xu Mon May 24 09:12:34 2010 Committer: David S. Miller Mon May 24 09:12:34 2010 cls_cgroup: Store classid in struct sock Tis code was added to init_cgroup_cls() /* We can't use rcu_assign_pointer because this is an int. */ smp_wmb(); net_cls_subsys_id = net_cls_subsys.subsys_id; respectively to exit_cgroup_cls() net_cls_subsys_id = -1; synchronize_rcu(); and in module version of task_cls_classid() rcu_read_lock(); id = rcu_dereference(net_cls_subsys_id); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); Without an explicit explaination why the RCU part is needed. (The rcu_deference was fixed by exchanging it to rcu_derefence_index_check() in a later commit, but that is a minor detail.) So here is my pondering why it was introduced and why it safe to remove it now. Note that this code was copied over to net_prio the reasoning holds for that subsystem too. The idea behind the RCU use for net_cls_subsys_id is to make sure we get a valid pointer back from task_subsys_state(). task_subsys_state() is just blindly accessing the subsys array and returning the pointer. Obviously, passing in -1 as id into task_subsys_state() returns an invalid value (out of lower bound). So this code makes sure that only after module is loaded and the subsystem registered, the id is assigned. Before unregistering the module all old readers must have left the critical section. This is done by assigning -1 to the id and issuing a synchronized_rcu(). Any new readers wont call task_subsys_state() anymore and therefore it is safe to unregister the subsystem. The new code relies on the same trick, but it looks at the subsys pointer return by task_subsys_state() (remember the id is constant and therefore we allways have a valid index into the subsys array). No precautions need to be taken during module loading module. Eventually, all CPUs will get a valid pointer back from task_subsys_state() because rebind_subsystem() which is called after the module init() function will assigned subsys[net_cls_subsys_id] the newly loaded module subsystem pointer. When the subsystem is about to be removed, rebind_subsystem() will called before the module exit() function. In this case, rebind_subsys() will assign subsys[net_cls_subsys_id] a NULL pointer and then it calls synchronize_rcu(). All old readers have left by then the critical section. Any new reader wont access the subsystem anymore. At this point we are safe to unregister the subsystem. No synchronize_rcu() call is needed. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: "David S. Miller" Cc: "Paul E. McKenney" Cc: Andrew Morton Cc: Eric Dumazet Cc: Gao feng Cc: Glauber Costa Cc: Herbert Xu Cc: Jamal Hadi Salim Cc: John Fastabend Cc: Kamezawa Hiroyuki Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a5ab5651441b..018f819405c8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -46,7 +46,7 @@ extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, -#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) +#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) enum cgroup_subsys_id { #include __CGROUP_TEMPORARY_PLACEHOLDER -- cgit From a6f00298b2ceaf50b4ab00e6ee3eb0206ac72fac Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:08 +0200 Subject: cgroup: Define CGROUP_SUBSYS_COUNT according the configuration Since we know exactly how many subsystems exists at compile time we are able to define CGROUP_SUBSYS_COUNT correctly. CGROUP_SUBSYS_COUNT will be at max 12 (all controllers enabled). Depending on the architecture we safe either 32 - 12 pointers (80 bytes) or 64 - 12 pointers (416 bytes) per cgroup. With this change we can also remove the temporary placeholder to avoid compilation errors. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 018f819405c8..df354ae079c1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -49,16 +49,10 @@ extern const struct file_operations proc_cgroup_operations; #define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) enum cgroup_subsys_id { #include - __CGROUP_TEMPORARY_PLACEHOLDER + CGROUP_SUBSYS_COUNT, }; #undef IS_SUBSYS_ENABLED #undef SUBSYS -/* - * This define indicates the maximum number of subsystems that can be loaded - * at once. We limit to this many since cgroupfs_root has subsys_bits to keep - * track of all of them. - */ -#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long)) /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { -- cgit