From 387ad9674b0013c8756ad20d854ff005b0c313ad Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 20 Feb 2017 12:19:00 +0200 Subject: kernel: convert cgroup_namespace.count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cgroup/cgroup.c') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0125589c7428..8ee78688e36d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { - .count = { .counter = 2, }, + .count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.ops = &cgroupns_operations, .ns.inum = PROC_CGROUP_INIT_INO, -- cgit From 4b9502e63b5e2b1b5ef491919d3219b9440fe0b3 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Wed, 8 Mar 2017 10:00:40 +0200 Subject: kernel: convert css_set.refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/cgroup/cgroup.c') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8ee78688e36d..b1cc1c306668 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -554,7 +554,7 @@ EXPORT_SYMBOL_GPL(of_css); * haven't been created. */ struct css_set init_css_set = { - .refcount = ATOMIC_INIT(1), + .refcount = REFCOUNT_INIT(1), .tasks = LIST_HEAD_INIT(init_css_set.tasks), .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), @@ -724,7 +724,7 @@ void put_css_set_locked(struct css_set *cset) lockdep_assert_held(&css_set_lock); - if (!atomic_dec_and_test(&cset->refcount)) + if (!refcount_dec_and_test(&cset->refcount)) return; /* This css_set is dead. unlink it and release cgroup and css refs */ @@ -977,7 +977,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, return NULL; } - atomic_set(&cset->refcount, 1); + refcount_set(&cset->refcount, 1); INIT_LIST_HEAD(&cset->tasks); INIT_LIST_HEAD(&cset->mg_tasks); INIT_LIST_HEAD(&cset->task_iters); -- cgit From a590b90d472f2c176c140576ee3ab44df7f67839 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Apr 2017 15:14:55 -0400 Subject: cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc() cgroup_get() expected to be called only on live cgroups and triggers warning on a dead cgroup; however, cgroup_sk_alloc() may be called while cloning a socket which is left in an empty and removed cgroup and thus may legitimately duplicate its reference on a dead cgroup. This currently triggers the following warning spuriously. WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60 ... [] __warn+0xd3/0xf0 [] warn_slowpath_null+0x1e/0x20 [] cgroup_get+0x55/0x60 [] cgroup_sk_alloc+0x51/0xe0 [] sk_clone_lock+0x2db/0x390 [] inet_csk_clone_lock+0x16/0xc0 [] tcp_create_openreq_child+0x23/0x4b0 [] tcp_v6_syn_recv_sock+0x91/0x670 [] tcp_check_req+0x3a6/0x4e0 [] tcp_v6_rcv+0x693/0xa00 [] ip6_input_finish+0x59/0x3e0 [] ip6_input+0x32/0xb0 [] ip6_rcv_finish+0x57/0xa0 [] ipv6_rcv+0x318/0x4d0 [] __netif_receive_skb_core+0x2d7/0x9a0 [] __netif_receive_skb+0x16/0x70 [] netif_receive_skb_internal+0x23/0x80 [] napi_gro_frags+0x208/0x270 [] mlx4_en_process_rx_cq+0x74c/0xf40 [] mlx4_en_poll_rx_cq+0x30/0x90 [] net_rx_action+0x210/0x350 [] __do_softirq+0x106/0x2c7 [] irq_exit+0x9d/0xa0 [] do_IRQ+0x54/0xd0 [] common_interrupt+0x7f/0x7f [] cpuidle_enter+0x17/0x20 [] cpu_startup_entry+0x2a9/0x2f0 [] start_secondary+0xf1/0x100 This patch renames the existing cgroup_get() with the dead cgroup warning to cgroup_get_live() after cgroup_kn_lock_live() and introduces the new cgroup_get() which doesn't check whether the cgroup is live or dead. All existing cgroup_get() users except for cgroup_sk_alloc() are converted to use cgroup_get_live(). Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Cc: stable@vger.kernel.org # v4.5+ Cc: Johannes Weiner Reported-by: Chris Mason Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/cgroup/cgroup.c') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b1cc1c306668..10951d5e35d2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -437,6 +437,11 @@ out_unlock: } static void cgroup_get(struct cgroup *cgrp) +{ + css_get(&cgrp->self); +} + +static void cgroup_get_live(struct cgroup *cgrp) { WARN_ON_ONCE(cgroup_is_dead(cgrp)); css_get(&cgrp->self); @@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, list_add_tail(&link->cgrp_link, &cset->cgrp_links); if (cgroup_parent(cgrp)) - cgroup_get(cgrp); + cgroup_get_live(cgrp); } /** @@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } cgrp_dfl_visible = true; - cgroup_get(&cgrp_dfl_root.cgrp); + cgroup_get_live(&cgrp_dfl_root.cgrp); dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); @@ -2575,7 +2580,7 @@ restart: if (!css || !percpu_ref_is_dying(&css->refcnt)) continue; - cgroup_get(dsct); + cgroup_get_live(dsct); prepare_to_wait(&dsct->offline_waitq, &wait, TASK_UNINTERRUPTIBLE); @@ -3946,7 +3951,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, { lockdep_assert_held(&cgroup_mutex); - cgroup_get(cgrp); + cgroup_get_live(cgrp); memset(css, 0, sizeof(*css)); css->cgroup = cgrp; @@ -4122,7 +4127,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) /* allocation complete, commit to creation */ list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); - cgroup_get(parent); + cgroup_get_live(parent); /* * @cgrp is now fully operational. If something fails after this @@ -4946,7 +4951,7 @@ struct cgroup *cgroup_get_from_path(const char *path) if (kn) { if (kernfs_type(kn) == KERNFS_DIR) { cgrp = kn->priv; - cgroup_get(cgrp); + cgroup_get_live(cgrp); } else { cgrp = ERR_PTR(-ENOTDIR); } @@ -5026,6 +5031,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) /* Socket clone path */ if (skcd->val) { + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ cgroup_get(sock_cgroup_ptr(skcd)); return; } -- cgit From 9732adc5d6520238223df16630f1f8cad2269317 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Wed, 19 Apr 2017 10:15:59 +0800 Subject: cgroup: avoid attaching a cgroup root to two different superblocks, take 2 Commit bfb0b80db5f9 ("cgroup: avoid attaching a cgroup root to two different superblocks") is broken. Now we try to fix the race by delaying the initialization of cgroup root refcnt until a superblock has been allocated. Reported-by: Dmitry Vyukov Reported-by: Andrei Vagin Tested-by: Andrei Vagin Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/cgroup/cgroup.c') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 10951d5e35d2..38d9386f46e7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1645,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -1661,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) root_cgrp->id = ret; root_cgrp->ancestor_ids[0] = ret; - ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, - GFP_KERNEL); + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, + ref_flags, GFP_KERNEL); if (ret) goto out; @@ -4517,7 +4517,7 @@ int __init cgroup_init(void) hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); - BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); mutex_unlock(&cgroup_mutex); -- cgit From 310b4816a5d8082416b4ab83e5a7b3cb92883a4d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 May 2017 15:24:14 -0400 Subject: cgroup: mark cgroup_get() with __maybe_unused a590b90d472f ("cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc()") converted most cgroup_get() usages to cgroup_get_live() leaving cgroup_sk_alloc() the sole user of cgroup_get(). When !CONFIG_SOCK_CGROUP_DATA, this ends up triggering unused warning for cgroup_get(). Silence the warning by adding __maybe_unused to cgroup_get(). Reported-by: Stephen Rothwell Link: http://lkml.kernel.org/r/20170501145340.17e8ef86@canb.auug.org.au Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cgroup/cgroup.c') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 38d9386f46e7..f2bcc11b85be 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -436,7 +436,7 @@ out_unlock: return css; } -static void cgroup_get(struct cgroup *cgrp) +static void __maybe_unused cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); } -- cgit