From 0a01f2cc390e10633a54f72c608cc3fe19a50c3d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2012 10:33:47 -0700 Subject: pidns: Make the pidns proc mount/umount logic obvious. Track the number of pids in the proc hash table. When the number of pids goes to 0 schedule work to unmount the kernel mount of proc. Move the mount of proc into alloc_pid when we allocate the pid for init. Remove the surprising calls of pid_ns_release proc in fork and proc_flush_task. Those code paths really shouldn't know about proc namespace implementation details and people have demonstrated several times that finding and understanding those code paths is difficult and non-obvious. Because of the call path detach pid is alwasy called with the rtnl_lock held free_pid is not allowed to sleep, so the work to unmounting proc is moved to a work queue. This has the side benefit of not blocking the entire world waiting for the unnecessary rcu_barrier in deactivate_locked_super. In the process of making the code clear and obvious this fixes a bug reported by Gao feng where we would leak a mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns succeeded and copy_net_ns failed. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- kernel/pid_namespace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/pid_namespace.c') diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index b2604950aa50..84591cfeefc1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -72,6 +72,12 @@ err_alloc: return NULL; } +static void proc_cleanup_work(struct work_struct *work) +{ + struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); + pid_ns_release_proc(ns); +} + /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 @@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); + INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); @@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns for (i = 1; i < PIDMAP_ENTRIES; i++) atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - err = pid_ns_prepare_proc(ns); - if (err) - goto out_put_parent_pid_ns; - return ns; -out_put_parent_pid_ns: - put_pid_ns(parent_pid_ns); - put_user_ns(user_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: -- cgit