diff options
Diffstat (limited to 'ipc/namespace.c')
| -rw-r--r-- | ipc/namespace.c | 187 |
1 files changed, 123 insertions, 64 deletions
diff --git a/ipc/namespace.c b/ipc/namespace.c index 7ee61bf44933..535f16ea40e1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/ipc/namespace.c * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc. @@ -9,56 +10,105 @@ #include <linux/rcupdate.h> #include <linux/nsproxy.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> +#include <linux/nstree.h> +#include <linux/sched/task.h> #include "util.h" +/* + * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount. + */ +static void free_ipc(struct work_struct *unused); +static DECLARE_WORK(free_ipc_work, free_ipc); + +static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); +} + +static void dec_ipc_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES); +} + static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, struct ipc_namespace *old_ns) { struct ipc_namespace *ns; + struct ucounts *ucounts; int err; - ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); + err = -ENOSPC; + again: + ucounts = inc_ipc_namespaces(user_ns); + if (!ucounts) { + /* + * IPC namespaces are freed asynchronously, by free_ipc_work. + * If frees were pending, flush_work will wait, and + * return true. Fail the allocation if no frees are pending. + */ + if (flush_work(&free_ipc_work)) + goto again; + goto fail; + } + + err = -ENOMEM; + ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT); if (ns == NULL) - return ERR_PTR(-ENOMEM); + goto fail_dec; - err = proc_alloc_inum(&ns->proc_inum); - if (err) { - kfree(ns); - return ERR_PTR(err); - } + err = ns_common_init(ns); + if (err) + goto fail_free; + + ns_tree_gen_id(ns); + ns->user_ns = get_user_ns(user_ns); + ns->ucounts = ucounts; - atomic_set(&ns->count, 1); err = mq_init_ns(ns); - if (err) { - proc_free_inum(ns->proc_inum); - kfree(ns); - return ERR_PTR(err); - } - atomic_inc(&nr_ipc_ns); + if (err) + goto fail_put; - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); + err = -ENOMEM; + if (!setup_mq_sysctls(ns)) + goto fail_mq_mount; - /* - * msgmni has already been computed for the new ipc ns. - * Thus, do the ipcns creation notification before registering that - * new ipcns in the chain. - */ - ipcns_notify(IPCNS_CREATED); - register_ipcns_notifier(ns); + if (!setup_ipc_sysctls(ns)) + goto fail_mq_sysctls; - ns->user_ns = get_user_ns(user_ns); + err = msg_init_ns(ns); + if (err) + goto fail_ipc; + + sem_init_ns(ns); + shm_init_ns(ns); + ns_tree_add_raw(ns); return ns; + +fail_ipc: + retire_ipc_sysctls(ns); +fail_mq_sysctls: + retire_mq_sysctls(ns); +fail_mq_mount: + mntput(ns->mq_mnt); +fail_put: + put_user_ns(ns->user_ns); + ns_common_free(ns); +fail_free: + kfree(ns); +fail_dec: + dec_ipc_namespaces(ucounts); +fail: + return ERR_PTR(err); } -struct ipc_namespace *copy_ipcs(unsigned long flags, +struct ipc_namespace *copy_ipcs(u64 flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (!(flags & CLONE_NEWIPC)) @@ -81,7 +131,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, int next_id; int total, in_use; - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); in_use = ids->in_use; @@ -89,39 +139,50 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, perm = idr_find(&ids->ipcs_idr, next_id); if (perm == NULL) continue; - ipc_lock_by_ptr(perm); + rcu_read_lock(); + ipc_lock_object(perm); free(ns, perm); total++; } - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); } static void free_ipc_ns(struct ipc_namespace *ns) { /* - * Unregistering the hotplug notifier at the beginning guarantees - * that the ipc namespace won't be freed while we are inside the - * callback routine. Since the blocking_notifier_chain_XXX routines - * hold a rw lock on the notifier list, unregister_ipcns_notifier() - * won't take the rw lock before blocking_notifier_call_chain() has - * released the rd lock. + * Caller needs to wait for an RCU grace period to have passed + * after making the mount point inaccessible to new accesses. */ - unregister_ipcns_notifier(ns); + mntput(ns->mq_mnt); sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); - atomic_dec(&nr_ipc_ns); - /* - * Do the ipcns removal notification after decrementing nr_ipc_ns in - * order to have a correct value when recomputing msgmni. - */ - ipcns_notify(IPCNS_REMOVED); + retire_mq_sysctls(ns); + retire_ipc_sysctls(ns); + + dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - proc_free_inum(ns->proc_inum); + ns_common_free(ns); kfree(ns); } +static LLIST_HEAD(free_ipc_list); +static void free_ipc(struct work_struct *unused) +{ + struct llist_node *node = llist_del_all(&free_ipc_list); + struct ipc_namespace *n, *t; + + llist_for_each_entry_safe(n, t, node, mnt_llist) + mnt_make_shortterm(n->mq_mnt); + + /* Wait for any last users to have gone away. */ + synchronize_rcu(); + + llist_for_each_entry_safe(n, t, node, mnt_llist) + free_ipc_ns(n); +} + /* * put_ipc_ns - drop a reference to an ipc namespace. * @ns: the namespace to put @@ -140,59 +201,57 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ void put_ipc_ns(struct ipc_namespace *ns) { - if (atomic_dec_and_lock(&ns->count, &mq_lock)) { + if (ns_ref_put_and_lock(ns, &mq_lock)) { mq_clear_sbinfo(ns); spin_unlock(&mq_lock); - mq_put_mnt(ns); - free_ipc_ns(ns); + + ns_tree_remove(ns); + if (llist_add(&ns->mnt_llist, &free_ipc_list)) + schedule_work(&free_ipc_work); } } -static void *ipcns_get(struct task_struct *task) +static struct ns_common *ipcns_get(struct task_struct *task) { struct ipc_namespace *ns = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) ns = get_ipc_ns(nsproxy->ipc_ns); - rcu_read_unlock(); + task_unlock(task); - return ns; + return ns ? &ns->ns : NULL; } -static void ipcns_put(void *ns) +static void ipcns_put(struct ns_common *ns) { - return put_ipc_ns(ns); + return put_ipc_ns(to_ipc_ns(ns)); } -static int ipcns_install(struct nsproxy *nsproxy, void *new) +static int ipcns_install(struct nsset *nsset, struct ns_common *new) { - struct ipc_namespace *ns = new; + struct nsproxy *nsproxy = nsset->nsproxy; + struct ipc_namespace *ns = to_ipc_ns(new); if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) return -EPERM; - /* Ditch state from the old ipc namespace */ - exit_sem(current); put_ipc_ns(nsproxy->ipc_ns); nsproxy->ipc_ns = get_ipc_ns(ns); return 0; } -static unsigned int ipcns_inum(void *vp) +static struct user_namespace *ipcns_owner(struct ns_common *ns) { - struct ipc_namespace *ns = vp; - - return ns->proc_inum; + return to_ipc_ns(ns)->user_ns; } const struct proc_ns_operations ipcns_operations = { .name = "ipc", - .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, - .inum = ipcns_inum, + .owner = ipcns_owner, }; |
