diff options
Diffstat (limited to 'kernel/nsproxy.c')
| -rw-r--r-- | kernel/nsproxy.c | 462 |
1 files changed, 396 insertions, 66 deletions
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f6c5d330059a..259c4b4f1eeb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2006 IBM Corporation * * Author: Serge Hallyn <serue@us.ibm.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - * * Jun 2006 - namespaces support * OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> @@ -22,16 +18,20 @@ #include <linux/pid_namespace.h> #include <net/net_namespace.h> #include <linux/ipc_namespace.h> +#include <linux/time_namespace.h> +#include <linux/fs_struct.h> +#include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/file.h> #include <linux/syscalls.h> #include <linux/cgroup.h> #include <linux/perf_event.h> +#include <linux/nstree.h> static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) .ipc_ns = &init_ipc_ns, @@ -44,6 +44,10 @@ struct nsproxy init_nsproxy = { #ifdef CONFIG_CGROUPS .cgroup_ns = &init_cgroup_ns, #endif +#ifdef CONFIG_TIME_NS + .time_ns = &init_time_ns, + .time_ns_for_children = &init_time_ns, +#endif }; static inline struct nsproxy *create_nsproxy(void) @@ -52,16 +56,35 @@ static inline struct nsproxy *create_nsproxy(void) nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); if (nsproxy) - atomic_set(&nsproxy->count, 1); + refcount_set(&nsproxy->count, 1); return nsproxy; } +static inline void nsproxy_free(struct nsproxy *ns) +{ + put_mnt_ns(ns->mnt_ns); + put_uts_ns(ns->uts_ns); + put_ipc_ns(ns->ipc_ns); + put_pid_ns(ns->pid_ns_for_children); + put_time_ns(ns->time_ns); + put_time_ns(ns->time_ns_for_children); + put_cgroup_ns(ns->cgroup_ns); + put_net(ns->net_ns); + kmem_cache_free(nsproxy_cachep, ns); +} + +void deactivate_nsproxy(struct nsproxy *ns) +{ + nsproxy_ns_active_put(ns); + nsproxy_free(ns); +} + /* * Create new nsproxy and all of its the associated namespaces. * Return the newly created nsproxy. Do not attach this to the task, * leave it to the caller to do proper locking and attach it to task. */ -static struct nsproxy *create_new_namespaces(unsigned long flags, +static struct nsproxy *create_new_namespaces(u64 flags, struct task_struct *tsk, struct user_namespace *user_ns, struct fs_struct *new_fs) { @@ -110,22 +133,28 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_net; } + new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns, + tsk->nsproxy->time_ns_for_children); + if (IS_ERR(new_nsp->time_ns_for_children)) { + err = PTR_ERR(new_nsp->time_ns_for_children); + goto out_time; + } + new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns); + return new_nsp; +out_time: + put_net(new_nsp->net_ns); out_net: put_cgroup_ns(new_nsp->cgroup_ns); out_cgroup: - if (new_nsp->pid_ns_for_children) - put_pid_ns(new_nsp->pid_ns_for_children); + put_pid_ns(new_nsp->pid_ns_for_children); out_pid: - if (new_nsp->ipc_ns) - put_ipc_ns(new_nsp->ipc_ns); + put_ipc_ns(new_nsp->ipc_ns); out_ipc: - if (new_nsp->uts_ns) - put_uts_ns(new_nsp->uts_ns); + put_uts_ns(new_nsp->uts_ns); out_uts: - if (new_nsp->mnt_ns) - put_mnt_ns(new_nsp->mnt_ns); + put_mnt_ns(new_nsp->mnt_ns); out_ns: kmem_cache_free(nsproxy_cachep, new_nsp); return ERR_PTR(err); @@ -135,7 +164,7 @@ out_ns: * called from clone. This now handles copy for nsproxy and all * namespaces therein. */ -int copy_namespaces(unsigned long flags, struct task_struct *tsk) +int copy_namespaces(u64 flags, struct task_struct *tsk) { struct nsproxy *old_ns = tsk->nsproxy; struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); @@ -143,12 +172,13 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | - CLONE_NEWCGROUP)))) { - get_nsproxy(old_ns); - return 0; - } - - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) + CLONE_NEWCGROUP | CLONE_NEWTIME)))) { + if ((flags & CLONE_VM) || + likely(old_ns->time_ns_for_children == old_ns->time_ns)) { + get_nsproxy(old_ns); + return 0; + } + } else if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -159,32 +189,21 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) * it along with CLONE_NEWIPC. */ if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) == - (CLONE_NEWIPC | CLONE_SYSVSEM)) + (CLONE_NEWIPC | CLONE_SYSVSEM)) return -EINVAL; new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); if (IS_ERR(new_ns)) return PTR_ERR(new_ns); + if ((flags & CLONE_VM) == 0) + timens_on_fork(new_ns, tsk); + + nsproxy_ns_active_get(new_ns); tsk->nsproxy = new_ns; return 0; } -void free_nsproxy(struct nsproxy *ns) -{ - if (ns->mnt_ns) - put_mnt_ns(ns->mnt_ns); - if (ns->uts_ns) - put_uts_ns(ns->uts_ns); - if (ns->ipc_ns) - put_ipc_ns(ns->ipc_ns); - if (ns->pid_ns_for_children) - put_pid_ns(ns->pid_ns_for_children); - put_cgroup_ns(ns->cgroup_ns); - put_net(ns->net_ns); - kmem_cache_free(nsproxy_cachep, ns); -} - /* * Called from unshare. Unshare all the namespaces part of nsproxy. * On success, returns the new nsproxy. @@ -196,7 +215,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP))) + CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | + CLONE_NEWTIME))) return 0; user_ns = new_cred ? new_cred->user_ns : current_user_ns(); @@ -220,58 +240,368 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) might_sleep(); + if (new) + nsproxy_ns_active_get(new); + task_lock(p); ns = p->nsproxy; p->nsproxy = new; task_unlock(p); - if (ns && atomic_dec_and_test(&ns->count)) - free_nsproxy(ns); + if (ns) + put_nsproxy(ns); } -void exit_task_namespaces(struct task_struct *p) +void exit_nsproxy_namespaces(struct task_struct *p) { switch_task_namespaces(p, NULL); } -SYSCALL_DEFINE2(setns, int, fd, int, nstype) +void switch_cred_namespaces(const struct cred *old, const struct cred *new) +{ + ns_ref_active_get(new->user_ns); + ns_ref_active_put(old->user_ns); +} + +void get_cred_namespaces(struct task_struct *tsk) +{ + ns_ref_active_get(tsk->real_cred->user_ns); +} + +void exit_cred_namespaces(struct task_struct *tsk) +{ + ns_ref_active_put(tsk->real_cred->user_ns); +} + +int exec_task_namespaces(void) { struct task_struct *tsk = current; - struct nsproxy *new_nsproxy; - struct file *file; - struct ns_common *ns; - int err; + struct nsproxy *new; - file = proc_ns_fget(fd); - if (IS_ERR(file)) - return PTR_ERR(file); + if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns) + return 0; - err = -EINVAL; - ns = get_proc_ns(file_inode(file)); - if (nstype && (ns->ops->type != nstype)) - goto out; + new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); + if (IS_ERR(new)) + return PTR_ERR(new); - new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); - if (IS_ERR(new_nsproxy)) { - err = PTR_ERR(new_nsproxy); + timens_on_fork(new, tsk); + switch_task_namespaces(tsk, new); + return 0; +} + +static int check_setns_flags(unsigned long flags) +{ + if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | + CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER | + CLONE_NEWPID | CLONE_NEWCGROUP))) + return -EINVAL; + +#ifndef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) + return -EINVAL; +#endif +#ifndef CONFIG_PID_NS + if (flags & CLONE_NEWPID) + return -EINVAL; +#endif +#ifndef CONFIG_UTS_NS + if (flags & CLONE_NEWUTS) + return -EINVAL; +#endif +#ifndef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) + return -EINVAL; +#endif +#ifndef CONFIG_CGROUPS + if (flags & CLONE_NEWCGROUP) + return -EINVAL; +#endif +#ifndef CONFIG_NET_NS + if (flags & CLONE_NEWNET) + return -EINVAL; +#endif +#ifndef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) + return -EINVAL; +#endif + + return 0; +} + +static void put_nsset(struct nsset *nsset) +{ + unsigned flags = nsset->flags; + + if (flags & CLONE_NEWUSER) + put_cred(nsset_cred(nsset)); + /* + * We only created a temporary copy if we attached to more than just + * the mount namespace. + */ + if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) + free_fs_struct(nsset->fs); + if (nsset->nsproxy) + nsproxy_free(nsset->nsproxy); +} + +static int prepare_nsset(unsigned flags, struct nsset *nsset) +{ + struct task_struct *me = current; + + nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs); + if (IS_ERR(nsset->nsproxy)) + return PTR_ERR(nsset->nsproxy); + + if (flags & CLONE_NEWUSER) + nsset->cred = prepare_creds(); + else + nsset->cred = current_cred(); + if (!nsset->cred) goto out; + + /* Only create a temporary copy of fs_struct if we really need to. */ + if (flags == CLONE_NEWNS) { + nsset->fs = me->fs; + } else if (flags & CLONE_NEWNS) { + nsset->fs = copy_fs_struct(me->fs); + if (!nsset->fs) + goto out; } - err = ns->ops->install(new_nsproxy, ns); - if (err) { - free_nsproxy(new_nsproxy); - goto out; + nsset->flags = flags; + return 0; + +out: + put_nsset(nsset); + return -ENOMEM; +} + +static inline int validate_ns(struct nsset *nsset, struct ns_common *ns) +{ + return ns->ops->install(nsset, ns); +} + +/* + * This is the inverse operation to unshare(). + * Ordering is equivalent to the standard ordering used everywhere else + * during unshare and process creation. The switch to the new set of + * namespaces occurs at the point of no return after installation of + * all requested namespaces was successful in commit_nsset(). + */ +static int validate_nsset(struct nsset *nsset, struct pid *pid) +{ + int ret = 0; + unsigned flags = nsset->flags; + struct user_namespace *user_ns = NULL; + struct pid_namespace *pid_ns = NULL; + struct nsproxy *nsp; + struct task_struct *tsk; + + /* Take a "snapshot" of the target task's namespaces. */ + rcu_read_lock(); + tsk = pid_task(pid, PIDTYPE_PID); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; } - switch_task_namespaces(tsk, new_nsproxy); - perf_event_namespaces(tsk); + if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) { + rcu_read_unlock(); + return -EPERM; + } + + task_lock(tsk); + nsp = tsk->nsproxy; + if (nsp) + get_nsproxy(nsp); + task_unlock(tsk); + if (!nsp) { + rcu_read_unlock(); + return -ESRCH; + } + +#ifdef CONFIG_PID_NS + if (flags & CLONE_NEWPID) { + pid_ns = task_active_pid_ns(tsk); + if (unlikely(!pid_ns)) { + rcu_read_unlock(); + ret = -ESRCH; + goto out; + } + get_pid_ns(pid_ns); + } +#endif + +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) + user_ns = get_user_ns(__task_cred(tsk)->user_ns); +#endif + rcu_read_unlock(); + + /* + * Install requested namespaces. The caller will have + * verified earlier that the requested namespaces are + * supported on this kernel. We don't report errors here + * if a namespace is requested that isn't supported. + */ +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) { + ret = validate_ns(nsset, &user_ns->ns); + if (ret) + goto out; + } +#endif + + if (flags & CLONE_NEWNS) { + ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns)); + if (ret) + goto out; + } + +#ifdef CONFIG_UTS_NS + if (flags & CLONE_NEWUTS) { + ret = validate_ns(nsset, &nsp->uts_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) { + ret = validate_ns(nsset, &nsp->ipc_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_PID_NS + if (flags & CLONE_NEWPID) { + ret = validate_ns(nsset, &pid_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_CGROUPS + if (flags & CLONE_NEWCGROUP) { + ret = validate_ns(nsset, &nsp->cgroup_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_NET_NS + if (flags & CLONE_NEWNET) { + ret = validate_ns(nsset, &nsp->net_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) { + ret = validate_ns(nsset, &nsp->time_ns->ns); + if (ret) + goto out; + } +#endif + +out: + if (pid_ns) + put_pid_ns(pid_ns); + if (nsp) + put_nsproxy(nsp); + put_user_ns(user_ns); + + return ret; +} + +/* + * This is the point of no return. There are just a few namespaces + * that do some actual work here and it's sufficiently minimal that + * a separate ns_common operation seems unnecessary for now. + * Unshare is doing the same thing. If we'll end up needing to do + * more in a given namespace or a helper here is ultimately not + * exported anymore a simple commit handler for each namespace + * should be added to ns_common. + */ +static void commit_nsset(struct nsset *nsset) +{ + unsigned flags = nsset->flags; + struct task_struct *me = current; + +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) { + /* transfer ownership */ + commit_creds(nsset_cred(nsset)); + nsset->cred = NULL; + } +#endif + + /* We only need to commit if we have used a temporary fs_struct. */ + if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) { + set_fs_root(me->fs, &nsset->fs->root); + set_fs_pwd(me->fs, &nsset->fs->pwd); + } + +#ifdef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) + exit_sem(me); +#endif + +#ifdef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) + timens_commit(me, nsset->nsproxy->time_ns); +#endif + + /* transfer ownership */ + switch_task_namespaces(me, nsset->nsproxy); + nsset->nsproxy = NULL; +} + +SYSCALL_DEFINE2(setns, int, fd, int, flags) +{ + CLASS(fd, f)(fd); + struct ns_common *ns = NULL; + struct nsset nsset = {}; + int err = 0; + + if (fd_empty(f)) + return -EBADF; + + if (proc_ns_file(fd_file(f))) { + ns = get_proc_ns(file_inode(fd_file(f))); + if (flags && (ns->ns_type != flags)) + err = -EINVAL; + flags = ns->ns_type; + } else if (!IS_ERR(pidfd_pid(fd_file(f)))) { + err = check_setns_flags(flags); + } else { + err = -EINVAL; + } + if (err) + goto out; + + err = prepare_nsset(flags, &nsset); + if (err) + goto out; + + if (proc_ns_file(fd_file(f))) + err = validate_ns(&nsset, ns); + else + err = validate_nsset(&nsset, pidfd_pid(fd_file(f))); + if (!err) { + commit_nsset(&nsset); + perf_event_namespaces(current); + } + put_nsset(&nsset); out: - fput(file); return err; } int __init nsproxy_cache_init(void) { - nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); + nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT); return 0; } |
