summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c2
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/attr.c19
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/devpts/inode.c3
-rw-r--r--fs/exec.c2
-rw-r--r--fs/inode.c7
-rw-r--r--fs/kernfs/mount.c5
-rw-r--r--fs/namei.c55
-rw-r--r--fs/namespace.c99
-rw-r--r--fs/nfsd/nfsctl.c13
-rw-r--r--fs/posix_acl.c8
-rw-r--r--fs/proc/inode.c15
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/root.c61
-rw-r--r--fs/quota/dquot.c8
-rw-r--r--fs/quota/quota.c14
-rw-r--r--fs/super.c69
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/xattr.c7
-rw-r--r--include/linux/fs.h79
-rw-r--r--include/linux/mount.h1
-rw-r--r--include/linux/posix_acl.h2
-rw-r--r--include/linux/quota.h10
-rw-r--r--include/linux/uidgid.h4
-rw-r--r--include/linux/user_namespace.h6
-rw-r--r--ipc/mqueue.c20
-rw-r--r--ipc/namespace.c5
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/user_namespace.c14
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--security/commoncap.c10
-rw-r--r--security/integrity/evm/evm_crypto.c4
-rw-r--r--security/selinux/hooks.c25
-rw-r--r--security/smack/smack.h8
-rw-r--r--security/smack/smack_lsm.c34
36 files changed, 418 insertions, 213 deletions
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index d4cc73bb6e1e..542801f04b0d 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -415,7 +415,7 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
return rc;
}
- rc = posix_acl_valid(acl);
+ rc = posix_acl_valid(&init_user_ns, acl);
if (rc) {
CERROR("validate acl: %d\n", rc);
posix_acl_release(acl);
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 0576eaeb60b9..5b6a1743ea17 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -266,7 +266,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
if (IS_ERR(acl))
return PTR_ERR(acl);
else if (acl) {
- retval = posix_acl_valid(acl);
+ retval = posix_acl_valid(inode->i_sb->s_user_ns, acl);
if (retval)
goto err_out;
}
diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0f6c88..42bb42bb3c72 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -255,6 +255,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
return 0;
+ /*
+ * Verify that uid/gid changes are valid in the target
+ * namespace of the superblock.
+ */
+ if (ia_valid & ATTR_UID &&
+ !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+ return -EOVERFLOW;
+ if (ia_valid & ATTR_GID &&
+ !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+ return -EOVERFLOW;
+
+ /* Don't allow modifications of files with invalid uids or
+ * gids unless those uids & gids are being made valid.
+ */
+ if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
+ return -EOVERFLOW;
+ if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
+ return -EOVERFLOW;
+
error = security_inode_setattr(dentry, attr);
if (error)
return error;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5cbd5391667e..ada42cf42d06 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1846,7 +1846,7 @@ struct block_device *lookup_bdev(const char *pathname)
if (!S_ISBLK(inode->i_mode))
goto fail;
error = -EACCES;
- if (path.mnt->mnt_flags & MNT_NODEV)
+ if (!may_open_dev(&path))
goto fail;
error = -ENOMEM;
bdev = bd_acquire(inode);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 37c134a132c7..d116453b0276 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -396,6 +396,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode;
+ s->s_iflags &= ~SB_I_NODEV;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = DEVPTS_SUPER_MAGIC;
@@ -480,7 +481,7 @@ static struct file_system_type devpts_fs_type = {
.name = "devpts",
.mount = devpts_mount,
.kill_sb = devpts_kill_sb,
- .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
/*
diff --git a/fs/exec.c b/fs/exec.c
index 887c1c955df8..ca239fc86d8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1411,7 +1411,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
bprm->cred->euid = current_euid();
bprm->cred->egid = current_egid();
- if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+ if (!mnt_may_suid(bprm->file->f_path.mnt))
return;
if (task_no_new_privs(current))
diff --git a/fs/inode.c b/fs/inode.c
index e171f7b5f9e4..9cef4e16aeda 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1619,6 +1619,13 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
if (inode->i_flags & S_NOATIME)
return false;
+
+ /* Atime updates will likely cause i_uid and i_gid to be written
+ * back improprely if their true value is unknown to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return false;
+
if (IS_NOATIME(inode))
return false;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 63534f5f9073..b3d73ad52b22 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -152,6 +152,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
struct dentry *root;
info->sb = sb;
+ /* Userspace would break if executables or devices appear on sysfs */
+ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = magic;
@@ -241,7 +243,8 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
- sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
+ sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
+ &init_user_ns, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
if (IS_ERR(sb))
diff --git a/fs/namei.c b/fs/namei.c
index 68a896c804b7..c386a329ab20 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
#include <linux/posix_acl.h>
#include <linux/hash.h>
#include <linux/bitops.h>
+#include <linux/init_task.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -410,6 +411,14 @@ int __inode_permission(struct inode *inode, int mask)
*/
if (IS_IMMUTABLE(inode))
return -EACCES;
+
+ /*
+ * Updating mtime will likely cause i_uid and i_gid to be
+ * written back improperly if their true value is unknown
+ * to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EACCES;
}
retval = do_inode_permission(inode, mask);
@@ -901,6 +910,7 @@ static inline int may_follow_link(struct nameidata *nd)
{
const struct inode *inode;
const struct inode *parent;
+ kuid_t puid;
if (!sysctl_protected_symlinks)
return 0;
@@ -916,7 +926,8 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if parent directory and link owner match. */
- if (uid_eq(parent->i_uid, inode->i_uid))
+ puid = parent->i_uid;
+ if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
@@ -1089,6 +1100,7 @@ static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
{
struct vfsmount *mnt;
+ const struct cred *old_cred;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
@@ -1110,11 +1122,16 @@ static int follow_automount(struct path *path, struct nameidata *nd,
path->dentry->d_inode)
return -EISDIR;
+ if (path->dentry->d_sb->s_user_ns != &init_user_ns)
+ return -EACCES;
+
nd->total_link_count++;
if (nd->total_link_count >= 40)
return -ELOOP;
+ old_cred = override_creds(&init_cred);
mnt = path->dentry->d_op->d_automount(path);
+ revert_creds(old_cred);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
@@ -2741,10 +2758,11 @@ EXPORT_SYMBOL(__check_sticky);
* c. have CAP_FOWNER capability
* 6. If the victim is append-only or immutable we can't do antyhing with
* links pointing to it.
- * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
- * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
- * 9. We can't remove a root or mountpoint.
- * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * 7. If the victim has an unknown uid or gid we can't change the inode.
+ * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 10. We can't remove a root or mountpoint.
+ * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
@@ -2766,7 +2784,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
return -EPERM;
if (check_sticky(dir, inode) || IS_APPEND(inode) ||
- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
+ IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -2787,16 +2805,22 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
* 1. We can't do it if child already exists (open has special treatment for
* this case, but since we are inlined it's OK)
* 2. We can't do it if dir is read-only (done in permission())
- * 3. We should have write and exec permissions on dir
- * 4. We can't do it if dir is immutable (done in permission())
+ * 3. We can't do it if the fs can't represent the fsuid or fsgid.
+ * 4. We should have write and exec permissions on dir
+ * 5. We can't do it if dir is immutable (done in permission())
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ struct user_namespace *s_user_ns;
audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
+ s_user_ns = dir->i_sb->s_user_ns;
+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+ !kgid_has_mapping(s_user_ns, current_fsgid()))
+ return -EOVERFLOW;
return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}
@@ -2865,6 +2889,12 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
EXPORT_SYMBOL(vfs_create);
+bool may_open_dev(const struct path *path)
+{
+ return !(path->mnt->mnt_flags & MNT_NODEV) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
+}
+
static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
@@ -2883,7 +2913,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
break;
case S_IFBLK:
case S_IFCHR:
- if (path->mnt->mnt_flags & MNT_NODEV)
+ if (!may_open_dev(path))
return -EACCES;
/*FALLTHRU*/
case S_IFIFO:
@@ -4135,6 +4165,13 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
*/
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
+ /*
+ * Updating the link count will likely cause i_uid and i_gid to
+ * be writen back improperly if their true value is unknown to
+ * the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EPERM;
if (!dir->i_op->link)
return -EPERM;
if (S_ISDIR(inode->i_mode))
diff --git a/fs/namespace.c b/fs/namespace.c
index 419f746d851d..7bb2cda3bfef 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2186,13 +2186,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
- /* Was the nodev implicitly added in mount? */
- if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
- !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
- mnt_flags |= MNT_NODEV;
- } else {
- return -EPERM;
- }
+ return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
@@ -2376,7 +2370,7 @@ unlock:
return err;
}
-static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
/*
* create a new mount for userspace and request it to be added into the
@@ -2386,7 +2380,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
- struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct vfsmount *mnt;
int err;
@@ -2397,26 +2390,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
if (!type)
return -ENODEV;
- if (user_ns != &init_user_ns) {
- if (!(type->fs_flags & FS_USERNS_MOUNT)) {
- put_filesystem(type);
- return -EPERM;
- }
- /* Only in special cases allow devices from mounts
- * created outside the initial user namespace.
- */
- if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
- flags |= MS_NODEV;
- mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
- }
- if (type->fs_flags & FS_USERNS_VISIBLE) {
- if (!fs_fully_visible(type, &mnt_flags)) {
- put_filesystem(type);
- return -EPERM;
- }
- }
- }
-
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
@@ -2426,6 +2399,11 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ if (mount_too_revealing(mnt, &mnt_flags)) {
+ mntput(mnt);
+ return -EPERM;
+ }
+
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
@@ -3217,22 +3195,19 @@ bool current_chrooted(void)
return chrooted;
}
-static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
+ int *new_mnt_flags)
{
- struct mnt_namespace *ns = current->nsproxy->mnt_ns;
int new_flags = *new_mnt_flags;
struct mount *mnt;
bool visible = false;
- if (unlikely(!ns))
- return false;
-
down_read(&namespace_sem);
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
int mnt_flags;
- if (mnt->mnt.mnt_sb->s_type != type)
+ if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
continue;
/* This mount is not fully visible if it's root directory
@@ -3241,12 +3216,8 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
continue;
- /* Read the mount flags and filter out flags that
- * may safely be ignored.
- */
+ /* A local view of the mount flags */
mnt_flags = mnt->mnt.mnt_flags;
- if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
- mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
/* Don't miss readonly hidden in the superblock flags */
if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
@@ -3258,15 +3229,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if ((mnt_flags & MNT_LOCK_READONLY) &&
!(new_flags & MNT_READONLY))
continue;
- if ((mnt_flags & MNT_LOCK_NODEV) &&
- !(new_flags & MNT_NODEV))
- continue;
- if ((mnt_flags & MNT_LOCK_NOSUID) &&
- !(new_flags & MNT_NOSUID))
- continue;
- if ((mnt_flags & MNT_LOCK_NOEXEC) &&
- !(new_flags & MNT_NOEXEC))
- continue;
if ((mnt_flags & MNT_LOCK_ATIME) &&
((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
continue;
@@ -3286,9 +3248,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
}
/* Preserve the locked attributes */
*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
- MNT_LOCK_NODEV | \
- MNT_LOCK_NOSUID | \
- MNT_LOCK_NOEXEC | \
MNT_LOCK_ATIME);
visible = true;
goto found;
@@ -3299,6 +3258,42 @@ found:
return visible;
}
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
+{
+ const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ unsigned long s_iflags;
+
+ if (ns->user_ns == &init_user_ns)
+ return false;
+
+ /* Can this filesystem be too revealing? */
+ s_iflags = mnt->mnt_sb->s_iflags;
+ if (!(s_iflags & SB_I_USERNS_VISIBLE))
+ return false;
+
+ if ((s_iflags & required_iflags) != required_iflags) {
+ WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
+ required_iflags);
+ return true;
+ }
+
+ return !mnt_already_visible(ns, mnt, new_mnt_flags);
+}
+
+bool mnt_may_suid(struct vfsmount *mnt)
+{
+ /*
+ * Foreign mounts (accessed via fchdir or through /proc
+ * symlinks) are always treated as if they are nosuid. This
+ * prevents namespaces from trusting potentially unsafe
+ * suid/sgid bits, file caps, or security labels that originate
+ * in other namespaces.
+ */
+ return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
+ current_in_userns(mnt->mnt_sb->s_user_ns);
+}
+
static struct ns_common *mntns_get(struct task_struct *task)
{
struct ns_common *ns = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e7787777620e..65ad0165a94f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1151,20 +1151,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
#endif
/* last one */ {""}
};
- struct net *net = data;
- int ret;
-
- ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
- if (ret)
- return ret;
- sb->s_fs_info = get_net(net);
- return 0;
+ get_net(sb->s_fs_info);
+ return simple_fill_super(sb, 0x6e667364, nfsd_files);
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super);
+ struct net *net = current->nsproxy->net_ns;
+ return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
}
static void nfsd_umount(struct super_block *sb)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index edc452c2a563..59d47ab0791a 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -205,7 +205,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
* Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
*/
int
-posix_acl_valid(const struct posix_acl *acl)
+posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
{
const struct posix_acl_entry *pa, *pe;
int state = ACL_USER_OBJ;
@@ -225,7 +225,7 @@ posix_acl_valid(const struct posix_acl *acl)
case ACL_USER:
if (state != ACL_USER)
return -EINVAL;
- if (!uid_valid(pa->e_uid))
+ if (!kuid_has_mapping(user_ns, pa->e_uid))
return -EINVAL;
needs_mask = 1;
break;
@@ -240,7 +240,7 @@ posix_acl_valid(const struct posix_acl *acl)
case ACL_GROUP:
if (state != ACL_GROUP)
return -EINVAL;
- if (!gid_valid(pa->e_gid))
+ if (!kgid_has_mapping(user_ns, pa->e_gid))
return -EINVAL;
needs_mask = 1;
break;
@@ -834,7 +834,7 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
return -EPERM;
if (acl) {
- int ret = posix_acl_valid(acl);
+ int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
if (ret)
return ret;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 42305ddcbaa0..c1b72388e571 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -457,17 +457,30 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return inode;
}
-int proc_fill_super(struct super_block *s)
+int proc_fill_super(struct super_block *s, void *data, int silent)
{
+ struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
+ if (!proc_parse_options(data, ns))
+ return -EINVAL;
+
+ /* User space would break if executables or devices appear on proc */
+ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
+
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa2781095bd1..7931c558c192 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -212,7 +212,7 @@ extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-extern int proc_fill_super(struct super_block *);
+extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
@@ -268,6 +268,7 @@ static inline void proc_tty_init(void) {}
* root.c
*/
extern struct proc_dir_entry proc_root;
+extern int proc_parse_options(char *options, struct pid_namespace *pid);
extern void proc_self_init(void);
extern int proc_remount(struct super_block *, int *, char *);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 06702783bf40..8d3e484055a6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,21 +23,6 @@
#include "internal.h"
-static int proc_test_super(struct super_block *sb, void *data)
-{
- return sb->s_fs_info == data;
-}
-
-static int proc_set_super(struct super_block *sb, void *data)
-{
- int err = set_anon_super(sb, NULL);
- if (!err) {
- struct pid_namespace *ns = (struct pid_namespace *)data;
- sb->s_fs_info = get_pid_ns(ns);
- }
- return err;
-}
-
enum {
Opt_gid, Opt_hidepid, Opt_err,
};
@@ -48,7 +33,7 @@ static const match_table_t tokens = {
{Opt_err, NULL},
};
-static int proc_parse_options(char *options, struct pid_namespace *pid)
+int proc_parse_options(char *options, struct pid_namespace *pid)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -100,52 +85,16 @@ int proc_remount(struct super_block *sb, int *flags, char *data)
static struct dentry *proc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- int err;
- struct super_block *sb;
struct pid_namespace *ns;
- char *options;
if (flags & MS_KERNMOUNT) {
- ns = (struct pid_namespace *)data;
- options = NULL;
+ ns = data;
+ data = NULL;
} else {
ns = task_active_pid_ns(current);
- options = data;
-
- /* Does the mounter have privilege over the pid namespace? */
- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
- return ERR_PTR(-EPERM);
- }
-
- sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
-
- /*
- * procfs isn't actually a stacking filesystem; however, there is
- * too much magic going on inside it to permit stacking things on
- * top of it
- */
- sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-
- if (!proc_parse_options(options, ns)) {
- deactivate_locked_super(sb);
- return ERR_PTR(-EINVAL);
- }
-
- if (!sb->s_root) {
- err = proc_fill_super(sb);
- if (err) {
- deactivate_locked_super(sb);
- return ERR_PTR(err);
- }
-
- sb->s_flags |= MS_ACTIVE;
- /* User space would break if executables appear on proc */
- sb->s_iflags |= SB_I_NOEXEC;
}
- return dget(sb->s_root);
+ return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
}
static void proc_kill_sb(struct super_block *sb)
@@ -165,7 +114,7 @@ static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b1322dd9d136..1bfac28b7e7d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -841,6 +841,9 @@ struct dquot *dqget(struct super_block *sb, struct kqid qid)
unsigned int hashent = hashfn(sb, qid);
struct dquot *dquot, *empty = NULL;
+ if (!qid_has_mapping(sb->s_user_ns, qid))
+ return ERR_PTR(-EINVAL);
+
if (!sb_has_quota_active(sb, qid.type))
return ERR_PTR(-ESRCH);
we_slept:
@@ -2268,6 +2271,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
error = -EINVAL;
goto out_fmt;
}
+ /* Filesystems outside of init_user_ns not yet supported */
+ if (sb->s_user_ns != &init_user_ns) {
+ error = -EINVAL;
+ goto out_fmt;
+ }
/* Usage always has to be set... */
if (!(flags & DQUOT_USAGE_ENABLED)) {
error = -EINVAL;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 0f10ee9892ce..35df08ee9c97 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -211,7 +211,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
if (ret)
@@ -237,7 +237,7 @@ static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_nextdqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
if (ret)
@@ -288,7 +288,7 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->set_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
copy_from_if_dqblk(&fdq, &idq);
return sb->s_qcop->set_dqblk(sb, qid, &fdq);
@@ -581,10 +581,10 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->set_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
/* Are we actually setting timer / warning limits for all users? */
- if (from_kqid(&init_user_ns, qid) == 0 &&
+ if (from_kqid(sb->s_user_ns, qid) == 0 &&
fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
struct qc_info qinfo;
int ret;
@@ -642,7 +642,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_dqblk(sb, qid, &qdq);
if (ret)
@@ -669,7 +669,7 @@ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_nextdqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
if (ret)
diff --git a/fs/super.c b/fs/super.c
index 5806ffd45563..c2ff475c1711 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,6 +33,7 @@
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
+#include <linux/user_namespace.h>
#include "internal.h"
@@ -165,6 +166,7 @@ static void destroy_super(struct super_block *s)
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts));
+ put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
kfree(s->s_options);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -174,11 +176,13 @@ static void destroy_super(struct super_block *s)
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
* @flags: the mount flags
+ * @user_ns: User namespace for the super_block
*
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
-static struct super_block *alloc_super(struct file_system_type *type, int flags)
+static struct super_block *alloc_super(struct file_system_type *type, int flags,
+ struct user_namespace *user_ns)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static const struct super_operations default_op;
@@ -188,6 +192,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
return NULL;
INIT_LIST_HEAD(&s->s_mounts);
+ s->s_user_ns = get_user_ns(user_ns);
if (security_sb_alloc(s))
goto fail;
@@ -201,6 +206,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags;
+ if (s->s_user_ns != &init_user_ns)
+ s->s_iflags |= SB_I_NODEV;
INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon);
mutex_init(&s->s_sync_lock);
@@ -445,29 +452,42 @@ void generic_shutdown_super(struct super_block *sb)
EXPORT_SYMBOL(generic_shutdown_super);
/**
- * sget - find or create a superblock
+ * sget_userns - find or create a superblock
* @type: filesystem type superblock should belong to
* @test: comparison callback
* @set: setup callback
* @flags: mount flags
+ * @user_ns: User namespace for the super_block
* @data: argument to each of them
*/
-struct super_block *sget(struct file_system_type *type,
+struct super_block *sget_userns(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
- int flags,
+ int flags, struct user_namespace *user_ns,
void *data)
{
struct super_block *s = NULL;
struct super_block *old;
int err;
+ if (!(flags & MS_KERNMOUNT) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
retry:
spin_lock(&sb_lock);
if (test) {
hlist_for_each_entry(old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
+ if (user_ns != old->s_user_ns) {
+ spin_unlock(&sb_lock);
+ if (s) {
+ up_write(&s->s_umount);
+ destroy_super(s);
+ }
+ return ERR_PTR(-EBUSY);
+ }
if (!grab_super(old))
goto retry;
if (s) {
@@ -480,7 +500,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags);
+ s = alloc_super(type, flags, user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -503,6 +523,31 @@ retry:
return s;
}
+EXPORT_SYMBOL(sget_userns);
+
+/**
+ * sget - find or create a superblock
+ * @type: filesystem type superblock should belong to
+ * @test: comparison callback
+ * @set: setup callback
+ * @flags: mount flags
+ * @data: argument to each of them
+ */
+struct super_block *sget(struct file_system_type *type,
+ int (*test)(struct super_block *,void *),
+ int (*set)(struct super_block *,void *),
+ int flags,
+ void *data)
+{
+ struct user_namespace *user_ns = current_user_ns();
+
+ /* Ensure the requestor has permissions over the target filesystem */
+ if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return sget_userns(type, test, set, flags, user_ns, data);
+}
+
EXPORT_SYMBOL(sget);
void drop_super(struct super_block *sb)
@@ -920,12 +965,20 @@ static int ns_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
- void *data, int (*fill_super)(struct super_block *, void *, int))
+struct dentry *mount_ns(struct file_system_type *fs_type,
+ int flags, void *data, void *ns, struct user_namespace *user_ns,
+ int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *sb;
- sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
+ /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+ * over the namespace.
+ */
+ if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
+ user_ns, ns);
if (IS_ERR(sb))
return ERR_CAST(sb);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f3db82071cfb..20b8f82e115b 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -41,8 +41,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
if (IS_ERR(root) || !new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
else if (new_sb)
- /* Userspace would break if executables appear on sysfs */
- root->d_sb->s_iflags |= SB_I_NOEXEC;
+ root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
}
@@ -59,7 +58,7 @@ static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
- .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
int __init sysfs_init(void)
diff --git a/fs/xattr.c b/fs/xattr.c
index 4beafc43daa5..c243905835ab 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -38,6 +38,13 @@ xattr_permission(struct inode *inode, const char *name, int mask)
if (mask & MAY_WRITE) {
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
+ /*
+ * Updating an xattr will likely cause i_uid and i_gid
+ * to be writen back improperly if their true value is
+ * unknown to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EPERM;
}
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f65a6801f609..577365a77b47 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -829,31 +829,6 @@ static inline void i_size_write(struct inode *inode, loff_t i_size)
#endif
}
-/* Helper functions so that in most cases filesystems will
- * not need to deal directly with kuid_t and kgid_t and can
- * instead deal with the raw numeric values that are stored
- * in the filesystem.
- */
-static inline uid_t i_uid_read(const struct inode *inode)
-{
- return from_kuid(&init_user_ns, inode->i_uid);
-}
-
-static inline gid_t i_gid_read(const struct inode *inode)
-{
- return from_kgid(&init_user_ns, inode->i_gid);
-}
-
-static inline void i_uid_write(struct inode *inode, uid_t uid)
-{
- inode->i_uid = make_kuid(&init_user_ns, uid);
-}
-
-static inline void i_gid_write(struct inode *inode, gid_t gid)
-{
- inode->i_gid = make_kgid(&init_user_ns, gid);
-}
-
static inline unsigned iminor(const struct inode *inode)
{
return MINOR(inode->i_rdev);
@@ -1320,6 +1295,10 @@ struct mm_struct;
/* sb->s_iflags */
#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
+#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
+
+/* sb->s_iflags to limit user namespace mounts */
+#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
/* Possible states of 'frozen' field */
enum {
@@ -1423,6 +1402,13 @@ struct super_block {
struct hlist_head s_pins;
/*
+ * Owning user namespace and default context in which to
+ * interpret filesystem uids, gids, quotas, device nodes,
+ * xattrs and security labels.
+ */
+ struct user_namespace *s_user_ns;
+
+ /*
* Keep the lru lists last in the structure so they always sit on their
* own individual cachelines.
*/
@@ -1446,6 +1432,31 @@ struct super_block {
struct list_head s_inodes_wb; /* writeback inodes */
};
+/* Helper functions so that in most cases filesystems will
+ * not need to deal directly with kuid_t and kgid_t and can
+ * instead deal with the raw numeric values that are stored
+ * in the filesystem.
+ */
+static inline uid_t i_uid_read(const struct inode *inode)
+{
+ return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+}
+
+static inline gid_t i_gid_read(const struct inode *inode)
+{
+ return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+}
+
+static inline void i_uid_write(struct inode *inode, uid_t uid)
+{
+ inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
+}
+
+static inline void i_gid_write(struct inode *inode, gid_t gid)
+{
+ inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
+}
+
extern struct timespec current_fs_time(struct super_block *sb);
/*
@@ -1588,6 +1599,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *);
*/
extern void inode_init_owner(struct inode *inode, const struct inode *dir,
umode_t mode);
+extern bool may_open_dev(const struct path *path);
/*
* VFS FS_IOC_FIEMAP helper definitions.
*/
@@ -1858,6 +1870,11 @@ struct super_operations {
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
+static inline bool HAS_UNMAPPED_ID(struct inode *inode)
+{
+ return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
+}
+
/*
* Inode state bits. Protected by inode->i_lock
*
@@ -2006,8 +2023,6 @@ struct file_system_type {
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
-#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
-#define FS_USERNS_VISIBLE 32 /* FS must already be visible */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
@@ -2028,8 +2043,9 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
-extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
- void *data, int (*fill_super)(struct super_block *, void *, int));
+extern struct dentry *mount_ns(struct file_system_type *fs_type,
+ int flags, void *data, void *ns, struct user_namespace *user_ns,
+ int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2049,6 +2065,11 @@ void deactivate_locked_super(struct super_block *sb);
int set_anon_super(struct super_block *s, void *data);
int get_anon_bdev(dev_t *);
void free_anon_bdev(dev_t);
+struct super_block *sget_userns(struct file_system_type *type,
+ int (*test)(struct super_block *,void *),
+ int (*set)(struct super_block *,void *),
+ int flags, struct user_namespace *user_ns,
+ void *data);
struct super_block *sget(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
diff --git a/include/linux/mount.h b/include/linux/mount.h
index f822c3c11377..54a594d49733 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(struct path *path);
extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool mnt_may_suid(struct vfsmount *mnt);
struct path;
extern struct vfsmount *clone_private_mount(struct path *path);
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index c818772d9f9d..d5d3d741f028 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -79,7 +79,7 @@ posix_acl_release(struct posix_acl *acl)
extern void posix_acl_init(struct posix_acl *, int);
extern struct posix_acl *posix_acl_alloc(int, gfp_t);
-extern int posix_acl_valid(const struct posix_acl *);
+extern int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 8486d27cf360..55107a8ff887 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -179,6 +179,16 @@ static inline struct kqid make_kqid_projid(kprojid_t projid)
return kqid;
}
+/**
+ * qid_has_mapping - Report if a qid maps into a user namespace.
+ * @ns: The user namespace to see if a value maps into.
+ * @qid: The kernel internal quota identifier to test.
+ */
+static inline bool qid_has_mapping(struct user_namespace *ns, struct kqid qid)
+{
+ return from_kqid(ns, qid) != (qid_t) -1;
+}
+
extern spinlock_t dq_data_lock;
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index 03835522dfcb..25e9d9216340 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -177,12 +177,12 @@ static inline gid_t from_kgid_munged(struct user_namespace *to, kgid_t kgid)
static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid)
{
- return true;
+ return uid_valid(uid);
}
static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
{
- return true;
+ return gid_valid(gid);
}
#endif /* CONFIG_USER_NS */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 8297e5b341d8..9217169c64cb 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,6 +72,7 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t,
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
+extern bool current_in_userns(const struct user_namespace *target_ns);
#else
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -100,6 +101,11 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns)
{
return true;
}
+
+static inline bool current_in_userns(const struct user_namespace *target_ns)
+{
+ return true;
+}
#endif
#endif /* _LINUX_USER_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ade739f67f1d..0b13ace266f2 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -305,8 +305,9 @@ err:
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
- struct ipc_namespace *ns = data;
+ struct ipc_namespace *ns = sb->s_fs_info;
+ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = MQUEUE_MAGIC;
@@ -326,17 +327,14 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
- if (!(flags & MS_KERNMOUNT)) {
- struct ipc_namespace *ns = current->nsproxy->ipc_ns;
- /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
- * over the ipc namespace.
- */
- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
- return ERR_PTR(-EPERM);
-
- data = ns;
+ struct ipc_namespace *ns;
+ if (flags & MS_KERNMOUNT) {
+ ns = data;
+ data = NULL;
+ } else {
+ ns = current->nsproxy->ipc_ns;
}
- return mount_ns(fs_type, flags, data, mqueue_fill_super);
+ return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super);
}
static void init_once(void *foo)
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 068caf18d565..04cb07eb81f1 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -34,8 +34,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
ns->ns.ops = &ipcns_operations;
atomic_set(&ns->count, 1);
+ ns->user_ns = get_user_ns(user_ns);
+
err = mq_init_ns(ns);
if (err) {
+ put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
kfree(ns);
return ERR_PTR(err);
@@ -46,8 +49,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
msg_init_ns(ns);
shm_init_ns(ns);
- ns->user_ns = get_user_ns(user_ns);
-
return ns;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 0c0cd8a62285..5f264fb5737d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -689,6 +689,8 @@ EXPORT_SYMBOL(set_security_override_from_ctx);
*/
int set_create_files_as(struct cred *new, struct inode *inode)
{
+ if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+ return -EINVAL;
new->fsuid = inode->i_uid;
new->fsgid = inode->i_gid;
return security_kernel_create_files_as(new, inode);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9bafc211930c..68f594212759 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -938,6 +938,20 @@ bool userns_may_setgroups(const struct user_namespace *ns)
return allowed;
}
+/*
+ * Returns true if @ns is the same namespace as or a descendant of
+ * @target_ns.
+ */
+bool current_in_userns(const struct user_namespace *target_ns)
+{
+ struct user_namespace *ns;
+ for (ns = current_user_ns(); ns; ns = ns->parent) {
+ if (ns == target_ns)
+ return true;
+ }
+ return false;
+}
+
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
{
return container_of(ns, struct user_namespace, ns);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fc48eca21fd2..84f98cbe31c3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1386,7 +1386,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct dentry *root, *gssd_dentry;
- struct net *net = data;
+ struct net *net = get_net(sb->s_fs_info);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1419,7 +1419,6 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
sb);
if (err)
goto err_depopulate;
- sb->s_fs_info = get_net(net);
mutex_unlock(&sn->pipefs_sb_lock);
return 0;
@@ -1448,7 +1447,8 @@ static struct dentry *
rpc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super);
+ struct net *net = current->nsproxy->net_ns;
+ return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
}
static void rpc_kill_sb(struct super_block *sb)
@@ -1468,9 +1468,9 @@ static void rpc_kill_sb(struct super_block *sb)
RPC_PIPEFS_UMOUNT,
sb);
mutex_unlock(&sn->pipefs_sb_lock);
- put_net(net);
out:
kill_litter_super(sb);
+ put_net(net);
}
static struct file_system_type rpc_pipe_fs_type = {
diff --git a/security/commoncap.c b/security/commoncap.c
index e7fadde737f4..14540bd78561 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -453,7 +453,15 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
if (!file_caps_enabled)
return 0;
- if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+ if (!mnt_may_suid(bprm->file->f_path.mnt))
+ return 0;
+
+ /*
+ * This check is redundant with mnt_may_suid() but is kept to make
+ * explicit that capability bits are limited to s_user_ns and its
+ * descendants.
+ */
+ if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
return 0;
rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 30b6b7d0429f..11c1d30bd705 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -151,8 +151,8 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
memset(&hmac_misc, 0, sizeof(hmac_misc));
hmac_misc.ino = inode->i_ino;
hmac_misc.generation = inode->i_generation;
- hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
- hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
+ hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+ hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
hmac_misc.mode = inode->i_mode;
crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
if (evm_hmac_attrs & EVM_ATTR_FSUUID)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a86d537eb79b..19be9d39c742 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -830,6 +830,28 @@ static int selinux_set_mnt_opts(struct super_block *sb,
goto out;
}
}
+
+ /*
+ * If this is a user namespace mount, no contexts are allowed
+ * on the command line and security labels must be ignored.
+ */
+ if (sb->s_user_ns != &init_user_ns) {
+ if (context_sid || fscontext_sid || rootcontext_sid ||
+ defcontext_sid) {
+ rc = -EACCES;
+ goto out;
+ }
+ if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
+ sbsec->behavior = SECURITY_FS_USE_MNTPOINT;
+ rc = security_transition_sid(current_sid(), current_sid(),
+ SECCLASS_FILE, NULL,
+ &sbsec->mntpoint_sid);
+ if (rc)
+ goto out;
+ }
+ goto out_set_opts;
+ }
+
/* sets the context of the superblock for the fs being mounted. */
if (fscontext_sid) {
rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred);
@@ -898,6 +920,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
sbsec->def_sid = defcontext_sid;
}
+out_set_opts:
rc = sb_finish_set_opts(sb);
out:
mutex_unlock(&sbsec->lock);
@@ -2259,7 +2282,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm,
const struct task_security_struct *new_tsec)
{
int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS);
- int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID);
+ int nosuid = !mnt_may_suid(bprm->file->f_path.mnt);
int rc;
if (!nnp && !nosuid)
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 6c91156ae225..26e58f1804b1 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -90,9 +90,15 @@ struct superblock_smack {
struct smack_known *smk_floor;
struct smack_known *smk_hat;
struct smack_known *smk_default;
- int smk_initialized;
+ int smk_flags;
};
+/*
+ * Superblock flags
+ */
+#define SMK_SB_INITIALIZED 0x01
+#define SMK_SB_UNTRUSTED 0x02
+
struct socket_smack {
struct smack_known *smk_out; /* outbound label */
struct smack_known *smk_in; /* inbound label */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6777295f4b2b..b75634dbf53b 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -549,7 +549,7 @@ static int smack_sb_alloc_security(struct super_block *sb)
sbsp->smk_floor = &smack_known_floor;
sbsp->smk_hat = &smack_known_hat;
/*
- * smk_initialized will be zero from kzalloc.
+ * SMK_SB_INITIALIZED will be zero from kzalloc.
*/
sb->s_security = sbsp;
@@ -766,10 +766,10 @@ static int smack_set_mnt_opts(struct super_block *sb,
int num_opts = opts->num_mnt_opts;
int transmute = 0;
- if (sp->smk_initialized)
+ if (sp->smk_flags & SMK_SB_INITIALIZED)
return 0;
- sp->smk_initialized = 1;
+ sp->smk_flags |= SMK_SB_INITIALIZED;
for (i = 0; i < num_opts; i++) {
switch (opts->mnt_opts_flags[i]) {
@@ -821,6 +821,17 @@ static int smack_set_mnt_opts(struct super_block *sb,
skp = smk_of_current();
sp->smk_root = skp;
sp->smk_default = skp;
+ /*
+ * For a handful of fs types with no user-controlled
+ * backing store it's okay to trust security labels
+ * in the filesystem. The rest are untrusted.
+ */
+ if (sb->s_user_ns != &init_user_ns &&
+ sb->s_magic != SYSFS_MAGIC && sb->s_magic != TMPFS_MAGIC &&
+ sb->s_magic != RAMFS_MAGIC) {
+ transmute = 1;
+ sp->smk_flags |= SMK_SB_UNTRUSTED;
+ }
}
/*
@@ -908,6 +919,7 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
struct inode *inode = file_inode(bprm->file);
struct task_smack *bsp = bprm->cred->security;
struct inode_smack *isp;
+ struct superblock_smack *sbsp;
int rc;
if (bprm->cred_prepared)
@@ -917,6 +929,11 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
if (isp->smk_task == NULL || isp->smk_task == bsp->smk_task)
return 0;
+ sbsp = inode->i_sb->s_security;
+ if ((sbsp->smk_flags & SMK_SB_UNTRUSTED) &&
+ isp->smk_task != sbsp->smk_root)
+ return 0;
+
if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
struct task_struct *tracer;
rc = 0;
@@ -1203,6 +1220,7 @@ static int smack_inode_rename(struct inode *old_inode,
*/
static int smack_inode_permission(struct inode *inode, int mask)
{
+ struct superblock_smack *sbsp = inode->i_sb->s_security;
struct smk_audit_info ad;
int no_block = mask & MAY_NOT_BLOCK;
int rc;
@@ -1214,6 +1232,11 @@ static int smack_inode_permission(struct inode *inode, int mask)
if (mask == 0)
return 0;
+ if (sbsp->smk_flags & SMK_SB_UNTRUSTED) {
+ if (smk_of_inode(inode) != sbsp->smk_root)
+ return -EACCES;
+ }
+
/* May be droppable after audit */
if (no_block)
return -ECHILD;
@@ -1708,6 +1731,7 @@ static int smack_mmap_file(struct file *file,
struct task_smack *tsp;
struct smack_known *okp;
struct inode_smack *isp;
+ struct superblock_smack *sbsp;
int may;
int mmay;
int tmay;
@@ -1719,6 +1743,10 @@ static int smack_mmap_file(struct file *file,
isp = file_inode(file)->i_security;
if (isp->smk_mmap == NULL)
return 0;
+ sbsp = file_inode(file)->i_sb->s_security;
+ if (sbsp->smk_flags & SMK_SB_UNTRUSTED &&
+ isp->smk_mmap != sbsp->smk_root)
+ return -EACCES;
mkp = isp->smk_mmap;
tsp = current_security();