summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/attr.c19
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/devpts/inode.c3
-rw-r--r--fs/exec.c2
-rw-r--r--fs/inode.c7
-rw-r--r--fs/kernfs/mount.c5
-rw-r--r--fs/namei.c55
-rw-r--r--fs/namespace.c99
-rw-r--r--fs/nfsd/nfsctl.c13
-rw-r--r--fs/posix_acl.c8
-rw-r--r--fs/proc/inode.c15
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/root.c61
-rw-r--r--fs/quota/dquot.c8
-rw-r--r--fs/quota/quota.c14
-rw-r--r--fs/super.c69
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/xattr.c7
19 files changed, 242 insertions, 155 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 0576eaeb60b9..5b6a1743ea17 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -266,7 +266,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
if (IS_ERR(acl))
return PTR_ERR(acl);
else if (acl) {
- retval = posix_acl_valid(acl);
+ retval = posix_acl_valid(inode->i_sb->s_user_ns, acl);
if (retval)
goto err_out;
}
diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0f6c88..42bb42bb3c72 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -255,6 +255,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
return 0;
+ /*
+ * Verify that uid/gid changes are valid in the target
+ * namespace of the superblock.
+ */
+ if (ia_valid & ATTR_UID &&
+ !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+ return -EOVERFLOW;
+ if (ia_valid & ATTR_GID &&
+ !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+ return -EOVERFLOW;
+
+ /* Don't allow modifications of files with invalid uids or
+ * gids unless those uids & gids are being made valid.
+ */
+ if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
+ return -EOVERFLOW;
+ if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
+ return -EOVERFLOW;
+
error = security_inode_setattr(dentry, attr);
if (error)
return error;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5cbd5391667e..ada42cf42d06 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1846,7 +1846,7 @@ struct block_device *lookup_bdev(const char *pathname)
if (!S_ISBLK(inode->i_mode))
goto fail;
error = -EACCES;
- if (path.mnt->mnt_flags & MNT_NODEV)
+ if (!may_open_dev(&path))
goto fail;
error = -ENOMEM;
bdev = bd_acquire(inode);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 37c134a132c7..d116453b0276 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -396,6 +396,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode;
+ s->s_iflags &= ~SB_I_NODEV;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = DEVPTS_SUPER_MAGIC;
@@ -480,7 +481,7 @@ static struct file_system_type devpts_fs_type = {
.name = "devpts",
.mount = devpts_mount,
.kill_sb = devpts_kill_sb,
- .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
/*
diff --git a/fs/exec.c b/fs/exec.c
index 887c1c955df8..ca239fc86d8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1411,7 +1411,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
bprm->cred->euid = current_euid();
bprm->cred->egid = current_egid();
- if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+ if (!mnt_may_suid(bprm->file->f_path.mnt))
return;
if (task_no_new_privs(current))
diff --git a/fs/inode.c b/fs/inode.c
index e171f7b5f9e4..9cef4e16aeda 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1619,6 +1619,13 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
if (inode->i_flags & S_NOATIME)
return false;
+
+ /* Atime updates will likely cause i_uid and i_gid to be written
+ * back improprely if their true value is unknown to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return false;
+
if (IS_NOATIME(inode))
return false;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 63534f5f9073..b3d73ad52b22 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -152,6 +152,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
struct dentry *root;
info->sb = sb;
+ /* Userspace would break if executables or devices appear on sysfs */
+ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = magic;
@@ -241,7 +243,8 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
- sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
+ sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
+ &init_user_ns, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
if (IS_ERR(sb))
diff --git a/fs/namei.c b/fs/namei.c
index 68a896c804b7..c386a329ab20 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
#include <linux/posix_acl.h>
#include <linux/hash.h>
#include <linux/bitops.h>
+#include <linux/init_task.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -410,6 +411,14 @@ int __inode_permission(struct inode *inode, int mask)
*/
if (IS_IMMUTABLE(inode))
return -EACCES;
+
+ /*
+ * Updating mtime will likely cause i_uid and i_gid to be
+ * written back improperly if their true value is unknown
+ * to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EACCES;
}
retval = do_inode_permission(inode, mask);
@@ -901,6 +910,7 @@ static inline int may_follow_link(struct nameidata *nd)
{
const struct inode *inode;
const struct inode *parent;
+ kuid_t puid;
if (!sysctl_protected_symlinks)
return 0;
@@ -916,7 +926,8 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if parent directory and link owner match. */
- if (uid_eq(parent->i_uid, inode->i_uid))
+ puid = parent->i_uid;
+ if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
@@ -1089,6 +1100,7 @@ static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
{
struct vfsmount *mnt;
+ const struct cred *old_cred;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
@@ -1110,11 +1122,16 @@ static int follow_automount(struct path *path, struct nameidata *nd,
path->dentry->d_inode)
return -EISDIR;
+ if (path->dentry->d_sb->s_user_ns != &init_user_ns)
+ return -EACCES;
+
nd->total_link_count++;
if (nd->total_link_count >= 40)
return -ELOOP;
+ old_cred = override_creds(&init_cred);
mnt = path->dentry->d_op->d_automount(path);
+ revert_creds(old_cred);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
@@ -2741,10 +2758,11 @@ EXPORT_SYMBOL(__check_sticky);
* c. have CAP_FOWNER capability
* 6. If the victim is append-only or immutable we can't do antyhing with
* links pointing to it.
- * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
- * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
- * 9. We can't remove a root or mountpoint.
- * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * 7. If the victim has an unknown uid or gid we can't change the inode.
+ * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 10. We can't remove a root or mountpoint.
+ * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
@@ -2766,7 +2784,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
return -EPERM;
if (check_sticky(dir, inode) || IS_APPEND(inode) ||
- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
+ IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -2787,16 +2805,22 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
* 1. We can't do it if child already exists (open has special treatment for
* this case, but since we are inlined it's OK)
* 2. We can't do it if dir is read-only (done in permission())
- * 3. We should have write and exec permissions on dir
- * 4. We can't do it if dir is immutable (done in permission())
+ * 3. We can't do it if the fs can't represent the fsuid or fsgid.
+ * 4. We should have write and exec permissions on dir
+ * 5. We can't do it if dir is immutable (done in permission())
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ struct user_namespace *s_user_ns;
audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
+ s_user_ns = dir->i_sb->s_user_ns;
+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+ !kgid_has_mapping(s_user_ns, current_fsgid()))
+ return -EOVERFLOW;
return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}
@@ -2865,6 +2889,12 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
EXPORT_SYMBOL(vfs_create);
+bool may_open_dev(const struct path *path)
+{
+ return !(path->mnt->mnt_flags & MNT_NODEV) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
+}
+
static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
@@ -2883,7 +2913,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
break;
case S_IFBLK:
case S_IFCHR:
- if (path->mnt->mnt_flags & MNT_NODEV)
+ if (!may_open_dev(path))
return -EACCES;
/*FALLTHRU*/
case S_IFIFO:
@@ -4135,6 +4165,13 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
*/
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
+ /*
+ * Updating the link count will likely cause i_uid and i_gid to
+ * be writen back improperly if their true value is unknown to
+ * the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EPERM;
if (!dir->i_op->link)
return -EPERM;
if (S_ISDIR(inode->i_mode))
diff --git a/fs/namespace.c b/fs/namespace.c
index 419f746d851d..7bb2cda3bfef 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2186,13 +2186,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
- /* Was the nodev implicitly added in mount? */
- if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
- !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
- mnt_flags |= MNT_NODEV;
- } else {
- return -EPERM;
- }
+ return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
@@ -2376,7 +2370,7 @@ unlock:
return err;
}
-static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
/*
* create a new mount for userspace and request it to be added into the
@@ -2386,7 +2380,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
- struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct vfsmount *mnt;
int err;
@@ -2397,26 +2390,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
if (!type)
return -ENODEV;
- if (user_ns != &init_user_ns) {
- if (!(type->fs_flags & FS_USERNS_MOUNT)) {
- put_filesystem(type);
- return -EPERM;
- }
- /* Only in special cases allow devices from mounts
- * created outside the initial user namespace.
- */
- if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
- flags |= MS_NODEV;
- mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
- }
- if (type->fs_flags & FS_USERNS_VISIBLE) {
- if (!fs_fully_visible(type, &mnt_flags)) {
- put_filesystem(type);
- return -EPERM;
- }
- }
- }
-
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
@@ -2426,6 +2399,11 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ if (mount_too_revealing(mnt, &mnt_flags)) {
+ mntput(mnt);
+ return -EPERM;
+ }
+
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
@@ -3217,22 +3195,19 @@ bool current_chrooted(void)
return chrooted;
}
-static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
+ int *new_mnt_flags)
{
- struct mnt_namespace *ns = current->nsproxy->mnt_ns;
int new_flags = *new_mnt_flags;
struct mount *mnt;
bool visible = false;
- if (unlikely(!ns))
- return false;
-
down_read(&namespace_sem);
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
int mnt_flags;
- if (mnt->mnt.mnt_sb->s_type != type)
+ if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
continue;
/* This mount is not fully visible if it's root directory
@@ -3241,12 +3216,8 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
continue;
- /* Read the mount flags and filter out flags that
- * may safely be ignored.
- */
+ /* A local view of the mount flags */
mnt_flags = mnt->mnt.mnt_flags;
- if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
- mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
/* Don't miss readonly hidden in the superblock flags */
if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
@@ -3258,15 +3229,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if ((mnt_flags & MNT_LOCK_READONLY) &&
!(new_flags & MNT_READONLY))
continue;
- if ((mnt_flags & MNT_LOCK_NODEV) &&
- !(new_flags & MNT_NODEV))
- continue;
- if ((mnt_flags & MNT_LOCK_NOSUID) &&
- !(new_flags & MNT_NOSUID))
- continue;
- if ((mnt_flags & MNT_LOCK_NOEXEC) &&
- !(new_flags & MNT_NOEXEC))
- continue;
if ((mnt_flags & MNT_LOCK_ATIME) &&
((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
continue;
@@ -3286,9 +3248,6 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
}
/* Preserve the locked attributes */
*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
- MNT_LOCK_NODEV | \
- MNT_LOCK_NOSUID | \
- MNT_LOCK_NOEXEC | \
MNT_LOCK_ATIME);
visible = true;
goto found;
@@ -3299,6 +3258,42 @@ found:
return visible;
}
+static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
+{
+ const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ unsigned long s_iflags;
+
+ if (ns->user_ns == &init_user_ns)
+ return false;
+
+ /* Can this filesystem be too revealing? */
+ s_iflags = mnt->mnt_sb->s_iflags;
+ if (!(s_iflags & SB_I_USERNS_VISIBLE))
+ return false;
+
+ if ((s_iflags & required_iflags) != required_iflags) {
+ WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
+ required_iflags);
+ return true;
+ }
+
+ return !mnt_already_visible(ns, mnt, new_mnt_flags);
+}
+
+bool mnt_may_suid(struct vfsmount *mnt)
+{
+ /*
+ * Foreign mounts (accessed via fchdir or through /proc
+ * symlinks) are always treated as if they are nosuid. This
+ * prevents namespaces from trusting potentially unsafe
+ * suid/sgid bits, file caps, or security labels that originate
+ * in other namespaces.
+ */
+ return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
+ current_in_userns(mnt->mnt_sb->s_user_ns);
+}
+
static struct ns_common *mntns_get(struct task_struct *task)
{
struct ns_common *ns = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e7787777620e..65ad0165a94f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1151,20 +1151,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
#endif
/* last one */ {""}
};
- struct net *net = data;
- int ret;
-
- ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
- if (ret)
- return ret;
- sb->s_fs_info = get_net(net);
- return 0;
+ get_net(sb->s_fs_info);
+ return simple_fill_super(sb, 0x6e667364, nfsd_files);
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super);
+ struct net *net = current->nsproxy->net_ns;
+ return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
}
static void nfsd_umount(struct super_block *sb)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index edc452c2a563..59d47ab0791a 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -205,7 +205,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
* Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
*/
int
-posix_acl_valid(const struct posix_acl *acl)
+posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
{
const struct posix_acl_entry *pa, *pe;
int state = ACL_USER_OBJ;
@@ -225,7 +225,7 @@ posix_acl_valid(const struct posix_acl *acl)
case ACL_USER:
if (state != ACL_USER)
return -EINVAL;
- if (!uid_valid(pa->e_uid))
+ if (!kuid_has_mapping(user_ns, pa->e_uid))
return -EINVAL;
needs_mask = 1;
break;
@@ -240,7 +240,7 @@ posix_acl_valid(const struct posix_acl *acl)
case ACL_GROUP:
if (state != ACL_GROUP)
return -EINVAL;
- if (!gid_valid(pa->e_gid))
+ if (!kgid_has_mapping(user_ns, pa->e_gid))
return -EINVAL;
needs_mask = 1;
break;
@@ -834,7 +834,7 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
return -EPERM;
if (acl) {
- int ret = posix_acl_valid(acl);
+ int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
if (ret)
return ret;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 42305ddcbaa0..c1b72388e571 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -457,17 +457,30 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return inode;
}
-int proc_fill_super(struct super_block *s)
+int proc_fill_super(struct super_block *s, void *data, int silent)
{
+ struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
+ if (!proc_parse_options(data, ns))
+ return -EINVAL;
+
+ /* User space would break if executables or devices appear on proc */
+ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
+
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa2781095bd1..7931c558c192 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -212,7 +212,7 @@ extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-extern int proc_fill_super(struct super_block *);
+extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
@@ -268,6 +268,7 @@ static inline void proc_tty_init(void) {}
* root.c
*/
extern struct proc_dir_entry proc_root;
+extern int proc_parse_options(char *options, struct pid_namespace *pid);
extern void proc_self_init(void);
extern int proc_remount(struct super_block *, int *, char *);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 06702783bf40..8d3e484055a6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,21 +23,6 @@
#include "internal.h"
-static int proc_test_super(struct super_block *sb, void *data)
-{
- return sb->s_fs_info == data;
-}
-
-static int proc_set_super(struct super_block *sb, void *data)
-{
- int err = set_anon_super(sb, NULL);
- if (!err) {
- struct pid_namespace *ns = (struct pid_namespace *)data;
- sb->s_fs_info = get_pid_ns(ns);
- }
- return err;
-}
-
enum {
Opt_gid, Opt_hidepid, Opt_err,
};
@@ -48,7 +33,7 @@ static const match_table_t tokens = {
{Opt_err, NULL},
};
-static int proc_parse_options(char *options, struct pid_namespace *pid)
+int proc_parse_options(char *options, struct pid_namespace *pid)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -100,52 +85,16 @@ int proc_remount(struct super_block *sb, int *flags, char *data)
static struct dentry *proc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- int err;
- struct super_block *sb;
struct pid_namespace *ns;
- char *options;
if (flags & MS_KERNMOUNT) {
- ns = (struct pid_namespace *)data;
- options = NULL;
+ ns = data;
+ data = NULL;
} else {
ns = task_active_pid_ns(current);
- options = data;
-
- /* Does the mounter have privilege over the pid namespace? */
- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
- return ERR_PTR(-EPERM);
- }
-
- sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
-
- /*
- * procfs isn't actually a stacking filesystem; however, there is
- * too much magic going on inside it to permit stacking things on
- * top of it
- */
- sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-
- if (!proc_parse_options(options, ns)) {
- deactivate_locked_super(sb);
- return ERR_PTR(-EINVAL);
- }
-
- if (!sb->s_root) {
- err = proc_fill_super(sb);
- if (err) {
- deactivate_locked_super(sb);
- return ERR_PTR(err);
- }
-
- sb->s_flags |= MS_ACTIVE;
- /* User space would break if executables appear on proc */
- sb->s_iflags |= SB_I_NOEXEC;
}
- return dget(sb->s_root);
+ return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
}
static void proc_kill_sb(struct super_block *sb)
@@ -165,7 +114,7 @@ static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b1322dd9d136..1bfac28b7e7d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -841,6 +841,9 @@ struct dquot *dqget(struct super_block *sb, struct kqid qid)
unsigned int hashent = hashfn(sb, qid);
struct dquot *dquot, *empty = NULL;
+ if (!qid_has_mapping(sb->s_user_ns, qid))
+ return ERR_PTR(-EINVAL);
+
if (!sb_has_quota_active(sb, qid.type))
return ERR_PTR(-ESRCH);
we_slept:
@@ -2268,6 +2271,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
error = -EINVAL;
goto out_fmt;
}
+ /* Filesystems outside of init_user_ns not yet supported */
+ if (sb->s_user_ns != &init_user_ns) {
+ error = -EINVAL;
+ goto out_fmt;
+ }
/* Usage always has to be set... */
if (!(flags & DQUOT_USAGE_ENABLED)) {
error = -EINVAL;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 0f10ee9892ce..35df08ee9c97 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -211,7 +211,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
if (ret)
@@ -237,7 +237,7 @@ static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_nextdqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
if (ret)
@@ -288,7 +288,7 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->set_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
copy_from_if_dqblk(&fdq, &idq);
return sb->s_qcop->set_dqblk(sb, qid, &fdq);
@@ -581,10 +581,10 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->set_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
/* Are we actually setting timer / warning limits for all users? */
- if (from_kqid(&init_user_ns, qid) == 0 &&
+ if (from_kqid(sb->s_user_ns, qid) == 0 &&
fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
struct qc_info qinfo;
int ret;
@@ -642,7 +642,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_dqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_dqblk(sb, qid, &qdq);
if (ret)
@@ -669,7 +669,7 @@ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
if (!sb->s_qcop->get_nextdqblk)
return -ENOSYS;
qid = make_kqid(current_user_ns(), type, id);
- if (!qid_valid(qid))
+ if (!qid_has_mapping(sb->s_user_ns, qid))
return -EINVAL;
ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
if (ret)
diff --git a/fs/super.c b/fs/super.c
index 5806ffd45563..c2ff475c1711 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,6 +33,7 @@
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
+#include <linux/user_namespace.h>
#include "internal.h"
@@ -165,6 +166,7 @@ static void destroy_super(struct super_block *s)
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts));
+ put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
kfree(s->s_options);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -174,11 +176,13 @@ static void destroy_super(struct super_block *s)
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
* @flags: the mount flags
+ * @user_ns: User namespace for the super_block
*
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
-static struct super_block *alloc_super(struct file_system_type *type, int flags)
+static struct super_block *alloc_super(struct file_system_type *type, int flags,
+ struct user_namespace *user_ns)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static const struct super_operations default_op;
@@ -188,6 +192,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
return NULL;
INIT_LIST_HEAD(&s->s_mounts);
+ s->s_user_ns = get_user_ns(user_ns);
if (security_sb_alloc(s))
goto fail;
@@ -201,6 +206,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags;
+ if (s->s_user_ns != &init_user_ns)
+ s->s_iflags |= SB_I_NODEV;
INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon);
mutex_init(&s->s_sync_lock);
@@ -445,29 +452,42 @@ void generic_shutdown_super(struct super_block *sb)
EXPORT_SYMBOL(generic_shutdown_super);
/**
- * sget - find or create a superblock
+ * sget_userns - find or create a superblock
* @type: filesystem type superblock should belong to
* @test: comparison callback
* @set: setup callback
* @flags: mount flags
+ * @user_ns: User namespace for the super_block
* @data: argument to each of them
*/
-struct super_block *sget(struct file_system_type *type,
+struct super_block *sget_userns(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
- int flags,
+ int flags, struct user_namespace *user_ns,
void *data)
{
struct super_block *s = NULL;
struct super_block *old;
int err;
+ if (!(flags & MS_KERNMOUNT) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
retry:
spin_lock(&sb_lock);
if (test) {
hlist_for_each_entry(old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
+ if (user_ns != old->s_user_ns) {
+ spin_unlock(&sb_lock);
+ if (s) {
+ up_write(&s->s_umount);
+ destroy_super(s);
+ }
+ return ERR_PTR(-EBUSY);
+ }
if (!grab_super(old))
goto retry;
if (s) {
@@ -480,7 +500,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags);
+ s = alloc_super(type, flags, user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -503,6 +523,31 @@ retry:
return s;
}
+EXPORT_SYMBOL(sget_userns);
+
+/**
+ * sget - find or create a superblock
+ * @type: filesystem type superblock should belong to
+ * @test: comparison callback
+ * @set: setup callback
+ * @flags: mount flags
+ * @data: argument to each of them
+ */
+struct super_block *sget(struct file_system_type *type,
+ int (*test)(struct super_block *,void *),
+ int (*set)(struct super_block *,void *),
+ int flags,
+ void *data)
+{
+ struct user_namespace *user_ns = current_user_ns();
+
+ /* Ensure the requestor has permissions over the target filesystem */
+ if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return sget_userns(type, test, set, flags, user_ns, data);
+}
+
EXPORT_SYMBOL(sget);
void drop_super(struct super_block *sb)
@@ -920,12 +965,20 @@ static int ns_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
- void *data, int (*fill_super)(struct super_block *, void *, int))
+struct dentry *mount_ns(struct file_system_type *fs_type,
+ int flags, void *data, void *ns, struct user_namespace *user_ns,
+ int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *sb;
- sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
+ /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+ * over the namespace.
+ */
+ if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
+ user_ns, ns);
if (IS_ERR(sb))
return ERR_CAST(sb);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f3db82071cfb..20b8f82e115b 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -41,8 +41,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
if (IS_ERR(root) || !new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
else if (new_sb)
- /* Userspace would break if executables appear on sysfs */
- root->d_sb->s_iflags |= SB_I_NOEXEC;
+ root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
}
@@ -59,7 +58,7 @@ static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
- .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT,
};
int __init sysfs_init(void)
diff --git a/fs/xattr.c b/fs/xattr.c
index 4beafc43daa5..c243905835ab 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -38,6 +38,13 @@ xattr_permission(struct inode *inode, const char *name, int mask)
if (mask & MAY_WRITE) {
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
+ /*
+ * Updating an xattr will likely cause i_uid and i_gid
+ * to be writen back improperly if their true value is
+ * unknown to the vfs.
+ */
+ if (HAS_UNMAPPED_ID(inode))
+ return -EPERM;
}
/*