summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c185
1 files changed, 104 insertions, 81 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 1b466c54a357..e13d9ab4f564 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -355,12 +355,13 @@ static struct mount *alloc_vfsmnt(const char *name)
if (err)
goto out_free_cache;
- if (name) {
+ if (name)
mnt->mnt_devname = kstrdup_const(name,
GFP_KERNEL_ACCOUNT);
- if (!mnt->mnt_devname)
- goto out_free_id;
- }
+ else
+ mnt->mnt_devname = "none";
+ if (!mnt->mnt_devname)
+ goto out_free_id;
#ifdef CONFIG_SMP
mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
@@ -1264,7 +1265,7 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
if (!fc->root)
return ERR_PTR(-EINVAL);
- mnt = alloc_vfsmnt(fc->source ?: "none");
+ mnt = alloc_vfsmnt(fc->source);
if (!mnt)
return ERR_PTR(-ENOMEM);
@@ -1325,21 +1326,6 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type,
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
-struct vfsmount *
-vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
- const char *name, void *data)
-{
- /* Until it is worked out how to pass the user namespace
- * through from the parent mount to the submount don't support
- * unprivileged mounts with submounts.
- */
- if (mountpoint->d_sb->s_user_ns != &init_user_ns)
- return ERR_PTR(-EPERM);
-
- return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
-}
-EXPORT_SYMBOL_GPL(vfs_submount);
-
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -2424,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
-bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -2438,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
+bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ bool res;
+
+ read_seqlock_excl(&mount_lock);
+ res = __has_locked_children(mnt, dentry);
+ read_sequnlock_excl(&mount_lock);
+ return res;
+}
+
/*
* Check that there aren't references to earlier/same mount namespaces in the
* specified subtree. Such references can act as pins for mount namespaces
@@ -2482,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL);
- if (mnt_has_parent(old_mnt)) {
- if (!check_mnt(old_mnt))
- return ERR_PTR(-EINVAL);
- } else {
- if (!is_mounted(&old_mnt->mnt))
- return ERR_PTR(-EINVAL);
-
- /* Make sure this isn't something purely kernel internal. */
- if (!is_anon_ns(old_mnt->mnt_ns))
+ /*
+ * Make sure the source mount is acceptable.
+ * Anything mounted in our mount namespace is allowed.
+ * Otherwise, it must be the root of an anonymous mount
+ * namespace, and we need to make sure no namespace
+ * loops get created.
+ */
+ if (!check_mnt(old_mnt)) {
+ if (!is_mounted(&old_mnt->mnt) ||
+ !is_anon_ns(old_mnt->mnt_ns) ||
+ mnt_has_parent(old_mnt))
return ERR_PTR(-EINVAL);
- /* Make sure we don't create mount namespace loops. */
if (!check_for_nsfs_mounts(old_mnt))
return ERR_PTR(-EINVAL);
}
- if (has_locked_children(old_mnt, path->dentry))
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ if (__has_locked_children(old_mnt, path->dentry))
return ERR_PTR(-EINVAL);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
@@ -2944,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
+ if (!check_mnt(mnt)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3035,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
if (!may_copy_tree(old_path))
return mnt;
- if (!recurse && has_locked_children(old, old_path->dentry))
+ if (!recurse && __has_locked_children(old, old_path->dentry))
return mnt;
if (recurse)
@@ -3428,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
goto out;
/* From mount should not have locked children in place of To's root */
- if (has_locked_children(from, to->mnt.mnt_root))
+ if (__has_locked_children(from, to->mnt.mnt_root))
goto out;
/* Setting sharing groups is only allowed on private mounts */
@@ -3442,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
if (IS_MNT_SLAVE(from)) {
struct mount *m = from->mnt_master;
- list_add(&to->mnt_slave, &m->mnt_slave_list);
+ list_add(&to->mnt_slave, &from->mnt_slave);
to->mnt_master = m;
}
@@ -3467,18 +3471,25 @@ out:
* Check if path is overmounted, i.e., if there's a mount on top of
* @path->mnt with @path->dentry as mountpoint.
*
- * Context: This function expects namespace_lock() to be held.
+ * Context: namespace_sem must be held at least shared.
+ * MUST NOT be called under lock_mount_hash() (there one should just
+ * call __lookup_mnt() and check if it returns NULL).
* Return: If path is overmounted true is returned, false if not.
*/
static inline bool path_overmounted(const struct path *path)
{
+ unsigned seq = read_seqbegin(&mount_lock);
+ bool no_child;
+
rcu_read_lock();
- if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
- rcu_read_unlock();
- return true;
- }
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
rcu_read_unlock();
- return false;
+ if (need_seqretry(&mount_lock, seq)) {
+ read_seqlock_excl(&mount_lock);
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
+ read_sequnlock_excl(&mount_lock);
+ }
+ return unlikely(!no_child);
}
/**
@@ -3637,46 +3648,41 @@ static int do_move_mount(struct path *old_path,
ns = old->mnt_ns;
err = -EINVAL;
- if (!may_use_mount(p))
- goto out;
-
/* The thing moved must be mounted... */
if (!is_mounted(&old->mnt))
goto out;
- /* ... and either ours or the root of anon namespace */
- if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
- goto out;
-
- if (is_anon_ns(ns)) {
+ if (check_mnt(old)) {
+ /* if the source is in our namespace... */
+ /* ... it should be detachable from parent */
+ if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
+ goto out;
+ /* ... and the target should be in our namespace */
+ if (!check_mnt(p))
+ goto out;
+ } else {
/*
- * Ending up with two files referring to the root of the
- * same anonymous mount namespace would cause an error
- * as this would mean trying to move the same mount
- * twice into the mount tree which would be rejected
- * later. But be explicit about it right here.
+ * otherwise the source must be the root of some anon namespace.
+ * AV: check for mount being root of an anon namespace is worth
+ * an inlined predicate...
*/
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
+ if (!is_anon_ns(ns) || mnt_has_parent(old))
goto out;
-
/*
- * If this is an anonymous mount tree ensure that mount
- * propagation can detect mounts that were just
- * propagated to the target mount tree so we don't
- * propagate onto them.
+ * Bail out early if the target is within the same namespace -
+ * subsequent checks would've rejected that, but they lose
+ * some corner cases if we check it early.
*/
- ns->mntns_flags |= MNTNS_PROPAGATING;
- } else if (is_anon_ns(p->mnt_ns)) {
+ if (ns == p->mnt_ns)
+ goto out;
/*
- * Don't allow moving an attached mount tree to an
- * anonymous mount tree.
+ * Target should be either in our namespace or in an acceptable
+ * anon namespace, sensu check_anonymous_mnt().
*/
- goto out;
+ if (!may_use_mount(p))
+ goto out;
}
- if (old->mnt.mnt_flags & MNT_LOCKED)
- goto out;
-
if (!path_mounted(old_path))
goto out;
@@ -3722,8 +3728,6 @@ static int do_move_mount(struct path *old_path,
if (attached)
put_mountpoint(old_mp);
out:
- if (is_anon_ns(ns))
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
unlock_mount(mp);
if (!err) {
if (attached) {
@@ -3899,10 +3903,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
return PTR_ERR(m);
mnt = real_mount(m);
- /* The new mount record should have at least 2 refs to prevent it being
- * expired before we get a chance to add it
- */
- BUG_ON(mnt_get_count(mnt) < 2);
if (m->mnt_sb == path->mnt->mnt_sb &&
m->mnt_root == dentry) {
@@ -3935,7 +3935,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
unlock_mount(mp);
if (unlikely(err))
goto discard;
- mntput(m);
return 0;
discard_locked:
@@ -3949,7 +3948,6 @@ discard:
namespace_unlock();
}
mntput(m);
- mntput(m);
return err;
}
@@ -3986,11 +3984,14 @@ void mark_mounts_for_expiry(struct list_head *mounts)
/* extract from the expiration list every vfsmount that matches the
* following criteria:
+ * - already mounted
* - only referenced by its parent vfsmount
* - still marked for expiry (marked on the last call here; marks are
* cleared by mntput())
*/
list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
+ if (!is_mounted(&mnt->mnt))
+ continue;
if (!xchg(&mnt->mnt_expiry_mark, 1) ||
propagate_mount_busy(mnt, 1))
continue;
@@ -5491,7 +5492,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
seq->buf[seq->count] = '\0';
seq->count = start;
seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));
- } else if (r->mnt_devname) {
+ } else {
seq_puts(seq, r->mnt_devname);
}
return 0;
@@ -5804,7 +5805,9 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
STATMOUNT_SB_SOURCE | \
STATMOUNT_OPT_ARRAY | \
STATMOUNT_OPT_SEC_ARRAY | \
- STATMOUNT_SUPPORTED_MASK)
+ STATMOUNT_SUPPORTED_MASK | \
+ STATMOUNT_MNT_UIDMAP | \
+ STATMOUNT_MNT_GIDMAP)
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
@@ -5839,13 +5842,29 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
return err;
s->root = root;
- s->idmap = mnt_idmap(s->mnt);
- if (s->mask & STATMOUNT_SB_BASIC)
- statmount_sb_basic(s);
+ /*
+ * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
+ * can change concurrently as we only hold the read-side of the
+ * namespace semaphore and mount properties may change with only
+ * the mount lock held.
+ *
+ * We could sample the mount lock sequence counter to detect
+ * those changes and retry. But it's not worth it. Worst that
+ * happens is that the mnt->mnt_idmap pointer is already changed
+ * while mnt->mnt_flags isn't or vica versa. So what.
+ *
+ * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved
+ * via READ_ONCE()/WRITE_ONCE() and guard against theoretical
+ * torn read/write. That's all we care about right now.
+ */
+ s->idmap = mnt_idmap(s->mnt);
if (s->mask & STATMOUNT_MNT_BASIC)
statmount_mnt_basic(s);
+ if (s->mask & STATMOUNT_SB_BASIC)
+ statmount_sb_basic(s);
+
if (s->mask & STATMOUNT_PROPAGATE_FROM)
statmount_propagate_from(s);
@@ -6157,6 +6176,10 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
+ /*
+ * We only need to guard against mount topology changes as
+ * listmount() doesn't care about any mount properties.
+ */
scoped_guard(rwsem_read, &namespace_sem)
ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE));