summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/mount.h13
-rw-r--r--fs/namespace.c42
-rw-r--r--fs/nsfs.c5
3 files changed, 34 insertions, 26 deletions
diff --git a/fs/mount.h b/fs/mount.h
index bd8d6c36b421..e9f48e563c0f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -20,6 +20,7 @@ struct mnt_namespace {
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
+ struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
@@ -160,15 +161,9 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
}
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
-struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous);
-static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns)
-{
- return __lookup_next_mnt_ns(mntns, false);
-}
-static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns)
-{
- return __lookup_next_mnt_ns(mntns, true);
-}
+struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns,
+ bool previous);
+
static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
diff --git a/fs/namespace.c b/fs/namespace.c
index d2dbbbc91cc0..8b50b26024fa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -82,6 +82,7 @@ static DEFINE_RWLOCK(mnt_ns_tree_lock);
static seqcount_rwlock_t mnt_ns_tree_seqcount = SEQCNT_RWLOCK_ZERO(mnt_ns_tree_seqcount, &mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
+static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
struct mount_kattr {
unsigned int attr_set;
@@ -142,10 +143,19 @@ static inline void mnt_ns_tree_write_unlock(void)
static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
- struct rb_node *node;
+ struct rb_node *node, *prev;
mnt_ns_tree_write_lock();
node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
+ /*
+ * If there's no previous entry simply add it after the
+ * head and if there is add it after the previous entry.
+ */
+ prev = rb_prev(&ns->mnt_ns_tree_node);
+ if (!prev)
+ list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
+ else
+ list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
mnt_ns_tree_write_unlock();
WARN_ON_ONCE(node);
@@ -174,6 +184,7 @@ static void mnt_ns_tree_remove(struct mnt_namespace *ns)
if (!is_anon_ns(ns)) {
mnt_ns_tree_write_lock();
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
+ list_bidir_del_rcu(&ns->mnt_ns_list);
mnt_ns_tree_write_unlock();
}
@@ -2086,30 +2097,34 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
return &mnt->ns;
}
-struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool previous)
+struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
{
- guard(read_lock)(&mnt_ns_tree_lock);
+ guard(rcu)();
+
for (;;) {
- struct rb_node *node;
+ struct list_head *list;
if (previous)
- node = rb_prev(&mntns->mnt_ns_tree_node);
+ list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
else
- node = rb_next(&mntns->mnt_ns_tree_node);
- if (!node)
+ list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
+ if (list_is_head(list, &mnt_ns_list))
return ERR_PTR(-ENOENT);
- mntns = node_to_mnt_ns(node);
- node = &mntns->mnt_ns_tree_node;
+ mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+ /*
+ * The last passive reference count is put with RCU
+ * delay so accessing the mount namespace is not just
+ * safe but all relevant members are still valid.
+ */
if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN))
continue;
/*
- * Holding mnt_ns_tree_lock prevents the mount namespace from
- * being freed but it may well be on it's deathbed. We want an
- * active reference, not just a passive one here as we're
- * persisting the mount namespace.
+ * We need an active reference count as we're persisting
+ * the mount namespace and it might already be on its
+ * deathbed.
*/
if (!refcount_inc_not_zero(&mntns->ns.count))
continue;
@@ -3926,6 +3941,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
+ INIT_LIST_HEAD(&new_ns->mnt_ns_list);
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index c675fc40ce2d..663f8656158d 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -274,10 +274,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
if (usize < MNT_NS_INFO_SIZE_VER0)
return -EINVAL;
- if (previous)
- mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns));
- else
- mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns));
+ mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous);
if (IS_ERR(mnt_ns))
return PTR_ERR(mnt_ns);