summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-04-16 10:19:05 +0200
committerChristian Brauner <brauner@kernel.org>2025-05-23 14:20:44 +0200
commita68cb18624503a09a1b10e72c3bcc90f2eeb3ded (patch)
treed93812afe37570c30321f8e864b3d8962d9cbc35
parent0af2f6be1b4281385b618cb86ad946eded089ac8 (diff)
mount: add a comment about concurrent changes with statmount()/listmount()
Add some comments in there highlighting a few non-obvious assumptions. Link: https://lore.kernel.org/20250416-zerknirschen-aluminium-14a55639076f@brauner Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/namespace.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 14935a0500a2..ddb0a688633c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5831,13 +5831,29 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
return err;
s->root = root;
- s->idmap = mnt_idmap(s->mnt);
- if (s->mask & STATMOUNT_SB_BASIC)
- statmount_sb_basic(s);
+ /*
+ * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
+ * can change concurrently as we only hold the read-side of the
+ * namespace semaphore and mount properties may change with only
+ * the mount lock held.
+ *
+ * We could sample the mount lock sequence counter to detect
+ * those changes and retry. But it's not worth it. Worst that
+ * happens is that the mnt->mnt_idmap pointer is already changed
+ * while mnt->mnt_flags isn't or vica versa. So what.
+ *
+ * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved
+ * via READ_ONCE()/WRITE_ONCE() and guard against theoretical
+ * torn read/write. That's all we care about right now.
+ */
+ s->idmap = mnt_idmap(s->mnt);
if (s->mask & STATMOUNT_MNT_BASIC)
statmount_mnt_basic(s);
+ if (s->mask & STATMOUNT_SB_BASIC)
+ statmount_sb_basic(s);
+
if (s->mask & STATMOUNT_PROPAGATE_FROM)
statmount_propagate_from(s);
@@ -6149,6 +6165,10 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
+ /*
+ * We only need to guard against mount topology changes as
+ * listmount() doesn't care about any mount properties.
+ */
scoped_guard(rwsem_read, &namespace_sem)
ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE));