From 853b8d7597eea4ccaaefbcf0942cd42fc86d542a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 2 Feb 2024 12:22:58 +0200
Subject: remap_range: merge do_clone_file_range() into vfs_clone_file_range()

commit dfad37051ade ("remap_range: move permission hooks out of
do_clone_file_range()") moved the permission hooks from
do_clone_file_range() out to its caller vfs_clone_file_range(),
but left all the fast sanity checks in do_clone_file_range().

This makes the expensive security hooks be called in situations
that they would not have been called before (e.g. fs does not support
clone).

The only reason for the do_clone_file_range() helper was that overlayfs
did not use to be able to call vfs_clone_file_range() from copy up
context with sb_writers lock held.  However, since commit c63e56a4a652
("ovl: do not open/llseek lower file with upper sb_writers held"),
overlayfs just uses an open coded version of vfs_clone_file_range().

Merge_clone_file_range() into vfs_clone_file_range(), restoring the
original order of checks as it was before the regressing commit and adapt
the overlayfs code to call vfs_clone_file_range() before the permission
hooks that were added by commit ca7ab482401c ("ovl: add permission hooks
outside of do_splice_direct()").

Note that in the merge of do_clone_file_range(), the file_start_write()
context was reduced to cover ->remap_file_range() without holding it
over the permission hooks, which was the reason for doing the regressing
commit in the first place.

Reported-and-tested-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202401312229.eddeb9a6-oliver.sang@intel.com
Fixes: dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20240202102258.1582671-1-amir73il@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/overlayfs/copy_up.c | 14 ++++++--------
 fs/remap_range.c       | 31 +++++++++----------------------
 2 files changed, 15 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b8e25ca51016..8586e2f5d243 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 	if (IS_ERR(old_file))
 		return PTR_ERR(old_file);
 
+	/* Try to use clone_file_range to clone up within the same fs */
+	cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0);
+	if (cloned == len)
+		goto out_fput;
+
+	/* Couldn't clone, so now we try to copy the data */
 	error = rw_verify_area(READ, old_file, &old_pos, len);
 	if (!error)
 		error = rw_verify_area(WRITE, new_file, &new_pos, len);
 	if (error)
 		goto out_fput;
 
-	/* Try to use clone_file_range to clone up within the same fs */
-	ovl_start_write(dentry);
-	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
-	ovl_end_write(dentry);
-	if (cloned == len)
-		goto out_fput;
-	/* Couldn't clone, so now we try to copy the data */
-
 	/* Check if lower fs supports seek operation */
 	if (old_file->f_mode & FMODE_LSEEK)
 		skip_hole = true;
diff --git a/fs/remap_range.c b/fs/remap_range.c
index f8c1120b8311..de07f978ce3e 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out,
-			   loff_t len, unsigned int remap_flags)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+			    struct file *file_out, loff_t pos_out,
+			    loff_t len, unsigned int remap_flags)
 {
 	loff_t ret;
 
@@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (!file_in->f_op->remap_file_range)
 		return -EOPNOTSUPP;
 
-	ret = file_in->f_op->remap_file_range(file_in, pos_in,
-			file_out, pos_out, len, remap_flags);
-	if (ret < 0)
-		return ret;
-
-	fsnotify_access(file_in);
-	fsnotify_modify(file_out);
-	return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			    struct file *file_out, loff_t pos_out,
-			    loff_t len, unsigned int remap_flags)
-{
-	loff_t ret;
-
 	ret = remap_verify_area(file_in, pos_in, len, false);
 	if (ret)
 		return ret;
@@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		return ret;
 
 	file_start_write(file_out);
-	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
-				  remap_flags);
+	ret = file_in->f_op->remap_file_range(file_in, pos_in,
+			file_out, pos_out, len, remap_flags);
 	file_end_write(file_out);
+	if (ret < 0)
+		return ret;
 
+	fsnotify_access(file_in);
+	fsnotify_modify(file_out);
 	return ret;
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
-- 
cgit 


From 46f5ab762d048dad224436978315cbc2fa79c630 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 6 Feb 2024 11:22:09 +0100
Subject: fs: relax mount_setattr() permission checks

When we added mount_setattr() I added additional checks compared to the
legacy do_reconfigure_mnt() and do_change_type() helpers used by regular
mount(2). If that mount had a parent then verify that the caller and the
mount namespace the mount is attached to match and if not make sure that
it's an anonymous mount.

The real rootfs falls into neither category. It is neither an anoymous
mount because it is obviously attached to the initial mount namespace
but it also obviously doesn't have a parent mount. So that means legacy
mount(2) allows changing mount properties on the real rootfs but
mount_setattr(2) blocks this. I never thought much about this but of
course someone on this planet of earth changes properties on the real
rootfs as can be seen in [1].

Since util-linux finally switched to the new mount api in 2.39 not so
long ago it also relies on mount_setattr() and that surfaced this issue
when Fedora 39 finally switched to it. Fix this.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843
Link: https://lore.kernel.org/r/20240206-vfs-mount-rootfs-v1-1-19b335eee133@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reported-by: Karel Zak <kzak@redhat.com>
Cc: stable@vger.kernel.org # v5.12+
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namespace.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 437f60e96d40..5a51315c6678 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
 	/*
 	 * If this is an attached mount make sure it's located in the callers
 	 * mount namespace. If it's not don't let the caller interact with it.
-	 * If this is a detached mount make sure it has an anonymous mount
-	 * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+	 *
+	 * If this mount doesn't have a parent it's most often simply a
+	 * detached mount with an anonymous mount namespace. IOW, something
+	 * that's simply not attached yet. But there are apparently also users
+	 * that do change mount properties on the rootfs itself. That obviously
+	 * neither has a parent nor is it a detached mount so we cannot
+	 * unconditionally check for detached mounts.
 	 */
-	if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+	if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
 		goto out;
 
 	/*
-- 
cgit