From 2b99e410b28f5a75ae417e6389e767c7745d6fce Mon Sep 17 00:00:00 2001 From: Cheng Lin Date: Tue, 10 Oct 2023 10:09:01 +0800 Subject: xfs: introduce protection for drop nlink When abnormal drop_nlink are detected on the inode, return error, to avoid corruption propagation. Signed-off-by: Cheng Lin Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4d55f58d99b7..fb85c5c81745 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -918,6 +918,13 @@ xfs_droplink( xfs_trans_t *tp, xfs_inode_t *ip) { + if (VFS_I(ip)->i_nlink == 0) { + xfs_alert(ip->i_mount, + "%s: Attempt to drop inode (%llu) with nlink zero.", + __func__, ip->i_ino); + return -EFSCORRUPTED; + } + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); drop_nlink(VFS_I(ip)); -- cgit From 14a537983b228cb050ceca3a5b743d01315dc4aa Mon Sep 17 00:00:00 2001 From: Catherine Hoang Date: Tue, 17 Oct 2023 13:12:08 -0700 Subject: xfs: allow read IO and FICLONE to run concurrently One of our VM cluster management products needs to snapshot KVM image files so that they can be restored in case of failure. Snapshotting is done by redirecting VM disk writes to a sidecar file and using reflink on the disk image, specifically the FICLONE ioctl as used by "cp --reflink". Reflink locks the source and destination files while it operates, which means that reads from the main vm disk image are blocked, causing the vm to stall. When an image file is heavily fragmented, the copy process could take several minutes. Some of the vm image files have 50-100 million extent records, and duplicating that much metadata locks the file for 30 minutes or more. Having activities suspended for such a long time in a cluster node could result in node eviction. Clone operations and read IO do not change any data in the source file, so they should be able to run concurrently. Demote the exclusive locks taken by FICLONE to shared locks to allow reads while cloning. While a clone is in progress, writes will take the IOLOCK_EXCL, so they block until the clone completes. Link: https://lore.kernel.org/linux-xfs/8911B94D-DD29-4D6E-B5BC-32EAF1866245@oracle.com/ Signed-off-by: Catherine Hoang Reviewed-by: "Darrick J. Wong" Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_inode.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fb85c5c81745..f9d29acd72b9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3628,6 +3628,23 @@ xfs_iunlock2_io_mmap( inode_unlock(VFS_I(ip1)); } +/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */ +void +xfs_iunlock2_remapping( + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + xfs_iflags_clear(ip1, XFS_IREMAPPING); + + if (ip1 != ip2) + xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED); + xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); + + if (ip1 != ip2) + inode_unlock_shared(VFS_I(ip1)); + inode_unlock(VFS_I(ip2)); +} + /* * Reload the incore inode list for this inode. Caller should ensure that * the link count cannot change, either by taking ILOCK_SHARED or otherwise -- cgit