summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_exchrange.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_exchrange.c')
-rw-r--r--fs/xfs/xfs_exchrange.c227
1 files changed, 173 insertions, 54 deletions
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index c8a655c92c92..0b41bdfecdfb 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -72,6 +72,34 @@ xfs_exchrange_estimate(
return error;
}
+/*
+ * Check that file2's metadata agree with the snapshot that we took for the
+ * range commit request.
+ *
+ * This should be called after the filesystem has locked /all/ inode metadata
+ * against modification.
+ */
+STATIC int
+xfs_exchrange_check_freshness(
+ const struct xfs_exchrange *fxr,
+ struct xfs_inode *ip2)
+{
+ struct inode *inode2 = VFS_I(ip2);
+ struct timespec64 ctime = inode_get_ctime(inode2);
+ struct timespec64 mtime = inode_get_mtime(inode2);
+
+ trace_xfs_exchrange_freshness(fxr, ip2);
+
+ /* Check that file2 hasn't otherwise been modified. */
+ if (fxr->file2_ino != ip2->i_ino ||
+ fxr->file2_gen != inode2->i_generation ||
+ !timespec64_equal(&fxr->file2_ctime, &ctime) ||
+ !timespec64_equal(&fxr->file2_mtime, &mtime))
+ return -EBUSY;
+
+ return 0;
+}
+
#define QRETRY_IP1 (0x1)
#define QRETRY_IP2 (0x2)
@@ -91,6 +119,9 @@ xfs_exchrange_reserve_quota(
int ip1_error = 0;
int error;
+ ASSERT(!xfs_is_metadir_inode(req->ip1));
+ ASSERT(!xfs_is_metadir_inode(req->ip2));
+
/*
* Don't bother with a quota reservation if we're not enforcing them
* or the two inodes have the same dquots.
@@ -189,7 +220,7 @@ xfs_exchrange_mappings(
* length in @fxr are safe to round up.
*/
if (xfs_inode_has_bigrtalloc(ip2))
- req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount);
+ req.blockcount = xfs_blen_roundup_rtx(mp, req.blockcount);
error = xfs_exchrange_estimate(&req);
if (error)
@@ -298,22 +329,6 @@ out_trans_cancel:
* successfully but before locks are dropped.
*/
-/* Verify that we have security clearance to perform this operation. */
-static int
-xfs_exchange_range_verify_area(
- struct xfs_exchrange *fxr)
-{
- int ret;
-
- ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
- true);
- if (ret)
- return ret;
-
- return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
- true);
-}
-
/*
* Performs necessary checks before doing a range exchange, having stabilized
* mutable inode attributes via i_rwsem.
@@ -324,11 +339,13 @@ xfs_exchange_range_checks(
unsigned int alloc_unit)
{
struct inode *inode1 = file_inode(fxr->file1);
+ loff_t size1 = i_size_read(inode1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t size2 = i_size_read(inode2);
uint64_t allocmask = alloc_unit - 1;
int64_t test_len;
uint64_t blen;
- loff_t size1, size2, tmp;
+ loff_t tmp;
int error;
/* Don't touch certain kinds of inodes */
@@ -337,24 +354,25 @@ xfs_exchange_range_checks(
if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
return -ETXTBSY;
- size1 = i_size_read(inode1);
- size2 = i_size_read(inode2);
-
/* Ranges cannot start after EOF. */
if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
return -EINVAL;
- /*
- * If the caller said to exchange to EOF, we set the length of the
- * request large enough to cover everything to the end of both files.
- */
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
+ /*
+ * If the caller said to exchange to EOF, we set the length of
+ * the request large enough to cover everything to the end of
+ * both files.
+ */
fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
size2 - fxr->file2_offset);
-
- error = xfs_exchange_range_verify_area(fxr);
- if (error)
- return error;
+ } else {
+ /*
+ * Otherwise we require both ranges to end within EOF.
+ */
+ if (fxr->file1_offset + fxr->length > size1 ||
+ fxr->file2_offset + fxr->length > size2)
+ return -EINVAL;
}
/*
@@ -371,15 +389,6 @@ xfs_exchange_range_checks(
return -EINVAL;
/*
- * We require both ranges to end within EOF, unless we're exchanging
- * to EOF.
- */
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
- (fxr->file1_offset + fxr->length > size1 ||
- fxr->file2_offset + fxr->length > size2))
- return -EINVAL;
-
- /*
* Make sure we don't hit any file size limits. If we hit any size
* limits such that test_length was adjusted, we abort the whole
* operation.
@@ -607,6 +616,12 @@ xfs_exchrange_prep(
if (error || fxr->length == 0)
return error;
+ if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) {
+ error = xfs_exchrange_check_freshness(fxr, ip2);
+ if (error)
+ return error;
+ }
+
/* Attach dquots to both inodes before changing block maps. */
error = xfs_qm_dqattach(ip2);
if (error)
@@ -710,6 +725,7 @@ xfs_exchange_range(
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
+ loff_t check_len = fxr->length;
int ret;
BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
@@ -719,7 +735,8 @@ xfs_exchange_range(
if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
return -EXDEV;
- if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
+ if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
+ __XFS_EXCHANGE_RANGE_CHECK_FRESH2))
return -EINVAL;
/* Userspace requests only honored for regular files. */
@@ -741,14 +758,18 @@ xfs_exchange_range(
return -EBADF;
/*
- * If we're not exchanging to EOF, we can check the areas before
- * stabilizing both files' i_size.
+ * If we're exchanging to EOF we can't calculate the length until taking
+ * the iolock. Pass a 0 length to remap_verify_area similar to the
+ * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
*/
- if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
- ret = xfs_exchange_range_verify_area(fxr);
- if (ret)
- return ret;
- }
+ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
+ check_len = 0;
+ ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
+ if (ret)
+ return ret;
+ ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
+ if (ret)
+ return ret;
/* Update cmtime if the fd/inode don't forbid it. */
if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
@@ -778,8 +799,6 @@ xfs_ioc_exchange_range(
.file2 = file,
};
struct xfs_exchange_range args;
- struct fd file1;
- int error;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
@@ -793,12 +812,112 @@ xfs_ioc_exchange_range(
fxr.length = args.length;
fxr.flags = args.flags;
- file1 = fdget(args.file1_fd);
- if (!file1.file)
+ CLASS(fd, file1)(args.file1_fd);
+ if (fd_empty(file1))
return -EBADF;
- fxr.file1 = file1.file;
+ fxr.file1 = fd_file(file1);
- error = xfs_exchange_range(&fxr);
- fdput(file1);
- return error;
+ return xfs_exchange_range(&fxr);
+}
+
+/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */
+struct xfs_commit_range_fresh {
+ xfs_fsid_t fsid; /* m_fixedfsid */
+ __u64 file2_ino; /* inode number */
+ __s64 file2_mtime; /* modification time */
+ __s64 file2_ctime; /* change time */
+ __s32 file2_mtime_nsec; /* mod time, nsec */
+ __s32 file2_ctime_nsec; /* change time, nsec */
+ __u32 file2_gen; /* inode generation */
+ __u32 magic; /* zero */
+};
+#define XCR_FRESH_MAGIC 0x444F524B /* DORK */
+
+/* Set up a commitrange operation by sampling file2's write-related attrs */
+long
+xfs_ioc_start_commit(
+ struct file *file,
+ struct xfs_commit_range __user *argp)
+{
+ struct xfs_commit_range args = { };
+ struct kstat kstat = { };
+ struct xfs_commit_range_fresh *kern_f;
+ struct xfs_commit_range_fresh __user *user_f;
+ struct inode *inode2 = file_inode(file);
+ struct xfs_inode *ip2 = XFS_I(inode2);
+ const unsigned int lockflags = XFS_IOLOCK_SHARED |
+ XFS_MMAPLOCK_SHARED |
+ XFS_ILOCK_SHARED;
+
+ BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) !=
+ sizeof(args.file2_freshness));
+
+ kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
+
+ memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
+
+ xfs_ilock(ip2, lockflags);
+ /* Force writing of a distinct ctime if any writes happen. */
+ fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2);
+ kern_f->file2_ctime = kstat.ctime.tv_sec;
+ kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec;
+ kern_f->file2_mtime = kstat.mtime.tv_sec;
+ kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec;
+ kern_f->file2_ino = ip2->i_ino;
+ kern_f->file2_gen = inode2->i_generation;
+ kern_f->magic = XCR_FRESH_MAGIC;
+ xfs_iunlock(ip2, lockflags);
+
+ user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness;
+ if (copy_to_user(user_f, kern_f, sizeof(*kern_f)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Exchange file1 and file2 contents if file2 has not been written since the
+ * start commit operation.
+ */
+long
+xfs_ioc_commit_range(
+ struct file *file,
+ struct xfs_commit_range __user *argp)
+{
+ struct xfs_exchrange fxr = {
+ .file2 = file,
+ };
+ struct xfs_commit_range args;
+ struct xfs_commit_range_fresh *kern_f;
+ struct xfs_inode *ip2 = XFS_I(file_inode(file));
+ struct xfs_mount *mp = ip2->i_mount;
+
+ kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+ if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
+ return -EINVAL;
+ if (kern_f->magic != XCR_FRESH_MAGIC)
+ return -EBUSY;
+ if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t)))
+ return -EBUSY;
+
+ fxr.file1_offset = args.file1_offset;
+ fxr.file2_offset = args.file2_offset;
+ fxr.length = args.length;
+ fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2;
+ fxr.file2_ino = kern_f->file2_ino;
+ fxr.file2_gen = kern_f->file2_gen;
+ fxr.file2_mtime.tv_sec = kern_f->file2_mtime;
+ fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec;
+ fxr.file2_ctime.tv_sec = kern_f->file2_ctime;
+ fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec;
+
+ CLASS(fd, file1)(args.file1_fd);
+ if (fd_empty(file1))
+ return -EBADF;
+ fxr.file1 = fd_file(file1);
+
+ return xfs_exchange_range(&fxr);
}