diff options
Diffstat (limited to 'fs/xfs/xfs_exchrange.c')
| -rw-r--r-- | fs/xfs/xfs_exchrange.c | 227 |
1 files changed, 173 insertions, 54 deletions
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index c8a655c92c92..0b41bdfecdfb 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -72,6 +72,34 @@ xfs_exchrange_estimate( return error; } +/* + * Check that file2's metadata agree with the snapshot that we took for the + * range commit request. + * + * This should be called after the filesystem has locked /all/ inode metadata + * against modification. + */ +STATIC int +xfs_exchrange_check_freshness( + const struct xfs_exchrange *fxr, + struct xfs_inode *ip2) +{ + struct inode *inode2 = VFS_I(ip2); + struct timespec64 ctime = inode_get_ctime(inode2); + struct timespec64 mtime = inode_get_mtime(inode2); + + trace_xfs_exchrange_freshness(fxr, ip2); + + /* Check that file2 hasn't otherwise been modified. */ + if (fxr->file2_ino != ip2->i_ino || + fxr->file2_gen != inode2->i_generation || + !timespec64_equal(&fxr->file2_ctime, &ctime) || + !timespec64_equal(&fxr->file2_mtime, &mtime)) + return -EBUSY; + + return 0; +} + #define QRETRY_IP1 (0x1) #define QRETRY_IP2 (0x2) @@ -91,6 +119,9 @@ xfs_exchrange_reserve_quota( int ip1_error = 0; int error; + ASSERT(!xfs_is_metadir_inode(req->ip1)); + ASSERT(!xfs_is_metadir_inode(req->ip2)); + /* * Don't bother with a quota reservation if we're not enforcing them * or the two inodes have the same dquots. @@ -189,7 +220,7 @@ xfs_exchrange_mappings( * length in @fxr are safe to round up. */ if (xfs_inode_has_bigrtalloc(ip2)) - req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount); + req.blockcount = xfs_blen_roundup_rtx(mp, req.blockcount); error = xfs_exchrange_estimate(&req); if (error) @@ -298,22 +329,6 @@ out_trans_cancel: * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -324,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -337,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -371,15 +389,6 @@ xfs_exchange_range_checks( return -EINVAL; /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole * operation. @@ -607,6 +616,12 @@ xfs_exchrange_prep( if (error || fxr->length == 0) return error; + if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) { + error = xfs_exchrange_check_freshness(fxr, ip2); + if (error) + return error; + } + /* Attach dquots to both inodes before changing block maps. */ error = xfs_qm_dqattach(ip2); if (error) @@ -710,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -719,7 +735,8 @@ xfs_exchange_range( if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) return -EXDEV; - if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS | + __XFS_EXCHANGE_RANGE_CHECK_FRESH2)) return -EINVAL; /* Userspace requests only honored for regular files. */ @@ -741,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) @@ -778,8 +799,6 @@ xfs_ioc_exchange_range( .file2 = file, }; struct xfs_exchange_range args; - struct fd file1; - int error; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; @@ -793,12 +812,112 @@ xfs_ioc_exchange_range( fxr.length = args.length; fxr.flags = args.flags; - file1 = fdget(args.file1_fd); - if (!file1.file) + CLASS(fd, file1)(args.file1_fd); + if (fd_empty(file1)) return -EBADF; - fxr.file1 = file1.file; + fxr.file1 = fd_file(file1); - error = xfs_exchange_range(&fxr); - fdput(file1); - return error; + return xfs_exchange_range(&fxr); +} + +/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */ +struct xfs_commit_range_fresh { + xfs_fsid_t fsid; /* m_fixedfsid */ + __u64 file2_ino; /* inode number */ + __s64 file2_mtime; /* modification time */ + __s64 file2_ctime; /* change time */ + __s32 file2_mtime_nsec; /* mod time, nsec */ + __s32 file2_ctime_nsec; /* change time, nsec */ + __u32 file2_gen; /* inode generation */ + __u32 magic; /* zero */ +}; +#define XCR_FRESH_MAGIC 0x444F524B /* DORK */ + +/* Set up a commitrange operation by sampling file2's write-related attrs */ +long +xfs_ioc_start_commit( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_commit_range args = { }; + struct kstat kstat = { }; + struct xfs_commit_range_fresh *kern_f; + struct xfs_commit_range_fresh __user *user_f; + struct inode *inode2 = file_inode(file); + struct xfs_inode *ip2 = XFS_I(inode2); + const unsigned int lockflags = XFS_IOLOCK_SHARED | + XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED; + + BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) != + sizeof(args.file2_freshness)); + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + xfs_ilock(ip2, lockflags); + /* Force writing of a distinct ctime if any writes happen. */ + fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2); + kern_f->file2_ctime = kstat.ctime.tv_sec; + kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec; + kern_f->file2_mtime = kstat.mtime.tv_sec; + kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec; + kern_f->file2_ino = ip2->i_ino; + kern_f->file2_gen = inode2->i_generation; + kern_f->magic = XCR_FRESH_MAGIC; + xfs_iunlock(ip2, lockflags); + + user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness; + if (copy_to_user(user_f, kern_f, sizeof(*kern_f))) + return -EFAULT; + + return 0; +} + +/* + * Exchange file1 and file2 contents if file2 has not been written since the + * start commit operation. + */ +long +xfs_ioc_commit_range( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_exchrange fxr = { + .file2 = file, + }; + struct xfs_commit_range args; + struct xfs_commit_range_fresh *kern_f; + struct xfs_inode *ip2 = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip2->i_mount; + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + if (kern_f->magic != XCR_FRESH_MAGIC) + return -EBUSY; + if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t))) + return -EBUSY; + + fxr.file1_offset = args.file1_offset; + fxr.file2_offset = args.file2_offset; + fxr.length = args.length; + fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2; + fxr.file2_ino = kern_f->file2_ino; + fxr.file2_gen = kern_f->file2_gen; + fxr.file2_mtime.tv_sec = kern_f->file2_mtime; + fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec; + fxr.file2_ctime.tv_sec = kern_f->file2_ctime; + fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec; + + CLASS(fd, file1)(args.file1_fd); + if (fd_empty(file1)) + return -EBADF; + fxr.file1 = fd_file(file1); + + return xfs_exchange_range(&fxr); } |
