diff options
Diffstat (limited to 'fs/remap_range.c')
| -rw-r--r-- | fs/remap_range.c | 141 |
1 files changed, 72 insertions, 69 deletions
diff --git a/fs/remap_range.c b/fs/remap_range.c index 231159682907..26afbbbfb10c 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -14,6 +14,8 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/dax.h> +#include <linux/overflow.h> #include "internal.h" #include <linux/uaccess.h> @@ -71,7 +73,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ - if (pos_in + count == size_in) { + if (pos_in + count == size_in && + (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) @@ -96,17 +99,25 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, return 0; } -static int remap_verify_area(struct file *file, loff_t pos, loff_t len, - bool write) +int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write) { + int mask = write ? MAY_WRITE : MAY_READ; + loff_t tmp; + int ret; + if (unlikely(pos < 0 || len < 0)) return -EINVAL; - if (unlikely((loff_t) (pos + len) < 0)) + if (unlikely(check_add_overflow(pos, len, &tmp))) return -EINVAL; - return security_file_permission(file, write ? MAY_WRITE : MAY_READ); + ret = security_file_permission(file, mask); + if (ret) + return ret; + + return fsnotify_file_area_perm(file, mask, &pos, len); } +EXPORT_SYMBOL_GPL(remap_verify_area); /* * Ensure that we don't remap a partial EOF block in the middle of something @@ -146,18 +157,9 @@ static int generic_remap_check_len(struct inode *inode_in, } /* Read a page's worth of file data into the page cache. */ -static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos) +static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos) { - struct folio *folio; - - folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL); - if (IS_ERR(folio)) - return folio; - if (!folio_test_uptodate(folio)) { - folio_put(folio); - return ERR_PTR(-EIO); - } - return folio; + return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file); } /* @@ -187,8 +189,8 @@ static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2) * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races. */ -static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t dstoff, +static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff, + struct file *dest, loff_t dstoff, loff_t len, bool *is_same) { bool same = true; @@ -224,8 +226,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, * someone is invalidating pages on us and we lose. */ if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) || - src_folio->mapping != src->i_mapping || - dst_folio->mapping != dest->i_mapping) { + src_folio->mapping != src->f_mapping || + dst_folio->mapping != dest->f_mapping) { same = false; goto unlock; } @@ -271,9 +273,11 @@ out_error: * If there's an error, then the usual negative error code is returned. * Otherwise returns 0 with *len set to the request length. */ -int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *len, unsigned int remap_flags) +int +__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -309,7 +313,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* Check that we don't violate system file offset limits. */ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, remap_flags); - if (ret) + if (ret || *len == 0) return ret; /* Wait for the completion of any pending IOs on both files */ @@ -333,8 +337,15 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; - ret = vfs_dedupe_file_range_compare(inode_in, pos_in, - inode_out, pos_out, *len, &is_same); + if (!IS_DAX(inode_in)) + ret = vfs_dedupe_file_range_compare(file_in, pos_in, + file_out, pos_out, *len, &is_same); + else if (dax_read_ops) + ret = dax_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same, + dax_read_ops); + else + return -EINVAL; if (ret) return ret; if (!is_same) @@ -343,7 +354,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, remap_flags); - if (ret) + if (ret || *len == 0) return ret; /* If can't alter the file contents, we're done. */ @@ -352,21 +363,24 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; } + +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags) +{ + return __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, NULL); +} EXPORT_SYMBOL(generic_remap_file_range_prep); -loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); - /* - * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on - * the same mount. Practically, they only need to be on the same file - * system. - */ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; @@ -385,8 +399,10 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, if (ret) return ret; + file_start_write(file_out); ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, len, remap_flags); + file_end_write(file_out); if (ret < 0) return ret; @@ -394,36 +410,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, fsnotify_modify(file_out); return ret; } -EXPORT_SYMBOL(do_clone_file_range); - -loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) -{ - loff_t ret; - - file_start_write(file_out); - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, - remap_flags); - file_end_write(file_out); - - return ret; -} EXPORT_SYMBOL(vfs_clone_file_range); /* Check whether we are allowed to dedupe the destination file */ -static bool allow_file_dedupe(struct file *file) +static bool may_dedupe_file(struct file *file) { - struct user_namespace *mnt_userns = file_mnt_user_ns(file); + struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); if (capable(CAP_SYS_ADMIN)) return true; if (file->f_mode & FMODE_WRITE) return true; - if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) + if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) return true; - if (!inode_permission(mnt_userns, inode, MAY_WRITE)) + if (!inode_permission(idmap, inode, MAY_WRITE)) return true; return false; } @@ -437,28 +438,33 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)); - ret = mnt_want_write_file(dst_file); - if (ret) - return ret; - /* * This is redundant if called from vfs_dedupe_file_range(), but other * callers need it and it's not performance sesitive... */ ret = remap_verify_area(src_file, src_pos, len, false); if (ret) - goto out_drop_write; + return ret; ret = remap_verify_area(dst_file, dst_pos, len, true); if (ret) - goto out_drop_write; + return ret; + + /* + * This needs to be called after remap_verify_area() because of + * sb_start_write() and before may_dedupe_file() because the mount's + * MAY_WRITE need to be checked with mnt_get_write_access_file() held. + */ + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; ret = -EPERM; - if (!allow_file_dedupe(dst_file)) + if (!may_dedupe_file(dst_file)) goto out_drop_write; ret = -EXDEV; - if (src_file->f_path.mnt != dst_file->f_path.mnt) + if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb) goto out_drop_write; ret = -EISDIR; @@ -530,20 +536,19 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) } for (i = 0, info = same->info; i < count; i++, info++) { - struct fd dst_fd = fdget(info->dest_fd); - struct file *dst_file = dst_fd.file; + CLASS(fd, dst_fd)(info->dest_fd); - if (!dst_file) { + if (fd_empty(dst_fd)) { info->status = -EBADF; goto next_loop; } if (info->reserved) { info->status = -EINVAL; - goto next_fdput; + goto next_loop; } - deduped = vfs_dedupe_file_range_one(file, off, dst_file, + deduped = vfs_dedupe_file_range_one(file, off, fd_file(dst_fd), info->dest_offset, len, REMAP_FILE_CAN_SHORTEN); if (deduped == -EBADE) @@ -553,8 +558,6 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) else info->bytes_deduped = len; -next_fdput: - fdput(dst_fd); next_loop: if (fatal_signal_pending(current)) break; |
