summaryrefslogtreecommitdiff
path: root/fs/remap_range.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/remap_range.c')
-rw-r--r--fs/remap_range.c141
1 files changed, 72 insertions, 69 deletions
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 231159682907..26afbbbfb10c 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -14,6 +14,8 @@
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/dax.h>
+#include <linux/overflow.h>
#include "internal.h"
#include <linux/uaccess.h>
@@ -71,7 +73,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
* Otherwise, make sure the count is also block-aligned, having
* already confirmed the starting offsets' block alignment.
*/
- if (pos_in + count == size_in) {
+ if (pos_in + count == size_in &&
+ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
bcount = ALIGN(size_in, bs) - pos_in;
} else {
if (!IS_ALIGNED(count, bs))
@@ -96,17 +99,25 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
return 0;
}
-static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
- bool write)
+int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write)
{
+ int mask = write ? MAY_WRITE : MAY_READ;
+ loff_t tmp;
+ int ret;
+
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
- if (unlikely((loff_t) (pos + len) < 0))
+ if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL;
- return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+ ret = security_file_permission(file, mask);
+ if (ret)
+ return ret;
+
+ return fsnotify_file_area_perm(file, mask, &pos, len);
}
+EXPORT_SYMBOL_GPL(remap_verify_area);
/*
* Ensure that we don't remap a partial EOF block in the middle of something
@@ -146,18 +157,9 @@ static int generic_remap_check_len(struct inode *inode_in,
}
/* Read a page's worth of file data into the page cache. */
-static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos)
+static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos)
{
- struct folio *folio;
-
- folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL);
- if (IS_ERR(folio))
- return folio;
- if (!folio_test_uptodate(folio)) {
- folio_put(folio);
- return ERR_PTR(-EIO);
- }
- return folio;
+ return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
}
/*
@@ -187,8 +189,8 @@ static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2)
* Compare extents of two files to see if they are the same.
* Caller must have locked both inodes to prevent write races.
*/
-static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t dstoff,
+static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff,
+ struct file *dest, loff_t dstoff,
loff_t len, bool *is_same)
{
bool same = true;
@@ -224,8 +226,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
* someone is invalidating pages on us and we lose.
*/
if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) ||
- src_folio->mapping != src->i_mapping ||
- dst_folio->mapping != dest->i_mapping) {
+ src_folio->mapping != src->f_mapping ||
+ dst_folio->mapping != dest->f_mapping) {
same = false;
goto unlock;
}
@@ -271,9 +273,11 @@ out_error:
* If there's an error, then the usual negative error code is returned.
* Otherwise returns 0 with *len set to the request length.
*/
-int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t *len, unsigned int remap_flags)
+int
+__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags,
+ const struct iomap_ops *dax_read_ops)
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
@@ -309,7 +313,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
/* Check that we don't violate system file offset limits. */
ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
remap_flags);
- if (ret)
+ if (ret || *len == 0)
return ret;
/* Wait for the completion of any pending IOs on both files */
@@ -333,8 +337,15 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (remap_flags & REMAP_FILE_DEDUP) {
bool is_same = false;
- ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
- inode_out, pos_out, *len, &is_same);
+ if (!IS_DAX(inode_in))
+ ret = vfs_dedupe_file_range_compare(file_in, pos_in,
+ file_out, pos_out, *len, &is_same);
+ else if (dax_read_ops)
+ ret = dax_dedupe_file_range_compare(inode_in, pos_in,
+ inode_out, pos_out, *len, &is_same,
+ dax_read_ops);
+ else
+ return -EINVAL;
if (ret)
return ret;
if (!is_same)
@@ -343,7 +354,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
remap_flags);
- if (ret)
+ if (ret || *len == 0)
return ret;
/* If can't alter the file contents, we're done. */
@@ -352,21 +363,24 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
return ret;
}
+
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags)
+{
+ return __generic_remap_file_range_prep(file_in, pos_in, file_out,
+ pos_out, len, remap_flags, NULL);
+}
EXPORT_SYMBOL(generic_remap_file_range_prep);
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
{
loff_t ret;
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
- /*
- * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
- * the same mount. Practically, they only need to be on the same file
- * system.
- */
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
@@ -385,8 +399,10 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (ret)
return ret;
+ file_start_write(file_out);
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out, len, remap_flags);
+ file_end_write(file_out);
if (ret < 0)
return ret;
@@ -394,36 +410,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
fsnotify_modify(file_out);
return ret;
}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
-
- file_start_write(file_out);
- ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
- remap_flags);
- file_end_write(file_out);
-
- return ret;
-}
EXPORT_SYMBOL(vfs_clone_file_range);
/* Check whether we are allowed to dedupe the destination file */
-static bool allow_file_dedupe(struct file *file)
+static bool may_dedupe_file(struct file *file)
{
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct inode *inode = file_inode(file);
if (capable(CAP_SYS_ADMIN))
return true;
if (file->f_mode & FMODE_WRITE)
return true;
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
+ if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
return true;
- if (!inode_permission(mnt_userns, inode, MAY_WRITE))
+ if (!inode_permission(idmap, inode, MAY_WRITE))
return true;
return false;
}
@@ -437,28 +438,33 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
REMAP_FILE_CAN_SHORTEN));
- ret = mnt_want_write_file(dst_file);
- if (ret)
- return ret;
-
/*
* This is redundant if called from vfs_dedupe_file_range(), but other
* callers need it and it's not performance sesitive...
*/
ret = remap_verify_area(src_file, src_pos, len, false);
if (ret)
- goto out_drop_write;
+ return ret;
ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret)
- goto out_drop_write;
+ return ret;
+
+ /*
+ * This needs to be called after remap_verify_area() because of
+ * sb_start_write() and before may_dedupe_file() because the mount's
+ * MAY_WRITE need to be checked with mnt_get_write_access_file() held.
+ */
+ ret = mnt_want_write_file(dst_file);
+ if (ret)
+ return ret;
ret = -EPERM;
- if (!allow_file_dedupe(dst_file))
+ if (!may_dedupe_file(dst_file))
goto out_drop_write;
ret = -EXDEV;
- if (src_file->f_path.mnt != dst_file->f_path.mnt)
+ if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
goto out_drop_write;
ret = -EISDIR;
@@ -530,20 +536,19 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
}
for (i = 0, info = same->info; i < count; i++, info++) {
- struct fd dst_fd = fdget(info->dest_fd);
- struct file *dst_file = dst_fd.file;
+ CLASS(fd, dst_fd)(info->dest_fd);
- if (!dst_file) {
+ if (fd_empty(dst_fd)) {
info->status = -EBADF;
goto next_loop;
}
if (info->reserved) {
info->status = -EINVAL;
- goto next_fdput;
+ goto next_loop;
}
- deduped = vfs_dedupe_file_range_one(file, off, dst_file,
+ deduped = vfs_dedupe_file_range_one(file, off, fd_file(dst_fd),
info->dest_offset, len,
REMAP_FILE_CAN_SHORTEN);
if (deduped == -EBADE)
@@ -553,8 +558,6 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
else
info->bytes_deduped = len;
-next_fdput:
- fdput(dst_fd);
next_loop:
if (fatal_signal_pending(current))
break;