From f2cb2f39ccc30fa13d3ac078d461031a63960e5b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:46:01 +0100 Subject: btrfs: fix hang on snapshot creation after RWF_NOWAIT write If we do a successful RWF_NOWAIT write we end up locking the snapshot lock of the inode, through a call to check_can_nocow(), but we never unlock it. This means the next attempt to create a snapshot on the subvolume will hang forever. Trivial reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foobar $ chattr +C /mnt/foobar $ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foobar $ xfs_io -d -c "pwrite -N -V 1 -S 0xfe 0 64K" /mnt/foobar $ btrfs subvolume snapshot -r /mnt /mnt/snap --> hangs Fix this by unlocking the snapshot lock if check_can_nocow() returned success. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2c14312b05e8..04faa04fccd1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1914,6 +1914,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, inode_unlock(inode); return -EAGAIN; } + /* check_can_nocow() locks the snapshot lock on success */ + btrfs_drew_write_unlock(&root->snapshot_lock); } current->backing_dev_info = inode_to_bdi(inode); -- cgit From 260a63395f90f67d6ab89e4266af9e3dc34a77e9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:49:13 +0100 Subject: btrfs: fix RWF_NOWAIT write not failling when we need to cow If we attempt to do a RWF_NOWAIT write against a file range for which we can only do NOCOW for a part of it, due to the existence of holes or shared extents for example, we proceed with the write as if it were possible to NOCOW the whole range. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/sdj/bar $ chattr +C /mnt/sdj/bar $ xfs_io -d -c "pwrite -S 0xab -b 256K 0 256K" /mnt/bar wrote 262144/262144 bytes at offset 0 256 KiB, 1 ops; 0.0003 sec (694.444 MiB/sec and 2777.7778 ops/sec) $ xfs_io -c "fpunch 64K 64K" /mnt/bar $ sync $ xfs_io -d -c "pwrite -N -V 1 -b 128K -S 0xfe 0 128K" /mnt/bar wrote 131072/131072 bytes at offset 0 128 KiB, 1 ops; 0.0007 sec (160.051 MiB/sec and 1280.4097 ops/sec) This last write should fail with -EAGAIN since the file range from 64K to 128K is a hole. On xfs it fails, as expected, but on ext4 it currently succeeds because apparently it is expensive to check if there are extents allocated for the whole range, but I'll check with the ext4 people. Fix the issue by checking if check_can_nocow() returns a number of NOCOW'able bytes smaller then the requested number of bytes, and if it does return -EAGAIN. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 04faa04fccd1..6d5d905281c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1904,18 +1904,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, pos = iocb->ki_pos; count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { + size_t nocow_bytes = count; + /* * We will allocate space in case nodatacow is not set, * so bail */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { + check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) { inode_unlock(inode); return -EAGAIN; } /* check_can_nocow() locks the snapshot lock on success */ btrfs_drew_write_unlock(&root->snapshot_lock); + /* + * There are holes in the range or parts of the range that must + * be COWed (shared extents, RO block groups, etc), so just bail + * out. + */ + if (nocow_bytes < count) { + inode_unlock(inode); + return -EAGAIN; + } } current->backing_dev_info = inode_to_bdi(inode); -- cgit From 5dbb75ed6900048e146247b6325742d92c892548 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:49:39 +0100 Subject: btrfs: fix RWF_NOWAIT writes blocking on extent locks and waiting for IO A RWF_NOWAIT write is not supposed to wait on filesystem locks that can be held for a long time or for ongoing IO to complete. However when calling check_can_nocow(), if the inode has prealloc extents or has the NOCOW flag set, we can block on extent (file range) locks through the call to btrfs_lock_and_flush_ordered_range(). Such lock can take a significant amount of time to be available. For example, a fiemap task may be running, and iterating through the entire file range checking all extents and doing backref walking to determine if they are shared, or a readpage operation may be in progress. Also at btrfs_lock_and_flush_ordered_range(), called by check_can_nocow(), after locking the file range we wait for any existing ordered extent that is in progress to complete. Another operation that can take a significant amount of time and defeat the purpose of RWF_NOWAIT. So fix this by trying to lock the file range and if it's currently locked return -EAGAIN to user space. If we are able to lock the file range without waiting and there is an ordered extent in the range, return -EAGAIN as well, instead of waiting for it to complete. Finally, don't bother trying to lock the snapshot lock of the root when attempting a RWF_NOWAIT write, as that is only important for buffered writes. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6d5d905281c6..2520605afc25 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes) + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; + num_bytes = lockend - lockstart + 1; - btrfs_lock_and_flush_ordered_range(inode, lockstart, - lockend, NULL); + if (nowait) { + struct btrfs_ordered_extent *ordered; + + if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) + return -EAGAIN; + + ordered = btrfs_lookup_ordered_range(inode, lockstart, + num_bytes); + if (ordered) { + btrfs_put_ordered_extent(ordered); + ret = -EAGAIN; + goto out_unlock; + } + } else { + btrfs_lock_and_flush_ordered_range(inode, lockstart, + lockend, NULL); + } - num_bytes = lockend - lockstart + 1; ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; - btrfs_drew_write_unlock(&root->snapshot_lock); + if (!nowait) + btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); } - +out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend); return ret; @@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) && check_can_nocow(BTRFS_I(inode), pos, - &write_bytes) > 0) { + &write_bytes, false) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) { + check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, + true) <= 0) { inode_unlock(inode); return -EAGAIN; } - /* check_can_nocow() locks the snapshot lock on success */ - btrfs_drew_write_unlock(&root->snapshot_lock); /* * There are holes in the range or parts of the range that must * be COWed (shared extents, RO block groups, etc), so just bail -- cgit From 8730f12b7962b21ea9ad2756abce1e205d22db84 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 22 May 2020 10:19:22 -0600 Subject: btrfs: flag files as supporting buffered async reads btrfs uses generic_file_read_iter(), which already supports this. Acked-by: Chris Mason Signed-off-by: Jens Axboe --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2c14312b05e8..234a418eb6da 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3472,7 +3472,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) static int btrfs_file_open(struct inode *inode, struct file *filp) { - filp->f_mode |= FMODE_NOWAIT; + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; return generic_file_open(inode, filp); } -- cgit From d77765911385b65fc82d74ab71b8983cddfe0b58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jul 2020 18:22:06 +0200 Subject: btrfs: wire up iter_file_splice_write btrfs implements the iter_write op and thus can use the more efficient iov_iter based splice implementation. For now falling back to the less efficient default is pretty harmless, but I have a pending series that removes the default, and thus would cause btrfs to not support splice at all. Reported-by: Andy Lavr Tested-by: Andy Lavr Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2520605afc25..b0d2c976587e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3509,6 +3509,7 @@ const struct file_operations btrfs_file_operations = { .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, + .splice_write = iter_file_splice_write, .mmap = btrfs_file_mmap, .open = btrfs_file_open, .release = btrfs_release_file, -- cgit From 6a3c7f5c87854e948c3c234e5f5e745c7c553722 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 28 May 2020 11:05:13 +0300 Subject: btrfs: don't balance btree inode pages from buffered write path The call to btrfs_btree_balance_dirty has been there since the early days of BTRFS, when the btree was directly modified from the write path, hence dirtied btree inode pages. With the implementation of b888db2bd7b6 ("Btrfs: Add delayed allocation to the extent based page tree code") 13 years ago the btree is no longer modified from the write path, hence there is no point in calling this function. Just remove it. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b0d2c976587e..52422e2b3344 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1800,8 +1800,6 @@ again: cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); - if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1) - btrfs_btree_balance_dirty(fs_info); pos += copied; num_written += copied; -- cgit From a7f8b1c2ac21bf081b41264c9cfd6260dffa6246 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 10 Jun 2020 09:04:42 +0800 Subject: btrfs: file: reserve qgroup space after the hole punch range is locked The incoming qgroup reserved space timing will move the data reservation to ordered extent completely. However in btrfs_punch_hole_lock_range() will call btrfs_invalidate_page(), which will clear QGROUP_RESERVED bit for the range. In current stage it's OK, but if we're making ordered extents handle the reserved space, then btrfs_punch_hole_lock_range() can clear the QGROUP_RESERVED bit before we submit ordered extent, leading to qgroup reserved space leakage. So here change the timing to make reserve data space after btrfs_punch_hole_lock_range(). The new timing is fine for either current code or the new code. Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 52422e2b3344..b15858e1d753 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3174,14 +3174,14 @@ reserve_space: if (ret < 0) goto out; space_reserved = true; - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, - alloc_start, bytes_to_reserve); - if (ret) - goto out; ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); if (ret) goto out; + ret = btrfs_qgroup_reserve_data(inode, &data_reserved, + alloc_start, bytes_to_reserve); + if (ret) + goto out; ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), -- cgit From 906c448c3dc3189d83bf644ec453d49737371b00 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:08 +0300 Subject: btrfs: make __btrfs_drop_extents take btrfs_inode It has only 4 uses of a vfs_inode for inode_sub_bytes but unifies the interface with the non __ prefixed version. Will also makes converting its callers to btrfs_inode easier. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b15858e1d753..4e530c9ac3ef 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -731,7 +731,7 @@ next: * is deleted from the tree. */ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, u64 start, u64 end, u64 *drop_end, int drop_cache, int replace_extent, @@ -744,7 +744,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_ref ref = { 0 }; struct btrfs_key key; struct btrfs_key new_key; - u64 ino = btrfs_ino(BTRFS_I(inode)); + struct inode *vfs_inode = &inode->vfs_inode; + u64 ino = btrfs_ino(inode); u64 search_start = start; u64 disk_bytenr = 0; u64 num_bytes = 0; @@ -762,9 +763,9 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, int leafs_visited = 0; if (drop_cache) - btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0); + btrfs_drop_extent_cache(inode, start, end - 1, 0); - if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) + if (start >= inode->disk_i_size && !replace_extent) modify_tree = 0; update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || @@ -935,7 +936,7 @@ next_slot: extent_end - end); btrfs_mark_buffer_dirty(leaf); if (update_refs && disk_bytenr > 0) - inode_sub_bytes(inode, end - key.offset); + inode_sub_bytes(vfs_inode, end - key.offset); break; } @@ -955,7 +956,7 @@ next_slot: start - key.offset); btrfs_mark_buffer_dirty(leaf); if (update_refs && disk_bytenr > 0) - inode_sub_bytes(inode, extent_end - start); + inode_sub_bytes(vfs_inode, extent_end - start); if (end == extent_end) break; @@ -979,7 +980,7 @@ delete_extent_item: if (update_refs && extent_type == BTRFS_FILE_EXTENT_INLINE) { - inode_sub_bytes(inode, + inode_sub_bytes(vfs_inode, extent_end - key.offset); extent_end = ALIGN(extent_end, fs_info->sectorsize); @@ -993,7 +994,7 @@ delete_extent_item: key.offset - extent_offset); ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); /* -ENOMEM */ - inode_sub_bytes(inode, + inode_sub_bytes(vfs_inode, extent_end - key.offset); } @@ -1082,8 +1083,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, - drop_cache, 0, 0, NULL); + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start, + end, NULL, drop_cache, 0, 0, NULL); btrfs_free_path(path); return ret; } @@ -2596,7 +2597,7 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, cur_offset = start; while (cur_offset < end) { - ret = __btrfs_drop_extents(trans, root, inode, path, + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, cur_offset, end + 1, &drop_end, 1, 0, 0, NULL); if (ret != -ENOSPC) { -- cgit From 6d4572a9d71d5fc2affee0258d8582d39859188c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 24 Jun 2020 07:23:50 +0800 Subject: btrfs: allow btrfs_truncate_block() to fallback to nocow for data space reservation [BUG] When the data space is exhausted, even if the inode has NOCOW attribute, we will still refuse to truncate unaligned range due to ENOSPC. The following script can reproduce it pretty easily: #!/bin/bash dev=/dev/test/test mnt=/mnt/btrfs umount $dev &> /dev/null umount $mnt &> /dev/null mkfs.btrfs -f $dev -b 1G mount -o nospace_cache $dev $mnt touch $mnt/foobar chattr +C $mnt/foobar xfs_io -f -c "pwrite -b 4k 0 4k" $mnt/foobar > /dev/null xfs_io -f -c "pwrite -b 4k 0 1G" $mnt/padding &> /dev/null sync xfs_io -c "fpunch 0 2k" $mnt/foobar umount $mnt Currently this will fail at the fpunch part. [CAUSE] Because btrfs_truncate_block() always reserves space without checking the NOCOW attribute. Since the writeback path follows NOCOW bit, we only need to bother the space reservation code in btrfs_truncate_block(). [FIX] Make btrfs_truncate_block() follow btrfs_buffered_write() to try to reserve data space first, and fall back to NOCOW check only when we don't have enough space. Such always-try-reserve is an optimization introduced in btrfs_buffered_write(), to avoid expensive btrfs_check_can_nocow() call. This patch will export check_can_nocow() as btrfs_check_can_nocow(), and use it in btrfs_truncate_block() to fix the problem. Reported-by: Martin Doucha Reviewed-by: Filipe Manana Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4e530c9ac3ef..7aa184493aea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,8 +1533,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, return ret; } -static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes, bool nowait) +int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1649,8 +1649,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, if (ret < 0) { if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) && - check_can_nocow(BTRFS_I(inode), pos, - &write_bytes, false) > 0) { + btrfs_check_can_nocow(BTRFS_I(inode), pos, + &write_bytes, false) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1927,8 +1927,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, - true) <= 0) { + btrfs_check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, + true) <= 0) { inode_unlock(inode); return -EAGAIN; } -- cgit From e4ecaf90bc13e2a9c351d5cd86d4094844d7d7bd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 24 Jun 2020 07:23:51 +0800 Subject: btrfs: add comments for btrfs_check_can_nocow() and can_nocow_extent() These two functions have extra conditions that their callers need to meet, and some not-that-common parameters used for return value. So adding some comments may save reviewers some time. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7aa184493aea..b750000b438a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,6 +1533,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, return ret; } +/* + * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) + * + * @pos: File offset + * @write_bytes: The length to write, will be updated to the nocow writeable + * range + * @nowait: Whether this function could sleep + * + * This function will flush ordered extents in the range to ensure proper + * nocow checks for (nowait == false) case. + * + * Return: + * >0 and update @write_bytes if we can do nocow write + * 0 if we can't do nocow write + * -EAGAIN if we can't get the needed lock or there are ordered extents + * for * (nowait == true) case + * <0 if other error happened + * + * NOTE: For wait (nowait == false) calls, callers need to release the drew + * write lock of inode->root->snapshot_lock when return value > 0. + */ int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes, bool nowait) { -- cgit From 38d37aa9c32938214ca071fe02762f55b89937fd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 24 Jun 2020 07:23:52 +0800 Subject: btrfs: refactor btrfs_check_can_nocow() into two variants The function btrfs_check_can_nocow() now has two completely different call patterns. For nowait variant, callers don't need to do any cleanup. While for wait variant, callers need to release the lock if they can do nocow write. This is somehow confusing, and is already a problem for the exported btrfs_check_can_nocow(). So this patch will separate the different patterns into different functions. For nowait variant, the function will be called check_nocow_nolock(). For wait variant, the function pair will be btrfs_check_nocow_lock() btrfs_check_nocow_unlock(). Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 83 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 35 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b750000b438a..760ddc11aa3f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,29 +1533,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, return ret; } -/* - * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) - * - * @pos: File offset - * @write_bytes: The length to write, will be updated to the nocow writeable - * range - * @nowait: Whether this function could sleep - * - * This function will flush ordered extents in the range to ensure proper - * nocow checks for (nowait == false) case. - * - * Return: - * >0 and update @write_bytes if we can do nocow write - * 0 if we can't do nocow write - * -EAGAIN if we can't get the needed lock or there are ordered extents - * for * (nowait == true) case - * <0 if other error happened - * - * NOTE: For wait (nowait == false) calls, callers need to release the drew - * write lock of inode->root->snapshot_lock when return value > 0. - */ -int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes, bool nowait) +static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1563,6 +1542,9 @@ int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; + if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) + return 0; + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; @@ -1605,6 +1587,42 @@ out_unlock: return ret; } +static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes) +{ + return check_can_nocow(inode, pos, write_bytes, true); +} + +/* + * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) + * + * @pos: File offset + * @write_bytes: The length to write, will be updated to the nocow writeable + * range + * + * This function will flush ordered extents in the range to ensure proper + * nocow checks. + * + * Return: + * >0 and update @write_bytes if we can do nocow write + * 0 if we can't do nocow write + * -EAGAIN if we can't get the needed lock or there are ordered extents + * for * (nowait == true) case + * <0 if other error happened + * + * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock(). + */ +int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes) +{ + return check_can_nocow(inode, pos, write_bytes, false); +} + +void btrfs_check_nocow_unlock(struct btrfs_inode *inode) +{ + btrfs_drew_write_unlock(&inode->root->snapshot_lock); +} + static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) { @@ -1612,7 +1630,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; @@ -1668,10 +1685,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, ret = btrfs_check_data_free_space(inode, &data_reserved, pos, write_bytes); if (ret < 0) { - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - btrfs_check_can_nocow(BTRFS_I(inode), pos, - &write_bytes, false) > 0) { + if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, + &write_bytes) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1700,7 +1715,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, data_reserved, pos, write_bytes); else - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); break; } @@ -1804,7 +1819,7 @@ again: release_bytes = 0; if (only_release_metadata) - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); if (only_release_metadata && copied > 0) { lockstart = round_down(pos, @@ -1831,7 +1846,7 @@ again: if (release_bytes) { if (only_release_metadata) { - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { @@ -1946,10 +1961,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, * We will allocate space in case nodatacow is not set, * so bail */ - if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) || - btrfs_check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, - true) <= 0) { + if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) + <= 0) { inode_unlock(inode); return -EAGAIN; } -- cgit From c2566f22893c8b8cdf443505c043b2ca9f5023f6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:35 +0300 Subject: btrfs: make btrfs_set_extent_delalloc take btrfs_inode Preparation to make btrfs_dirty_pages take btrfs_inode as parameter. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 760ddc11aa3f..b6c9921340f3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -546,7 +546,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, } } - err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, + err = btrfs_set_extent_delalloc(BTRFS_I(inode), start_pos, end_of_last_block, extra_bits, cached); if (err) return err; -- cgit From 088545f6e442605d4b897456ef0b34eae06bdc07 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:36 +0300 Subject: btrfs: make btrfs_dirty_pages take btrfs_inode There is a single use of the generic vfs_inode so let's take btrfs_inode as a parameter and remove couple of redundant BTRFS_I() calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b6c9921340f3..49013c6f2c14 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -500,18 +500,18 @@ next: * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -int btrfs_dirty_pages(struct inode *inode, struct page **pages, +int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int err = 0; int i; u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; - loff_t isize = i_size_read(inode); + loff_t isize = i_size_read(&inode->vfs_inode); unsigned int extra_bits = 0; start_pos = pos & ~((u64) fs_info->sectorsize - 1); @@ -524,13 +524,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, * The pages may have already been dirty, clear out old accounting so * we can set things up properly */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, + clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); - if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (!btrfs_is_free_space_inode(inode)) { if (start_pos >= isize && - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { + !(inode->flags & BTRFS_INODE_PREALLOC)) { /* * There can't be any extents following eof in this case * so just set the delalloc new bit for the range @@ -538,15 +538,14 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, */ extra_bits |= EXTENT_DELALLOC_NEW; } else { - err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), - start_pos, + err = btrfs_find_new_delalloc_bytes(inode, start_pos, num_bytes, cached); if (err) return err; } } - err = btrfs_set_extent_delalloc(BTRFS_I(inode), start_pos, end_of_last_block, + err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, extra_bits, cached); if (err) return err; @@ -564,7 +563,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, * at this time. */ if (end_pos > isize) - i_size_write(inode, end_pos); + i_size_write(&inode->vfs_inode, end_pos); return 0; } @@ -1795,8 +1794,9 @@ again: fs_info->sectorsize); if (copied > 0) - ret = btrfs_dirty_pages(inode, pages, dirty_pages, - pos, copied, &cached_state); + ret = btrfs_dirty_pages(BTRFS_I(inode), pages, + dirty_pages, pos, copied, + &cached_state); /* * If we have not locked the extent range, because the range's -- cgit From 7661a3e033ab782366e0e1f4b6aad0df3555fcbd Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:37 +0300 Subject: btrfs: make btrfs_qgroup_reserve_data take btrfs_inode There's only a single use of vfs_inode in a tracepoint so let's take btrfs_inode directly. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 49013c6f2c14..8d3c62b81088 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3213,7 +3213,7 @@ reserve_space: &cached_state); if (ret) goto out; - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); if (ret) goto out; @@ -3383,8 +3383,9 @@ static long btrfs_fallocate(struct file *file, int mode, free_extent_map(em); break; } - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, - cur_offset, last_byte - cur_offset); + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), + &data_reserved, cur_offset, + last_byte - cur_offset); if (ret < 0) { cur_offset = last_byte; free_extent_map(em); -- cgit From 25ce28caaa1ddc2ef8848c5a09e63a9bc0a5d455 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:39 +0300 Subject: btrfs: make btrfs_free_reserved_data_space take btrfs_inode It only uses btrfs_inode internally so take it as a parameter. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8d3c62b81088..96f2238a361b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1710,7 +1710,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, reserve_bytes); if (ret) { if (!only_release_metadata) - btrfs_free_reserved_data_space(inode, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, pos, write_bytes); else @@ -3232,7 +3232,7 @@ reserve_space: ret = btrfs_fallocate_update_isize(inode, offset + len, mode); out: if (ret && space_reserved) - btrfs_free_reserved_data_space(inode, data_reserved, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, alloc_start, bytes_to_reserve); extent_changeset_free(data_reserved); @@ -3397,8 +3397,9 @@ static long btrfs_fallocate(struct file *file, int mode, * range, free reserved data space first, otherwise * it'll result in false ENOSPC error. */ - btrfs_free_reserved_data_space(inode, data_reserved, - cur_offset, last_byte - cur_offset); + btrfs_free_reserved_data_space(BTRFS_I(inode), + data_reserved, cur_offset, + last_byte - cur_offset); } free_extent_map(em); cur_offset = last_byte; @@ -3415,7 +3416,7 @@ static long btrfs_fallocate(struct file *file, int mode, range->len, i_blocksize(inode), offset + len, &alloc_hint); else - btrfs_free_reserved_data_space(inode, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, range->start, range->len); list_del(&range->list); @@ -3436,7 +3437,7 @@ out: inode_unlock(inode); /* Let go of our reservation. */ if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) - btrfs_free_reserved_data_space(inode, data_reserved, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, cur_offset, alloc_end - cur_offset); extent_changeset_free(data_reserved); return ret; -- cgit From 86d52921a2ba51a78e5bfb71c75aedcbd9e61a5c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:40 +0300 Subject: btrfs: make btrfs_delalloc_release_space take btrfs_inode It needs btrfs_inode so take it as a parameter directly. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 96f2238a361b..f6c5d94f30f9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1784,7 +1784,7 @@ again: __pos = round_down(pos, fs_info->sectorsize) + (dirty_pages << PAGE_SHIFT); - btrfs_delalloc_release_space(inode, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, __pos, release_bytes, true); } @@ -1850,7 +1850,8 @@ again: btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), + data_reserved, round_down(pos, fs_info->sectorsize), release_bytes, true); } -- cgit From 36ea6f3e931391c2adbb38af8c5dd4a043d26ac5 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Jun 2020 08:55:41 +0300 Subject: btrfs: make btrfs_check_data_free_space take btrfs_inode Instead of calling BTRFS_I on the passed vfs_inode take btrfs_inode directly. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6c5d94f30f9..841c516079a9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1681,7 +1681,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, fs_info->sectorsize); extent_changeset_release(data_reserved); - ret = btrfs_check_data_free_space(inode, &data_reserved, pos, + ret = btrfs_check_data_free_space(BTRFS_I(inode), + &data_reserved, pos, write_bytes); if (ret < 0) { if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, -- cgit