diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 13:00:16 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 13:00:16 -0700 |
| commit | 15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch) | |
| tree | 9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/ordered-data.c | |
| parent | 1455c69900c8c6442b182a74087931f4ffb1cac4 (diff) | |
| parent | 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff) | |
Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"A number of core changes that make things work better in general, code
is simpler and cleaner.
Core changes:
- per-inode file extent tree, for in memory tracking of contiguous
extent ranges to make sure i_size adjustments are accurate
- tree root structures are protected by reference counts, replacing
SRCU that did not cover some cases
- leak detector for tree root structures
- per-transaction pinned extent tracking
- buffer heads are replaced by bios for super block access
- speedup of extent back reference resolution, on an example test
scenario the runtime of send went down from a hour to minutes
- factor out locking scheme used for subvolume writer and NOCOW
exclusion, abstracted as DREW lock, double reader-writer exclusion
(allow either readers or writers)
- cleanup and abstract extent allocation policies, preparation for
zoned device support
- make reflink/clone_range work on inline extents
- add more cancellation point for relocation, improves long response
from 'balance cancel'
- add page migration callback for data pages
- switch to guid for uuids, with additional cleanups of the interface
- make ranged full fsyncs more efficient
- removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC
- remove b-tree readahead from delayed refs paths, avoiding seek and
read unnecessary blocks
Features:
- v2 of ioctl to delete subvolumes, allowing to delete by id and more
future extensions
Fixes:
- fix qgroup rescan worker that could block umount
- fix crash during unmount due to race with delayed inode workers
- fix dellaloc flushing logic that could create unnecessary chunks
under heavy load
- fix missing file extent item for hole after ranged fsync
- several fixes in relocation error handling
Other:
- more documentation of relocation, device replace, space
reservations
- many random cleanups"
* tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits)
btrfs: fix missing semaphore unlock in btrfs_sync_file
btrfs: use nofs allocations for running delayed items
btrfs: sysfs: Use scnprintf() instead of snprintf()
btrfs: do not resolve backrefs for roots that are being deleted
btrfs: track reloc roots based on their commit root bytenr
btrfs: restart relocate_tree_blocks properly
btrfs: reloc: reorder reservation before root selection
btrfs: do not readahead in build_backref_tree
btrfs: do not use readahead for running delayed refs
btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot
btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid
btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support
btrfs: kill the subvol_srcu
btrfs: make btrfs_cleanup_fs_roots use the radix tree lock
btrfs: don't take an extra root ref at allocation time
btrfs: hold a ref on the root on the dead roots list
btrfs: make inodes hold a ref on their roots
btrfs: move the root freeing stuff into btrfs_put_root
btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root
btrfs: make the extent buffer leak check per fs info
...
Diffstat (limited to 'fs/btrfs/ordered-data.c')
| -rw-r--r-- | fs/btrfs/ordered-data.c | 140 |
1 files changed, 5 insertions, 135 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a65f189a5b94..e13b3d28c063 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -580,7 +580,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, while (!list_empty(&splice) && nr) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); - root = btrfs_grab_fs_root(root); + root = btrfs_grab_root(root); BUG_ON(!root); list_move_tail(&root->ordered_root, &fs_info->ordered_roots); @@ -588,7 +588,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, done = btrfs_wait_ordered_extents(root, nr, range_start, range_len); - btrfs_put_fs_root(root); + btrfs_put_root(root); spin_lock(&fs_info->ordered_root_lock); if (nr != U64_MAX) { @@ -786,134 +786,6 @@ out: } /* - * After an extent is done, call this to conditionally update the on disk - * i_size. i_size is updated to cover any fully written part of the file. - */ -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, - struct btrfs_ordered_extent *ordered) -{ - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - u64 disk_i_size; - u64 new_i_size; - u64 i_size = i_size_read(inode); - struct rb_node *node; - struct rb_node *prev = NULL; - struct btrfs_ordered_extent *test; - int ret = 1; - u64 orig_offset = offset; - - spin_lock_irq(&tree->lock); - if (ordered) { - offset = entry_end(ordered); - if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) - offset = min(offset, - ordered->file_offset + - ordered->truncated_len); - } else { - offset = ALIGN(offset, btrfs_inode_sectorsize(inode)); - } - disk_i_size = BTRFS_I(inode)->disk_i_size; - - /* - * truncate file. - * If ordered is not NULL, then this is called from endio and - * disk_i_size will be updated by either truncate itself or any - * in-flight IOs which are inside the disk_i_size. - * - * Because btrfs_setsize() may set i_size with disk_i_size if truncate - * fails somehow, we need to make sure we have a precise disk_i_size by - * updating it as usual. - * - */ - if (!ordered && disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = orig_offset; - ret = 0; - goto out; - } - - /* - * if the disk i_size is already at the inode->i_size, or - * this ordered extent is inside the disk i_size, we're done - */ - if (disk_i_size == i_size) - goto out; - - /* - * We still need to update disk_i_size if outstanding_isize is greater - * than disk_i_size. - */ - if (offset <= disk_i_size && - (!ordered || ordered->outstanding_isize <= disk_i_size)) - goto out; - - /* - * walk backward from this ordered extent to disk_i_size. - * if we find an ordered extent then we can't update disk i_size - * yet - */ - if (ordered) { - node = rb_prev(&ordered->rb_node); - } else { - prev = tree_search(tree, offset); - /* - * we insert file extents without involving ordered struct, - * so there should be no ordered struct cover this offset - */ - if (prev) { - test = rb_entry(prev, struct btrfs_ordered_extent, - rb_node); - BUG_ON(offset_in_entry(test, offset)); - } - node = prev; - } - for (; node; node = rb_prev(node)) { - test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - - /* We treat this entry as if it doesn't exist */ - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) - continue; - - if (entry_end(test) <= disk_i_size) - break; - if (test->file_offset >= i_size) - break; - - /* - * We don't update disk_i_size now, so record this undealt - * i_size. Or we will not know the real i_size. - */ - if (test->outstanding_isize < offset) - test->outstanding_isize = offset; - if (ordered && - ordered->outstanding_isize > test->outstanding_isize) - test->outstanding_isize = ordered->outstanding_isize; - goto out; - } - new_i_size = min_t(u64, offset, i_size); - - /* - * Some ordered extents may completed before the current one, and - * we hold the real i_size in ->outstanding_isize. - */ - if (ordered && ordered->outstanding_isize > new_i_size) - new_i_size = min_t(u64, ordered->outstanding_isize, i_size); - BTRFS_I(inode)->disk_i_size = new_i_size; - ret = 0; -out: - /* - * We need to do this because we can't remove ordered extents until - * after the i_disk_size has been updated and then the inode has been - * updated to reflect the change, so we need to tell anybody who finds - * this ordered extent that we've already done all the real work, we - * just haven't completed all the other work. - */ - if (ordered) - set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); - spin_unlock_irq(&tree->lock); - return ret; -} - -/* * search the ordered extents for one corresponding to 'offset' and * try to find a checksum. This is used because we allow pages to * be reclaimed before their checksum is actually put into the btree @@ -963,7 +835,6 @@ out: * btrfs_flush_ordered_range - Lock the passed range and ensures all pending * ordered extents in it are run to completion. * - * @tree: IO tree used for locking out other users of the range * @inode: Inode whose ordered tree is to be searched * @start: Beginning of range to flush * @end: Last byte of range to lock @@ -973,8 +844,7 @@ out: * This function always returns with the given range locked, ensuring after it's * called no order extent can be pending. */ -void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, - struct btrfs_inode *inode, u64 start, +void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state) { @@ -986,7 +856,7 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, cachedp = cached_state; while (1) { - lock_extent_bits(tree, start, end, cachedp); + lock_extent_bits(&inode->io_tree, start, end, cachedp); ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1); if (!ordered) { @@ -999,7 +869,7 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, refcount_dec(&cache->refs); break; } - unlock_extent_cached(tree, start, end, cachedp); + unlock_extent_cached(&inode->io_tree, start, end, cachedp); btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); btrfs_put_ordered_extent(ordered); } |
