diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
commit | 149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch) | |
tree | a61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/transaction.c | |
parent | 97971df811b8854882c0f6c6631e23ab8cdcc44f (diff) | |
parent | b7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff) |
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This round there are a lot of cleanups and moved code so the diffstat
looks huge, otherwise there are some nice performance improvements and
an update to raid56 reliability.
User visible features:
- raid56 reliability vs performance trade off:
- fix destructive RMW for raid5 data (raid6 still needs work): do
full checksum verification for all data during RMW cycle, this
should prevent rewriting potentially corrupted data without
notice
- stripes are cached in memory which should reduce the performance
impact but still can hurt some workloads
- checksums are verified after repair again
- this is the last option without introducing additional features
(write intent bitmap, journal, another tree), the extra checksum
read/verification was supposed to be avoided by the original
implementation exactly for performance reasons but that caused
all the reliability problems
- discard=async by default for devices that support it
- implement emergency flush reserve to avoid almost all unnecessary
transaction aborts due to ENOSPC in cases where there are too many
delayed refs or delayed allocation
- skip block group synchronization if there's no change in used
bytes, can reduce transaction commit count for some workloads
Performance improvements:
- fiemap and lseek:
- overall speedup due to skipping unnecessary or duplicate
searches (-40% run time)
- cache some data structures and sharedness of extents (-30% run
time)
- send:
- faster backref resolution when finding clones
- cached leaf to root mapping for faster backref walking
- improved clone/sharing detection
- overall run time improvements (-70%)
Core:
- module initialization converted to a table of function pointers run
in a sequence
- preparation for fscrypt, extend passing file names across calls,
dir item can store encryption status
- raid56 updates:
- more accurate error tracking of sectors within stripe
- simplify recovery path and remove dedicated endio worker kthread
- simplify scrub call paths
- refactoring to support the extra data checksum verification
during RMW cycle
- tree block parentness checks consolidated and done at metadata read
time
- improved error handling
- cleanups:
- move a lot of code for better synchronization between kernel and
user space sources, split big files
- enum cleanups
- GFP flag cleanups
- header file cleanups, prototypes, dependencies
- redundant parameter cleanups
- inline extent handling simplifications
- inode parameter conversion
- data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
btrfs: print transaction aborted messages with an error level
btrfs: sync some cleanups from progs into uapi/btrfs.h
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
btrfs: remove outdated logic from overwrite_item() and add assertion
btrfs: unify overwrite_item() and do_overwrite_item()
btrfs: replace strncpy() with strscpy()
btrfs: fix uninitialized variable in find_first_clear_extent_bit
btrfs: fix uninitialized parent in insert_state
btrfs: add might_sleep() annotations
btrfs: add stack helpers for a few btrfs items
btrfs: add nr_global_roots to the super block definition
btrfs: remove BTRFS_LEAF_DATA_OFFSET
btrfs: add helpers for manipulating leaf items and data
btrfs: add eb to btrfs_node_key_ptr_offset
btrfs: pass the extent buffer for the btrfs_item_nr helpers
btrfs: move the csum helpers into ctree.h
btrfs: move eb offset helpers into extent_io.h
btrfs: move file_extent_item helpers into file-item.h
btrfs: move leaf_data_end into ctree.c
...
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 92 |
1 files changed, 55 insertions, 37 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d1f1da6820fb..b8c52e89688c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -6,6 +6,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/writeback.h> #include <linux/pagemap.h> #include <linux/blkdev.h> @@ -23,6 +24,18 @@ #include "block-group.h" #include "space-info.h" #include "zoned.h" +#include "fs.h" +#include "accessors.h" +#include "extent-tree.h" +#include "root-tree.h" +#include "defrag.h" +#include "dir-item.h" +#include "uuid-tree.h" +#include "ioctl.h" +#include "relocation.h" +#include "scrub.h" + +static struct kmem_cache *btrfs_trans_handle_cachep; #define BTRFS_ROOT_TRANS_TAG 0 @@ -365,9 +378,9 @@ loop: spin_lock_init(&cur_trans->releasing_ebs_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, - IO_TREE_TRANS_DIRTY_PAGES, NULL); + IO_TREE_TRANS_DIRTY_PAGES); extent_io_tree_init(fs_info, &cur_trans->pinned_extents, - IO_TREE_FS_PINNED_EXTENTS, NULL); + IO_TREE_FS_PINNED_EXTENTS); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; @@ -936,7 +949,7 @@ static bool should_end_transaction(struct btrfs_trans_handle *trans) if (btrfs_check_space_for_delayed_refs(fs_info)) return true; - return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); + return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 50); } bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans) @@ -1607,10 +1620,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; struct btrfs_block_rsv *rsv; - struct inode *parent_inode; + struct inode *parent_inode = pending->dir; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; struct timespec64 cur_time; @@ -1619,6 +1631,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 index = 0; u64 objectid; u64 root_flags; + unsigned int nofs_flags; + struct fscrypt_name fname; ASSERT(pending->path); path = pending->path; @@ -1626,9 +1640,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ASSERT(pending->root_item); new_root_item = pending->root_item; + /* + * We're inside a transaction and must make sure that any potential + * allocations with GFP_KERNEL in fscrypt won't recurse back to + * filesystem. + */ + nofs_flags = memalloc_nofs_save(); + pending->error = fscrypt_setup_filename(parent_inode, + &pending->dentry->d_name, 0, + &fname); + memalloc_nofs_restore(nofs_flags); + if (pending->error) + goto free_pending; + pending->error = btrfs_get_free_objectid(tree_root, &objectid); if (pending->error) - goto no_free_objectid; + goto free_fname; /* * Make qgroup to skip current new snapshot's qgroupid, as it is @@ -1657,8 +1684,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, trace_btrfs_space_reservation(fs_info, "transaction", trans->transid, trans->bytes_reserved, 1); - dentry = pending->dentry; - parent_inode = pending->dir; parent_root = BTRFS_I(parent_inode)->root; ret = record_root_in_trans(trans, parent_root, 0); if (ret) @@ -1674,8 +1699,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, /* check if there is a file/dir which has the same name. */ dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, btrfs_ino(BTRFS_I(parent_inode)), - dentry->d_name.name, - dentry->d_name.len, 0); + &fname.disk_name, 0); if (dir_item != NULL && !IS_ERR(dir_item)) { pending->error = -EEXIST; goto dir_item_existed; @@ -1770,7 +1794,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_add_root_ref(trans, objectid, parent_root->root_key.objectid, btrfs_ino(BTRFS_I(parent_inode)), index, - dentry->d_name.name, dentry->d_name.len); + &fname.disk_name); if (ret) { btrfs_abort_transaction(trans, ret); goto fail; @@ -1802,9 +1826,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret < 0) goto fail; - ret = btrfs_insert_dir_item(trans, dentry->d_name.name, - dentry->d_name.len, BTRFS_I(parent_inode), - &key, BTRFS_FT_DIR, index); + ret = btrfs_insert_dir_item(trans, &fname.disk_name, + BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, + index); /* We have check then name at the beginning, so it is impossible. */ BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); if (ret) { @@ -1813,7 +1837,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + - dentry->d_name.len * 2); + fname.disk_name.len * 2); parent_inode->i_mtime = current_time(parent_inode); parent_inode->i_ctime = parent_inode->i_mtime; ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); @@ -1845,7 +1869,9 @@ dir_item_existed: trans->bytes_reserved = 0; clear_skip_qgroup: btrfs_clear_skip_qgroup(trans); -no_free_objectid: +free_fname: + fscrypt_free_filename(&fname); +free_pending: kfree(new_root_item); pending->root_item = NULL; btrfs_free_path(path); @@ -2101,6 +2127,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ASSERT(refcount_read(&trans->use_count) == 1); btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); + /* Stop the commit early if ->aborted is set */ if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; @@ -2354,12 +2382,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto unlock_reloc; - /* - * Since the transaction is done, we can apply the pending changes - * before the next transaction. - */ - btrfs_apply_pending_changes(fs_info); - /* commit_fs_roots gets rid of all the tree log roots, it is now * safe to free the root of tree log roots */ @@ -2582,21 +2604,17 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info) return (ret < 0) ? 0 : 1; } -void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) +int __init btrfs_transaction_init(void) { - unsigned long prev; - unsigned long bit; - - prev = xchg(&fs_info->pending_changes, 0); - if (!prev) - return; - - bit = 1 << BTRFS_PENDING_COMMIT; - if (prev & bit) - btrfs_debug(fs_info, "pending commit done"); - prev &= ~bit; + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", + sizeof(struct btrfs_trans_handle), 0, + SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); + if (!btrfs_trans_handle_cachep) + return -ENOMEM; + return 0; +} - if (prev) - btrfs_warn(fs_info, - "unknown pending changes left 0x%lx, ignoring", prev); +void __cold btrfs_transaction_exit(void) +{ + kmem_cache_destroy(btrfs_trans_handle_cachep); } |