summaryrefslogtreecommitdiff
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:47:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:47:51 -0800
commit149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch)
treea61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/transaction.c
parent97971df811b8854882c0f6c6631e23ab8cdcc44f (diff)
parentb7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff)
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This round there are a lot of cleanups and moved code so the diffstat looks huge, otherwise there are some nice performance improvements and an update to raid56 reliability. User visible features: - raid56 reliability vs performance trade off: - fix destructive RMW for raid5 data (raid6 still needs work): do full checksum verification for all data during RMW cycle, this should prevent rewriting potentially corrupted data without notice - stripes are cached in memory which should reduce the performance impact but still can hurt some workloads - checksums are verified after repair again - this is the last option without introducing additional features (write intent bitmap, journal, another tree), the extra checksum read/verification was supposed to be avoided by the original implementation exactly for performance reasons but that caused all the reliability problems - discard=async by default for devices that support it - implement emergency flush reserve to avoid almost all unnecessary transaction aborts due to ENOSPC in cases where there are too many delayed refs or delayed allocation - skip block group synchronization if there's no change in used bytes, can reduce transaction commit count for some workloads Performance improvements: - fiemap and lseek: - overall speedup due to skipping unnecessary or duplicate searches (-40% run time) - cache some data structures and sharedness of extents (-30% run time) - send: - faster backref resolution when finding clones - cached leaf to root mapping for faster backref walking - improved clone/sharing detection - overall run time improvements (-70%) Core: - module initialization converted to a table of function pointers run in a sequence - preparation for fscrypt, extend passing file names across calls, dir item can store encryption status - raid56 updates: - more accurate error tracking of sectors within stripe - simplify recovery path and remove dedicated endio worker kthread - simplify scrub call paths - refactoring to support the extra data checksum verification during RMW cycle - tree block parentness checks consolidated and done at metadata read time - improved error handling - cleanups: - move a lot of code for better synchronization between kernel and user space sources, split big files - enum cleanups - GFP flag cleanups - header file cleanups, prototypes, dependencies - redundant parameter cleanups - inline extent handling simplifications - inode parameter conversion - data structure cleanups, reductions, renames, merges" * tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits) btrfs: print transaction aborted messages with an error level btrfs: sync some cleanups from progs into uapi/btrfs.h btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range btrfs: fix extent map use-after-free when handling missing device in read_one_chunk btrfs: remove outdated logic from overwrite_item() and add assertion btrfs: unify overwrite_item() and do_overwrite_item() btrfs: replace strncpy() with strscpy() btrfs: fix uninitialized variable in find_first_clear_extent_bit btrfs: fix uninitialized parent in insert_state btrfs: add might_sleep() annotations btrfs: add stack helpers for a few btrfs items btrfs: add nr_global_roots to the super block definition btrfs: remove BTRFS_LEAF_DATA_OFFSET btrfs: add helpers for manipulating leaf items and data btrfs: add eb to btrfs_node_key_ptr_offset btrfs: pass the extent buffer for the btrfs_item_nr helpers btrfs: move the csum helpers into ctree.h btrfs: move eb offset helpers into extent_io.h btrfs: move file_extent_item helpers into file-item.h btrfs: move leaf_data_end into ctree.c ...
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c92
1 files changed, 55 insertions, 37 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d1f1da6820fb..b8c52e89688c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -6,6 +6,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/blkdev.h>
@@ -23,6 +24,18 @@
#include "block-group.h"
#include "space-info.h"
#include "zoned.h"
+#include "fs.h"
+#include "accessors.h"
+#include "extent-tree.h"
+#include "root-tree.h"
+#include "defrag.h"
+#include "dir-item.h"
+#include "uuid-tree.h"
+#include "ioctl.h"
+#include "relocation.h"
+#include "scrub.h"
+
+static struct kmem_cache *btrfs_trans_handle_cachep;
#define BTRFS_ROOT_TRANS_TAG 0
@@ -365,9 +378,9 @@ loop:
spin_lock_init(&cur_trans->releasing_ebs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
- IO_TREE_TRANS_DIRTY_PAGES, NULL);
+ IO_TREE_TRANS_DIRTY_PAGES);
extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
- IO_TREE_FS_PINNED_EXTENTS, NULL);
+ IO_TREE_FS_PINNED_EXTENTS);
fs_info->generation++;
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
@@ -936,7 +949,7 @@ static bool should_end_transaction(struct btrfs_trans_handle *trans)
if (btrfs_check_space_for_delayed_refs(fs_info))
return true;
- return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
+ return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 50);
}
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
@@ -1607,10 +1620,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root;
struct btrfs_block_rsv *rsv;
- struct inode *parent_inode;
+ struct inode *parent_inode = pending->dir;
struct btrfs_path *path;
struct btrfs_dir_item *dir_item;
- struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec64 cur_time;
@@ -1619,6 +1631,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 index = 0;
u64 objectid;
u64 root_flags;
+ unsigned int nofs_flags;
+ struct fscrypt_name fname;
ASSERT(pending->path);
path = pending->path;
@@ -1626,9 +1640,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ASSERT(pending->root_item);
new_root_item = pending->root_item;
+ /*
+ * We're inside a transaction and must make sure that any potential
+ * allocations with GFP_KERNEL in fscrypt won't recurse back to
+ * filesystem.
+ */
+ nofs_flags = memalloc_nofs_save();
+ pending->error = fscrypt_setup_filename(parent_inode,
+ &pending->dentry->d_name, 0,
+ &fname);
+ memalloc_nofs_restore(nofs_flags);
+ if (pending->error)
+ goto free_pending;
+
pending->error = btrfs_get_free_objectid(tree_root, &objectid);
if (pending->error)
- goto no_free_objectid;
+ goto free_fname;
/*
* Make qgroup to skip current new snapshot's qgroupid, as it is
@@ -1657,8 +1684,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid,
trans->bytes_reserved, 1);
- dentry = pending->dentry;
- parent_inode = pending->dir;
parent_root = BTRFS_I(parent_inode)->root;
ret = record_root_in_trans(trans, parent_root, 0);
if (ret)
@@ -1674,8 +1699,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
btrfs_ino(BTRFS_I(parent_inode)),
- dentry->d_name.name,
- dentry->d_name.len, 0);
+ &fname.disk_name, 0);
if (dir_item != NULL && !IS_ERR(dir_item)) {
pending->error = -EEXIST;
goto dir_item_existed;
@@ -1770,7 +1794,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_add_root_ref(trans, objectid,
parent_root->root_key.objectid,
btrfs_ino(BTRFS_I(parent_inode)), index,
- dentry->d_name.name, dentry->d_name.len);
+ &fname.disk_name);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@@ -1802,9 +1826,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (ret < 0)
goto fail;
- ret = btrfs_insert_dir_item(trans, dentry->d_name.name,
- dentry->d_name.len, BTRFS_I(parent_inode),
- &key, BTRFS_FT_DIR, index);
+ ret = btrfs_insert_dir_item(trans, &fname.disk_name,
+ BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
+ index);
/* We have check then name at the beginning, so it is impossible. */
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
if (ret) {
@@ -1813,7 +1837,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
- dentry->d_name.len * 2);
+ fname.disk_name.len * 2);
parent_inode->i_mtime = current_time(parent_inode);
parent_inode->i_ctime = parent_inode->i_mtime;
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
@@ -1845,7 +1869,9 @@ dir_item_existed:
trans->bytes_reserved = 0;
clear_skip_qgroup:
btrfs_clear_skip_qgroup(trans);
-no_free_objectid:
+free_fname:
+ fscrypt_free_filename(&fname);
+free_pending:
kfree(new_root_item);
pending->root_item = NULL;
btrfs_free_path(path);
@@ -2101,6 +2127,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ASSERT(refcount_read(&trans->use_count) == 1);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
+
/* Stop the commit early if ->aborted is set */
if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
@@ -2354,12 +2382,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto unlock_reloc;
- /*
- * Since the transaction is done, we can apply the pending changes
- * before the next transaction.
- */
- btrfs_apply_pending_changes(fs_info);
-
/* commit_fs_roots gets rid of all the tree log roots, it is now
* safe to free the root of tree log roots
*/
@@ -2582,21 +2604,17 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
return (ret < 0) ? 0 : 1;
}
-void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
+int __init btrfs_transaction_init(void)
{
- unsigned long prev;
- unsigned long bit;
-
- prev = xchg(&fs_info->pending_changes, 0);
- if (!prev)
- return;
-
- bit = 1 << BTRFS_PENDING_COMMIT;
- if (prev & bit)
- btrfs_debug(fs_info, "pending commit done");
- prev &= ~bit;
+ btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
+ sizeof(struct btrfs_trans_handle), 0,
+ SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_trans_handle_cachep)
+ return -ENOMEM;
+ return 0;
+}
- if (prev)
- btrfs_warn(fs_info,
- "unknown pending changes left 0x%lx, ignoring", prev);
+void __cold btrfs_transaction_exit(void)
+{
+ kmem_cache_destroy(btrfs_trans_handle_cachep);
}