summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2022-01-20 11:00:09 +0000
committerDavid Sterba <dsterba@suse.com>2022-03-14 13:13:47 +0100
commit259c4b96d78dda8477a3ac21d6b3cf0eb9f75c8b (patch)
treecd64ad208e1cf630d16443f8f3f9706224dde4ba /fs/btrfs/tree-log.c
parent88d2beec7e53fc500a5ac99beb254e6079d03543 (diff)
btrfs: stop doing unnecessary log updates during a rename
During a rename, we call __btrfs_unlink_inode(), which will call btrfs_del_inode_ref_in_log() and btrfs_del_dir_entries_in_log(), in order to remove an inode reference and a directory entry from the log. These are necessary when __btrfs_unlink_inode() is called from the unlink path, but not necessary when it's called from a rename context, because: 1) For the btrfs_del_inode_ref_in_log() call, it's pointless to delete the inode reference related to the old name, because later in the rename path we call btrfs_log_new_name(), which will drop all inode references from the log and copy all inode references from the subvolume tree to the log tree. So we are doing one unnecessary btree operation which adds additional latency and lock contention in case there are other tasks accessing the log tree; 2) For the btrfs_del_dir_entries_in_log() call, we are now doing the equivalent at btrfs_log_new_name() since the previous patch in the series, that has the subject "btrfs: avoid logging all directory changes during renames". In fact, having __btrfs_unlink_inode() call this function not only adds additional latency and lock contention due to the extra btree operation, but also can make btrfs_log_new_name() unnecessarily log a range item to track the deletion of the old name, since it has no way to known that the directory entry related to the old name was previously logged and already deleted by __btrfs_unlink_inode() through its call to btrfs_del_dir_entries_in_log(). So skip those calls at __btrfs_unlink_inode() when we are doing a rename. Skipping them also allows us now to reduce the duration of time we are pinning a log transaction during renames, which is always beneficial as it's not delaying so much other tasks trying to sync the log tree, in particular we end up not holding the log transaction pinned while adding the new name (adding inode ref, directory entry, etc). This change is part of a patchset comprised of the following patches: 1/5 btrfs: add helper to delete a dir entry from a log tree 2/5 btrfs: pass the dentry to btrfs_log_new_name() instead of the inode 3/5 btrfs: avoid logging all directory changes during renames 4/5 btrfs: stop doing unnecessary log updates during a rename 5/5 btrfs: avoid inode logging during rename and link when possible Just like the previous patch in the series, "btrfs: avoid logging all directory changes during renames", the following script mimics part of what a package installation/upgrade with zypper does, which is basically renaming a lot of files, in some directory under /usr, to a name with a suffix of "-RPMDELETE": $ cat test.sh #!/bin/bash DEV=/dev/nvme0n1 MNT=/mnt/nvme0n1 NUM_FILES=10000 mkfs.btrfs -f $DEV mount $DEV $MNT mkdir $MNT/testdir for ((i = 1; i <= $NUM_FILES; i++)); do echo -n > $MNT/testdir/file_$i done sync # Do some change to testdir and fsync it. echo -n > $MNT/testdir/file_$((NUM_FILES + 1)) xfs_io -c "fsync" $MNT/testdir echo "Renaming $NUM_FILES files..." start=$(date +%s%N) for ((i = 1; i <= $NUM_FILES; i++)); do mv $MNT/testdir/file_$i $MNT/testdir/file_$i-RPMDELETE done end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "Renames took $dur milliseconds" umount $MNT Testing this change on box a using a non-debug kernel (Debian's default kernel config) gave the following results: NUM_FILES=10000, before patchset: 27399 ms NUM_FILES=10000, after patches 1/5 to 3/5 applied: 9093 ms (-66.8%) NUM_FILES=10000, after patches 1/5 to 4/5 applied: 9016 ms (-67.1%) NUM_FILES=5000, before patchset: 9241 ms NUM_FILES=5000, after patches 1/5 to 3/5 applied: 4642 ms (-49.8%) NUM_FILES=5000, after patches 1/5 to 4/5 applied: 4553 ms (-50.7%) NUM_FILES=2000, before patchset: 2550 ms NUM_FILES=2000, after patches 1/5 to 3/5 applied: 1788 ms (-29.9%) NUM_FILES=2000, after patches 1/5 to 4/5 applied: 1767 ms (-30.7%) NUM_FILES=1000, before patchset: 1088 ms NUM_FILES=1000, after patches 1/5 to 3/5 applied: 905 ms (-16.9%) NUM_FILES=1000, after patches 1/5 to 4/5 applied: 883 ms (-18.8%) The next patch in the series (5/5), also contains dbench results after applying to whole patchset. Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1193549 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 44719e9c48f3..d697f8a33a5a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6810,7 +6810,10 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
u64 old_dir_index, struct dentry *parent)
{
struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
+ struct btrfs_root *root = inode->root;
struct btrfs_log_ctx ctx;
+ bool log_pinned = false;
+ int ret = 0;
/*
* this will force the logging code to walk the dentry chain
@@ -6837,14 +6840,22 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
if (old_dir && old_dir->logged_trans == trans->transid) {
struct btrfs_root *log = old_dir->root->log_root;
struct btrfs_path *path;
- int ret;
ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+ /*
+ * We have two inodes to update in the log, the old directory and
+ * the inode that got renamed, so we must pin the log to prevent
+ * anyone from syncing the log until we have updated both inodes
+ * in the log.
+ */
+ log_pinned = true;
+ btrfs_pin_log_trans(root);
+
path = btrfs_alloc_path();
if (!path) {
- btrfs_set_log_full_commit(trans);
- return;
+ ret = -ENOMEM;
+ goto out;
}
/*
@@ -6874,10 +6885,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
mutex_unlock(&old_dir->log_mutex);
btrfs_free_path(path);
- if (ret < 0) {
- btrfs_set_log_full_commit(trans);
- return;
- }
+ if (ret < 0)
+ goto out;
}
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
@@ -6890,5 +6899,16 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+out:
+ if (log_pinned) {
+ /*
+ * If an error happened mark the log for a full commit because
+ * it's not consistent and up to date. Do it before unpinning the
+ * log, to avoid any races with someone else trying to commit it.
+ */
+ if (ret < 0)
+ btrfs_set_log_full_commit(trans);
+ btrfs_end_log_trans(root);
+ }
}