summaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 09:41:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 09:41:22 -0700
commit87045e6546078dae215d1bd3b2bc82b3ada3ca77 (patch)
treef3d816b9834ca959514f6e399fb9f505871d2729 /fs/btrfs/delayed-inode.c
parent9c849ce86e0fa93a218614eac562ace44053d7ce (diff)
parent0d977e0eba234e01a60bdde27314dc21374201b3 (diff)
Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The highlights of this round are integrations with fs-verity and idmapped mounts, the rest is usual mix of minor improvements, speedups and cleanups. There are some patches outside of btrfs, namely updating some VFS interfaces, all straightforward and acked. Features: - fs-verity support, using standard ioctls, backward compatible with read-only limitation on inodes with previously enabled fs-verity - idmapped mount support - make mount with rescue=ibadroots more tolerant to partially damaged trees - allow raid0 on a single device and raid10 on two devices, degenerate cases but might be useful as an intermediate step during conversion to other profiles - zoned mode block group auto reclaim can be disabled via sysfs knob Performance improvements: - continue readahead of node siblings even if target node is in memory, could speed up full send (on sample test +11%) - batching of delayed items can speed up creating many files - fsync/tree-log speedups - avoid unnecessary work (gains +2% throughput, -2% run time on sample load) - reduced lock contention on renames (on dbench +4% throughput, up to -30% latency) Fixes: - various zoned mode fixes - preemptive flushing threshold tuning, avoid excessive work on almost full filesystems Core: - continued subpage support, preparation for implementing remaining features like compression and defragmentation; with some limitations, write is now enabled on 64K page systems with 4K sectors, still considered experimental - no readahead on compressed reads - inline extents disabled - disabled raid56 profile conversion and mount - improved flushing logic, fixing early ENOSPC on some workloads - inode flags have been internally split to read-only and read-write incompat bit parts, used by fs-verity - new tree items for fs-verity - descriptor item - Merkle tree item - inode operations extended to be namespace-aware - cleanups and refactoring Generic code changes: - fs: new export filemap_fdatawrite_wbc - fs: removed sync_inode - block: bio_trim argument type fixups - vfs: add namespace-aware lookup" * tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits) btrfs: reset replace target device to allocation state on close btrfs: zoned: fix ordered extent boundary calculation btrfs: do not do preemptive flushing if the majority is global rsv btrfs: reduce the preemptive flushing threshold to 90% btrfs: tree-log: check btrfs_lookup_data_extent return value btrfs: avoid unnecessarily logging directories that had no changes btrfs: allow idmapped mount btrfs: handle ACLs on idmapped mounts btrfs: allow idmapped INO_LOOKUP_USER ioctl btrfs: allow idmapped SUBVOL_SETFLAGS ioctl btrfs: allow idmapped SET_RECEIVED_SUBVOL ioctls btrfs: relax restrictions for SNAP_DESTROY_V2 with subvolids btrfs: allow idmapped SNAP_DESTROY ioctls btrfs: allow idmapped SNAP_CREATE/SUBVOL_CREATE ioctls btrfs: check whether fsgid/fsuid are mapped during subvolume creation btrfs: allow idmapped permission inode op btrfs: allow idmapped setattr inode op btrfs: allow idmapped tmpfile inode op btrfs: allow idmapped symlink inode op btrfs: allow idmapped mkdir inode op ...
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c227
1 files changed, 83 insertions, 144 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 257c1e18abd4..1e08eb2b27f0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -6,7 +6,6 @@
#include <linux/slab.h>
#include <linux/iversion.h>
-#include <linux/sched/mm.h>
#include "misc.h"
#include "delayed-inode.h"
#include "disk-io.h"
@@ -672,176 +671,119 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
}
/*
- * This helper will insert some continuous items into the same leaf according
- * to the free space of the leaf.
+ * Insert a single delayed item or a batch of delayed items that have consecutive
+ * keys if they exist.
*/
-static int btrfs_batch_insert_items(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
+static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *first_item)
{
- struct btrfs_delayed_item *curr, *next;
- int free_space;
- int total_size = 0;
- struct extent_buffer *leaf;
- char *data_ptr;
- struct btrfs_key *keys;
- u32 *data_size;
- struct list_head head;
- int slot;
+ LIST_HEAD(batch);
+ struct btrfs_delayed_item *curr;
+ struct btrfs_delayed_item *next;
+ const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
+ int total_size;
int nitems;
- int i;
- int ret = 0;
-
- BUG_ON(!path->nodes[0]);
+ char *ins_data = NULL;
+ struct btrfs_key *ins_keys;
+ u32 *ins_sizes;
+ int ret;
- leaf = path->nodes[0];
- free_space = btrfs_leaf_free_space(leaf);
- INIT_LIST_HEAD(&head);
+ list_add_tail(&first_item->tree_list, &batch);
+ nitems = 1;
+ total_size = first_item->data_len + sizeof(struct btrfs_item);
+ curr = first_item;
- next = item;
- nitems = 0;
+ while (true) {
+ int next_size;
- /*
- * count the number of the continuous items that we can insert in batch
- */
- while (total_size + next->data_len + sizeof(struct btrfs_item) <=
- free_space) {
- total_size += next->data_len + sizeof(struct btrfs_item);
- list_add_tail(&next->tree_list, &head);
- nitems++;
-
- curr = next;
next = __btrfs_next_delayed_item(curr);
- if (!next)
+ if (!next || !btrfs_is_continuous_delayed_item(curr, next))
break;
- if (!btrfs_is_continuous_delayed_item(curr, next))
+ next_size = next->data_len + sizeof(struct btrfs_item);
+ if (total_size + next_size > max_size)
break;
- }
- if (!nitems) {
- ret = 0;
- goto out;
+ list_add_tail(&next->tree_list, &batch);
+ nitems++;
+ total_size += next_size;
+ curr = next;
}
- keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
- if (!keys) {
- ret = -ENOMEM;
- goto out;
- }
+ if (nitems == 1) {
+ ins_keys = &first_item->key;
+ ins_sizes = &first_item->data_len;
+ } else {
+ int i = 0;
- data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
- if (!data_size) {
- ret = -ENOMEM;
- goto error;
+ ins_data = kmalloc(nitems * sizeof(u32) +
+ nitems * sizeof(struct btrfs_key), GFP_NOFS);
+ if (!ins_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ins_sizes = (u32 *)ins_data;
+ ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
+ list_for_each_entry(curr, &batch, tree_list) {
+ ins_keys[i] = curr->key;
+ ins_sizes[i] = curr->data_len;
+ i++;
+ }
}
- /* get keys of all the delayed items */
- i = 0;
- list_for_each_entry(next, &head, tree_list) {
- keys[i] = next->key;
- data_size[i] = next->data_len;
- i++;
- }
+ ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
+ nitems);
+ if (ret)
+ goto out;
- /* insert the keys of the items */
- setup_items_for_insert(root, path, keys, data_size, nitems);
+ list_for_each_entry(curr, &batch, tree_list) {
+ char *data_ptr;
- /* insert the dir index items */
- slot = path->slots[0];
- list_for_each_entry_safe(curr, next, &head, tree_list) {
- data_ptr = btrfs_item_ptr(leaf, slot, char);
- write_extent_buffer(leaf, &curr->data,
- (unsigned long)data_ptr,
- curr->data_len);
- slot++;
+ data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
+ write_extent_buffer(path->nodes[0], &curr->data,
+ (unsigned long)data_ptr, curr->data_len);
+ path->slots[0]++;
+ }
- btrfs_delayed_item_release_metadata(root, curr);
+ /*
+ * Now release our path before releasing the delayed items and their
+ * metadata reservations, so that we don't block other tasks for more
+ * time than needed.
+ */
+ btrfs_release_path(path);
+ list_for_each_entry_safe(curr, next, &batch, tree_list) {
list_del(&curr->tree_list);
+ btrfs_delayed_item_release_metadata(root, curr);
btrfs_release_delayed_item(curr);
}
-
-error:
- kfree(data_size);
- kfree(keys);
out:
+ kfree(ins_data);
return ret;
}
-/*
- * This helper can just do simple insertion that needn't extend item for new
- * data, such as directory name index insertion, inode insertion.
- */
-static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *delayed_item)
-{
- struct extent_buffer *leaf;
- unsigned int nofs_flag;
- char *ptr;
- int ret;
-
- nofs_flag = memalloc_nofs_save();
- ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
- delayed_item->data_len);
- memalloc_nofs_restore(nofs_flag);
- if (ret < 0 && ret != -EEXIST)
- return ret;
-
- leaf = path->nodes[0];
-
- ptr = btrfs_item_ptr(leaf, path->slots[0], char);
-
- write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
- delayed_item->data_len);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_delayed_item_release_metadata(root, delayed_item);
- return 0;
-}
-
-/*
- * we insert an item first, then if there are some continuous items, we try
- * to insert those items into the same leaf.
- */
static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_root *root,
struct btrfs_delayed_node *node)
{
- struct btrfs_delayed_item *curr, *prev;
int ret = 0;
-do_again:
- mutex_lock(&node->mutex);
- curr = __btrfs_first_delayed_insertion_item(node);
- if (!curr)
- goto insert_end;
-
- ret = btrfs_insert_delayed_item(trans, root, path, curr);
- if (ret < 0) {
- btrfs_release_path(path);
- goto insert_end;
- }
+ while (ret == 0) {
+ struct btrfs_delayed_item *curr;
- prev = curr;
- curr = __btrfs_next_delayed_item(prev);
- if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
- /* insert the continuous items into the same leaf */
- path->slots[0]++;
- btrfs_batch_insert_items(root, path, curr);
+ mutex_lock(&node->mutex);
+ curr = __btrfs_first_delayed_insertion_item(node);
+ if (!curr) {
+ mutex_unlock(&node->mutex);
+ break;
+ }
+ ret = btrfs_insert_delayed_item(trans, root, path, curr);
+ mutex_unlock(&node->mutex);
}
- btrfs_release_delayed_item(prev);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- goto do_again;
-
-insert_end:
- mutex_unlock(&node->mutex);
return ret;
}
@@ -914,7 +856,6 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
- unsigned int nofs_flag;
int ret = 0;
do_again:
@@ -923,9 +864,7 @@ do_again:
if (!curr)
goto delete_fail;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
@@ -994,7 +933,6 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
- unsigned int nofs_flag;
int mod;
int ret;
@@ -1007,9 +945,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
else
mod = 1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
- memalloc_nofs_restore(nofs_flag);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
@@ -1066,9 +1002,7 @@ search:
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = -1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto err_out;
ASSERT(ret);
@@ -1711,6 +1645,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item,
struct inode *inode)
{
+ u64 flags;
+
btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
@@ -1723,7 +1659,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
- btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_stack_inode_flags(inode_item, flags);
btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(&inode_item->atime,
@@ -1781,7 +1719,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
- BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+ btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
+ &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);