summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-checker.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 11:41:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 11:41:38 -0700
commitcc423f6337d0a5ff1906f3b3d465d28c0d1705f6 (patch)
treefafc40aa7dc3ecd9800239f647d4fe21ee5db6af /fs/btrfs/tree-checker.c
parente940efa936be65866db9ce20798b13fdc6b3891a (diff)
parent8a4a0b2a3eaf75ca8854f856ef29690c12b2f531 (diff)
Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mainly core changes, refactoring and optimizations. Performance is improved in some areas, overall there may be a cumulative improvement due to refactoring that removed lookups in the IO path or simplified IO submission tracking. Core: - submit IO synchronously for fast checksums (crc32c and xxhash), remove high priority worker kthread - read extent buffer in one go, simplify IO tracking, bio submission and locking - remove additional tracking of redirtied extent buffers, originally added for zoned mode but actually not needed - track ordered extent pointer in bio to avoid rbtree lookups during IO - scrub, use recovered data stripes as cache to avoid unnecessary read - in zoned mode, optimize logical to physical mappings of extents - remove PageError handling, not set by VFS nor writeback - cleanups, refactoring, better structure packing - lots of error handling improvements - more assertions, lockdep annotations - print assertion failure with the exact line where it happens - tracepoint updates - more debugging prints Performance: - speedup in fsync(), better tracking of inode logged status can avoid transaction commit - IO path structures track logical offsets in data structures and does not need to look it up User visible changes: - don't commit transaction for every created subvolume, this can reduce time when many subvolumes are created in a batch - print affected files when relocation fails - trigger orphan file cleanup during START_SYNC ioctl Notable fixes: - fix crash when disabling quota and relocation - fix crashes when removing roots from drity list - fix transacion abort during relocation when converting from newer profiles not covered by fallback - in zoned mode, stop reclaiming block groups if filesystem becomes read-only - fix rare race condition in tree mod log rewind that can miss some btree node slots - with enabled fsverity, drop up-to-date page bit in case the verification fails" * tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits) btrfs: fix race between quota disable and relocation btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots btrfs: fix race when deleting free space root from the dirty cow roots list btrfs: fix race when deleting quota root from the dirty cow roots list btrfs: tracepoints: also show actual number of the outstanding extents btrfs: update i_version in update_dev_time btrfs: make btrfs_compressed_bioset static btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers btrfs: scrub: remove scrub_ctx::csum_list member btrfs: do not BUG_ON after failure to migrate space during truncation btrfs: do not BUG_ON on failure to get dir index for new snapshot btrfs: send: do not BUG_ON() on unexpected symlink data extent btrfs: do not BUG_ON() when dropping inode items from log root btrfs: replace BUG_ON() at split_item() with proper error handling btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr() btrfs: do not BUG_ON() on tree mod log failures at insert_ptr() btrfs: do not BUG_ON() on tree mod log failure at insert_new_root() btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert() btrfs: abort transaction at update_ref_for_cow() when ref count is zero ...
Diffstat (limited to 'fs/btrfs/tree-checker.c')
-rw-r--r--fs/btrfs/tree-checker.c152
1 files changed, 112 insertions, 40 deletions
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 2138e9fc0564..038dfa8f1788 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -25,10 +25,10 @@
#include "compression.h"
#include "volumes.h"
#include "misc.h"
-#include "btrfs_inode.h"
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
+#include "inode-item.h"
/*
* Error message should follow the following format:
@@ -1620,9 +1620,10 @@ static int check_inode_ref(struct extent_buffer *leaf,
/*
* Common point to switch the item-specific validation.
*/
-static int check_leaf_item(struct extent_buffer *leaf,
- struct btrfs_key *key, int slot,
- struct btrfs_key *prev_key)
+static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
+ struct btrfs_key *key,
+ int slot,
+ struct btrfs_key *prev_key)
{
int ret = 0;
struct btrfs_chunk *chunk;
@@ -1671,10 +1672,13 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_ref(leaf, key, slot);
break;
}
- return ret;
+
+ if (ret)
+ return BTRFS_TREE_BLOCK_INVALID_ITEM;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
-static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
+enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
@@ -1687,7 +1691,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid level for leaf, have %d expect 0",
btrfs_header_level(leaf));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
/*
@@ -1710,32 +1714,32 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
/* Unknown tree */
if (unlikely(owner == 0)) {
generic_err(leaf, 0,
"invalid owner, root 0 is not defined");
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OWNER;
}
/* EXTENT_TREE_V2 can have empty extent trees. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
if (unlikely(nritems == 0))
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
/*
* Check the following things to make sure this is a good leaf, and
@@ -1751,7 +1755,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
u64 item_data_end;
- int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@@ -1762,7 +1765,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
key.offset);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
item_data_end = (u64)btrfs_item_offset(leaf, slot) +
@@ -1781,7 +1784,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"unexpected item end, have %llu expect %u",
item_data_end, item_end_expected);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/*
@@ -1793,7 +1796,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"slot end outside of leaf, have %llu expect range [0, %u]",
item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/* Also check if the item pointer overlaps with btrfs item. */
@@ -1804,16 +1807,22 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
btrfs_item_nr_offset(leaf, slot) +
sizeof(struct btrfs_item),
btrfs_item_ptr_offset(leaf, slot));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
- if (check_item_data) {
+ /*
+ * We only want to do this if WRITTEN is set, otherwise the leaf
+ * may be in some intermediate state and won't appear valid.
+ */
+ if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
+ enum btrfs_tree_block_status ret;
+
/*
* Check if the item size and content meet other
* criteria
*/
ret = check_leaf_item(leaf, &key, slot, &prev_key);
- if (unlikely(ret < 0))
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
return ret;
}
@@ -1822,21 +1831,21 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.offset = key.offset;
}
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
-int btrfs_check_leaf_full(struct extent_buffer *leaf)
+int btrfs_check_leaf(struct extent_buffer *leaf)
{
- return check_leaf(leaf, true);
-}
-ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
+ enum btrfs_tree_block_status ret;
-int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
-{
- return check_leaf(leaf, false);
+ ret = __btrfs_check_leaf(leaf);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return -EUCLEAN;
+ return 0;
}
+ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO);
-int btrfs_check_node(struct extent_buffer *node)
+enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
{
struct btrfs_fs_info *fs_info = node->fs_info;
unsigned long nr = btrfs_header_nritems(node);
@@ -1844,13 +1853,12 @@ int btrfs_check_node(struct extent_buffer *node)
int slot;
int level = btrfs_header_level(node);
u64 bytenr;
- int ret = 0;
if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
level, BTRFS_MAX_LEVEL - 1);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
btrfs_crit(fs_info,
@@ -1858,7 +1866,7 @@ int btrfs_check_node(struct extent_buffer *node)
btrfs_header_owner(node), node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(fs_info));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
for (slot = 0; slot < nr - 1; slot++) {
@@ -1869,15 +1877,13 @@ int btrfs_check_node(struct extent_buffer *node)
if (unlikely(!bytenr)) {
generic_err(node, slot,
"invalid NULL node pointer");
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
generic_err(node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, fs_info->sectorsize);
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
@@ -1886,12 +1892,20 @@ int btrfs_check_node(struct extent_buffer *node)
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
next_key.offset);
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
}
-out:
- return ret;
+ return BTRFS_TREE_BLOCK_CLEAN;
+}
+
+int btrfs_check_node(struct extent_buffer *node)
+{
+ enum btrfs_tree_block_status ret;
+
+ ret = __btrfs_check_node(node);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return -EUCLEAN;
+ return 0;
}
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
@@ -1949,3 +1963,61 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
}
return 0;
}
+
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid)
+{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ int found_level;
+ struct btrfs_key found_key;
+ int ret;
+
+ found_level = btrfs_header_level(eb);
+ if (found_level != level) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree level check failed\n");
+ btrfs_err(fs_info,
+"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
+ eb->start, level, found_level);
+ return -EIO;
+ }
+
+ if (!first_key)
+ return 0;
+
+ /*
+ * For live tree block (new tree blocks in current transaction),
+ * we need proper lock context to avoid race, which is impossible here.
+ * So we only checks tree blocks which is read from disk, whose
+ * generation <= fs_info->last_trans_committed.
+ */
+ if (btrfs_header_generation(eb) > fs_info->last_trans_committed)
+ return 0;
+
+ /* We have @first_key, so this @eb must have at least one item */
+ if (btrfs_header_nritems(eb) == 0) {
+ btrfs_err(fs_info,
+ "invalid tree nritems, bytenr=%llu nritems=0 expect >0",
+ eb->start);
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ return -EUCLEAN;
+ }
+
+ if (found_level)
+ btrfs_node_key_to_cpu(eb, &found_key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &found_key, 0);
+ ret = btrfs_comp_cpu_keys(first_key, &found_key);
+
+ if (ret) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree first key check failed\n");
+ btrfs_err(fs_info,
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+ eb->start, parent_transid, first_key->objectid,
+ first_key->type, first_key->offset,
+ found_key.objectid, found_key.type,
+ found_key.offset);
+ }
+ return ret;
+}