summaryrefslogtreecommitdiff
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c1774
1 files changed, 911 insertions, 863 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4754c9101a4c..a48b4befbee7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -30,28 +30,13 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *ins_key, struct btrfs_path *path,
- int data_size, int extend);
+ int data_size, bool extend);
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
- struct extent_buffer *src, int empty);
+ struct extent_buffer *src, bool empty);
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
- int level, int slot);
-
-static const struct btrfs_csums {
- u16 size;
- const char name[10];
- const char driver[12];
-} btrfs_csums[] = {
- [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
- [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
- [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
- [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
- .driver = "blake2b-256" },
-};
-
/*
* The leaf data grows from end-to-front in the node. this returns the address
* of the start of the last item, which is the stop of the leaf data stack.
@@ -150,38 +135,6 @@ static inline void copy_leaf_items(const struct extent_buffer *dst,
nr_items * sizeof(struct btrfs_item));
}
-int btrfs_super_csum_size(const struct btrfs_super_block *s)
-{
- u16 t = btrfs_super_csum_type(s);
- /*
- * csum type is validated at mount time
- */
- return btrfs_csums[t].size;
-}
-
-const char *btrfs_super_csum_name(u16 csum_type)
-{
- /* csum type is validated at mount time */
- return btrfs_csums[csum_type].name;
-}
-
-/*
- * Return driver name if defined, otherwise the name that's also a valid driver
- * name
- */
-const char *btrfs_super_csum_driver(u16 csum_type)
-{
- /* csum type is validated at mount time */
- return btrfs_csums[csum_type].driver[0] ?
- btrfs_csums[csum_type].driver :
- btrfs_csums[csum_type].name;
-}
-
-size_t __attribute_const__ btrfs_get_num_csums(void)
-{
- return ARRAY_SIZE(btrfs_csums);
-}
-
struct btrfs_path *btrfs_alloc_path(void)
{
might_sleep();
@@ -222,22 +175,6 @@ noinline void btrfs_release_path(struct btrfs_path *p)
}
/*
- * We want the transaction abort to print stack trace only for errors where the
- * cause could be a bug, eg. due to ENOSPC, and not for common errors that are
- * caused by external factors.
- */
-bool __cold abort_should_print_stack(int errno)
-{
- switch (errno) {
- case -EIO:
- case -EROFS:
- case -ENOMEM:
- return false;
- }
- return true;
-}
-
-/*
* safely gets a reference on the root node of a tree. A lock
* is not taken, so a concurrent writer may put a different node
* at the root of the tree. See btrfs_lock_root_node for the
@@ -261,7 +198,7 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
* the inc_not_zero dance and if it doesn't work then
* synchronize_rcu and try again.
*/
- if (atomic_inc_not_zero(&eb->refs)) {
+ if (refcount_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
break;
}
@@ -287,7 +224,7 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
spin_lock(&fs_info->trans_lock);
if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
/* Want the extent tree to be the last on the list */
- if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID)
list_move_tail(&root->dirty_list,
&fs_info->dirty_cowonly_roots);
else
@@ -312,11 +249,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
+ u64 reloc_src_root = 0;
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
- trans->transid != root->last_trans);
+ trans->transid != btrfs_get_root_last_trans(root));
level = btrfs_header_level(buf);
if (level == 0)
@@ -324,9 +262,11 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(buf, &disk_key, 0);
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ reloc_src_root = btrfs_header_owner(buf);
cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
&disk_key, level, buf->start, 0,
- BTRFS_NESTING_NEW_ROOT);
+ reloc_src_root, BTRFS_NESTING_NEW_ROOT);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -343,19 +283,30 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
- if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
- else
+ if (unlikely(ret))
+ btrfs_abort_transaction(trans, ret);
+ } else {
ret = btrfs_inc_ref(trans, root, cow, 0);
+ if (unlikely(ret))
+ btrfs_abort_transaction(trans, ret);
+ }
if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
- btrfs_abort_transaction(trans, ret);
return ret;
}
- btrfs_mark_buffer_dirty(cow);
+ btrfs_mark_buffer_dirty(trans, cow);
*cow_ret = cow;
return 0;
}
@@ -363,22 +314,41 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
/*
* check if the tree block can be shared by multiple trees
*/
-int btrfs_block_can_be_shared(struct btrfs_root *root,
- struct extent_buffer *buf)
+bool btrfs_block_can_be_shared(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf)
{
+ const u64 buf_gen = btrfs_header_generation(buf);
+
/*
* Tree blocks not in shareable trees and tree roots are never shared.
* If a block was allocated after the last snapshot and the block was
* not allocated by tree relocation, we know the block is not shared.
*/
- if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
- buf != root->node && buf != root->commit_root &&
- (btrfs_header_generation(buf) <=
- btrfs_root_last_snapshot(&root->root_item) ||
- btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
- return 1;
- return 0;
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+ return false;
+
+ if (buf == root->node)
+ return false;
+
+ if (buf_gen > btrfs_root_last_snapshot(&root->root_item) &&
+ !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
+ return false;
+
+ if (buf != root->commit_root)
+ return true;
+
+ /*
+ * An extent buffer that used to be the commit root may still be shared
+ * because the tree height may have increased and it became a child of a
+ * higher level root. This can happen when snapshotting a subvolume
+ * created in the current transaction.
+ */
+ if (buf_gen == trans->transid)
+ return true;
+
+ return false;
}
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
@@ -391,7 +361,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
u64 refs;
u64 owner;
u64 flags;
- u64 new_flags = 0;
int ret;
/*
@@ -411,20 +380,24 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
* are only allowed for blocks use full backrefs.
*/
- if (btrfs_block_can_be_shared(root, buf)) {
+ if (btrfs_block_can_be_shared(trans, root, buf)) {
ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
btrfs_header_level(buf), 1,
- &refs, &flags);
+ &refs, &flags, NULL);
if (ret)
return ret;
- if (refs == 0) {
- ret = -EROFS;
- btrfs_handle_fs_error(fs_info, ret, NULL);
+ if (unlikely(refs == 0)) {
+ btrfs_crit(fs_info,
+ "found 0 references for tree block at bytenr %llu level %d root %llu",
+ buf->start, btrfs_header_level(buf),
+ btrfs_root_id(root));
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
return ret;
}
} else {
refs = 1;
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
else
@@ -432,19 +405,26 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
}
owner = btrfs_header_owner(buf);
- BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
- !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+ if (unlikely(owner == BTRFS_TREE_RELOC_OBJECTID &&
+ !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))) {
+ btrfs_crit(fs_info,
+"found tree block at bytenr %llu level %d root %llu refs %llu flags %llx without full backref flag set",
+ buf->start, btrfs_header_level(buf),
+ btrfs_root_id(root), refs, flags);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
if (refs > 1) {
- if ((owner == root->root_key.objectid ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+ if ((owner == btrfs_root_id(root) ||
+ btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1);
if (ret)
return ret;
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID) {
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0);
if (ret)
return ret;
@@ -452,29 +432,22 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret)
return ret;
}
- new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ ret = btrfs_set_disk_extent_flags(trans, buf,
+ BTRFS_BLOCK_FLAG_FULL_BACKREF);
+ if (ret)
+ return ret;
} else {
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID)
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
if (ret)
return ret;
}
- if (new_flags != 0) {
- int level = btrfs_header_level(buf);
-
- ret = btrfs_set_disk_extent_flags(trans, buf,
- new_flags, level);
- if (ret)
- return ret;
- }
} else {
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID)
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
@@ -484,7 +457,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret)
return ret;
}
- btrfs_clean_tree_block(buf);
+ btrfs_clear_buffer_dirty(trans, buf);
*last_ref = 1;
}
return 0;
@@ -502,13 +475,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
* bytes the allocator should try to find free next to the block it returns.
* This is just a hint and may be ignored by the allocator.
*/
-static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- struct extent_buffer *parent, int parent_slot,
- struct extent_buffer **cow_ret,
- u64 search_start, u64 empty_size,
- enum btrfs_lock_nesting nest)
+int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret,
+ u64 search_start, u64 empty_size,
+ enum btrfs_lock_nesting nest)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_disk_key disk_key;
@@ -517,6 +490,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start = 0;
+ u64 reloc_src_root = 0;
if (*cow_ret == buf)
unlock_orig = 1;
@@ -526,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
- trans->transid != root->last_trans);
+ trans->transid != btrfs_get_root_last_trans(root));
level = btrfs_header_level(buf);
@@ -535,12 +509,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_node_key(buf, &disk_key, 0);
- if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
- parent_start = parent->start;
-
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
+ if (parent)
+ parent_start = parent->start;
+ reloc_src_root = btrfs_header_owner(buf);
+ }
cow = btrfs_alloc_tree_block(trans, root, parent_start,
- root->root_key.objectid, &disk_key, level,
- search_start, empty_size, nest);
+ btrfs_root_id(root), &disk_key, level,
+ search_start, empty_size, reloc_src_root, nest);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -552,84 +528,97 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
BTRFS_HEADER_FLAG_RELOC);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
else
- btrfs_set_header_owner(cow, root->root_key.objectid);
+ btrfs_set_header_owner(cow, btrfs_root_id(root));
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
- if (ret) {
- btrfs_tree_unlock(cow);
- free_extent_buffer(cow);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- return ret;
+ goto error_unlock_cow;
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
- if (ret) {
- btrfs_tree_unlock(cow);
- free_extent_buffer(cow);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- return ret;
+ goto error_unlock_cow;
}
}
if (buf == root->node) {
WARN_ON(parent && parent != buf);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
- atomic_inc(&cow->refs);
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto error_unlock_cow;
+ }
+ refcount_inc(&cow->refs);
rcu_assign_pointer(root->node, cow);
- btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
- parent_start, last_ref);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto error_unlock_cow;
+ }
} else {
WARN_ON(trans->transid != btrfs_header_generation(parent));
- btrfs_tree_mod_log_insert_key(parent, parent_slot,
- BTRFS_MOD_LOG_KEY_REPLACE);
+ ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
+ BTRFS_MOD_LOG_KEY_REPLACE);
+ if (unlikely(ret)) {
+ btrfs_abort_transaction(trans, ret);
+ goto error_unlock_cow;
+ }
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
if (last_ref) {
ret = btrfs_tree_mod_log_free_eb(buf);
- if (ret) {
- btrfs_tree_unlock(cow);
- free_extent_buffer(cow);
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
- return ret;
+ goto error_unlock_cow;
}
}
- btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
- parent_start, last_ref);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto error_unlock_cow;
+ }
}
+
+ trace_btrfs_cow_block(root, buf, cow);
if (unlock_orig)
btrfs_tree_unlock(buf);
free_extent_buffer_stale(buf);
- btrfs_mark_buffer_dirty(cow);
+ btrfs_mark_buffer_dirty(trans, cow);
*cow_ret = cow;
return 0;
+
+error_unlock_cow:
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ return ret;
}
-static inline int should_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf)
+static inline bool should_cow_block(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf)
{
if (btrfs_is_testing(root->fs_info))
- return 0;
-
- /* Ensure we can see the FORCE_COW bit */
- smp_mb__before_atomic();
+ return false;
/*
* We do not need to cow a block if
@@ -642,21 +631,33 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
* after we've finished copying src root, we must COW the shared
* block to ensure the metadata consistency.
*/
- if (btrfs_header_generation(buf) == trans->transid &&
- !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
- !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
- btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
- return 0;
- return 1;
+
+ if (btrfs_header_generation(buf) != trans->transid)
+ return true;
+
+ if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN))
+ return true;
+
+ /* Ensure we can see the FORCE_COW bit. */
+ smp_mb__before_atomic();
+ if (test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
+ return true;
+
+ if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
+ return false;
+
+ if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
+ return true;
+
+ return false;
}
/*
- * cows a single block, see __btrfs_cow_block for the real work.
+ * COWs a single block, see btrfs_force_cow_block() for the real work.
* This version of it has extra checks so that a block isn't COWed more than
* once per transaction, as long as it hasn't been written yet
*/
-noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
+int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
struct extent_buffer **cow_ret,
@@ -664,27 +665,38 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 search_start;
- int ret;
-
- if (test_bit(BTRFS_ROOT_DELETING, &root->state))
- btrfs_err(fs_info,
- "COW'ing blocks on a fs root that's being dropped");
- if (trans->transaction != fs_info->running_transaction)
- WARN(1, KERN_CRIT "trans %llu running %llu\n",
- trans->transid,
- fs_info->running_transaction->transid);
+ if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_crit(fs_info,
+ "attempt to COW block %llu on root %llu that is being deleted",
+ buf->start, btrfs_root_id(root));
+ return -EUCLEAN;
+ }
- if (trans->transid != fs_info->generation)
- WARN(1, KERN_CRIT "trans %llu running %llu\n",
- trans->transid, fs_info->generation);
+ /*
+ * COWing must happen through a running transaction, which always
+ * matches the current fs generation (it's a transaction with a state
+ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
+ * into error state to prevent the commit of any transaction.
+ */
+ if (unlikely(trans->transaction != fs_info->running_transaction ||
+ trans->transid != fs_info->generation)) {
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_crit(fs_info,
+"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu",
+ buf->start, btrfs_root_id(root), trans->transid,
+ fs_info->running_transaction->transid,
+ fs_info->generation);
+ return -EUCLEAN;
+ }
if (!should_cow_block(trans, root, buf)) {
*cow_ret = buf;
return 0;
}
- search_start = buf->start & ~((u64)SZ_1G - 1);
+ search_start = round_down(buf->start, SZ_1G);
/*
* Before CoWing this block for later modification, check if it's
@@ -693,59 +705,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
* Also We don't care about the error, as it's handled internally.
*/
btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
- ret = __btrfs_cow_block(trans, root, buf, parent,
- parent_slot, cow_ret, search_start, 0, nest);
-
- trace_btrfs_cow_block(root, buf, *cow_ret);
-
- return ret;
+ return btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
+ cow_ret, search_start, 0, nest);
}
ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
/*
- * helper function for defrag to decide if two blocks pointed to by a
- * node are actually close by
- */
-static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
-{
- if (blocknr < other && other - (blocknr + blocksize) < 32768)
- return 1;
- if (blocknr > other && blocknr - (other + blocksize) < 32768)
- return 1;
- return 0;
-}
-
-#ifdef __LITTLE_ENDIAN
-
-/*
- * Compare two keys, on little-endian the disk order is same as CPU order and
- * we can avoid the conversion.
- */
-static int comp_keys(const struct btrfs_disk_key *disk_key,
- const struct btrfs_key *k2)
-{
- const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
-
- return btrfs_comp_cpu_keys(k1, k2);
-}
-
-#else
-
-/*
- * compare two keys in a memcmp fashion
- */
-static int comp_keys(const struct btrfs_disk_key *disk,
- const struct btrfs_key *k2)
-{
- struct btrfs_key k1;
-
- btrfs_disk_key_to_cpu(&k1, disk);
-
- return btrfs_comp_cpu_keys(&k1, k2);
-}
-#endif
-
-/*
* same as comp_keys only with two btrfs_key's
*/
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
@@ -766,95 +731,11 @@ int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_ke
}
/*
- * this is used by the defrag code to go through all the
- * leaves pointed to by a node and reallocate them so that
- * disk order is close to key order
- */
-int btrfs_realloc_node(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *parent,
- int start_slot, u64 *last_ret,
- struct btrfs_key *progress)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct extent_buffer *cur;
- u64 blocknr;
- u64 search_start = *last_ret;
- u64 last_block = 0;
- u64 other;
- u32 parent_nritems;
- int end_slot;
- int i;
- int err = 0;
- u32 blocksize;
- int progress_passed = 0;
- struct btrfs_disk_key disk_key;
-
- WARN_ON(trans->transaction != fs_info->running_transaction);
- WARN_ON(trans->transid != fs_info->generation);
-
- parent_nritems = btrfs_header_nritems(parent);
- blocksize = fs_info->nodesize;
- end_slot = parent_nritems - 1;
-
- if (parent_nritems <= 1)
- return 0;
-
- for (i = start_slot; i <= end_slot; i++) {
- int close = 1;
-
- btrfs_node_key(parent, &disk_key, i);
- if (!progress_passed && comp_keys(&disk_key, progress) < 0)
- continue;
-
- progress_passed = 1;
- blocknr = btrfs_node_blockptr(parent, i);
- if (last_block == 0)
- last_block = blocknr;
-
- if (i > 0) {
- other = btrfs_node_blockptr(parent, i - 1);
- close = close_blocks(blocknr, other, blocksize);
- }
- if (!close && i < end_slot) {
- other = btrfs_node_blockptr(parent, i + 1);
- close = close_blocks(blocknr, other, blocksize);
- }
- if (close) {
- last_block = blocknr;
- continue;
- }
-
- cur = btrfs_read_node_slot(parent, i);
- if (IS_ERR(cur))
- return PTR_ERR(cur);
- if (search_start == 0)
- search_start = last_block;
-
- btrfs_tree_lock(cur);
- err = __btrfs_cow_block(trans, root, cur, parent, i,
- &cur, search_start,
- min(16 * blocksize,
- (end_slot - i) * blocksize),
- BTRFS_NESTING_COW);
- if (err) {
- btrfs_tree_unlock(cur);
- free_extent_buffer(cur);
- break;
- }
- search_start = cur->start;
- last_block = cur->start;
- *last_ret = search_start;
- btrfs_tree_unlock(cur);
- free_extent_buffer(cur);
- }
- return err;
-}
-
-/*
* Search for a key in the given extent_buffer.
*
- * The lower boundary for the search is specified by the slot number @low. Use a
- * value of 0 to search over the whole extent buffer.
+ * The lower boundary for the search is specified by the slot number @first_slot.
+ * Use a value of 0 to search over the whole extent buffer. Works for both
+ * leaves and nodes.
*
* The slot in the extent buffer is returned via @slot. If the key exists in the
* extent buffer, then @slot will point to the slot where the key is, otherwise
@@ -863,18 +744,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
* Slot may point to the total number of items (i.e. one position beyond the last
* key) if the key is bigger than the last key in the extent buffer.
*/
-static noinline int generic_bin_search(struct extent_buffer *eb, int low,
- const struct btrfs_key *key, int *slot)
+int btrfs_bin_search(const struct extent_buffer *eb, int first_slot,
+ const struct btrfs_key *key, int *slot)
{
unsigned long p;
int item_size;
- int high = btrfs_header_nritems(eb);
+ /*
+ * Use unsigned types for the low and high slots, so that we get a more
+ * efficient division in the search loop below.
+ */
+ u32 low = first_slot;
+ u32 high = btrfs_header_nritems(eb);
int ret;
const int key_size = sizeof(struct btrfs_disk_key);
- if (low > high) {
+ if (unlikely(low > high)) {
btrfs_err(eb->fs_info,
- "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
+ "%s: low (%u) > high (%u) eb %llu owner %llu level %d",
__func__, low, high, eb->start,
btrfs_header_owner(eb), btrfs_header_level(eb));
return -EINVAL;
@@ -889,7 +775,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb, int low,
}
while (low < high) {
- unsigned long oip;
+ const int unit_size = eb->folio_size;
+ unsigned long oil;
unsigned long offset;
struct btrfs_disk_key *tmp;
struct btrfs_disk_key unaligned;
@@ -897,20 +784,20 @@ static noinline int generic_bin_search(struct extent_buffer *eb, int low,
mid = (low + high) / 2;
offset = p + mid * item_size;
- oip = offset_in_page(offset);
+ oil = get_eb_offset_in_folio(eb, offset);
- if (oip + key_size <= PAGE_SIZE) {
- const unsigned long idx = get_eb_page_index(offset);
- char *kaddr = page_address(eb->pages[idx]);
+ if (oil + key_size <= unit_size) {
+ const unsigned long idx = get_eb_folio_index(eb, offset);
+ char *kaddr = folio_address(eb->folios[idx]);
- oip = get_eb_offset_in_page(eb, offset);
- tmp = (struct btrfs_disk_key *)(kaddr + oip);
+ oil = get_eb_offset_in_folio(eb, offset);
+ tmp = (struct btrfs_disk_key *)(kaddr + oil);
} else {
read_extent_buffer(eb, &unaligned, offset, key_size);
tmp = &unaligned;
}
- ret = comp_keys(tmp, key);
+ ret = btrfs_comp_keys(tmp, key);
if (ret < 0)
low = mid + 1;
@@ -925,29 +812,19 @@ static noinline int generic_bin_search(struct extent_buffer *eb, int low,
return 1;
}
-/*
- * Simple binary search on an extent buffer. Works for both leaves and nodes, and
- * always searches over the whole range of keys (slot 0 to slot 'nritems - 1').
- */
-int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
- int *slot)
-{
- return generic_bin_search(eb, 0, key, slot);
-}
-
-static void root_add_used(struct btrfs_root *root, u32 size)
+static void root_add_used_bytes(struct btrfs_root *root)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
- btrfs_root_used(&root->root_item) + size);
+ btrfs_root_used(&root->root_item) + root->fs_info->nodesize);
spin_unlock(&root->accounting_lock);
}
-static void root_sub_used(struct btrfs_root *root, u32 size)
+static void root_sub_used_bytes(struct btrfs_root *root)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
- btrfs_root_used(&root->root_item) - size);
+ btrfs_root_used(&root->root_item) - root->fs_info->nodesize);
spin_unlock(&root->accounting_lock);
}
@@ -964,7 +841,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
if (slot < 0 || slot >= btrfs_header_nritems(parent))
return ERR_PTR(-ENOENT);
- BUG_ON(level == 0);
+ ASSERT(level);
check.level = level - 1;
check.transid = btrfs_node_ptr_generation(parent, slot);
@@ -976,7 +853,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
&check);
if (IS_ERR(eb))
return eb;
- if (!extent_buffer_uptodate(eb)) {
+ if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
return ERR_PTR(-EIO);
}
@@ -985,6 +862,75 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
}
/*
+ * Promote a child node to become the new tree root.
+ *
+ * @trans: Transaction handle
+ * @root: Tree root structure to update
+ * @path: Path holding nodes and locks
+ * @level: Level of the parent (old root)
+ * @parent: The parent (old root) with exactly one item
+ *
+ * This helper is called during rebalancing when the root node contains only
+ * a single item (nritems == 1). We can reduce the tree height by promoting
+ * that child to become the new root and freeing the old root node. The path
+ * locks and references are updated accordingly.
+ *
+ * Return: 0 on success, negative errno on failure. The transaction is aborted
+ * on critical errors.
+ */
+static int promote_child_to_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ int level, struct extent_buffer *parent)
+{
+ struct extent_buffer *child;
+ int ret;
+
+ ASSERT(btrfs_header_nritems(parent) == 1);
+
+ child = btrfs_read_node_slot(parent, 0);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+
+ btrfs_tree_lock(child);
+ ret = btrfs_cow_block(trans, root, child, parent, 0, &child, BTRFS_NESTING_COW);
+ if (ret) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ return ret;
+ }
+
+ ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
+ if (unlikely(ret < 0)) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ rcu_assign_pointer(root->node, child);
+
+ add_root_to_dirty_list(root);
+ btrfs_tree_unlock(child);
+
+ path->locks[level] = 0;
+ path->nodes[level] = NULL;
+ btrfs_clear_buffer_dirty(trans, parent);
+ btrfs_tree_unlock(parent);
+ /* Once for the path. */
+ free_extent_buffer(parent);
+
+ root_sub_used_bytes(root);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), parent, 0, 1);
+ /* Once for the root ptr. */
+ free_extent_buffer_stale(parent);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
* node level balancing, used to make sure nodes are in proper order for
* item deletion. We balance from the top down, so we have to make sure
* that a deletion won't leave an node completely empty later on.
@@ -1023,79 +969,48 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* by promoting the node below to a root
*/
if (!parent) {
- struct extent_buffer *child;
-
if (btrfs_header_nritems(mid) != 1)
return 0;
- /* promote the child to a root */
- child = btrfs_read_node_slot(mid, 0);
- if (IS_ERR(child)) {
- ret = PTR_ERR(child);
- btrfs_handle_fs_error(fs_info, ret, NULL);
- goto enospc;
- }
-
- btrfs_tree_lock(child);
- ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
- BTRFS_NESTING_COW);
- if (ret) {
- btrfs_tree_unlock(child);
- free_extent_buffer(child);
- goto enospc;
- }
-
- ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
- BUG_ON(ret < 0);
- rcu_assign_pointer(root->node, child);
-
- add_root_to_dirty_list(root);
- btrfs_tree_unlock(child);
-
- path->locks[level] = 0;
- path->nodes[level] = NULL;
- btrfs_clean_tree_block(mid);
- btrfs_tree_unlock(mid);
- /* once for the path */
- free_extent_buffer(mid);
-
- root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
- /* once for the root ptr */
- free_extent_buffer_stale(mid);
- return 0;
+ return promote_child_to_root(trans, root, path, level, mid);
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
return 0;
- left = btrfs_read_node_slot(parent, pslot - 1);
- if (IS_ERR(left))
- left = NULL;
+ if (pslot) {
+ left = btrfs_read_node_slot(parent, pslot - 1);
+ if (IS_ERR(left)) {
+ ret = PTR_ERR(left);
+ left = NULL;
+ goto out;
+ }
- if (left) {
- __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
+ btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
wret = btrfs_cow_block(trans, root, left,
parent, pslot - 1, &left,
BTRFS_NESTING_LEFT_COW);
if (wret) {
ret = wret;
- goto enospc;
+ goto out;
}
}
- right = btrfs_read_node_slot(parent, pslot + 1);
- if (IS_ERR(right))
- right = NULL;
+ if (pslot + 1 < btrfs_header_nritems(parent)) {
+ right = btrfs_read_node_slot(parent, pslot + 1);
+ if (IS_ERR(right)) {
+ ret = PTR_ERR(right);
+ right = NULL;
+ goto out;
+ }
- if (right) {
- __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
+ btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
wret = btrfs_cow_block(trans, root, right,
parent, pslot + 1, &right,
BTRFS_NESTING_RIGHT_COW);
if (wret) {
ret = wret;
- goto enospc;
+ goto out;
}
}
@@ -1115,22 +1030,34 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- btrfs_clean_tree_block(right);
+ btrfs_clear_buffer_dirty(trans, right);
btrfs_tree_unlock(right);
- del_ptr(root, path, level + 1, pslot + 1);
- root_sub_used(root, right->len);
- btrfs_free_tree_block(trans, btrfs_root_id(root), right,
- 0, 1);
+ ret = btrfs_del_ptr(trans, root, path, level + 1, pslot + 1);
+ if (ret < 0) {
+ free_extent_buffer_stale(right);
+ right = NULL;
+ goto out;
+ }
+ root_sub_used_bytes(root);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root),
+ right, 0, 1);
free_extent_buffer_stale(right);
right = NULL;
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
btrfs_set_node_key(parent, &right_key, pslot + 1);
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
}
}
if (btrfs_header_nritems(mid) == 1) {
@@ -1143,15 +1070,19 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* otherwise we would have pulled some pointers from the
* right
*/
- if (!left) {
- ret = -EROFS;
- btrfs_handle_fs_error(fs_info, ret, NULL);
- goto enospc;
+ if (unlikely(!left)) {
+ btrfs_crit(fs_info,
+"missing left child when middle child only has 1 item, parent bytenr %llu level %d mid bytenr %llu root %llu",
+ parent->start, btrfs_header_level(parent),
+ mid->start, btrfs_root_id(root));
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
}
wret = balance_node_right(trans, mid, left);
if (wret < 0) {
ret = wret;
- goto enospc;
+ goto out;
}
if (wret == 1) {
wret = push_node_left(trans, left, mid, 1);
@@ -1161,32 +1092,45 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- btrfs_clean_tree_block(mid);
+ btrfs_clear_buffer_dirty(trans, mid);
btrfs_tree_unlock(mid);
- del_ptr(root, path, level + 1, pslot);
- root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
+ ret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
+ if (ret < 0) {
+ free_extent_buffer_stale(mid);
+ mid = NULL;
+ goto out;
+ }
+ root_sub_used_bytes(root);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
mid = NULL;
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
btrfs_node_key(mid, &mid_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
btrfs_set_node_key(parent, &mid_key, pslot);
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
}
/* update the path */
if (left) {
if (btrfs_header_nritems(left) > orig_slot) {
- atomic_inc(&left->refs);
/* left was locked after cow */
path->nodes[level] = left;
path->slots[level + 1] -= 1;
path->slots[level] = orig_slot;
+ /* Left is now owned by path. */
+ left = NULL;
if (mid) {
btrfs_tree_unlock(mid);
free_extent_buffer(mid);
@@ -1200,14 +1144,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (orig_ptr !=
btrfs_node_blockptr(path->nodes[level], path->slots[level]))
BUG();
-enospc:
+out:
if (right) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
}
if (left) {
- if (path->nodes[level] != left)
- btrfs_tree_unlock(left);
+ btrfs_tree_unlock(left);
free_extent_buffer(left);
}
return ret;
@@ -1245,15 +1188,15 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (!parent)
return 1;
- left = btrfs_read_node_slot(parent, pslot - 1);
- if (IS_ERR(left))
- left = NULL;
-
/* first, try to make some room in the middle buffer */
- if (left) {
+ if (pslot) {
u32 left_nr;
- __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
+ left = btrfs_read_node_slot(parent, pslot - 1);
+ if (IS_ERR(left))
+ return PTR_ERR(left);
+
+ btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
left_nr = btrfs_header_nritems(left);
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -1276,9 +1219,14 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
btrfs_set_node_key(parent, &disk_key, pslot);
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
if (btrfs_header_nritems(left) > orig_slot) {
path->nodes[level] = left;
path->slots[level + 1] -= 1;
@@ -1297,17 +1245,18 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(left);
free_extent_buffer(left);
}
- right = btrfs_read_node_slot(parent, pslot + 1);
- if (IS_ERR(right))
- right = NULL;
/*
* then try to empty the right most buffer into the middle
*/
- if (right) {
+ if (pslot + 1 < btrfs_header_nritems(parent)) {
u32 right_nr;
- __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
+ right = btrfs_read_node_slot(parent, pslot + 1);
+ if (IS_ERR(right))
+ return PTR_ERR(right);
+
+ btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
right_nr = btrfs_header_nritems(right);
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -1330,9 +1279,14 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
btrfs_set_node_key(parent, &disk_key, pslot + 1);
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
if (btrfs_header_nritems(mid) <= orig_slot) {
path->nodes[level] = right;
@@ -1358,7 +1312,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* to the block in 'slot', and triggering ra on them.
*/
static void reada_for_search(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+ const struct btrfs_path *path,
int level, int slot, u64 objectid)
{
struct extent_buffer *node;
@@ -1440,7 +1394,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
}
}
-static noinline void reada_for_balance(struct btrfs_path *path, int level)
+static noinline void reada_for_balance(const struct btrfs_path *path, int level)
{
struct extent_buffer *parent;
int slot;
@@ -1505,8 +1459,8 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
if (i >= lowest_unlock && i > skip_level) {
- check_skip = false;
btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
+ check_skip = false;
path->locks[i] = 0;
if (write_lock_level &&
i > min_write_lock_level &&
@@ -1528,27 +1482,27 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
*/
static int
read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer **eb_ret, int level, int slot,
+ struct extent_buffer **eb_ret, int slot,
const struct btrfs_key *key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_tree_parent_check check = { 0 };
u64 blocknr;
- u64 gen;
- struct extent_buffer *tmp;
- int ret;
+ struct extent_buffer *tmp = NULL;
+ int ret = 0;
+ int ret2;
int parent_level;
- bool unlock_up;
+ bool read_tmp = false;
+ bool tmp_locked = false;
+ bool path_released = false;
- unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
blocknr = btrfs_node_blockptr(*eb_ret, slot);
- gen = btrfs_node_ptr_generation(*eb_ret, slot);
parent_level = btrfs_header_level(*eb_ret);
btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot);
check.has_first_key = true;
check.level = parent_level - 1;
- check.transid = gen;
- check.owner_root = root->root_key.objectid;
+ check.transid = btrfs_node_ptr_generation(*eb_ret, slot);
+ check.owner_root = btrfs_root_id(root);
/*
* If we need to read an extent buffer from disk and we are holding locks
@@ -1560,84 +1514,117 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
tmp = find_extent_buffer(fs_info, blocknr);
if (tmp) {
if (p->reada == READA_FORWARD_ALWAYS)
- reada_for_search(fs_info, p, level, slot, key->objectid);
+ reada_for_search(fs_info, p, parent_level, slot, key->objectid);
/* first we do an atomic uptodate check */
- if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
+ if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) {
/*
* Do extra check for first_key, eb can be stale due to
* being cached, read from scrub, or have multiple
* parents (shared tree blocks).
*/
- if (btrfs_verify_level_key(tmp,
- parent_level - 1, &check.first_key, gen)) {
- free_extent_buffer(tmp);
- return -EUCLEAN;
+ if (unlikely(btrfs_verify_level_key(tmp, &check))) {
+ ret = -EUCLEAN;
+ goto out;
}
*eb_ret = tmp;
- return 0;
+ tmp = NULL;
+ ret = 0;
+ goto out;
}
if (p->nowait) {
- free_extent_buffer(tmp);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
- if (unlock_up)
- btrfs_unlock_up_safe(p, level + 1);
-
- /* now we're allowed to do a blocking uptodate check */
- ret = btrfs_read_extent_buffer(tmp, &check);
- if (ret) {
- free_extent_buffer(tmp);
- btrfs_release_path(p);
- return -EIO;
- }
- if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
- free_extent_buffer(tmp);
+ if (!p->skip_locking) {
+ btrfs_unlock_up_safe(p, parent_level + 1);
+ btrfs_maybe_reset_lockdep_class(root, tmp);
+ tmp_locked = true;
+ btrfs_tree_read_lock(tmp);
btrfs_release_path(p);
- return -EUCLEAN;
+ ret = -EAGAIN;
+ path_released = true;
}
- if (unlock_up)
- ret = -EAGAIN;
+ /* Now we're allowed to do a blocking uptodate check. */
+ ret2 = btrfs_read_extent_buffer(tmp, &check);
+ if (ret2) {
+ ret = ret2;
+ goto out;
+ }
+ if (ret == 0) {
+ ASSERT(!tmp_locked);
+ *eb_ret = tmp;
+ tmp = NULL;
+ }
goto out;
} else if (p->nowait) {
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
- if (unlock_up) {
- btrfs_unlock_up_safe(p, level + 1);
+ if (!p->skip_locking) {
+ btrfs_unlock_up_safe(p, parent_level + 1);
ret = -EAGAIN;
- } else {
- ret = 0;
}
if (p->reada != READA_NONE)
- reada_for_search(fs_info, p, level, slot, key->objectid);
+ reada_for_search(fs_info, p, parent_level, slot, key->objectid);
- tmp = read_tree_block(fs_info, blocknr, &check);
+ tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level);
if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ tmp = NULL;
+ goto out;
+ }
+ read_tmp = true;
+
+ if (!p->skip_locking) {
+ ASSERT(ret == -EAGAIN);
+ btrfs_maybe_reset_lockdep_class(root, tmp);
+ tmp_locked = true;
+ btrfs_tree_read_lock(tmp);
btrfs_release_path(p);
- return PTR_ERR(tmp);
+ path_released = true;
}
+
+ /* Now we're allowed to do a blocking uptodate check. */
+ ret2 = btrfs_read_extent_buffer(tmp, &check);
+ if (ret2) {
+ ret = ret2;
+ goto out;
+ }
+
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!extent_buffer_uptodate(tmp))
+ if (unlikely(!extent_buffer_uptodate(tmp))) {
ret = -EIO;
+ goto out;
+ }
-out:
if (ret == 0) {
+ ASSERT(!tmp_locked);
*eb_ret = tmp;
- } else {
- free_extent_buffer(tmp);
- btrfs_release_path(p);
+ tmp = NULL;
+ }
+out:
+ if (tmp) {
+ if (tmp_locked)
+ btrfs_tree_read_unlock(tmp);
+ if (read_tmp && ret && ret != -EAGAIN)
+ free_extent_buffer_stale(tmp);
+ else
+ free_extent_buffer(tmp);
}
+ if (ret && !path_released)
+ btrfs_release_path(p);
return ret;
}
@@ -1742,13 +1729,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
if (p->search_commit_root) {
b = root->commit_root;
- atomic_inc(&b->refs);
+ refcount_inc(&b->refs);
level = btrfs_header_level(b);
/*
* Ensure that all callers have set skip_locking when
- * p->search_commit_root = 1.
+ * p->search_commit_root is true.
*/
- ASSERT(p->skip_locking == 1);
+ ASSERT(p->skip_locking);
goto out;
}
@@ -1798,7 +1785,7 @@ out:
* The root may have failed to write out at some point, and thus is no
* longer valid, return an error in this case.
*/
- if (!extent_buffer_uptodate(b)) {
+ if (unlikely(!extent_buffer_uptodate(b))) {
if (root_lock)
btrfs_tree_unlock_rw(b, root_lock);
free_extent_buffer(b);
@@ -1851,7 +1838,7 @@ static int finish_need_commit_sem_search(struct btrfs_path *path)
return 0;
}
-static inline int search_for_key_slot(struct extent_buffer *eb,
+static inline int search_for_key_slot(const struct extent_buffer *eb,
int search_low_slot,
const struct btrfs_key *key,
int prev_cmp,
@@ -1869,7 +1856,7 @@ static inline int search_for_key_slot(struct extent_buffer *eb,
return 0;
}
- return generic_bin_search(eb, search_low_slot, key, slot);
+ return btrfs_bin_search(eb, search_low_slot, key, slot);
}
static int search_leaf(struct btrfs_trans_handle *trans,
@@ -1916,7 +1903,7 @@ static int search_leaf(struct btrfs_trans_handle *trans,
* the extent buffer's header and we have recently accessed
* the header's level field.
*/
- ret = comp_keys(&first_key, key);
+ ret = btrfs_comp_keys(&first_key, key);
if (ret < 0) {
/*
* The first key is smaller than the key we want
@@ -1985,15 +1972,14 @@ static int search_leaf(struct btrfs_trans_handle *trans,
ASSERT(leaf_free_space >= 0);
if (leaf_free_space < ins_len) {
- int err;
-
- err = split_leaf(trans, root, key, path, ins_len,
- (ret == 0));
- ASSERT(err <= 0);
- if (WARN_ON(err > 0))
- err = -EUCLEAN;
- if (err)
- ret = err;
+ int ret2;
+
+ ret2 = split_leaf(trans, root, key, path, ins_len, (ret == 0));
+ ASSERT(ret2 <= 0);
+ if (WARN_ON(ret2 > 0))
+ ret2 = -EUCLEAN;
+ if (ret2)
+ ret = ret2;
}
}
@@ -2001,8 +1987,8 @@ static int search_leaf(struct btrfs_trans_handle *trans,
}
/*
- * btrfs_search_slot - look for a key in a tree and perform necessary
- * modifications to preserve tree invariants.
+ * Look for a key in a tree and perform necessary modifications to preserve
+ * tree invariants.
*
* @trans: Handle of transaction, used when modifying the tree
* @p: Holds all btree nodes along the search path
@@ -2035,11 +2021,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p,
int ins_len, int cow)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info;
struct extent_buffer *b;
int slot;
int ret;
- int err;
int level;
int lowest_unlock = 1;
/* everything at write_lock_level or lower must be write locked */
@@ -2048,6 +2033,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int min_write_lock_level;
int prev_cmp;
+ if (!root)
+ return -EINVAL;
+
+ fs_info = root->fs_info;
might_sleep();
lowest_level = p->lowest_level;
@@ -2106,6 +2095,7 @@ again:
while (b) {
int dec = 0;
+ int ret2;
level = btrfs_header_level(b);
@@ -2134,16 +2124,15 @@ again:
}
if (last_level)
- err = btrfs_cow_block(trans, root, b, NULL, 0,
- &b,
- BTRFS_NESTING_COW);
+ ret2 = btrfs_cow_block(trans, root, b, NULL, 0,
+ &b, BTRFS_NESTING_COW);
else
- err = btrfs_cow_block(trans, root, b,
- p->nodes[level + 1],
- p->slots[level + 1], &b,
- BTRFS_NESTING_COW);
- if (err) {
- ret = err;
+ ret2 = btrfs_cow_block(trans, root, b,
+ p->nodes[level + 1],
+ p->slots[level + 1], &b,
+ BTRFS_NESTING_COW);
+ if (ret2) {
+ ret = ret2;
goto done;
}
}
@@ -2191,12 +2180,12 @@ cow_done:
slot--;
}
p->slots[level] = slot;
- err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
- &write_lock_level);
- if (err == -EAGAIN)
+ ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len,
+ &write_lock_level);
+ if (ret2 == -EAGAIN)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
b = p->nodes[level];
@@ -2222,11 +2211,11 @@ cow_done:
goto done;
}
- err = read_block_for_search(root, p, &b, level, slot, key);
- if (err == -EAGAIN)
+ ret2 = read_block_for_search(root, p, &b, slot, key);
+ if (ret2 == -EAGAIN && !p->nowait)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
@@ -2289,7 +2278,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct extent_buffer *b;
int slot;
int ret;
- int err;
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
@@ -2305,7 +2293,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
again:
b = btrfs_get_old_root(root, time_seq);
- if (!b) {
+ if (unlikely(!b)) {
ret = -EIO;
goto done;
}
@@ -2314,6 +2302,7 @@ again:
while (b) {
int dec = 0;
+ int ret2;
level = btrfs_header_level(b);
p->nodes[level] = b;
@@ -2326,7 +2315,7 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
- ret = btrfs_bin_search(b, key, &slot);
+ ret = btrfs_bin_search(b, 0, key, &slot);
if (ret < 0)
goto done;
@@ -2349,17 +2338,17 @@ again:
goto done;
}
- err = read_block_for_search(root, p, &b, level, slot, key);
- if (err == -EAGAIN)
+ ret2 = read_block_for_search(root, p, &b, slot, key);
+ if (ret2 == -EAGAIN && !p->nowait)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
level = btrfs_header_level(b);
btrfs_tree_read_lock(b);
- b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
+ b = btrfs_tree_mod_log_rewind(fs_info, b, time_seq);
if (!b) {
ret = -ENOMEM;
goto done;
@@ -2376,6 +2365,87 @@ done:
}
/*
+ * Search the tree again to find a leaf with smaller keys.
+ * Returns 0 if it found something.
+ * Returns 1 if there are no smaller keys.
+ * Returns < 0 on error.
+ *
+ * This may release the path, and so you may lose any locks held at the
+ * time you call it.
+ */
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_key orig_key;
+ struct btrfs_disk_key found_key;
+ int ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
+
+ if (key.offset > 0) {
+ key.offset--;
+ } else if (key.type > 0) {
+ key.type--;
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
+ key.objectid--;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
+ return 1;
+ }
+
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret <= 0)
+ return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = btrfs_comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
+ btrfs_item_key(path->nodes[0], &found_key, 0);
+ ret = btrfs_comp_keys(&found_key, &key);
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
+ return 0;
+ return 1;
+}
+
+/*
* helper to use instead of search slot if no exact match is needed but
* instead the next or previous item should be returned.
* When find_higher is true, the next higher item is returned, the next lower
@@ -2487,26 +2557,15 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_path *path)
{
- while (1) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
int ret;
- const int slot = path->slots[0];
- const struct extent_buffer *leaf = path->nodes[0];
- /* This is where we start walking the path. */
- if (slot >= btrfs_header_nritems(leaf)) {
- /*
- * If we've reached the last slot in this leaf we need
- * to go to the next leaf and reset the path.
- */
- ret = btrfs_next_leaf(root, path);
- if (ret)
- return ret;
- continue;
- }
- /* Store the found, valid item in @key. */
- btrfs_item_key_to_cpu(leaf, key, slot);
- break;
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ return ret;
}
+
+ btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
return 0;
}
@@ -2518,8 +2577,9 @@ int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_path *path,
- struct btrfs_disk_key *key, int level)
+static void fixup_low_keys(struct btrfs_trans_handle *trans,
+ const struct btrfs_path *path,
+ const struct btrfs_disk_key *key, int level)
{
int i;
struct extent_buffer *t;
@@ -2535,7 +2595,7 @@ static void fixup_low_keys(struct btrfs_path *path,
BTRFS_MOD_LOG_KEY_REPLACE);
BUG_ON(ret < 0);
btrfs_set_node_key(t, key, tslot);
- btrfs_mark_buffer_dirty(path->nodes[i]);
+ btrfs_mark_buffer_dirty(trans, path->nodes[i]);
if (tslot != 0)
break;
}
@@ -2547,10 +2607,11 @@ static void fixup_low_keys(struct btrfs_path *path,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ const struct btrfs_path *path,
const struct btrfs_key *new_key)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_disk_key disk_key;
struct extent_buffer *eb;
int slot;
@@ -2559,38 +2620,36 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
slot = path->slots[0];
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
- if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
+ if (unlikely(btrfs_comp_keys(&disk_key, new_key) >= 0)) {
+ btrfs_print_leaf(eb);
btrfs_crit(fs_info,
- "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT,
slot, btrfs_disk_key_objectid(&disk_key),
btrfs_disk_key_type(&disk_key),
btrfs_disk_key_offset(&disk_key),
- new_key->objectid, new_key->type,
- new_key->offset);
- btrfs_print_leaf(eb);
+ BTRFS_KEY_FMT_VALUE(new_key));
BUG();
}
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
- if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
+ if (unlikely(btrfs_comp_keys(&disk_key, new_key) <= 0)) {
+ btrfs_print_leaf(eb);
btrfs_crit(fs_info,
- "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
+ "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT,
slot, btrfs_disk_key_objectid(&disk_key),
btrfs_disk_key_type(&disk_key),
btrfs_disk_key_offset(&disk_key),
- new_key->objectid, new_key->type,
- new_key->offset);
- btrfs_print_leaf(eb);
+ BTRFS_KEY_FMT_VALUE(new_key));
BUG();
}
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(eb, &disk_key, slot);
- btrfs_mark_buffer_dirty(eb);
+ btrfs_mark_buffer_dirty(trans, eb);
if (slot == 0)
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
}
/*
@@ -2613,8 +2672,8 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
* is correct, we only need to bother the last key of @left and the first
* key of @right.
*/
-static bool check_sibling_keys(struct extent_buffer *left,
- struct extent_buffer *right)
+static bool check_sibling_keys(const struct extent_buffer *left,
+ const struct extent_buffer *right)
{
struct btrfs_key left_last;
struct btrfs_key right_first;
@@ -2634,12 +2693,15 @@ static bool check_sibling_keys(struct extent_buffer *left,
btrfs_item_key_to_cpu(right, &right_first, 0);
}
- if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
+ if (unlikely(btrfs_comp_cpu_keys(&left_last, &right_first) >= 0)) {
+ btrfs_crit(left->fs_info, "left extent buffer:");
+ btrfs_print_tree(left, false);
+ btrfs_crit(left->fs_info, "right extent buffer:");
+ btrfs_print_tree(right, false);
btrfs_crit(left->fs_info,
-"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
- left_last.objectid, left_last.type,
- left_last.offset, right_first.objectid,
- right_first.type, right_first.offset);
+"bad key order, sibling blocks, left last " BTRFS_KEY_FMT " right first " BTRFS_KEY_FMT,
+ BTRFS_KEY_FMT_VALUE(&left_last),
+ BTRFS_KEY_FMT_VALUE(&right_first));
return true;
}
return false;
@@ -2654,7 +2716,7 @@ static bool check_sibling_keys(struct extent_buffer *left,
*/
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
- struct extent_buffer *src, int empty)
+ struct extent_buffer *src, bool empty)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int push_items = 0;
@@ -2690,13 +2752,13 @@ static int push_node_left(struct btrfs_trans_handle *trans,
push_items = min(src_nritems - 8, push_items);
/* dst is the left eb, src is the middle eb */
- if (check_sibling_keys(dst, src)) {
+ if (unlikely(check_sibling_keys(dst, src))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2707,8 +2769,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
if (push_items < src_nritems) {
/*
- * Don't call btrfs_tree_mod_log_insert_move() here, key removal
- * was already fully logged by btrfs_tree_mod_log_eb_copy() above.
+ * btrfs_tree_mod_log_eb_copy handles logging the move, so we
+ * don't need to do an explicit tree mod log operation for it.
*/
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0),
btrfs_node_key_ptr_offset(src, push_items),
@@ -2717,8 +2779,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
}
btrfs_set_header_nritems(src, src_nritems - push_items);
btrfs_set_header_nritems(dst, dst_nritems + push_items);
- btrfs_mark_buffer_dirty(src);
- btrfs_mark_buffer_dirty(dst);
+ btrfs_mark_buffer_dirty(trans, src);
+ btrfs_mark_buffer_dirty(trans, dst);
return ret;
}
@@ -2764,13 +2826,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
push_items = max_push;
/* dst is the right eb, src is the middle eb */
- if (check_sibling_keys(src, dst)) {
+ if (unlikely(check_sibling_keys(src, dst))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
- ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
- BUG_ON(ret < 0);
+
+ /*
+ * btrfs_tree_mod_log_eb_copy handles logging the move, so we don't
+ * need to do an explicit tree mod log operation for it.
+ */
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items),
btrfs_node_key_ptr_offset(dst, 0),
(dst_nritems) *
@@ -2778,7 +2843,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
push_items);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2790,8 +2855,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(src, src_nritems - push_items);
btrfs_set_header_nritems(dst, dst_nritems + push_items);
- btrfs_mark_buffer_dirty(src);
- btrfs_mark_buffer_dirty(dst);
+ btrfs_mark_buffer_dirty(trans, src);
+ btrfs_mark_buffer_dirty(trans, dst);
return ret;
}
@@ -2807,7 +2872,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 lower_gen;
struct extent_buffer *lower;
struct extent_buffer *c;
@@ -2824,13 +2888,13 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(lower, &lower_key, 0);
- c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
+ c = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
&lower_key, level, root->node->start, 0,
- BTRFS_NESTING_NEW_ROOT);
+ 0, BTRFS_NESTING_NEW_ROOT);
if (IS_ERR(c))
return PTR_ERR(c);
- root_add_used(root, fs_info->nodesize);
+ root_add_used_bytes(root);
btrfs_set_header_nritems(c, 1);
btrfs_set_node_key(c, &lower_key, 0);
@@ -2840,18 +2904,28 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(c, 0, lower_gen);
- btrfs_mark_buffer_dirty(c);
+ btrfs_mark_buffer_dirty(trans, c);
old = root->node;
ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ int ret2;
+
+ btrfs_clear_buffer_dirty(trans, c);
+ ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
+ if (unlikely(ret2 < 0))
+ btrfs_abort_transaction(trans, ret2);
+ btrfs_tree_unlock(c);
+ free_extent_buffer(c);
+ return ret;
+ }
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
free_extent_buffer(old);
add_root_to_dirty_list(root);
- atomic_inc(&c->refs);
+ refcount_inc(&c->refs);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK;
path->slots[level] = 0;
@@ -2865,10 +2939,10 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
*/
-static void insert_ptr(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_disk_key *key, u64 bytenr,
- int slot, int level)
+static int insert_ptr(struct btrfs_trans_handle *trans,
+ const struct btrfs_path *path,
+ const struct btrfs_disk_key *key, u64 bytenr,
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@@ -2884,7 +2958,10 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
slot, nritems - slot);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(lower, slot + 1),
@@ -2894,14 +2971,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_key(lower, slot,
BTRFS_MOD_LOG_KEY_ADD);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
btrfs_set_node_key(lower, key, slot);
btrfs_set_node_blockptr(lower, slot, bytenr);
WARN_ON(trans->transid == 0);
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
btrfs_set_header_nritems(lower, nritems + 1);
- btrfs_mark_buffer_dirty(lower);
+ btrfs_mark_buffer_dirty(trans, lower);
+
+ return 0;
}
/*
@@ -2955,17 +3037,19 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
mid = (c_nritems + 1) / 2;
btrfs_node_key(c, &disk_key, mid);
- split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
+ split = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
&disk_key, level, c->start, 0,
- BTRFS_NESTING_SPLIT);
+ 0, BTRFS_NESTING_SPLIT);
if (IS_ERR(split))
return PTR_ERR(split);
- root_add_used(root, fs_info->nodesize);
+ root_add_used_bytes(root);
ASSERT(btrfs_header_level(c) == level);
ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
- if (ret) {
+ if (unlikely(ret)) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2976,11 +3060,16 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(split, c_nritems - mid);
btrfs_set_header_nritems(c, mid);
- btrfs_mark_buffer_dirty(c);
- btrfs_mark_buffer_dirty(split);
+ btrfs_mark_buffer_dirty(trans, c);
+ btrfs_mark_buffer_dirty(trans, split);
- insert_ptr(trans, path, &disk_key, split->start,
- path->slots[level + 1] + 1, level + 1);
+ ret = insert_ptr(trans, path, &disk_key, split->start,
+ path->slots[level + 1] + 1, level + 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
+ return ret;
+ }
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -3000,7 +3089,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
* and nr indicate which items in the leaf to check. This totals up the
* space used both by the item structs and the item data
*/
-static int leaf_space_used(struct extent_buffer *l, int start, int nr)
+static int leaf_space_used(const struct extent_buffer *l, int start, int nr)
{
int data_len;
int nritems = btrfs_header_nritems(l);
@@ -3020,14 +3109,14 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
* the start of the leaf data. IOW, how much room
* the leaf has left for both items and data
*/
-noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
+int btrfs_leaf_free_space(const struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
int nritems = btrfs_header_nritems(leaf);
int ret;
ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
btrfs_crit(fs_info,
"leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret,
@@ -3041,8 +3130,9 @@ noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
* min slot controls the lowest index we're willing to push to the
* right. We'll push up to and including min_slot, but no lower
*/
-static noinline int __push_leaf_right(struct btrfs_path *path,
- int data_size, int empty,
+static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ int data_size, bool empty,
struct extent_buffer *right,
int free_space, u32 left_nritems,
u32 min_slot)
@@ -3050,7 +3140,6 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
struct btrfs_fs_info *fs_info = right->fs_info;
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
- struct btrfs_map_token token;
struct btrfs_disk_key disk_key;
int slot;
u32 i;
@@ -3124,36 +3213,33 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
copy_leaf_items(right, left, 0, left_nritems - push_items, push_items);
/* update the item pointers */
- btrfs_init_map_token(&token, right);
right_nritems += push_items;
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- push_space -= btrfs_token_item_size(&token, i);
- btrfs_set_token_item_offset(&token, i, push_space);
+ push_space -= btrfs_item_size(right, i);
+ btrfs_set_item_offset(right, i, push_space);
}
left_nritems -= push_items;
btrfs_set_header_nritems(left, left_nritems);
if (left_nritems)
- btrfs_mark_buffer_dirty(left);
+ btrfs_mark_buffer_dirty(trans, left);
else
- btrfs_clean_tree_block(left);
+ btrfs_clear_buffer_dirty(trans, left);
- btrfs_mark_buffer_dirty(right);
+ btrfs_mark_buffer_dirty(trans, right);
btrfs_item_key(right, &disk_key, 0);
btrfs_set_node_key(upper, &disk_key, slot + 1);
- btrfs_mark_buffer_dirty(upper);
+ btrfs_mark_buffer_dirty(trans, upper);
/* then fixup the leaf pointer in the path */
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
- if (btrfs_header_nritems(path->nodes[0]) == 0)
- btrfs_clean_tree_block(path->nodes[0]);
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
path->nodes[0] = right;
path->slots[1] += 1;
} else {
@@ -3181,7 +3267,7 @@ out_unlock:
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path,
int min_data_size, int data_size,
- int empty, u32 min_slot)
+ bool empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -3202,14 +3288,10 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_write_locked(path->nodes[1]);
right = btrfs_read_node_slot(upper, slot + 1);
- /*
- * slot + 1 is not valid or we fail to read the right node,
- * no big deal, just return.
- */
if (IS_ERR(right))
- return 1;
+ return PTR_ERR(right);
- __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
+ btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
@@ -3224,8 +3306,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- if (check_sibling_keys(left, right)) {
+ if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
@@ -3243,8 +3326,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
- return __push_leaf_right(path, min_data_size, empty,
- right, free_space, left_nritems, min_slot);
+ return __push_leaf_right(trans, path, min_data_size, empty, right,
+ free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -3259,8 +3342,9 @@ out_unlock:
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
* items
*/
-static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
- int empty, struct extent_buffer *left,
+static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, int data_size,
+ bool empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
u32 max_slot)
{
@@ -3275,7 +3359,6 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
int ret = 0;
u32 this_item_size;
u32 old_left_item_size;
- struct btrfs_map_token token;
if (empty)
nr = min(right_nritems, max_slot);
@@ -3323,21 +3406,24 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
old_left_nritems = btrfs_header_nritems(left);
BUG_ON(old_left_nritems <= 0);
- btrfs_init_map_token(&token, left);
old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i,
+ ioff = btrfs_item_offset(left, i);
+ btrfs_set_item_offset(left, i,
ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
/* fixup right node */
- if (push_items > right_nritems)
- WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
- right_nritems);
+ if (unlikely(push_items > right_nritems)) {
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_crit(fs_info, "push items (%d) > right leaf items (%u)",
+ push_items, right_nritems);
+ goto out;
+ }
if (push_items < right_nritems) {
push_space = btrfs_item_offset(right, push_items - 1) -
@@ -3350,29 +3436,28 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
btrfs_header_nritems(right) - push_items);
}
- btrfs_init_map_token(&token, right);
right_nritems -= push_items;
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- push_space = push_space - btrfs_token_item_size(&token, i);
- btrfs_set_token_item_offset(&token, i, push_space);
+ push_space = push_space - btrfs_item_size(right, i);
+ btrfs_set_item_offset(right, i, push_space);
}
- btrfs_mark_buffer_dirty(left);
+ btrfs_mark_buffer_dirty(trans, left);
if (right_nritems)
- btrfs_mark_buffer_dirty(right);
+ btrfs_mark_buffer_dirty(trans, right);
else
- btrfs_clean_tree_block(right);
+ btrfs_clear_buffer_dirty(trans, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
path->nodes[0] = left;
path->slots[1] -= 1;
} else {
@@ -3420,14 +3505,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_write_locked(path->nodes[1]);
left = btrfs_read_node_slot(path->nodes[1], slot - 1);
- /*
- * slot - 1 is not valid or we fail to read the left node,
- * no big deal, just return.
- */
if (IS_ERR(left))
- return 1;
+ return PTR_ERR(left);
- __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
+ btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
@@ -3445,13 +3526,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- if (check_sibling_keys(left, right)) {
+ if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
- return __push_leaf_left(path, min_data_size,
- empty, left, free_space, right_nritems,
- max_slot);
+ return __push_leaf_left(trans, path, min_data_size, empty, left,
+ free_space, right_nritems, max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -3462,18 +3543,18 @@ out:
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*/
-static noinline void copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct extent_buffer *l,
- struct extent_buffer *right,
- int slot, int mid, int nritems)
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct extent_buffer *l,
+ struct extent_buffer *right,
+ int slot, int mid, int nritems)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int data_copy_size;
int rt_data_off;
int i;
+ int ret;
struct btrfs_disk_key disk_key;
- struct btrfs_map_token token;
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
@@ -3486,20 +3567,21 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
- btrfs_init_map_token(&token, right);
for (i = 0; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
+ ioff = btrfs_item_offset(right, i);
+ btrfs_set_item_offset(right, i, ioff + rt_data_off);
}
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
- insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
+ ret = insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
+ if (ret < 0)
+ return ret;
- btrfs_mark_buffer_dirty(right);
- btrfs_mark_buffer_dirty(l);
+ btrfs_mark_buffer_dirty(trans, right);
+ btrfs_mark_buffer_dirty(trans, l);
BUG_ON(path->slots[0] != slot);
if (mid <= slot) {
@@ -3514,6 +3596,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
}
BUG_ON(path->slots[0] < 0);
+
+ return 0;
}
/*
@@ -3590,7 +3674,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const struct btrfs_key *ins_key,
struct btrfs_path *path, int data_size,
- int extend)
+ bool extend)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *l;
@@ -3700,20 +3784,25 @@ again:
* BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
* use BTRFS_NESTING_NEW_ROOT.
*/
- right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
- &disk_key, 0, l->start, 0,
+ right = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
+ &disk_key, 0, l->start, 0, 0,
num_doubles ? BTRFS_NESTING_NEW_ROOT :
BTRFS_NESTING_SPLIT);
if (IS_ERR(right))
return PTR_ERR(right);
- root_add_used(root, fs_info->nodesize);
+ root_add_used_bytes(root);
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, path, &disk_key,
- right->start, path->slots[1] + 1, 1);
+ ret = insert_ptr(trans, path, &disk_key,
+ right->start, path->slots[1] + 1, 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3721,14 +3810,19 @@ again:
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, path, &disk_key,
- right->start, path->slots[1], 1);
+ ret = insert_ptr(trans, path, &disk_key,
+ right->start, path->slots[1], 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
}
/*
* We create a new leaf 'right' for the required ins_len and
@@ -3738,7 +3832,12 @@ again:
return ret;
}
- copy_for_split(trans, path, l, right, slot, mid, nritems);
+ ret = copy_for_split(trans, path, l, right, slot, mid, nritems);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
if (split == 2) {
BUG_ON(num_doubles != 0);
@@ -3771,6 +3870,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
+ key.type != BTRFS_RAID_STRIPE_KEY &&
key.type != BTRFS_EXTENT_CSUM_KEY);
if (btrfs_leaf_free_space(leaf) >= ins_len)
@@ -3784,10 +3884,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- path->keep_locks = 1;
- path->search_for_split = 1;
+ path->keep_locks = true;
+ path->search_for_split = true;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- path->search_for_split = 0;
+ path->search_for_split = false;
if (ret > 0)
ret = -EAGAIN;
if (ret < 0)
@@ -3814,15 +3914,16 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
if (ret)
goto err;
- path->keep_locks = 0;
+ path->keep_locks = false;
btrfs_unlock_up_safe(path, 1);
return 0;
err:
- path->keep_locks = 0;
+ path->keep_locks = false;
return ret;
}
-static noinline int split_item(struct btrfs_path *path,
+static noinline int split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
const struct btrfs_key *new_key,
unsigned long split_offset)
{
@@ -3835,7 +3936,12 @@ static noinline int split_item(struct btrfs_path *path,
struct btrfs_disk_key disk_key;
leaf = path->nodes[0];
- BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
+ /*
+ * Shouldn't happen because the caller must have previously called
+ * setup_leaf_for_split() to make room for the new item in the leaf.
+ */
+ if (WARN_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)))
+ return -ENOSPC;
orig_slot = path->slots[0];
orig_offset = btrfs_item_offset(leaf, path->slots[0]);
@@ -3876,7 +3982,7 @@ static noinline int split_item(struct btrfs_path *path,
write_extent_buffer(leaf, buf + split_offset,
btrfs_item_ptr_offset(leaf, slot),
item_size - split_offset);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
BUG_ON(btrfs_leaf_free_space(leaf) < 0);
kfree(buf);
@@ -3910,7 +4016,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = split_item(path, new_key, split_offset);
+ ret = split_item(trans, path, new_key, split_offset);
return ret;
}
@@ -3920,7 +4026,8 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
+void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ const struct btrfs_path *path, u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
@@ -3930,7 +4037,6 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
unsigned int old_size;
unsigned int size_diff;
int i;
- struct btrfs_map_token token;
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3953,12 +4059,11 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + size_diff);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff + size_diff);
}
/* shift the data */
@@ -3996,13 +4101,13 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
}
btrfs_set_item_size(leaf, slot, new_size);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4011,7 +4116,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
/*
* make the item pointed to by the path bigger, data_size is the added size.
*/
-void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
+void btrfs_extend_item(struct btrfs_trans_handle *trans,
+ const struct btrfs_path *path, u32 data_size)
{
int slot;
struct extent_buffer *leaf;
@@ -4020,14 +4126,13 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
unsigned int old_data;
unsigned int old_size;
int i;
- struct btrfs_map_token token;
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(leaf);
- if (btrfs_leaf_free_space(leaf) < data_size) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < data_size)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4035,7 +4140,7 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
old_data = btrfs_item_data_end(leaf, slot);
BUG_ON(slot < 0);
- if (slot >= nritems) {
+ if (unlikely(slot >= nritems)) {
btrfs_print_leaf(leaf);
btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
slot, nritems);
@@ -4046,24 +4151,22 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff - data_size);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff - data_size);
}
/* shift the data */
memmove_leaf_data(leaf, data_end - data_size, data_end,
old_data - data_end);
- data_end = old_data;
old_size = btrfs_item_size(leaf, slot);
btrfs_set_item_size(leaf, slot, old_size + data_size);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4072,6 +4175,7 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
/*
* Make space in the node before inserting one or more items.
*
+ * @trans: transaction handle
* @root: root we are inserting items to
* @path: points to the leaf/slot where we are going to insert new items
* @batch: information about the batch of items to insert
@@ -4079,7 +4183,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
* Main purpose is to save stack depth by doing the bulk of the work in a
* function that doesn't call btrfs_search_slot
*/
-static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
+static void setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
const struct btrfs_item_batch *batch)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4089,7 +4194,6 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
struct btrfs_disk_key disk_key;
struct extent_buffer *leaf;
int slot;
- struct btrfs_map_token token;
u32 total_size;
/*
@@ -4099,7 +4203,7 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
*/
if (path->slots[0] == 0) {
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
@@ -4110,18 +4214,17 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
data_end = leaf_data_end(leaf);
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
- if (btrfs_leaf_free_space(leaf) < total_size) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < total_size)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(leaf));
BUG();
}
- btrfs_init_map_token(&token, leaf);
if (slot != nritems) {
unsigned int old_data = btrfs_item_data_end(leaf, slot);
- if (old_data < data_end) {
+ if (unlikely(old_data < data_end)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info,
"item at slot %d with data offset %u beyond data end of leaf %u",
@@ -4135,8 +4238,8 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i,
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i,
ioff - batch->total_data_size);
}
/* shift the items */
@@ -4153,14 +4256,14 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
btrfs_set_item_key(leaf, &disk_key, slot + i);
data_end -= batch->data_sizes[i];
- btrfs_set_token_item_offset(&token, slot + i, data_end);
- btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
+ btrfs_set_item_offset(leaf, slot + i, data_end);
+ btrfs_set_item_size(leaf, slot + i, batch->data_sizes[i]);
}
btrfs_set_header_nritems(leaf, nritems + batch->nr);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
- if (btrfs_leaf_free_space(leaf) < 0) {
+ if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@@ -4169,12 +4272,14 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
/*
* Insert a new item into a leaf.
*
+ * @trans: Transaction handle.
* @root: The root of the btree.
* @path: A path pointing to the target leaf and slot.
* @key: The key of the new item.
* @data_size: The size of the data associated with the new key.
*/
-void btrfs_setup_item_for_insert(struct btrfs_root *root,
+void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *key,
u32 data_size)
@@ -4186,12 +4291,16 @@ void btrfs_setup_item_for_insert(struct btrfs_root *root,
batch.total_data_size = data_size;
batch.nr = 1;
- setup_items_for_insert(root, path, &batch);
+ setup_items_for_insert(trans, root, path, &batch);
}
/*
* Given a key and some data, insert items into the tree.
* This does all the path init required, making room in the tree if needed.
+ *
+ * Returns: 0 on success
+ * -EEXIST if the first key already exists
+ * < 0 on other errors
*/
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -4212,7 +4321,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
slot = path->slots[0];
BUG_ON(slot < 0);
- setup_items_for_insert(root, path, batch);
+ setup_items_for_insert(trans, root, path, batch);
return 0;
}
@@ -4225,7 +4334,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u32 data_size)
{
int ret = 0;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
unsigned long ptr;
@@ -4237,9 +4346,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
leaf = path->nodes[0];
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
write_extent_buffer(leaf, data, ptr, data_size);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
}
- btrfs_free_path(path);
return ret;
}
@@ -4268,7 +4376,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
return ret;
path->slots[0]++;
- btrfs_setup_item_for_insert(root, path, new_key, item_size);
+ btrfs_setup_item_for_insert(trans, root, path, new_key, item_size);
leaf = path->nodes[0];
memcpy_extent_buffer(leaf,
btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -4282,9 +4390,11 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
*
* the tree should have been previously balanced so the deletion does not
* empty a node.
+ *
+ * This is exported for use inside btrfs-progs, don't un-export it.
*/
-static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
- int level, int slot)
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
@@ -4295,7 +4405,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
if (level) {
ret = btrfs_tree_mod_log_insert_move(parent, slot,
slot + 1, nritems - slot - 1);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
memmove_extent_buffer(parent,
btrfs_node_key_ptr_offset(parent, slot),
@@ -4305,7 +4418,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
} else if (level) {
ret = btrfs_tree_mod_log_insert_key(parent, slot,
BTRFS_MOD_LOG_KEY_REMOVE);
- BUG_ON(ret < 0);
+ if (unlikely(ret < 0)) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
nritems--;
@@ -4318,9 +4434,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(path, &disk_key, level + 1);
+ fixup_low_keys(trans, path, &disk_key, level + 1);
}
- btrfs_mark_buffer_dirty(parent);
+ btrfs_mark_buffer_dirty(trans, parent);
+ return 0;
}
/*
@@ -4333,13 +4450,17 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
* The path must have already been setup for deleting the leaf, including
* all the proper balancing. path->nodes[1] must be locked.
*/
-static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *leaf)
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *leaf)
{
+ int ret;
+
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- del_ptr(root, path, 1, path->slots[1]);
+ ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
+ if (ret < 0)
+ return ret;
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -4347,11 +4468,15 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
- root_sub_used(root, leaf->len);
+ root_sub_used_bytes(root);
- atomic_inc(&leaf->refs);
- btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
+ refcount_inc(&leaf->refs);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
+ if (ret < 0)
+ btrfs_abort_transaction(trans, ret);
+
+ return ret;
}
/*
* delete the item at the leaf level in path. If that empties
@@ -4372,7 +4497,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (slot + nr != nritems) {
const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
const int data_end = leaf_data_end(leaf);
- struct btrfs_map_token token;
u32 dsize = 0;
int i;
@@ -4382,12 +4506,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
memmove_leaf_data(leaf, data_end + dsize, data_end,
last_off - data_end);
- btrfs_init_map_token(&token, leaf);
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + dsize);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff + dsize);
}
memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr);
@@ -4397,11 +4520,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* delete the leaf if we've emptied it */
if (nritems == 0) {
- if (leaf == root->node) {
- btrfs_set_header_level(leaf, 0);
- } else {
- btrfs_clean_tree_block(leaf);
- btrfs_del_leaf(trans, root, path, leaf);
+ if (leaf != root->node) {
+ btrfs_clear_buffer_dirty(trans, leaf);
+ ret = btrfs_del_leaf(trans, root, path, leaf);
+ if (ret < 0)
+ return ret;
}
} else {
int used = leaf_space_used(leaf, 0, nritems);
@@ -4409,7 +4532,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(path, &disk_key, 1);
+ fixup_low_keys(trans, path, &disk_key, 1);
}
/*
@@ -4425,10 +4548,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
- * for possible call to del_ptr below
+ * for possible call to btrfs_del_ptr below
*/
slot = path->slots[1];
- atomic_inc(&leaf->refs);
+ refcount_inc(&leaf->refs);
/*
* We want to be able to at least push one item to the
* left neighbour leaf, and that's the first item.
@@ -4462,9 +4585,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (btrfs_header_nritems(leaf) == 0) {
path->slots[1] = slot;
- btrfs_del_leaf(trans, root, path, leaf);
+ ret = btrfs_del_leaf(trans, root, path, leaf);
free_extent_buffer(leaf);
- ret = 0;
+ if (ret < 0)
+ return ret;
} else {
/* if we're still in the path, make sure
* we're dirty. Otherwise, one of the
@@ -4472,78 +4596,25 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* dirtied this buffer
*/
if (path->nodes[0] == leaf)
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
free_extent_buffer(leaf);
}
} else {
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
}
}
return ret;
}
/*
- * search the tree again to find a leaf with lesser keys
- * returns 0 if it found something or 1 if there are no lesser leaves.
- * returns < 0 on io errors.
- *
- * This may release the path, and so you may lose any locks held at the
- * time you call it.
- */
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- struct btrfs_key key;
- struct btrfs_disk_key found_key;
- int ret;
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
-
- if (key.offset > 0) {
- key.offset--;
- } else if (key.type > 0) {
- key.type--;
- key.offset = (u64)-1;
- } else if (key.objectid > 0) {
- key.objectid--;
- key.type = (u8)-1;
- key.offset = (u64)-1;
- } else {
- return 1;
- }
-
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
- btrfs_item_key(path->nodes[0], &found_key, 0);
- ret = comp_keys(&found_key, &key);
- /*
- * We might have had an item with the previous key in the tree right
- * before we released our path. And after we released our path, that
- * item might have been pushed to the first slot (0) of the leaf we
- * were holding due to a tree balance. Alternatively, an item with the
- * previous key can exist as the only element of a leaf (big fat item).
- * Therefore account for these 2 cases, so that our callers (like
- * btrfs_previous_item) don't miss an existing item with a key matching
- * the previous key we computed above.
- */
- if (ret <= 0)
- return 0;
- return 1;
-}
-
-/*
* A helper function to walk down the tree starting at min_key, and looking
- * for nodes or leaves that are have a minimum transaction id.
+ * for leaves that have a minimum transaction id.
* This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
* key and get a writable path.
*
- * This honors path->lowest_level to prevent descent past a given level
- * of the tree.
- *
* min_trans indicates the oldest transaction that you are interested
* in walking through. Any nodes or leaves older than min_trans are
* skipped over (without reading them).
@@ -4556,16 +4627,16 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
u64 min_trans)
{
struct extent_buffer *cur;
- struct btrfs_key found_key;
int slot;
int sret;
u32 nritems;
int level;
int ret = 1;
- int keep_locks = path->keep_locks;
+ const bool keep_locks = path->keep_locks;
ASSERT(!path->nowait);
- path->keep_locks = 1;
+ ASSERT(path->lowest_level == 0);
+ path->keep_locks = true;
again:
cur = btrfs_read_lock_root_node(root);
level = btrfs_header_level(cur);
@@ -4580,19 +4651,20 @@ again:
while (1) {
nritems = btrfs_header_nritems(cur);
level = btrfs_header_level(cur);
- sret = btrfs_bin_search(cur, min_key, &slot);
+ sret = btrfs_bin_search(cur, 0, min_key, &slot);
if (sret < 0) {
ret = sret;
goto out;
}
- /* at the lowest level, we're done, setup the path and exit */
- if (level == path->lowest_level) {
+ /* At level 0 we're done, setup the path and exit. */
+ if (level == 0) {
if (slot >= nritems)
goto find_next_key;
ret = 0;
path->slots[level] = slot;
- btrfs_item_key_to_cpu(cur, &found_key, slot);
+ /* Save our key for returning back. */
+ btrfs_item_key_to_cpu(cur, min_key, slot);
goto out;
}
if (sret && slot > 0)
@@ -4616,8 +4688,8 @@ find_next_key:
* we didn't find a candidate key in this node, walk forward
* and find another one
*/
+ path->slots[level] = slot;
if (slot >= nritems) {
- path->slots[level] = slot;
sret = btrfs_find_next_key(root, path, min_key, level,
min_trans);
if (sret == 0) {
@@ -4627,13 +4699,6 @@ find_next_key:
goto out;
}
}
- /* save our key for returning back */
- btrfs_node_key_to_cpu(cur, &found_key, slot);
- path->slots[level] = slot;
- if (level == path->lowest_level) {
- ret = 0;
- goto out;
- }
cur = btrfs_read_node_slot(cur, slot);
if (IS_ERR(cur)) {
ret = PTR_ERR(cur);
@@ -4648,10 +4713,8 @@ find_next_key:
}
out:
path->keep_locks = keep_locks;
- if (ret == 0) {
- btrfs_unlock_up_safe(path, path->lowest_level + 1);
- memcpy(min_key, &found_key, sizeof(found_key));
- }
+ if (ret == 0)
+ btrfs_unlock_up_safe(path, 1);
return ret;
}
@@ -4663,7 +4726,7 @@ out:
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
*
- * path->keep_locks should be set to 1 on the search made before
+ * path->keep_locks should be set to true on the search made before
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -4762,13 +4825,13 @@ again:
next = NULL;
btrfs_release_path(path);
- path->keep_locks = 1;
+ path->keep_locks = true;
if (time_seq) {
ret = btrfs_search_old_slot(root, &key, path, time_seq);
} else {
if (path->need_commit_sem) {
- path->need_commit_sem = 0;
+ path->need_commit_sem = false;
need_commit_sem = true;
if (path->nowait) {
if (!down_read_trylock(&fs_info->commit_root_sem)) {
@@ -4781,41 +4844,30 @@ again:
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
- path->keep_locks = 0;
+ path->keep_locks = false;
if (ret < 0)
goto done;
nritems = btrfs_header_nritems(path->nodes[0]);
/*
- * by releasing the path above we dropped all our locks. A balance
- * could have added more items next to the key that used to be
- * at the very end of the block. So, check again here and
- * advance the path if there are now more items available.
- */
- if (nritems > 0 && path->slots[0] < nritems - 1) {
- if (ret == 0)
- path->slots[0]++;
- ret = 0;
- goto done;
- }
- /*
- * So the above check misses one case:
- * - after releasing the path above, someone has removed the item that
- * used to be at the very end of the block, and balance between leafs
- * gets another one with bigger key.offset to replace it.
+ * By releasing the path above we dropped all our locks. A balance
+ * could have happened and
*
- * This one should be returned as well, or we can get leaf corruption
- * later(esp. in __btrfs_drop_extents()).
+ * 1. added more items after the previous last item
+ * 2. deleted the previous last item
*
- * And a bit more explanation about this check,
- * with ret > 0, the key isn't found, the path points to the slot
- * where it should be inserted, so the path->slots[0] item must be the
- * bigger one.
+ * So, check again here and advance the path if there are now more
+ * items available.
*/
- if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
- ret = 0;
- goto done;
+ if (nritems > 0 && path->slots[0] <= nritems - 1) {
+ if (ret == 0 && path->slots[0] != nritems - 1) {
+ path->slots[0]++;
+ goto done;
+ } else if (ret > 0) {
+ ret = 0;
+ goto done;
+ }
}
while (level < BTRFS_MAX_LEVEL) {
@@ -4851,8 +4903,7 @@ again:
}
next = c;
- ret = read_block_for_search(root, path, &next, level,
- slot, &key);
+ ret = read_block_for_search(root, path, &next, slot, &key);
if (ret == -EAGAIN && !path->nowait)
goto again;
@@ -4895,8 +4946,7 @@ again:
if (!level)
break;
- ret = read_block_for_search(root, path, &next, level,
- 0, &key);
+ ret = read_block_for_search(root, path, &next, 0, &key);
if (ret == -EAGAIN && !path->nowait)
goto again;
@@ -4922,7 +4972,7 @@ done:
if (need_commit_sem) {
int ret2;
- path->need_commit_sem = 1;
+ path->need_commit_sem = true;
ret2 = finish_need_commit_sem_search(path);
up_read(&fs_info->commit_root_sem);
if (ret2)
@@ -5026,9 +5076,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
int __init btrfs_ctree_init(void)
{
- btrfs_path_cachep = kmem_cache_create("btrfs_path",
- sizeof(struct btrfs_path), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_path_cachep = KMEM_CACHE(btrfs_path, 0);
if (!btrfs_path_cachep)
return -ENOMEM;
return 0;