summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c529
1 files changed, 265 insertions, 264 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c843563914ca..f09db62e61a1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -17,7 +17,7 @@
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
@@ -29,7 +29,6 @@
#include "tree-log.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
@@ -193,7 +192,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
struct folio *folio = eb->folios[i];
u64 start = max_t(u64, eb->start, folio_pos(folio));
u64 end = min_t(u64, eb->start + eb->len,
- folio_pos(folio) + folio_size(folio));
+ folio_pos(folio) + eb->folio_size);
u32 len = end - start;
ret = btrfs_repair_io_failure(fs_info, 0, start, len,
@@ -214,7 +213,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
* structure for details.
*/
int btrfs_read_extent_buffer(struct extent_buffer *eb,
- struct btrfs_tree_parent_check *check)
+ const struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int failed = 0;
@@ -227,7 +226,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
+ ret = read_extent_buffer_pages(eb, mirror_num, check);
if (!ret)
break;
@@ -359,7 +358,7 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
/* Do basic extent buffer checks at read time */
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
- struct btrfs_tree_parent_check *check)
+ const struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start;
@@ -368,6 +367,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
u8 result[BTRFS_CSUM_SIZE];
const u8 *header_csum;
int ret = 0;
+ const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS);
ASSERT(check);
@@ -400,13 +400,16 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
if (memcmp(result, header_csum, csum_size) != 0) {
btrfs_warn_rl(fs_info,
-"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
+"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s",
eb->start, eb->read_mirror,
CSUM_FMT_VALUE(csum_size, header_csum),
CSUM_FMT_VALUE(csum_size, result),
- btrfs_header_level(eb));
- ret = -EUCLEAN;
- goto out;
+ btrfs_header_level(eb),
+ ignore_csum ? ", ignored" : "");
+ if (!ignore_csum) {
+ ret = -EUCLEAN;
+ goto out;
+ }
}
if (found_level != check->level) {
@@ -426,7 +429,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out;
}
if (check->has_first_key) {
- struct btrfs_key *expect_key = &check->first_key;
+ const struct btrfs_key *expect_key = &check->first_key;
struct btrfs_key found_key;
if (found_level)
@@ -498,15 +501,15 @@ static int btree_migrate_folio(struct address_space *mapping,
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct btrfs_fs_info *fs_info;
int ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
+ struct btrfs_fs_info *fs_info;
if (wbc->for_kupdate)
return 0;
- fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ fs_info = inode_to_fs_info(mapping->host);
/* this is a bit racy, but that's ok */
ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
BTRFS_DIRTY_METADATA_THRESH,
@@ -522,18 +525,19 @@ static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
if (folio_test_writeback(folio) || folio_test_dirty(folio))
return false;
- return try_release_extent_buffer(&folio->page);
+ return try_release_extent_buffer(folio);
}
static void btree_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
struct extent_io_tree *tree;
- tree = &BTRFS_I(folio->mapping->host)->io_tree;
+
+ tree = &folio_to_inode(folio)->io_tree;
extent_invalidate_folio(tree, folio, offset);
btree_release_folio(folio, GFP_NOFS);
if (folio_get_private(folio)) {
- btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
+ btrfs_warn(folio_to_fs_info(folio),
"folio private not zero on folio %llu",
(unsigned long long)folio_pos(folio));
folio_detach_private(folio);
@@ -544,7 +548,7 @@ static void btree_invalidate_folio(struct folio *folio, size_t offset,
static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
@@ -635,10 +639,6 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
- if (btrfs_check_eb_owner(buf, check->owner_root)) {
- free_extent_buffer_stale(buf);
- return ERR_PTR(-EUCLEAN);
- }
return buf;
}
@@ -646,7 +646,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
- bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
+ bool dummy = btrfs_is_testing(fs_info);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
@@ -658,13 +658,12 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->state = 0;
RB_CLEAR_NODE(&root->rb_node);
- root->last_trans = 0;
+ btrfs_set_root_last_trans(root, 0);
root->free_objectid = 0;
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
- root->inode_tree = RB_ROOT;
- /* GFP flags are compatible with XA_FLAGS_*. */
- xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
+ xa_init(&root->inodes);
+ xa_init(&root->delayed_nodes);
btrfs_init_root_block_rsv(root);
@@ -675,7 +674,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&root->ordered_extents);
INIT_LIST_HEAD(&root->ordered_root);
INIT_LIST_HEAD(&root->reloc_dirty_list);
- spin_lock_init(&root->inode_lock);
spin_lock_init(&root->delalloc_lock);
spin_lock_init(&root->ordered_extent_lock);
spin_lock_init(&root->accounting_lock);
@@ -776,7 +774,7 @@ int btrfs_global_root_insert(struct btrfs_root *root)
if (tmp) {
ret = -EEXIST;
btrfs_warn(fs_info, "global root %llu %llu already exists",
- root->root_key.objectid, root->root_key.offset);
+ btrfs_root_id(root), root->root_key.offset);
}
return ret;
}
@@ -848,13 +846,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
return btrfs_global_root(fs_info, &key);
}
-struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
-{
- if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
- return fs_info->block_group_root;
- return btrfs_extent_root(fs_info, 0);
-}
-
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid)
{
@@ -926,8 +917,7 @@ fail:
return ERR_PTR(ret);
}
-static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+static struct btrfs_root *alloc_log_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
@@ -975,7 +965,7 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log_root;
- log_root = alloc_log_tree(trans, fs_info);
+ log_root = alloc_log_tree(fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
@@ -1001,7 +991,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item;
int ret;
- log_root = alloc_log_tree(trans, fs_info);
+ log_root = alloc_log_tree(fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
@@ -1011,8 +1001,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
- log_root->last_trans = trans->transid;
- log_root->root_key.offset = root->root_key.objectid;
+ btrfs_set_root_last_trans(log_root, trans->transid);
+ log_root->root_key.offset = btrfs_root_id(root);
inode_item = &log_root->root_item.inode;
btrfs_set_stack_inode_generation(inode_item, 1);
@@ -1034,7 +1024,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_path *path,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_tree_parent_check check = { 0 };
@@ -1076,15 +1066,15 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
* For real fs, and not log/reloc trees, root owner must
* match its root node owner
*/
- if (!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state) &&
- root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
- root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
- root->root_key.objectid != btrfs_header_owner(root->node)) {
+ if (!btrfs_is_testing(fs_info) &&
+ btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
+ btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
+ btrfs_root_id(root) != btrfs_header_owner(root->node)) {
btrfs_crit(fs_info,
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
- root->root_key.objectid, root->node->start,
+ btrfs_root_id(root), root->node->start,
btrfs_header_owner(root->node),
- root->root_key.objectid);
+ btrfs_root_id(root));
ret = -EUCLEAN;
goto fail;
}
@@ -1096,7 +1086,7 @@ fail:
}
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_path *path;
@@ -1121,9 +1111,9 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
btrfs_drew_lock_init(&root->snapshot_lock);
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
+ if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root) &&
- is_fstree(root->root_key.objectid)) {
+ is_fstree(btrfs_root_id(root))) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1132,7 +1122,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
- if (is_fstree(root->root_key.objectid) &&
+ if (is_fstree(btrfs_root_id(root)) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
@@ -1219,7 +1209,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_insert(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
+ (unsigned long)btrfs_root_id(root),
root);
if (ret == 0) {
btrfs_grab_root(root);
@@ -1231,7 +1221,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
return ret;
}
-void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
+void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info)
{
#ifdef CONFIG_BTRFS_DEBUG
struct btrfs_root *root;
@@ -1244,6 +1234,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "leaked root %s refcount %d",
btrfs_root_name(&root->root_key, buf),
refcount_read(&root->refs));
+ WARN_ON_ONCE(1);
while (refcount_read(&root->refs) > 1)
btrfs_put_root(root);
btrfs_put_root(root);
@@ -1265,9 +1256,15 @@ static void free_global_roots(struct btrfs_fs_info *fs_info)
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{
+ struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
+
+ percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->ordered_bytes);
+ if (percpu_counter_initialized(em_counter))
+ ASSERT(percpu_counter_sum_positive(em_counter) == 0);
+ percpu_counter_destroy(em_counter);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
@@ -1288,7 +1285,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
- kfree(fs_info->subpage_info);
kvfree(fs_info);
}
@@ -1849,7 +1845,8 @@ void btrfs_put_root(struct btrfs_root *root)
return;
if (refcount_dec_and_test(&root->refs)) {
- WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+ if (WARN_ON(!xa_empty(&root->inodes)))
+ xa_destroy(&root->inodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
@@ -1923,7 +1920,7 @@ static int btrfs_init_btree_inode(struct super_block *sb)
if (!inode)
return -ENOMEM;
- inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+ btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID);
set_nlink(inode, 1);
/*
* we set the i_size on the btree inode to the max possible int.
@@ -1934,15 +1931,11 @@ static int btrfs_init_btree_inode(struct super_block *sb)
inode->i_mapping->a_ops = &btree_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
IO_TREE_BTREE_INODE_IO);
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
- BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
- BTRFS_I(inode)->location.type = 0;
- BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
__insert_inode_hash(inode, hash);
fs_info->btree_inode = inode;
@@ -1966,7 +1959,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -2141,7 +2134,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
/* If we have IGNOREDATACSUMS skip loading these roots. */
if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
- set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+ set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
return 0;
}
@@ -2194,7 +2187,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
- set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+ set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
ret = ret ? ret : -ENOENT;
@@ -2239,7 +2232,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
struct btrfs_key location;
int ret;
- BUG_ON(!fs_info->tree_root);
+ ASSERT(fs_info->tree_root);
ret = load_global_roots(tree_root);
if (ret)
@@ -2335,6 +2328,71 @@ out:
return ret;
}
+static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb)
+{
+ unsigned int cur = 0; /* Offset inside the sys chunk array */
+ /*
+ * At sb read time, fs_info is not fully initialized. Thus we have
+ * to use super block sectorsize, which should have been validated.
+ */
+ const u32 sectorsize = btrfs_super_sectorsize(sb);
+ u32 sys_array_size = btrfs_super_sys_array_size(sb);
+
+ if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ btrfs_err(fs_info, "system chunk array too big %u > %u",
+ sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ return -EUCLEAN;
+ }
+
+ while (cur < sys_array_size) {
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_chunk *chunk;
+ struct btrfs_key key;
+ u64 type;
+ u16 num_stripes;
+ u32 len;
+ int ret;
+
+ disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
+ len = sizeof(*disk_key);
+
+ if (cur + len > sys_array_size)
+ goto short_read;
+ cur += len;
+
+ btrfs_disk_key_to_cpu(&key, disk_key);
+ if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+ btrfs_err(fs_info,
+ "unexpected item type %u in sys_array at offset %u",
+ key.type, cur);
+ return -EUCLEAN;
+ }
+ chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
+ num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)
+ goto short_read;
+ type = btrfs_stack_chunk_type(chunk);
+ if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ btrfs_err(fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur);
+ return -EUCLEAN;
+ }
+ ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset,
+ sectorsize);
+ if (ret < 0)
+ return ret;
+ cur += btrfs_chunk_item_size(num_stripes);
+ }
+ return 0;
+short_read:
+ btrfs_err(fs_info,
+ "super block sys chunk array short read, cur=%u sys_array_size=%u",
+ cur, sys_array_size);
+ return -EUCLEAN;
+}
+
/*
* Real super block validation
* NOTE: super csum type and incompat features will not be checked here.
@@ -2345,21 +2403,29 @@ out:
* 1, 2 2nd and 3rd backup copy
* -1 skip bytenr check
*/
-int btrfs_validate_super(struct btrfs_fs_info *fs_info,
- struct btrfs_super_block *sb, int mirror_num)
+int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb, int mirror_num)
{
u64 nodesize = btrfs_super_nodesize(sb);
u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS);
if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL;
}
- if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
- btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
- btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
- ret = -EINVAL;
+ if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) {
+ if (!ignore_flags) {
+ btrfs_err(fs_info,
+ "unrecognized or unsupported super flag 0x%llx",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ ret = -EINVAL;
+ } else {
+ btrfs_info(fs_info,
+ "unrecognized or unsupported super flags: 0x%llx, ignored",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ }
}
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d",
@@ -2462,7 +2528,7 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
btrfs_err(fs_info,
- "block-group-tree feature requires fres-space-tree and no-holes");
+ "block-group-tree feature requires free-space-tree and no-holes");
ret = -EINVAL;
}
@@ -2495,6 +2561,8 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
+ ret = validate_sys_chunk_array(fs_info, sb);
+
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
@@ -2583,7 +2651,7 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
struct btrfs_tree_parent_check check = {
.level = level,
.transid = gen,
- .owner_root = root->root_key.objectid
+ .owner_root = btrfs_root_id(root)
};
int ret = 0;
@@ -2785,6 +2853,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_init_scrub(fs_info);
btrfs_init_balance(fs_info);
btrfs_init_async_reclaim_work(fs_info);
+ btrfs_init_extent_map_shrinker_work(fs_info);
rwlock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
@@ -2839,6 +2908,7 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
int ret;
fs_info->sb = sb;
+ /* Temporary fixed values for block size until we read the superblock. */
sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
@@ -2846,10 +2916,18 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
+ ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
+ ret = percpu_counter_init(&fs_info->stats_read_blocks, 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+
fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
@@ -2870,6 +2948,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (sb_rdonly(sb))
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
+ if (btrfs_test_opt(fs_info, IGNOREMETACSUMS))
+ set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state);
return btrfs_alloc_stripe_hash_table(fs_info);
}
@@ -2915,22 +2995,22 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
struct btrfs_root *gang[8];
- int i = 0;
- int err = 0;
- unsigned int ret = 0;
+ int ret = 0;
while (1) {
+ unsigned int found;
+
spin_lock(&fs_info->fs_roots_radix_lock);
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ found = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang));
- if (!ret) {
+ if (!found) {
spin_unlock(&fs_info->fs_roots_radix_lock);
break;
}
- root_objectid = gang[ret - 1]->root_key.objectid + 1;
+ root_objectid = btrfs_root_id(gang[found - 1]) + 1;
- for (i = 0; i < ret; i++) {
+ for (int i = 0; i < found; i++) {
/* Avoid to grab roots in dead_roots. */
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
gang[i] = NULL;
@@ -2941,24 +3021,25 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
}
spin_unlock(&fs_info->fs_roots_radix_lock);
- for (i = 0; i < ret; i++) {
+ for (int i = 0; i < found; i++) {
if (!gang[i])
continue;
- root_objectid = gang[i]->root_key.objectid;
- err = btrfs_orphan_cleanup(gang[i]);
- if (err)
- goto out;
+ root_objectid = btrfs_root_id(gang[i]);
+ /*
+ * Continue to release the remaining roots after the first
+ * error without cleanup and preserve the first error
+ * for the return.
+ */
+ if (!ret)
+ ret = btrfs_orphan_cleanup(gang[i]);
btrfs_put_root(gang[i]);
}
+ if (ret)
+ break;
+
root_objectid++;
}
-out:
- /* Release the uncleaned roots due to error. */
- for (; i < ret; i++) {
- if (gang[i])
- btrfs_put_root(gang[i]);
- }
- return err;
+ return ret;
}
/*
@@ -3191,8 +3272,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
return 0;
}
-int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
- char *options)
+int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
{
u32 sectorsize;
u32 nodesize;
@@ -3310,8 +3390,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
+ fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
+ fs_info->fs_devices->fs_info = fs_info;
/*
* Handle the space caching options appropriately now that we have the
@@ -3334,20 +3416,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
- if (sectorsize < PAGE_SIZE) {
- struct btrfs_subpage_info *subpage_info;
-
+ if (sectorsize < PAGE_SIZE)
btrfs_warn(fs_info,
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
- subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
- if (!subpage_info) {
- ret = -ENOMEM;
- goto fail_alloc;
- }
- btrfs_init_subpage_info(subpage_info, sectorsize);
- fs_info->subpage_info = subpage_info;
- }
ret = btrfs_init_workqueues(fs_info);
if (ret)
@@ -3356,6 +3428,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
+ /* Update the values for the current filesystem. */
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
@@ -3615,28 +3688,25 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_super_write(struct bio *bio)
{
struct btrfs_device *device = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
- struct page *page;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- page = bvec->bv_page;
+ struct folio_iter fi;
+ bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
- "lost page write due to IO error on %s (%d)",
+ "lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
- ClearPageUptodate(page);
- SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
- } else {
- SetPageUptodate(page);
+ /* Ensure failure if the primary sb fails. */
+ if (bio->bi_opf & REQ_FUA)
+ atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
+ &device->sb_write_errors);
+ else
+ atomic_inc(&device->sb_write_errors);
}
-
- put_page(page);
- unlock_page(page);
+ folio_unlock(fi.folio);
+ folio_put(fi.folio);
}
bio_put(bio);
@@ -3648,7 +3718,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
struct btrfs_super_block *super;
struct page *page;
u64 bytenr, bytenr_orig;
- struct address_space *mapping = bdev->bd_inode->i_mapping;
+ struct address_space *mapping = bdev->bd_mapping;
int ret;
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -3723,34 +3793,36 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
- * pages we use for writing are locked.
+ * folios we use for writing are locked.
*
* Write @max_mirrors copies of the superblock, where 0 means default that fit
* the expected device size at commit time. Note that max_mirrors must be
* same for write and wait phases.
*
- * Return number of errors when page is not found or submission fails.
+ * Return number of errors when folio is not found or submission fails.
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
- struct address_space *mapping = device->bdev->bd_inode->i_mapping;
+ struct address_space *mapping = device->bdev->bd_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
- int errors = 0;
int ret;
u64 bytenr, bytenr_orig;
+ atomic_set(&device->sb_write_errors, 0);
+
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
- struct page *page;
+ struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
+ size_t offset;
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
@@ -3760,7 +3832,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d",
i);
- errors++;
+ atomic_inc(&device->sb_write_errors);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3773,20 +3845,20 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
- page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
- GFP_NOFS);
- if (!page) {
+ folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ GFP_NOFS);
+ if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu",
bytenr);
- errors++;
+ atomic_inc(&device->sb_write_errors);
continue;
}
+ ASSERT(folio_order(folio) == 0);
- /* Bump the refcount for wait_dev_supers() */
- get_page(page);
-
- disk_super = page_address(page);
+ offset = offset_in_folio(folio, bytenr);
+ disk_super = folio_address(folio) + offset;
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
/*
@@ -3800,8 +3872,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
- __bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
- offset_in_page(bytenr));
+ bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset);
/*
* We FUA only the first super block. The others we allow to
@@ -3813,17 +3884,17 @@ static int write_dev_supers(struct btrfs_device *device,
submit_bio(bio);
if (btrfs_advance_sb_log(device, i))
- errors++;
+ atomic_inc(&device->sb_write_errors);
}
- return errors < i ? 0 : -1;
+ return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
}
/*
* Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases.
*
- * Return number of errors when page is not found or not marked up to
- * date.
+ * Return -1 if primary super block write failed or when there were no super block
+ * copies written. Otherwise 0.
*/
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
@@ -3837,7 +3908,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
for (i = 0; i < max_mirrors; i++) {
- struct page *page;
+ struct folio *folio;
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
if (ret == -ENOENT) {
@@ -3852,30 +3923,21 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
device->commit_total_bytes)
break;
- page = find_get_page(device->bdev->bd_inode->i_mapping,
- bytenr >> PAGE_SHIFT);
- if (!page) {
- errors++;
- if (i == 0)
- primary_failed = true;
+ folio = filemap_get_folio(device->bdev->bd_mapping,
+ bytenr >> PAGE_SHIFT);
+ /* If the folio has been removed, then we know it completed. */
+ if (IS_ERR(folio))
continue;
- }
- /* Page is submitted locked and unlocked once the IO completes */
- wait_on_page_locked(page);
- if (PageError(page)) {
- errors++;
- if (i == 0)
- primary_failed = true;
- }
-
- /* Drop our reference */
- put_page(page);
+ ASSERT(folio_order(folio) == 0);
- /* Drop the reference from the writing run */
- put_page(page);
+ /* Folio will be unlocked once the write completes. */
+ folio_wait_locked(folio);
+ folio_put(folio);
}
- /* log error, force error return */
+ errors += atomic_read(&device->sb_write_errors);
+ if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
+ primary_failed = true;
if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu",
device->devid);
@@ -4136,7 +4198,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid);
+ (unsigned long)btrfs_root_id(root));
if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
drop_ref = true;
spin_unlock(&fs_info->fs_roots_radix_lock);
@@ -4155,9 +4217,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_commit_super(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_trans_handle *trans;
-
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
@@ -4167,10 +4226,7 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- return btrfs_commit_transaction(trans);
+ return btrfs_commit_current_transaction(fs_info->tree_root);
}
static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
@@ -4179,9 +4235,6 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
struct btrfs_transaction *tmp;
bool found = false;
- if (list_empty(&fs_info->trans_list))
- return;
-
/*
* This function is only called at the very end of close_ctree(),
* thus no other running transaction, no need to take trans_lock.
@@ -4203,7 +4256,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
btrfs_warn(fs_info,
"transaction %llu (with %llu dirty metadata bytes) is not committed",
trans->transid, dirty_bytes);
- btrfs_cleanup_one_transaction(trans, fs_info);
+ btrfs_cleanup_one_transaction(trans);
if (trans == fs_info->running_transaction)
fs_info->running_transaction = NULL;
@@ -4273,6 +4326,26 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_cleanup_defrag_inodes(fs_info);
/*
+ * Wait for any fixup workers to complete.
+ * If we don't wait for them here and they are still running by the time
+ * we call kthread_stop() against the cleaner kthread further below, we
+ * get an use-after-free on the cleaner because the fixup worker adds an
+ * inode to the list of delayed iputs and then attempts to wakeup the
+ * cleaner kthread, which was already stopped and destroyed. We parked
+ * already the cleaner, but below we run all pending delayed iputs.
+ */
+ btrfs_flush_workqueue(fs_info->fixup_workers);
+ /*
+ * Similar case here, we have to wait for delalloc workers before we
+ * proceed below and stop the cleaner kthread, otherwise we trigger a
+ * use-after-tree on the cleaner kthread task_struct when a delalloc
+ * worker running submit_compressed_extents() adds a delayed iput, which
+ * does a wake up on the cleaner kthread, which was already freed below
+ * when we call kthread_stop().
+ */
+ btrfs_flush_workqueue(fs_info->delalloc_workers);
+
+ /*
* After we parked the cleaner kthread, ordered extents may have
* completed and created new delayed iputs. If one of the async reclaim
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
@@ -4300,6 +4373,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
+ cancel_work_sync(&fs_info->em_shrinker_work);
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
@@ -4481,7 +4555,7 @@ static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
- root_objectid = gang[i]->root_key.objectid;
+ root_objectid = btrfs_root_id(gang[i]);
btrfs_free_log(NULL, gang[i]);
btrfs_put_root(gang[i]);
}
@@ -4534,84 +4608,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
* extents that haven't had their dirty pages IO start writeout yet
* actually get run and error out properly.
*/
- btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
-}
-
-static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct rb_node *node;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
-
- delayed_refs = &trans->delayed_refs;
-
- spin_lock(&delayed_refs->lock);
- if (atomic_read(&delayed_refs->num_entries) == 0) {
- spin_unlock(&delayed_refs->lock);
- btrfs_debug(fs_info, "delayed_refs has NO entry");
- return;
- }
-
- while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
- struct btrfs_delayed_ref_head *head;
- struct rb_node *n;
- bool pin_bytes = false;
-
- head = rb_entry(node, struct btrfs_delayed_ref_head,
- href_node);
- if (btrfs_delayed_ref_lock(delayed_refs, head))
- continue;
-
- spin_lock(&head->lock);
- while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
- ref = rb_entry(n, struct btrfs_delayed_ref_node,
- ref_node);
- rb_erase_cached(&ref->ref_node, &head->ref_tree);
- RB_CLEAR_NODE(&ref->ref_node);
- if (!list_empty(&ref->add_list))
- list_del(&ref->add_list);
- atomic_dec(&delayed_refs->num_entries);
- btrfs_put_delayed_ref(ref);
- btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
- }
- if (head->must_insert_reserved)
- pin_bytes = true;
- btrfs_free_delayed_extent_op(head->extent_op);
- btrfs_delete_ref_head(delayed_refs, head);
- spin_unlock(&head->lock);
- spin_unlock(&delayed_refs->lock);
- mutex_unlock(&head->mutex);
-
- if (pin_bytes) {
- struct btrfs_block_group *cache;
-
- cache = btrfs_lookup_block_group(fs_info, head->bytenr);
- BUG_ON(!cache);
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += head->num_bytes;
- btrfs_space_info_update_bytes_pinned(fs_info,
- cache->space_info, head->num_bytes);
- cache->reserved -= head->num_bytes;
- cache->space_info->bytes_reserved -= head->num_bytes;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-
- btrfs_put_block_group(cache);
-
- btrfs_error_unpin_extent_range(fs_info, head->bytenr,
- head->bytenr + head->num_bytes - 1);
- }
- btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
- btrfs_put_delayed_ref_head(head);
- cond_resched();
- spin_lock(&delayed_refs->lock);
- }
- btrfs_qgroup_destroy_extent_records(trans);
-
- spin_unlock(&delayed_refs->lock);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
}
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
@@ -4626,7 +4623,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
- __btrfs_del_delalloc_inode(root, btrfs_inode);
+ btrfs_del_delalloc_inode(btrfs_inode);
spin_unlock(&root->delalloc_lock);
/*
@@ -4812,16 +4809,16 @@ static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
btrfs_qgroup_free_meta_all_pertrans(root);
radix_tree_tag_clear(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
+ (unsigned long)btrfs_root_id(root),
BTRFS_ROOT_TRANS_TAG);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
}
-void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
- struct btrfs_fs_info *fs_info)
+void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans)
{
+ struct btrfs_fs_info *fs_info = cur_trans->fs_info;
struct btrfs_device *dev, *tmp;
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
@@ -4833,7 +4830,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
list_del_init(&dev->post_commit_list);
}
- btrfs_destroy_delayed_refs(cur_trans, fs_info);
+ btrfs_destroy_delayed_refs(cur_trans);
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&fs_info->transaction_blocked_wait);
@@ -4841,14 +4838,10 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&fs_info->transaction_wait);
- btrfs_destroy_delayed_inodes(fs_info);
-
btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
- btrfs_free_all_qgroup_pertrans(fs_info);
-
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}
@@ -4883,7 +4876,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
} else {
spin_unlock(&fs_info->trans_lock);
}
- btrfs_cleanup_one_transaction(t, fs_info);
+ btrfs_cleanup_one_transaction(t);
spin_lock(&fs_info->trans_lock);
if (t == fs_info->running_transaction)
@@ -4901,6 +4894,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
btrfs_assert_delayed_root_empty(fs_info);
btrfs_destroy_all_delalloc_inodes(fs_info);
btrfs_drop_all_logs(fs_info);
+ btrfs_free_all_qgroup_pertrans(fs_info);
mutex_unlock(&fs_info->transaction_kthread_mutex);
return 0;
@@ -4925,7 +4919,14 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of valid range.
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
@@ -4949,7 +4950,7 @@ int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
btrfs_warn(root->fs_info,
"the objectid of root %llu reaches its highest value",
- root->root_key.objectid);
+ btrfs_root_id(root));
ret = -ENOSPC;
goto out;
}