summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c804
1 files changed, 377 insertions, 427 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index cabb558dbdaa..89149fac804c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -17,7 +17,7 @@
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
@@ -50,6 +50,7 @@
#include "relocation.h"
#include "scrub.h"
#include "super.h"
+#include "delayed-inode.h"
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
BTRFS_HEADER_FLAG_RELOC |\
@@ -116,7 +117,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
-int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
+int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic)
{
if (!extent_buffer_uptodate(eb))
return 0;
@@ -182,26 +183,33 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- int num_folios = num_extent_folios(eb);
+ const u32 step = min(fs_info->nodesize, PAGE_SIZE);
+ const u32 nr_steps = eb->len / step;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
int ret = 0;
if (sb_rdonly(fs_info->sb))
return -EROFS;
- for (int i = 0; i < num_folios; i++) {
+ for (int i = 0; i < num_extent_pages(eb); i++) {
struct folio *folio = eb->folios[i];
- u64 start = max_t(u64, eb->start, folio_pos(folio));
- u64 end = min_t(u64, eb->start + eb->len,
- folio_pos(folio) + eb->folio_size);
- u32 len = end - start;
-
- ret = btrfs_repair_io_failure(fs_info, 0, start, len,
- start, folio, offset_in_folio(folio, start),
- mirror_num);
- if (ret)
- break;
+
+ /* No large folio support yet. */
+ ASSERT(folio_order(folio) == 0);
+ ASSERT(i < nr_steps);
+
+ /*
+ * For nodesize < page size, there is just one paddr, with some
+ * offset inside the page.
+ *
+ * For nodesize >= page size, it's one or more paddrs, and eb->start
+ * must be aligned to page boundary.
+ */
+ paddrs[i] = page_to_phys(&folio->page) + offset_in_page(eb->start);
}
+ ret = btrfs_repair_io_failure(fs_info, 0, eb->start, eb->len, eb->start,
+ paddrs, step, mirror_num);
return ret;
}
@@ -213,7 +221,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
* structure for details.
*/
int btrfs_read_extent_buffer(struct extent_buffer *eb,
- struct btrfs_tree_parent_check *check)
+ const struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int failed = 0;
@@ -225,8 +233,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
while (1) {
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
+ ret = read_extent_buffer_pages(eb, mirror_num, check);
if (!ret)
break;
@@ -257,7 +264,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
/*
* Checksum a dirty tree block before IO.
*/
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
+int btree_csum_one_bio(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -268,9 +275,9 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
/* Btree blocks are always contiguous on disk. */
if (WARN_ON_ONCE(bbio->file_offset != eb->start))
- return BLK_STS_IOERR;
+ return -EIO;
if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
- return BLK_STS_IOERR;
+ return -EIO;
/*
* If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't
@@ -279,14 +286,13 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
*/
if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) {
memzero_extent_buffer(eb, 0, eb->len);
- return BLK_STS_OK;
+ return 0;
}
if (WARN_ON_ONCE(found_start != eb->start))
- return BLK_STS_IOERR;
- if (WARN_ON(!btrfs_folio_test_uptodate(fs_info, eb->folios[0],
- eb->start, eb->len)))
- return BLK_STS_IOERR;
+ return -EIO;
+ if (WARN_ON(!btrfs_meta_folio_test_uptodate(eb->folios[0], eb)))
+ return -EIO;
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
offsetof(struct btrfs_header, fsid),
@@ -314,7 +320,7 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
- return BLK_STS_OK;
+ return 0;
error:
btrfs_print_tree(eb, 0);
@@ -328,7 +334,7 @@ error:
*/
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
- return errno_to_blk_status(ret);
+ return ret;
}
static bool check_tree_block_fsid(struct extent_buffer *eb)
@@ -358,7 +364,7 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
/* Do basic extent buffer checks at read time */
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
- struct btrfs_tree_parent_check *check)
+ const struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start;
@@ -367,25 +373,26 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
u8 result[BTRFS_CSUM_SIZE];
const u8 *header_csum;
int ret = 0;
+ const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS);
ASSERT(check);
found_start = btrfs_header_bytenr(eb);
- if (found_start != eb->start) {
+ if (unlikely(found_start != eb->start)) {
btrfs_err_rl(fs_info,
"bad tree block start, mirror %u want %llu have %llu",
eb->read_mirror, eb->start, found_start);
ret = -EIO;
goto out;
}
- if (check_tree_block_fsid(eb)) {
+ if (unlikely(check_tree_block_fsid(eb))) {
btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
eb->start, eb->read_mirror);
ret = -EIO;
goto out;
}
found_level = btrfs_header_level(eb);
- if (found_level >= BTRFS_MAX_LEVEL) {
+ if (unlikely(found_level >= BTRFS_MAX_LEVEL)) {
btrfs_err(fs_info,
"bad tree block level, mirror %u level %d on logical %llu",
eb->read_mirror, btrfs_header_level(eb), eb->start);
@@ -399,16 +406,19 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
if (memcmp(result, header_csum, csum_size) != 0) {
btrfs_warn_rl(fs_info,
-"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
+"checksum verify failed on logical %llu mirror %u wanted " BTRFS_CSUM_FMT " found " BTRFS_CSUM_FMT " level %d%s",
eb->start, eb->read_mirror,
- CSUM_FMT_VALUE(csum_size, header_csum),
- CSUM_FMT_VALUE(csum_size, result),
- btrfs_header_level(eb));
- ret = -EUCLEAN;
- goto out;
+ BTRFS_CSUM_FMT_VALUE(csum_size, header_csum),
+ BTRFS_CSUM_FMT_VALUE(csum_size, result),
+ btrfs_header_level(eb),
+ ignore_csum ? ", ignored" : "");
+ if (unlikely(!ignore_csum)) {
+ ret = -EUCLEAN;
+ goto out;
+ }
}
- if (found_level != check->level) {
+ if (unlikely(found_level != check->level)) {
btrfs_err(fs_info,
"level verify failed on logical %llu mirror %u wanted %u found %u",
eb->start, eb->read_mirror, check->level, found_level);
@@ -425,7 +435,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out;
}
if (check->has_first_key) {
- struct btrfs_key *expect_key = &check->first_key;
+ const struct btrfs_key *expect_key = &check->first_key;
struct btrfs_key found_key;
if (found_level)
@@ -450,15 +460,9 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out;
}
- /*
- * If this is a leaf block and it is corrupt, set the corrupt bit so
- * that we don't try and read the other copies of this block, just
- * return -EIO.
- */
- if (found_level == 0 && btrfs_check_leaf(eb)) {
- set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ /* If this is a leaf block and it is corrupt, just return -EIO. */
+ if (found_level == 0 && btrfs_check_leaf(eb))
ret = -EIO;
- }
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
@@ -521,7 +525,7 @@ static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
if (folio_test_writeback(folio) || folio_test_dirty(folio))
return false;
- return try_release_extent_buffer(&folio->page);
+ return try_release_extent_buffer(folio);
}
static void btree_invalidate_folio(struct folio *folio, size_t offset,
@@ -635,34 +639,24 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
- if (btrfs_check_eb_owner(buf, check->owner_root)) {
- free_extent_buffer_stale(buf);
- return ERR_PTR(-EUCLEAN);
- }
return buf;
}
-static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
- u64 objectid)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, gfp_t flags)
{
- bool dummy = btrfs_is_testing(fs_info);
+ struct btrfs_root *root;
+
+ root = kzalloc(sizeof(*root), flags);
+ if (!root)
+ return NULL;
- memset(&root->root_key, 0, sizeof(root->root_key));
- memset(&root->root_item, 0, sizeof(root->root_item));
- memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
root->fs_info = fs_info;
root->root_key.objectid = objectid;
- root->node = NULL;
- root->commit_root = NULL;
- root->state = 0;
RB_CLEAR_NODE(&root->rb_node);
- root->last_trans = 0;
- root->free_objectid = 0;
- root->nr_delalloc_inodes = 0;
- root->nr_ordered_extents = 0;
- root->inode_tree = RB_ROOT;
+ xa_init(&root->inodes);
xa_init(&root->delayed_nodes);
btrfs_init_root_block_rsv(root);
@@ -674,7 +668,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&root->ordered_extents);
INIT_LIST_HEAD(&root->ordered_root);
INIT_LIST_HEAD(&root->reloc_dirty_list);
- spin_lock_init(&root->inode_lock);
spin_lock_init(&root->delalloc_lock);
spin_lock_init(&root->ordered_extent_lock);
spin_lock_init(&root->accounting_lock);
@@ -696,15 +689,12 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
refcount_set(&root->refs, 1);
atomic_set(&root->snapshot_force_cow, 0);
atomic_set(&root->nr_swapfiles, 0);
- btrfs_set_root_log_transid(root, 0);
root->log_transid_committed = -1;
- btrfs_set_root_last_log_commit(root, 0);
- root->anon_dev = 0;
- if (!dummy) {
- extent_io_tree_init(fs_info, &root->dirty_log_pages,
- IO_TREE_ROOT_DIRTY_LOG_PAGES);
- extent_io_tree_init(fs_info, &root->log_csum_range,
- IO_TREE_LOG_CSUM_RANGE);
+ if (!btrfs_is_testing(fs_info)) {
+ btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES);
+ btrfs_extent_io_tree_init(fs_info, &root->log_csum_range,
+ IO_TREE_LOG_CSUM_RANGE);
}
spin_lock_init(&root->root_item_lock);
@@ -715,14 +705,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
-}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
- u64 objectid, gfp_t flags)
-{
- struct btrfs_root *root = kzalloc(sizeof(*root), flags);
- if (root)
- __setup_root(root, fs_info, objectid);
return root;
}
@@ -847,13 +830,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
return btrfs_global_root(fs_info, &key);
}
-struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
-{
- if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
- return fs_info->block_group_root;
- return btrfs_extent_root(fs_info, 0);
-}
-
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid)
{
@@ -902,7 +878,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
- if (is_fstree(objectid))
+ if (btrfs_is_fstree(objectid))
generate_random_guid(root->root_item.uuid);
else
export_guid(root->root_item.uuid, &guid_null);
@@ -925,8 +901,7 @@ fail:
return ERR_PTR(ret);
}
-static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+static struct btrfs_root *alloc_log_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
@@ -974,7 +949,7 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log_root;
- log_root = alloc_log_tree(trans, fs_info);
+ log_root = alloc_log_tree(fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
@@ -1000,7 +975,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item;
int ret;
- log_root = alloc_log_tree(trans, fs_info);
+ log_root = alloc_log_tree(fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
@@ -1010,7 +985,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
- log_root->last_trans = trans->transid;
+ btrfs_set_root_last_trans(log_root, trans->transid);
log_root->root_key.offset = btrfs_root_id(root);
inode_item = &log_root->root_item.inode;
@@ -1033,7 +1008,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_path *path,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_tree_parent_check check = { 0 };
@@ -1066,7 +1041,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
root->node = NULL;
goto fail;
}
- if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+ if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) {
ret = -EIO;
goto fail;
}
@@ -1075,10 +1050,10 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
* For real fs, and not log/reloc trees, root owner must
* match its root node owner
*/
- if (!btrfs_is_testing(fs_info) &&
- btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
- btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
- btrfs_root_id(root) != btrfs_header_owner(root->node)) {
+ if (unlikely(!btrfs_is_testing(fs_info) &&
+ btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
+ btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
+ btrfs_root_id(root) != btrfs_header_owner(root->node))) {
btrfs_crit(fs_info,
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
btrfs_root_id(root), root->node->start,
@@ -1095,24 +1070,25 @@ fail:
}
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_root *root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
root = read_tree_root_path(tree_root, path, key);
- btrfs_free_path(path);
return root;
}
/*
- * Initialize subvolume root in-memory structure
+ * Initialize subvolume root in-memory structure.
*
* @anon_dev: anonymous device to attach to the root, if zero, allocate new
+ *
+ * In case of failure the caller is responsible to call btrfs_free_fs_root()
*/
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{
@@ -1122,7 +1098,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root) &&
- is_fstree(btrfs_root_id(root))) {
+ btrfs_is_fstree(btrfs_root_id(root))) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1131,12 +1107,12 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
- if (is_fstree(btrfs_root_id(root)) &&
+ if (btrfs_is_fstree(btrfs_root_id(root)) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
if (ret)
- goto fail;
+ return ret;
} else {
root->anon_dev = anon_dev;
}
@@ -1146,7 +1122,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
ret = btrfs_init_root_free_objectid(root);
if (ret) {
mutex_unlock(&root->objectid_mutex);
- goto fail;
+ return ret;
}
ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
@@ -1154,9 +1130,6 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
mutex_unlock(&root->objectid_mutex);
return 0;
-fail:
- /* The caller is responsible to call btrfs_free_fs_root */
- return ret;
}
static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
@@ -1230,7 +1203,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
return ret;
}
-void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
+void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info)
{
#ifdef CONFIG_BTRFS_DEBUG
struct btrfs_root *root;
@@ -1267,6 +1240,10 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{
struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
+ if (fs_info->fs_devices)
+ btrfs_close_devices(fs_info->fs_devices);
+ btrfs_free_compress_wsm(fs_info);
+ percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->ordered_bytes);
@@ -1293,7 +1270,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
- kfree(fs_info->subpage_info);
kvfree(fs_info);
}
@@ -1336,7 +1312,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* This is namely for free-space-tree and quota tree, which can change
* at runtime and should only be grabbed from fs_info.
*/
- if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (!btrfs_is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
return ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
@@ -1577,7 +1553,7 @@ static int transaction_kthread(void *arg)
do {
cannot_commit = false;
- delay = msecs_to_jiffies(fs_info->commit_interval * 1000);
+ delay = secs_to_jiffies(fs_info->commit_interval);
mutex_lock(&fs_info->transaction_kthread_mutex);
spin_lock(&fs_info->trans_lock);
@@ -1592,9 +1568,9 @@ static int transaction_kthread(void *arg)
cur->state < TRANS_STATE_COMMIT_PREP &&
delta < fs_info->commit_interval) {
spin_unlock(&fs_info->trans_lock);
- delay -= msecs_to_jiffies((delta - 1) * 1000);
+ delay -= secs_to_jiffies(delta - 1);
delay = min(delay,
- msecs_to_jiffies(fs_info->commit_interval * 1000));
+ secs_to_jiffies(fs_info->commit_interval));
goto sleep;
}
transid = cur->transid;
@@ -1792,8 +1768,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
destroy_workqueue(fs_info->endio_workers);
if (fs_info->rmw_workers)
destroy_workqueue(fs_info->rmw_workers);
- if (fs_info->compressed_write_workers)
- destroy_workqueue(fs_info->compressed_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->delayed_workers);
@@ -1854,7 +1828,10 @@ void btrfs_put_root(struct btrfs_root *root)
return;
if (refcount_dec_and_test(&root->refs)) {
- WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+ if (WARN_ON(!xa_empty(&root->inodes)))
+ xa_destroy(&root->inodes);
+ if (WARN_ON(!xa_empty(&root->delayed_nodes)))
+ xa_destroy(&root->delayed_nodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
@@ -1875,8 +1852,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
int i;
while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
+ gang[0] = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
@@ -1928,7 +1905,7 @@ static int btrfs_init_btree_inode(struct super_block *sb)
if (!inode)
return -ENOMEM;
- inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+ btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID);
set_nlink(inode, 1);
/*
* we set the i_size on the btree inode to the max possible int.
@@ -1939,17 +1916,14 @@ static int btrfs_init_btree_inode(struct super_block *sb)
inode->i_mapping->a_ops = &btree_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
- extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
- IO_TREE_BTREE_INODE_IO);
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
+ btrfs_extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+ IO_TREE_BTREE_INODE_IO);
+ btrfs_extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
- BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
- BTRFS_I(inode)->location.type = 0;
- BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
__insert_inode_hash(inode, hash);
+ set_bit(AS_KERNEL_FILE, &inode->i_mapping->flags);
fs_info->btree_inode = inode;
return 0;
@@ -1969,9 +1943,8 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
- fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -1979,7 +1952,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
- unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
+ unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
@@ -2006,8 +1979,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->endio_write_workers =
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
max_active, 2);
- fs_info->compressed_write_workers =
- alloc_workqueue("btrfs-compressed-write", flags, max_active);
fs_info->endio_freespace_worker =
btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
max_active, 0);
@@ -2018,12 +1989,11 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
ordered_flags);
fs_info->discard_ctl.discard_workers =
- alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
+ alloc_ordered_workqueue("btrfs-discard", WQ_FREEZABLE);
if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
- fs_info->compressed_write_workers &&
fs_info->endio_write_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->fixup_workers &&
@@ -2050,14 +2020,10 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
fs_info->csum_shash = csum_shash;
- /*
- * Check if the checksum implementation is a fast accelerated one.
- * As-is this is a bit of a hack and should be replaced once the csum
- * implementations provide that information themselves.
- */
+ /* Check if the checksum implementation is a fast accelerated one. */
switch (csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
- if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
+ if (crc32_optimizations() & CRC32C_OPTIMIZATION)
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
break;
case BTRFS_CSUM_TYPE_XXHASH:
@@ -2083,7 +2049,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
u64 bytenr = btrfs_super_log_root(disk_super);
int level = btrfs_super_log_root_level(disk_super);
- if (fs_devices->rw_devices == 0) {
+ if (unlikely(fs_devices->rw_devices == 0)) {
btrfs_warn(fs_info, "log replay required on RO media");
return -EIO;
}
@@ -2104,7 +2070,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
btrfs_put_root(log_tree_root);
return ret;
}
- if (!extent_buffer_uptodate(log_tree_root->node)) {
+ if (unlikely(!extent_buffer_uptodate(log_tree_root->node))) {
btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root);
return -EIO;
@@ -2112,10 +2078,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
+ btrfs_put_root(log_tree_root);
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
"Failed to recover log tree");
- btrfs_put_root(log_tree_root);
return ret;
}
@@ -2146,7 +2112,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
/* If we have IGNOREDATACSUMS skip loading these roots. */
if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
- set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+ set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
return 0;
}
@@ -2180,8 +2146,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
- if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
- ret = PTR_ERR(root);
+ ret = PTR_ERR(root);
break;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@@ -2199,7 +2164,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
- set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+ set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
ret = ret ? ret : -ENOENT;
@@ -2212,8 +2177,8 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
static int load_global_roots(struct btrfs_root *tree_root)
{
- struct btrfs_path *path;
- int ret = 0;
+ BTRFS_PATH_AUTO_FREE(path);
+ int ret;
path = btrfs_alloc_path();
if (!path)
@@ -2222,18 +2187,17 @@ static int load_global_roots(struct btrfs_root *tree_root)
ret = load_global_roots_objectid(tree_root, path,
BTRFS_EXTENT_TREE_OBJECTID, "extent");
if (ret)
- goto out;
+ return ret;
ret = load_global_roots_objectid(tree_root, path,
BTRFS_CSUM_TREE_OBJECTID, "csum");
if (ret)
- goto out;
+ return ret;
if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE))
- goto out;
+ return ret;
ret = load_global_roots_objectid(tree_root, path,
BTRFS_FREE_SPACE_TREE_OBJECTID,
"free space");
-out:
- btrfs_free_path(path);
+
return ret;
}
@@ -2340,6 +2304,71 @@ out:
return ret;
}
+static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb)
+{
+ unsigned int cur = 0; /* Offset inside the sys chunk array */
+ /*
+ * At sb read time, fs_info is not fully initialized. Thus we have
+ * to use super block sectorsize, which should have been validated.
+ */
+ const u32 sectorsize = btrfs_super_sectorsize(sb);
+ u32 sys_array_size = btrfs_super_sys_array_size(sb);
+
+ if (unlikely(sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)) {
+ btrfs_err(fs_info, "system chunk array too big %u > %u",
+ sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ return -EUCLEAN;
+ }
+
+ while (cur < sys_array_size) {
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_chunk *chunk;
+ struct btrfs_key key;
+ u64 type;
+ u16 num_stripes;
+ u32 len;
+ int ret;
+
+ disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
+ len = sizeof(*disk_key);
+
+ if (unlikely(cur + len > sys_array_size))
+ goto short_read;
+ cur += len;
+
+ btrfs_disk_key_to_cpu(&key, disk_key);
+ if (unlikely(key.type != BTRFS_CHUNK_ITEM_KEY)) {
+ btrfs_err(fs_info,
+ "unexpected item type %u in sys_array at offset %u",
+ key.type, cur);
+ return -EUCLEAN;
+ }
+ chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
+ num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+ if (unlikely(cur + btrfs_chunk_item_size(num_stripes) > sys_array_size))
+ goto short_read;
+ type = btrfs_stack_chunk_type(chunk);
+ if (unlikely(!(type & BTRFS_BLOCK_GROUP_SYSTEM))) {
+ btrfs_err(fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur);
+ return -EUCLEAN;
+ }
+ ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset,
+ sectorsize);
+ if (ret < 0)
+ return ret;
+ cur += btrfs_chunk_item_size(num_stripes);
+ }
+ return 0;
+short_read:
+ btrfs_err(fs_info,
+ "super block sys chunk array short read, cur=%u sys_array_size=%u",
+ cur, sys_array_size);
+ return -EUCLEAN;
+}
+
/*
* Real super block validation
* NOTE: super csum type and incompat features will not be checked here.
@@ -2350,21 +2379,29 @@ out:
* 1, 2 2nd and 3rd backup copy
* -1 skip bytenr check
*/
-int btrfs_validate_super(struct btrfs_fs_info *fs_info,
- struct btrfs_super_block *sb, int mirror_num)
+int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb, int mirror_num)
{
u64 nodesize = btrfs_super_nodesize(sb);
u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS);
if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL;
}
- if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
- btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
- btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
- ret = -EINVAL;
+ if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) {
+ if (!ignore_flags) {
+ btrfs_err(fs_info,
+ "unrecognized or unsupported super flag 0x%llx",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ ret = -EINVAL;
+ } else {
+ btrfs_info(fs_info,
+ "unrecognized or unsupported super flags: 0x%llx, ignored",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ }
}
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d",
@@ -2386,21 +2423,13 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
* Check sectorsize and nodesize first, other check will need it.
* Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
*/
- if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ if (!is_power_of_2(sectorsize) || sectorsize < BTRFS_MIN_BLOCKSIZE ||
sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
ret = -EINVAL;
}
- /*
- * We only support at most two sectorsizes: 4K and PAGE_SIZE.
- *
- * We can support 16K sectorsize with 64K page size without problem,
- * but such sectorsize/pagesize combination doesn't make much sense.
- * 4K will be our future standard, PAGE_SIZE is supported from the very
- * beginning.
- */
- if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) {
+ if (!btrfs_supported_blocksize(sectorsize)) {
btrfs_err(fs_info,
"sectorsize %llu not yet supported for page size %lu",
sectorsize, PAGE_SIZE);
@@ -2467,7 +2496,7 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
btrfs_err(fs_info,
- "block-group-tree feature requires fres-space-tree and no-holes");
+ "block-group-tree feature requires free-space-tree and no-holes");
ret = -EINVAL;
}
@@ -2500,6 +2529,11 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
+ if (ret)
+ return ret;
+
+ ret = validate_sys_chunk_array(fs_info, sb);
+
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
@@ -2562,13 +2596,13 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
ret = btrfs_validate_super(fs_info, sb, -1);
if (ret < 0)
goto out;
- if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
+ if (unlikely(!btrfs_supported_super_csum(btrfs_super_csum_type(sb)))) {
ret = -EUCLEAN;
btrfs_err(fs_info, "invalid csum type, has %u want %u",
btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
goto out;
}
- if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
+ if (unlikely(btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP)) {
ret = -EUCLEAN;
btrfs_err(fs_info,
"invalid incompat flags, has 0x%llx valid mask 0x%llx",
@@ -2598,7 +2632,7 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
root->node = NULL;
return ret;
}
- if (!extent_buffer_uptodate(root->node)) {
+ if (unlikely(!extent_buffer_uptodate(root->node))) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
@@ -2702,10 +2736,21 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
return ret;
}
+/*
+ * Lockdep gets confused between our buffer_tree which requires IRQ locking because
+ * we modify marks in the IRQ context, and our delayed inode xarray which doesn't
+ * have these requirements. Use a class key so lockdep doesn't get them mixed up.
+ */
+static struct lock_class_key buffer_xa_class;
+
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
- INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+
+ /* Use the same flags as mapping->i_pages. */
+ xa_init_flags(&fs_info->buffer_tree, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
+ lockdep_set_class(&fs_info->buffer_tree.xa_lock, &buffer_xa_class);
+
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2717,7 +2762,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
- spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
@@ -2762,6 +2806,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
+ btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS);
@@ -2790,12 +2835,13 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_init_scrub(fs_info);
btrfs_init_balance(fs_info);
btrfs_init_async_reclaim_work(fs_info);
+ btrfs_init_extent_map_shrinker_work(fs_info);
rwlock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
- extent_io_tree_init(fs_info, &fs_info->excluded_extents,
- IO_TREE_FS_EXCLUDED_EXTENTS);
+ btrfs_extent_io_tree_init(fs_info, &fs_info->excluded_extents,
+ IO_TREE_FS_EXCLUDED_EXTENTS);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
@@ -2856,12 +2902,14 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
- spin_lock_init(&fs_info->extent_map_shrinker_lock);
-
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
+ ret = percpu_counter_init(&fs_info->stats_read_blocks, 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+
fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
@@ -2882,6 +2930,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (sb_rdonly(sb))
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
+ if (btrfs_test_opt(fs_info, IGNOREMETACSUMS))
+ set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state);
return btrfs_alloc_stripe_hash_table(fs_info);
}
@@ -2927,22 +2977,22 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
struct btrfs_root *gang[8];
- int i = 0;
- int err = 0;
- unsigned int ret = 0;
+ int ret = 0;
while (1) {
+ unsigned int found;
+
spin_lock(&fs_info->fs_roots_radix_lock);
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ found = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang));
- if (!ret) {
+ if (!found) {
spin_unlock(&fs_info->fs_roots_radix_lock);
break;
}
- root_objectid = btrfs_root_id(gang[ret - 1]) + 1;
+ root_objectid = btrfs_root_id(gang[found - 1]) + 1;
- for (i = 0; i < ret; i++) {
+ for (int i = 0; i < found; i++) {
/* Avoid to grab roots in dead_roots. */
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
gang[i] = NULL;
@@ -2953,24 +3003,25 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
}
spin_unlock(&fs_info->fs_roots_radix_lock);
- for (i = 0; i < ret; i++) {
+ for (int i = 0; i < found; i++) {
if (!gang[i])
continue;
root_objectid = btrfs_root_id(gang[i]);
- err = btrfs_orphan_cleanup(gang[i]);
- if (err)
- goto out;
+ /*
+ * Continue to release the remaining roots after the first
+ * error without cleanup and preserve the first error
+ * for the return.
+ */
+ if (!ret)
+ ret = btrfs_orphan_cleanup(gang[i]);
btrfs_put_root(gang[i]);
}
+ if (ret)
+ break;
+
root_objectid++;
}
-out:
- /* Release the uncleaned roots due to error. */
- for (; i < ret; i++) {
- if (gang[i])
- btrfs_put_root(gang[i]);
- }
- return err;
+ return ret;
}
/*
@@ -3182,13 +3233,13 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
}
/*
- * Subpage runtime limitation on v1 cache.
+ * Subpage/bs > ps runtime limitation on v1 cache.
*
- * V1 space cache still has some hard codeed PAGE_SIZE usage, while
+ * V1 space cache still has some hard coded PAGE_SIZE usage, while
* we're already defaulting to v2 cache, no need to bother v1 as it's
* going to be deprecated anyway.
*/
- if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
+ if (fs_info->sectorsize != PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
btrfs_warn(fs_info,
"v1 space cache is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
@@ -3203,8 +3254,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
return 0;
}
-int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
- char *options)
+int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
{
u32 sectorsize;
u32 nodesize;
@@ -3243,7 +3293,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/*
* Read super block and check the signature bytes only
*/
- disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
+ disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0, false);
if (IS_ERR(disk_super)) {
ret = PTR_ERR(disk_super);
goto fail_alloc;
@@ -3320,11 +3370,19 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
fs_info->nodesize = nodesize;
+ fs_info->nodesize_bits = ilog2(nodesize);
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
+ fs_info->block_min_order = ilog2(round_up(sectorsize, PAGE_SIZE) >> PAGE_SHIFT);
+ fs_info->block_max_order = ilog2((BITS_PER_LONG << fs_info->sectorsize_bits) >> PAGE_SHIFT);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
+ fs_info->fs_devices->fs_info = fs_info;
+ if (fs_info->sectorsize > PAGE_SIZE)
+ btrfs_warn(fs_info,
+ "support for block size %u with page size %lu is experimental, some features may be missing",
+ fs_info->sectorsize, PAGE_SIZE);
/*
* Handle the space caching options appropriately now that we have the
* super block loaded and validated.
@@ -3346,21 +3404,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
- if (sectorsize < PAGE_SIZE) {
- struct btrfs_subpage_info *subpage_info;
-
- btrfs_warn(fs_info,
- "read-write for sector size %u with page size %lu is experimental",
- sectorsize, PAGE_SIZE);
- subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
- if (!subpage_info) {
- ret = -ENOMEM;
- goto fail_alloc;
- }
- btrfs_init_subpage_info(subpage_info, sectorsize);
- fs_info->subpage_info = subpage_info;
- }
-
+ ret = btrfs_alloc_compress_wsm(fs_info);
+ if (ret)
+ goto fail_sb_buffer;
ret = btrfs_init_workqueues(fs_info);
if (ret)
goto fail_sb_buffer;
@@ -3408,7 +3454,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* below in btrfs_init_dev_replace().
*/
btrfs_free_extra_devids(fs_devices);
- if (!fs_devices->latest_dev->bdev) {
+ if (unlikely(!fs_devices->latest_dev->bdev)) {
btrfs_err(fs_info, "failed to read devices");
ret = -EIO;
goto fail_tree_roots;
@@ -3499,6 +3545,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
+ btrfs_zoned_reserve_data_reloc_bg(fs_info);
btrfs_free_zone_cache(fs_info);
btrfs_check_active_zone_reservation(fs_info);
@@ -3619,7 +3666,6 @@ fail_alloc:
iput(fs_info->btree_inode);
fail:
- btrfs_close_devices(fs_info->fs_devices);
ASSERT(ret < 0);
return ret;
}
@@ -3632,7 +3678,7 @@ static void btrfs_end_super_write(struct bio *bio)
bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
- btrfs_warn_rl_in_rcu(device->fs_info,
+ btrfs_warn_rl(device->fs_info,
"lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
@@ -3652,85 +3698,6 @@ static void btrfs_end_super_write(struct bio *bio)
bio_put(bio);
}
-struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
- int copy_num, bool drop_cache)
-{
- struct btrfs_super_block *super;
- struct page *page;
- u64 bytenr, bytenr_orig;
- struct address_space *mapping = bdev->bd_mapping;
- int ret;
-
- bytenr_orig = btrfs_sb_offset(copy_num);
- ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
- if (ret == -ENOENT)
- return ERR_PTR(-EINVAL);
- else if (ret)
- return ERR_PTR(ret);
-
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
- return ERR_PTR(-EINVAL);
-
- if (drop_cache) {
- /* This should only be called with the primary sb. */
- ASSERT(copy_num == 0);
-
- /*
- * Drop the page of the primary superblock, so later read will
- * always read from the device.
- */
- invalidate_inode_pages2_range(mapping,
- bytenr >> PAGE_SHIFT,
- (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
- }
-
- page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
- if (IS_ERR(page))
- return ERR_CAST(page);
-
- super = page_address(page);
- if (btrfs_super_magic(super) != BTRFS_MAGIC) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-ENODATA);
- }
-
- if (btrfs_super_bytenr(super) != bytenr_orig) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-EINVAL);
- }
-
- return super;
-}
-
-
-struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
-{
- struct btrfs_super_block *super, *latest = NULL;
- int i;
- u64 transid = 0;
-
- /* we would like to check all the supers, but that would make
- * a btrfs mount succeed after a mkfs from a different FS.
- * So, we need to add a special mount option to scan for
- * later supers, using BTRFS_SUPER_MIRROR_MAX instead
- */
- for (i = 0; i < 1; i++) {
- super = btrfs_read_dev_one_super(bdev, i, false);
- if (IS_ERR(super))
- continue;
-
- if (!latest || btrfs_super_generation(super) > transid) {
- if (latest)
- btrfs_release_disk_super(super);
-
- latest = super;
- transid = btrfs_super_generation(super);
- }
- }
-
- return super;
-}
-
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* folios we use for writing are locked.
@@ -3770,8 +3737,8 @@ static int write_dev_supers(struct btrfs_device *device,
continue;
} else if (ret < 0) {
btrfs_err(device->fs_info,
- "couldn't get super block location for mirror %d",
- i);
+ "couldn't get super block location for mirror %d error %d",
+ i, ret);
atomic_inc(&device->sb_write_errors);
continue;
}
@@ -3790,12 +3757,11 @@ static int write_dev_supers(struct btrfs_device *device,
GFP_NOFS);
if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
- "couldn't get super block page for bytenr %llu",
- bytenr);
+ "couldn't get super block page for bytenr %llu error %ld",
+ bytenr, PTR_ERR(folio));
atomic_inc(&device->sb_write_errors);
continue;
}
- ASSERT(folio_order(folio) == 0);
offset = offset_in_folio(folio, bytenr);
disk_super = folio_address(folio) + offset;
@@ -3868,7 +3834,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
/* If the folio has been removed, then we know it completed. */
if (IS_ERR(folio))
continue;
- ASSERT(folio_order(folio) == 0);
/* Folio will be unlocked once the write completes. */
folio_wait_locked(folio);
@@ -3983,7 +3948,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
* Checks last_flush_error of disks in order to determine the device
* state.
*/
- if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
+ if (unlikely(errors_wait && !btrfs_check_rw_degradable(info, NULL)))
return -EIO;
return 0;
@@ -4011,7 +3976,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
}
if (min_tolerated == INT_MAX) {
- pr_warn("BTRFS: unknown raid flag: %llu", flags);
+ btrfs_warn(NULL, "unknown raid flag: %llu", flags);
min_tolerated = 0;
}
@@ -4085,7 +4050,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
ret = btrfs_validate_write_super(fs_info, sb);
- if (ret < 0) {
+ if (unlikely(ret < 0)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_handle_fs_error(fs_info, -EUCLEAN,
"unexpected superblock corruption detected");
@@ -4096,7 +4061,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
if (ret)
total_errors++;
}
- if (total_errors > max_errors) {
+ if (unlikely(total_errors > max_errors)) {
btrfs_err(fs_info, "%d errors while writing supers",
total_errors);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -4121,7 +4086,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
total_errors++;
}
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- if (total_errors > max_errors) {
+ if (unlikely(total_errors > max_errors)) {
btrfs_handle_fs_error(fs_info, -EIO,
"%d errors while writing supers",
total_errors);
@@ -4157,9 +4122,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_commit_super(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_trans_handle *trans;
-
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
@@ -4169,10 +4131,7 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- return btrfs_commit_transaction(trans);
+ return btrfs_commit_current_transaction(fs_info->tree_root);
}
static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
@@ -4194,15 +4153,16 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
u64 found_end;
found = true;
- while (find_first_extent_bit(&trans->dirty_pages, cur,
- &found_start, &found_end, EXTENT_DIRTY, &cached)) {
+ while (btrfs_find_first_extent_bit(&trans->dirty_pages, cur,
+ &found_start, &found_end,
+ EXTENT_DIRTY, &cached)) {
dirty_bytes += found_end + 1 - found_start;
cur = found_end + 1;
}
btrfs_warn(fs_info,
"transaction %llu (with %llu dirty metadata bytes) is not committed",
trans->transid, dirty_bytes);
- btrfs_cleanup_one_transaction(trans, fs_info);
+ btrfs_cleanup_one_transaction(trans);
if (trans == fs_info->running_transaction)
fs_info->running_transaction = NULL;
@@ -4272,6 +4232,59 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_cleanup_defrag_inodes(fs_info);
/*
+ * Handle the error fs first, as it will flush and wait for all ordered
+ * extents. This will generate delayed iputs, thus we want to handle
+ * it first.
+ */
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
+ btrfs_error_commit_super(fs_info);
+
+ /*
+ * Wait for any fixup workers to complete.
+ * If we don't wait for them here and they are still running by the time
+ * we call kthread_stop() against the cleaner kthread further below, we
+ * get an use-after-free on the cleaner because the fixup worker adds an
+ * inode to the list of delayed iputs and then attempts to wakeup the
+ * cleaner kthread, which was already stopped and destroyed. We parked
+ * already the cleaner, but below we run all pending delayed iputs.
+ */
+ btrfs_flush_workqueue(fs_info->fixup_workers);
+ /*
+ * Similar case here, we have to wait for delalloc workers before we
+ * proceed below and stop the cleaner kthread, otherwise we trigger a
+ * use-after-tree on the cleaner kthread task_struct when a delalloc
+ * worker running submit_compressed_extents() adds a delayed iput, which
+ * does a wake up on the cleaner kthread, which was already freed below
+ * when we call kthread_stop().
+ */
+ btrfs_flush_workqueue(fs_info->delalloc_workers);
+
+ /*
+ * We can have ordered extents getting their last reference dropped from
+ * the fs_info->workers queue because for async writes for data bios we
+ * queue a work for that queue, at btrfs_wq_submit_bio(), that runs
+ * run_one_async_done() which calls btrfs_bio_end_io() in case the bio
+ * has an error, and that later function can do the final
+ * btrfs_put_ordered_extent() on the ordered extent attached to the bio,
+ * which adds a delayed iput for the inode. So we must flush the queue
+ * so that we don't have delayed iputs after committing the current
+ * transaction below and stopping the cleaner and transaction kthreads.
+ */
+ btrfs_flush_workqueue(fs_info->workers);
+
+ /*
+ * When finishing a compressed write bio we schedule a work queue item
+ * to finish an ordered extent - end_bbio_compressed_write()
+ * calls btrfs_finish_ordered_extent() which in turns does a call to
+ * btrfs_queue_ordered_fn(), and that queues the ordered extent
+ * completion either in the endio_write_workers work queue or in the
+ * fs_info->endio_freespace_worker work queue. We flush those queues
+ * below, so before we flush them we must flush this queue for the
+ * workers of compressed writes.
+ */
+ flush_workqueue(fs_info->endio_workers);
+
+ /*
* After we parked the cleaner kthread, ordered extents may have
* completed and created new delayed iputs. If one of the async reclaim
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
@@ -4282,8 +4295,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*
* So wait for all ongoing ordered extents to complete and then run
* delayed iputs. This works because once we reach this point no one
- * can either create new ordered extents nor create delayed iputs
- * through some other means.
+ * can create new ordered extents, but delayed iputs can still be added
+ * by a reclaim worker (see comments further below).
*
* Also note that btrfs_wait_ordered_roots() is not safe here, because
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@@ -4294,11 +4307,28 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->endio_write_workers);
/* Ordered extents for free space inodes. */
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ /*
+ * Run delayed iputs in case an async reclaim worker is waiting for them
+ * to be run as mentioned above.
+ */
btrfs_run_delayed_iputs(fs_info);
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
+ cancel_work_sync(&fs_info->em_shrinker_work);
+
+ /*
+ * Run delayed iputs again because an async reclaim worker may have
+ * added new ones if it was flushing delalloc:
+ *
+ * shrink_delalloc() -> btrfs_start_delalloc_roots() ->
+ * start_delalloc_inodes() -> btrfs_add_delayed_iput()
+ */
+ btrfs_run_delayed_iputs(fs_info);
+
+ /* There should be no more workload to generate new delayed iputs. */
+ set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
@@ -4328,9 +4358,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "commit super ret %d", ret);
}
- if (BTRFS_FS_ERROR(fs_info))
- btrfs_error_commit_super(fs_info);
-
kthread_stop(fs_info->transaction_kthread);
kthread_stop(fs_info->cleaner_kthread);
@@ -4338,7 +4365,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
if (btrfs_check_quota_leak(fs_info)) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN("qgroup reserved space leaked");
btrfs_err(fs_info, "qgroup reserved space leaked");
}
@@ -4385,7 +4412,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
iput(fs_info->btree_inode);
btrfs_mapping_tree_free(fs_info);
- btrfs_close_devices(fs_info->fs_devices);
}
void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
@@ -4453,10 +4479,6 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(fs_info);
- mutex_lock(&fs_info->cleaner_mutex);
- btrfs_run_delayed_iputs(fs_info);
- mutex_unlock(&fs_info->cleaner_mutex);
-
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
}
@@ -4533,76 +4555,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
* extents that haven't had their dirty pages IO start writeout yet
* actually get run and error out properly.
*/
- btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
-}
-
-static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct rb_node *node;
- struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
- struct btrfs_delayed_ref_node *ref;
-
- spin_lock(&delayed_refs->lock);
- while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
- struct btrfs_delayed_ref_head *head;
- struct rb_node *n;
- bool pin_bytes = false;
-
- head = rb_entry(node, struct btrfs_delayed_ref_head,
- href_node);
- if (btrfs_delayed_ref_lock(delayed_refs, head))
- continue;
-
- spin_lock(&head->lock);
- while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
- ref = rb_entry(n, struct btrfs_delayed_ref_node,
- ref_node);
- rb_erase_cached(&ref->ref_node, &head->ref_tree);
- RB_CLEAR_NODE(&ref->ref_node);
- if (!list_empty(&ref->add_list))
- list_del(&ref->add_list);
- atomic_dec(&delayed_refs->num_entries);
- btrfs_put_delayed_ref(ref);
- btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
- }
- if (head->must_insert_reserved)
- pin_bytes = true;
- btrfs_free_delayed_extent_op(head->extent_op);
- btrfs_delete_ref_head(delayed_refs, head);
- spin_unlock(&head->lock);
- spin_unlock(&delayed_refs->lock);
- mutex_unlock(&head->mutex);
-
- if (pin_bytes) {
- struct btrfs_block_group *cache;
-
- cache = btrfs_lookup_block_group(fs_info, head->bytenr);
- BUG_ON(!cache);
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += head->num_bytes;
- btrfs_space_info_update_bytes_pinned(fs_info,
- cache->space_info, head->num_bytes);
- cache->reserved -= head->num_bytes;
- cache->space_info->bytes_reserved -= head->num_bytes;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-
- btrfs_put_block_group(cache);
-
- btrfs_error_unpin_extent_range(fs_info, head->bytenr,
- head->bytenr + head->num_bytes - 1);
- }
- btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
- btrfs_put_delayed_ref_head(head);
- cond_resched();
- spin_lock(&delayed_refs->lock);
- }
- btrfs_qgroup_destroy_extent_records(trans);
-
- spin_unlock(&delayed_refs->lock);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
}
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
@@ -4668,9 +4621,9 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, NULL)) {
- clear_extent_bits(dirty_pages, start, end, mark);
+ while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, NULL)) {
+ btrfs_clear_extent_bit(dirty_pages, start, end, mark, NULL);
while (start <= end) {
eb = find_extent_buffer(fs_info, start);
start += fs_info->nodesize;
@@ -4703,14 +4656,14 @@ static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
* the same extent range.
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- if (!find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state)) {
+ if (!btrfs_find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
- clear_extent_dirty(unpin, start, end, &cached_state);
- free_extent_state(cached_state);
+ btrfs_clear_extent_dirty(unpin, start, end, &cached_state);
+ btrfs_free_extent_state(cached_state);
btrfs_error_unpin_extent_range(fs_info, start, end);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
@@ -4810,9 +4763,9 @@ static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->fs_roots_radix_lock);
}
-void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
- struct btrfs_fs_info *fs_info)
+void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans)
{
+ struct btrfs_fs_info *fs_info = cur_trans->fs_info;
struct btrfs_device *dev, *tmp;
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
@@ -4824,7 +4777,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
list_del_init(&dev->post_commit_list);
}
- btrfs_destroy_delayed_refs(cur_trans, fs_info);
+ btrfs_destroy_delayed_refs(cur_trans);
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&fs_info->transaction_blocked_wait);
@@ -4870,7 +4823,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
} else {
spin_unlock(&fs_info->trans_lock);
}
- btrfs_cleanup_one_transaction(t, fs_info);
+ btrfs_cleanup_one_transaction(t);
spin_lock(&fs_info->trans_lock);
if (t == fs_info->running_transaction)
@@ -4896,7 +4849,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
int btrfs_init_root_free_objectid(struct btrfs_root *root)
{
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
int ret;
struct extent_buffer *l;
struct btrfs_key search_key;
@@ -4912,14 +4865,13 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
search_key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
- goto error;
- if (ret == 0) {
+ return ret;
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of valid range.
*/
- ret = -EUCLEAN;
- goto error;
+ return -EUCLEAN;
}
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
@@ -4930,10 +4882,8 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
} else {
root->free_objectid = BTRFS_FIRST_FREE_OBJECTID;
}
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
+
+ return 0;
}
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)