summaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c974
1 files changed, 441 insertions, 533 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f15591f3e54f..89835071cfea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -13,8 +13,8 @@
#include <linux/list_sort.h>
#include <linux/namei.h>
#include "misc.h"
-#include "ctree.h"
#include "disk-io.h"
+#include "extent-tree.h"
#include "transaction.h"
#include "volumes.h"
#include "raid56.h"
@@ -48,6 +48,7 @@ struct btrfs_io_geometry {
u64 raid56_full_stripe_start;
int max_errors;
enum btrfs_map_op op;
+ bool use_rst;
};
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
@@ -403,7 +404,7 @@ static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
- extent_io_tree_release(&device->alloc_state);
+ btrfs_extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
@@ -414,8 +415,8 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
WARN_ON(fs_devices->opened);
while (!list_empty(&fs_devices->devices)) {
- device = list_entry(fs_devices->devices.next,
- struct btrfs_device, dev_list);
+ device = list_first_entry(&fs_devices->devices,
+ struct btrfs_device, dev_list);
list_del(&device->dev_list);
btrfs_free_device(device);
}
@@ -427,8 +428,8 @@ void __exit btrfs_cleanup_fs_uuids(void)
struct btrfs_fs_devices *fs_devices;
while (!list_empty(&fs_uuids)) {
- fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, fs_list);
+ fs_devices = list_first_entry(&fs_uuids, struct btrfs_fs_devices,
+ fs_list);
list_del(&fs_devices->fs_list);
free_fs_devices(fs_devices);
}
@@ -476,19 +477,23 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
if (IS_ERR(*bdev_file)) {
ret = PTR_ERR(*bdev_file);
+ btrfs_err(NULL, "failed to open device for path %s with flags 0x%x: %d",
+ device_path, flags, ret);
goto error;
}
bdev = file_bdev(*bdev_file);
if (flush)
sync_blockdev(bdev);
- ret = set_blocksize(bdev, BTRFS_BDEV_BLOCKSIZE);
- if (ret) {
- fput(*bdev_file);
- goto error;
+ if (holder) {
+ ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE);
+ if (ret) {
+ fput(*bdev_file);
+ goto error;
+ }
}
invalidate_bdev(bdev);
- *disk_super = btrfs_read_dev_super(bdev);
+ *disk_super = btrfs_read_disk_super(bdev, 0, false);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
fput(*bdev_file);
@@ -498,6 +503,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
return 0;
error:
+ *disk_super = NULL;
*bdev_file = NULL;
return ret;
}
@@ -719,7 +725,7 @@ error_free_page:
return -EINVAL;
}
-u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
{
bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
@@ -727,6 +733,42 @@ u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
}
+static bool is_same_device(struct btrfs_device *device, const char *new_path)
+{
+ struct path old = { .mnt = NULL, .dentry = NULL };
+ struct path new = { .mnt = NULL, .dentry = NULL };
+ char *old_path = NULL;
+ bool is_same = false;
+ int ret;
+
+ if (!device->name)
+ goto out;
+
+ old_path = kzalloc(PATH_MAX, GFP_NOFS);
+ if (!old_path)
+ goto out;
+
+ rcu_read_lock();
+ ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX);
+ rcu_read_unlock();
+ if (ret < 0)
+ goto out;
+
+ ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
+ if (ret)
+ goto out;
+ ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
+ if (ret)
+ goto out;
+ if (path_equal(&old, &new))
+ is_same = true;
+out:
+ kfree(old_path);
+ path_put(&old);
+ path_put(&new);
+ return is_same;
+}
+
/*
* Add new device to list of registered devices
*
@@ -847,7 +889,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
MAJOR(path_devt), MINOR(path_devt),
current->comm, task_pid_nr(current));
- } else if (!device->name || strcmp(device->name->str, path)) {
+ } else if (!device->name || !is_same_device(device, path)) {
/*
* When FS is already mounted.
* 1. If you are here and if the device->name is NULL that
@@ -1100,13 +1142,14 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (device->bdev) {
fs_devices->open_devices--;
device->bdev = NULL;
+ device->bdev_file = NULL;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
btrfs_destroy_dev_zone_info(device);
device->fs_info = NULL;
atomic_set(&device->dev_stats_ccnt, 0);
- extent_io_tree_release(&device->alloc_state);
+ btrfs_extent_io_tree_release(&device->alloc_state);
/*
* Reset the flush error record. We might have a transient flush error
@@ -1184,6 +1227,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
struct btrfs_device *tmp_device;
+ s64 __maybe_unused value = 0;
int ret = 0;
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
@@ -1213,7 +1257,23 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->latest_dev = latest_dev;
fs_devices->total_rw_bytes = 0;
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
+ fs_devices->read_devid = latest_dev->devid;
+ fs_devices->read_policy = btrfs_read_policy_to_enum(btrfs_get_mod_read_policy(),
+ &value);
+ if (fs_devices->read_policy == BTRFS_READ_POLICY_RR)
+ fs_devices->collect_fs_stats = true;
+
+ if (value) {
+ if (fs_devices->read_policy == BTRFS_READ_POLICY_RR)
+ fs_devices->rr_min_contig_read = value;
+ if (fs_devices->read_policy == BTRFS_READ_POLICY_DEVID)
+ fs_devices->read_devid = value;
+ }
+#else
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
+#endif
return 0;
}
@@ -1265,48 +1325,58 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
put_page(page);
}
-static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
- u64 bytenr, u64 bytenr_orig)
+struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
+ int copy_num, bool drop_cache)
{
- struct btrfs_super_block *disk_super;
+ struct btrfs_super_block *super;
struct page *page;
- void *p;
- pgoff_t index;
+ u64 bytenr, bytenr_orig;
+ struct address_space *mapping = bdev->bd_mapping;
+ int ret;
- /* make sure our super fits in the device */
- if (bytenr + PAGE_SIZE >= bdev_nr_bytes(bdev))
- return ERR_PTR(-EINVAL);
+ bytenr_orig = btrfs_sb_offset(copy_num);
+ ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ return ERR_PTR(ret);
+ }
- /* make sure our super fits in the page */
- if (sizeof(*disk_super) > PAGE_SIZE)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
return ERR_PTR(-EINVAL);
- /* make sure our super doesn't straddle pages on disk */
- index = bytenr >> PAGE_SHIFT;
- if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
- return ERR_PTR(-EINVAL);
+ if (drop_cache) {
+ /* This should only be called with the primary sb. */
+ ASSERT(copy_num == 0);
- /* pull in the page with our super */
- page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
+ /*
+ * Drop the page of the primary superblock, so later read will
+ * always read from the device.
+ */
+ invalidate_inode_pages2_range(mapping, bytenr >> PAGE_SHIFT,
+ (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
+ }
+ page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
if (IS_ERR(page))
return ERR_CAST(page);
- p = page_address(page);
-
- /* align our pointer to the offset of the super block */
- disk_super = p + offset_in_page(bytenr);
-
- if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
- btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- btrfs_release_disk_super(p);
+ super = page_address(page);
+ if (btrfs_super_magic(super) != BTRFS_MAGIC ||
+ btrfs_super_bytenr(super) != bytenr_orig) {
+ btrfs_release_disk_super(super);
return ERR_PTR(-EINVAL);
}
- if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
- disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
+ /*
+ * Make sure the last byte of label is properly NUL termiated. We use
+ * '%s' to print the label, if not properly NUL termiated we can access
+ * beyond the label.
+ */
+ if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1])
+ super->label[BTRFS_LABEL_SIZE - 1] = 0;
- return disk_super;
+ return super;
}
int btrfs_forget_devices(dev_t devt)
@@ -1377,20 +1447,11 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
bool new_device_added = false;
struct btrfs_device *device = NULL;
struct file *bdev_file;
- u64 bytenr, bytenr_orig;
dev_t devt;
- int ret;
lockdep_assert_held(&uuid_mutex);
/*
- * we would like to check all the supers, but that would make
- * a btrfs mount succeed after a mkfs from a different FS.
- * So, we need to add a special mount option to scan for
- * later supers, using BTRFS_SUPER_MIRROR_MAX instead
- */
-
- /*
* Avoid an exclusive open here, as the systemd-udev may initiate the
* device scan which may race with the user's mount or mkfs command,
* resulting in failure.
@@ -1404,15 +1465,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
if (IS_ERR(bdev_file))
return ERR_CAST(bdev_file);
- bytenr_orig = btrfs_sb_offset(0);
- ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
- if (ret) {
- device = ERR_PTR(ret);
- goto error_bdev_put;
- }
-
- disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
- bytenr_orig);
+ disk_super = btrfs_read_disk_super(file_bdev(bdev_file), 0, false);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
@@ -1453,9 +1506,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
lockdep_assert_held(&device->fs_info->chunk_mutex);
- if (find_first_extent_bit(&device->alloc_state, *start,
- &physical_start, &physical_end,
- CHUNK_ALLOCATED, NULL)) {
+ if (btrfs_find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
@@ -1470,6 +1523,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
static u64 dev_extent_search_start(struct btrfs_device *device)
{
switch (device->fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
return BTRFS_DEVICE_RANGE_RESERVED;
case BTRFS_CHUNK_ALLOC_ZONED:
@@ -1479,8 +1535,6 @@ static u64 dev_extent_search_start(struct btrfs_device *device)
* for superblock logging.
*/
return 0;
- default:
- BUG();
}
}
@@ -1493,7 +1547,8 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
int ret;
bool changed = false;
- ASSERT(IS_ALIGNED(*hole_start, zone_size));
+ ASSERT(IS_ALIGNED(*hole_start, zone_size),
+ "hole_start=%llu zone_size=%llu", *hole_start, zone_size);
while (*hole_size > 0) {
pos = btrfs_find_allocatable_zones(device, *hole_start,
@@ -1559,6 +1614,9 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
}
switch (device->fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
/* No extra check */
break;
@@ -1573,8 +1631,6 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
continue;
}
break;
- default:
- BUG();
}
break;
@@ -1651,8 +1707,8 @@ again:
path->skip_locking = 1;
key.objectid = device->devid;
- key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = search_start;
ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
@@ -1744,7 +1800,9 @@ next:
else
ret = 0;
- ASSERT(max_hole_start + max_hole_size <= search_end);
+ ASSERT(max_hole_start + max_hole_size <= search_end,
+ "max_hole_start=%llu max_hole_size=%llu search_end=%llu",
+ max_hole_start, max_hole_size, search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -1771,8 +1829,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
return -ENOMEM;
key.objectid = device->devid;
- key.offset = start;
key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = start;
again:
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
@@ -1920,7 +1978,6 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
ptr = btrfs_device_fsid(dev_item);
write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
ptr, BTRFS_FSID_SIZE);
- btrfs_mark_buffer_dirty(trans, leaf);
ret = 0;
out:
@@ -2058,7 +2115,7 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
down_read(&fs_info->dev_replace.rwsem);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
- ASSERT(num_devices > 1);
+ ASSERT(num_devices > 1, "num_devices=%llu", num_devices);
num_devices--;
}
up_read(&fs_info->dev_replace.rwsem);
@@ -2074,7 +2131,7 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
const u64 bytenr = btrfs_sb_offset(copy_num);
int ret;
- disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr);
+ disk_super = btrfs_read_disk_super(bdev, copy_num, false);
if (IS_ERR(disk_super))
return;
@@ -2262,7 +2319,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
*/
if (cur_devices->num_devices == 0) {
list_del_init(&cur_devices->seed_list);
- ASSERT(cur_devices->opened == 1);
+ ASSERT(cur_devices->opened == 1, "opened=%d", cur_devices->opened);
cur_devices->opened--;
free_fs_devices(cur_devices);
}
@@ -2575,8 +2632,8 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
return -ENOMEM;
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.offset = 0;
key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = 0;
while (1) {
btrfs_reserve_chunk_metadata(trans, false);
@@ -2616,11 +2673,9 @@ next_slot:
device = btrfs_find_device(fs_info->fs_devices, &args);
BUG_ON(!device); /* Logic error */
- if (device->fs_devices->seeding) {
+ if (device->fs_devices->seeding)
btrfs_set_device_generation(leaf, dev_item,
device->generation);
- btrfs_mark_buffer_dirty(trans, leaf);
- }
path->slots[0]++;
goto next_slot;
@@ -2714,11 +2769,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
device->dev_stats_valid = 1;
- set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+ set_blocksize(device->bdev_file, BTRFS_BDEV_BLOCKSIZE);
if (seeding_dev) {
- btrfs_clear_sb_rdonly(sb);
-
/* GFP_KERNEL allocation must not be under device_list_mutex */
seed_devices = btrfs_init_sprout(fs_info);
if (IS_ERR(seed_devices)) {
@@ -2861,8 +2914,6 @@ error_sysfs:
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
error_trans:
- if (seeding_dev)
- btrfs_set_sb_rdonly(sb);
if (trans)
btrfs_end_transaction(trans);
error_free_zone:
@@ -2917,8 +2968,6 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
btrfs_device_get_disk_total_bytes(device));
btrfs_set_device_bytes_used(leaf, dev_item,
btrfs_device_get_bytes_used(device));
- btrfs_mark_buffer_dirty(trans, leaf);
-
out:
btrfs_free_path(path);
return ret;
@@ -2981,23 +3030,26 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
return -ENOMEM;
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.offset = chunk_offset;
key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = chunk_offset;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
else if (ret > 0) { /* Logic error or corruption */
- btrfs_handle_fs_error(fs_info, -ENOENT,
- "Failed lookup while freeing chunk.");
- ret = -ENOENT;
+ btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
+ chunk_offset);
+ btrfs_abort_transaction(trans, -ENOENT);
+ ret = -EUCLEAN;
goto out;
}
ret = btrfs_del_item(trans, root, path);
- if (ret < 0)
- btrfs_handle_fs_error(fs_info, ret,
- "Failed to delete chunk item.");
+ if (ret < 0) {
+ btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
out:
btrfs_free_path(path);
return ret;
@@ -3197,7 +3249,8 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
* user having built with ASSERT enabled, so if ASSERT doesn't
* do anything we still error out.
*/
- ASSERT(0);
+ DEBUG_WARN("errr %ld reading chunk map at offset %llu",
+ PTR_ERR(map), chunk_offset);
return PTR_ERR(map);
}
@@ -3278,8 +3331,16 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
if (ret == -ENOSPC) {
const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *space_info;
- sys_bg = btrfs_create_chunk(trans, sys_flags);
+ space_info = btrfs_find_space_info(fs_info, sys_flags);
+ if (!space_info) {
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ sys_bg = btrfs_create_chunk(trans, space_info, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
@@ -3436,8 +3497,8 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
again:
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = (u64)-1;
while (1) {
mutex_lock(&fs_info->reclaim_bgs_lock);
@@ -3455,6 +3516,7 @@ again:
* alignment and size).
*/
ret = -EUCLEAN;
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
@@ -3623,10 +3685,7 @@ static int insert_balance_item(struct btrfs_fs_info *fs_info,
btrfs_set_balance_meta(leaf, item, &disk_bargs);
btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
btrfs_set_balance_sys(leaf, item, &disk_bargs);
-
btrfs_set_balance_flags(leaf, item, bctl->flags);
-
- btrfs_mark_buffer_dirty(trans, leaf);
out:
btrfs_free_path(path);
err = btrfs_commit_transaction(trans);
@@ -3741,26 +3800,25 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info)
* Balance filters. Return 1 if chunk should be filtered out
* (should not be balanced).
*/
-static int chunk_profiles_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
+static bool chunk_profiles_filter(u64 chunk_type, struct btrfs_balance_args *bargs)
{
chunk_type = chunk_to_extended(chunk_type) &
BTRFS_EXTENDED_PROFILE_MASK;
if (bargs->profiles & chunk_type)
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
- struct btrfs_balance_args *bargs)
+static bool chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_block_group *cache;
u64 chunk_used;
u64 user_thresh_min;
u64 user_thresh_max;
- int ret = 1;
+ bool ret = true;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = cache->used;
@@ -3778,18 +3836,18 @@ static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_off
user_thresh_max = mult_perc(cache->length, bargs->usage_max);
if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
- ret = 0;
+ ret = false;
btrfs_put_block_group(cache);
return ret;
}
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
- u64 chunk_offset, struct btrfs_balance_args *bargs)
+static bool chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_block_group *cache;
u64 chunk_used, user_thresh;
- int ret = 1;
+ bool ret = true;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = cache->used;
@@ -3802,15 +3860,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
user_thresh = mult_perc(cache->length, bargs->usage);
if (chunk_used < user_thresh)
- ret = 0;
+ ret = false;
btrfs_put_block_group(cache);
return ret;
}
-static int chunk_devid_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_devid_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_stripe *stripe;
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3819,10 +3876,10 @@ static int chunk_devid_filter(struct extent_buffer *leaf,
for (i = 0; i < num_stripes; i++) {
stripe = btrfs_stripe_nr(chunk, i);
if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static u64 calc_data_stripes(u64 type, int num_stripes)
@@ -3835,9 +3892,8 @@ static u64 calc_data_stripes(u64 type, int num_stripes)
}
/* [pstart, pend) */
-static int chunk_drange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_drange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
struct btrfs_stripe *stripe;
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3848,7 +3904,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
int i;
if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
- return 0;
+ return false;
type = btrfs_chunk_type(leaf, chunk);
factor = calc_data_stripes(type, num_stripes);
@@ -3864,56 +3920,53 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
if (stripe_offset < bargs->pend &&
stripe_offset + stripe_length > bargs->pstart)
- return 0;
+ return false;
}
- return 1;
+ return true;
}
/* [vstart, vend) */
-static int chunk_vrange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- u64 chunk_offset,
- struct btrfs_balance_args *bargs)
+static bool chunk_vrange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ u64 chunk_offset, struct btrfs_balance_args *bargs)
{
if (chunk_offset < bargs->vend &&
chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
/* at least part of the chunk is inside this vrange */
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_stripes_range_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
+static bool chunk_stripes_range_filter(struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ struct btrfs_balance_args *bargs)
{
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
if (bargs->stripes_min <= num_stripes
&& num_stripes <= bargs->stripes_max)
- return 0;
+ return false;
- return 1;
+ return true;
}
-static int chunk_soft_convert_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
+static bool chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args *bargs)
{
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
- return 0;
+ return false;
chunk_type = chunk_to_extended(chunk_type) &
BTRFS_EXTENDED_PROFILE_MASK;
if (bargs->target == chunk_type)
- return 1;
+ return true;
- return 0;
+ return false;
}
-static int should_balance_chunk(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk, u64 chunk_offset)
+static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+ u64 chunk_offset)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -3923,7 +3976,7 @@ static int should_balance_chunk(struct extent_buffer *leaf,
/* type filter */
if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
(bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
- return 0;
+ return false;
}
if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
@@ -3936,46 +3989,46 @@ static int should_balance_chunk(struct extent_buffer *leaf,
/* profiles filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
chunk_profiles_filter(chunk_type, bargs)) {
- return 0;
+ return false;
}
/* usage filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
chunk_usage_filter(fs_info, chunk_offset, bargs)) {
- return 0;
+ return false;
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
- return 0;
+ return false;
}
/* devid filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
chunk_devid_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* drange filter, makes sense only with devid filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
chunk_drange_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* vrange filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
- return 0;
+ return false;
}
/* stripes filter */
if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
chunk_stripes_range_filter(leaf, chunk, bargs)) {
- return 0;
+ return false;
}
/* soft profile changing mode */
if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
chunk_soft_convert_filter(chunk_type, bargs)) {
- return 0;
+ return false;
}
/*
@@ -3983,7 +4036,7 @@ static int should_balance_chunk(struct extent_buffer *leaf,
*/
if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
if (bargs->limit == 0)
- return 0;
+ return false;
else
bargs->limit--;
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
@@ -3993,12 +4046,12 @@ static int should_balance_chunk(struct extent_buffer *leaf,
* about the count of all chunks that satisfy the filters.
*/
if (bargs->limit_max == 0)
- return 0;
+ return false;
else
bargs->limit_max--;
}
- return 1;
+ return true;
}
static int __btrfs_balance(struct btrfs_fs_info *fs_info)
@@ -4045,8 +4098,8 @@ again:
bctl->sys.limit = limit_sys;
}
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
+ key.offset = (u64)-1;
while (1) {
if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
@@ -4613,7 +4666,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
}
spin_lock(&fs_info->super_lock);
- ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
+ ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED,
+ "exclusive_operation=%d", fs_info->exclusive_operation);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
spin_unlock(&fs_info->super_lock);
/*
@@ -4779,183 +4833,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
return 0;
}
-int btrfs_uuid_scan_kthread(void *data)
-{
- struct btrfs_fs_info *fs_info = data;
- struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_key key;
- struct btrfs_path *path = NULL;
- int ret = 0;
- struct extent_buffer *eb;
- int slot;
- struct btrfs_root_item root_item;
- u32 item_size;
- struct btrfs_trans_handle *trans = NULL;
- bool closing = false;
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- key.objectid = 0;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = 0;
-
- while (1) {
- if (btrfs_fs_closing(fs_info)) {
- closing = true;
- break;
- }
- ret = btrfs_search_forward(root, &key, path,
- BTRFS_OLDEST_GENERATION);
- if (ret) {
- if (ret > 0)
- ret = 0;
- break;
- }
-
- if (key.type != BTRFS_ROOT_ITEM_KEY ||
- (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
- key.objectid != BTRFS_FS_TREE_OBJECTID) ||
- key.objectid > BTRFS_LAST_FREE_OBJECTID)
- goto skip;
-
- eb = path->nodes[0];
- slot = path->slots[0];
- item_size = btrfs_item_size(eb, slot);
- if (item_size < sizeof(root_item))
- goto skip;
-
- read_extent_buffer(eb, &root_item,
- btrfs_item_ptr_offset(eb, slot),
- (int)sizeof(root_item));
- if (btrfs_root_refs(&root_item) == 0)
- goto skip;
-
- if (!btrfs_is_empty_uuid(root_item.uuid) ||
- !btrfs_is_empty_uuid(root_item.received_uuid)) {
- if (trans)
- goto update_tree;
-
- btrfs_release_path(path);
- /*
- * 1 - subvol uuid item
- * 1 - received_subvol uuid item
- */
- trans = btrfs_start_transaction(fs_info->uuid_root, 2);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- continue;
- } else {
- goto skip;
- }
-update_tree:
- btrfs_release_path(path);
- if (!btrfs_is_empty_uuid(root_item.uuid)) {
- ret = btrfs_uuid_tree_add(trans, root_item.uuid,
- BTRFS_UUID_KEY_SUBVOL,
- key.objectid);
- if (ret < 0) {
- btrfs_warn(fs_info, "uuid_tree_add failed %d",
- ret);
- break;
- }
- }
-
- if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
- ret = btrfs_uuid_tree_add(trans,
- root_item.received_uuid,
- BTRFS_UUID_KEY_RECEIVED_SUBVOL,
- key.objectid);
- if (ret < 0) {
- btrfs_warn(fs_info, "uuid_tree_add failed %d",
- ret);
- break;
- }
- }
-
-skip:
- btrfs_release_path(path);
- if (trans) {
- ret = btrfs_end_transaction(trans);
- trans = NULL;
- if (ret)
- break;
- }
-
- if (key.offset < (u64)-1) {
- key.offset++;
- } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
- key.offset = 0;
- key.type = BTRFS_ROOT_ITEM_KEY;
- } else if (key.objectid < (u64)-1) {
- key.offset = 0;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.objectid++;
- } else {
- break;
- }
- cond_resched();
- }
-
-out:
- btrfs_free_path(path);
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (ret)
- btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
- else if (!closing)
- set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
- up(&fs_info->uuid_tree_rescan_sem);
- return 0;
-}
-
-int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *tree_root = fs_info->tree_root;
- struct btrfs_root *uuid_root;
- struct task_struct *task;
- int ret;
-
- /*
- * 1 - root node
- * 1 - root item
- */
- trans = btrfs_start_transaction(tree_root, 2);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
- if (IS_ERR(uuid_root)) {
- ret = PTR_ERR(uuid_root);
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- return ret;
- }
-
- fs_info->uuid_root = uuid_root;
-
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ret;
-
- down(&fs_info->uuid_tree_rescan_sem);
- task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
- if (IS_ERR(task)) {
- /* fs_info->update_uuid_tree_gen remains 0 in all error case */
- btrfs_warn(fs_info, "failed to start uuid_scan task");
- up(&fs_info->uuid_tree_rescan_sem);
- return PTR_ERR(task);
- }
-
- return 0;
-}
-
/*
* shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks.
@@ -5039,8 +4916,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
again:
key.objectid = device->devid;
- key.offset = (u64)-1;
key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = (u64)-1;
do {
mutex_lock(&fs_info->reclaim_bgs_lock);
@@ -5126,8 +5003,8 @@ again:
mutex_lock(&fs_info->chunk_mutex);
/* Clear all state bits beyond the shrunk device size */
- clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
- CHUNK_STATE_MASK);
+ btrfs_clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
+ CHUNK_STATE_MASK);
btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->post_commit_list))
@@ -5254,6 +5131,8 @@ struct alloc_chunk_ctl {
u64 stripe_size;
u64 chunk_size;
int ndevs;
+ /* Space_info the block group is going to belong. */
+ struct btrfs_space_info *space_info;
};
static void init_alloc_chunk_ctl_policy_regular(
@@ -5327,14 +5206,15 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
ctl->ndevs = 0;
switch (fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
break;
case BTRFS_CHUNK_ALLOC_ZONED:
init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl);
break;
- default:
- BUG();
}
}
@@ -5473,20 +5353,24 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
* It should hold because:
* dev_extent_min == dev_extent_want == zone_size * dev_stripes
*/
- ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min);
+ ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min,
+ "ndevs=%d max_avail=%llu dev_extent_min=%llu", ctl->ndevs,
+ devices_info[ctl->ndevs - 1].max_avail, ctl->dev_extent_min);
ctl->stripe_size = zone_size;
ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
- /* stripe_size is fixed in zoned filesysmte. Reduce ndevs instead. */
+ /* stripe_size is fixed in zoned filesystem. Reduce ndevs instead. */
if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
ctl->ndevs = div_u64(div_u64(ctl->max_chunk_size * ctl->ncopies,
ctl->stripe_size) + ctl->nparity,
ctl->dev_stripes);
ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
- ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size);
+ ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size,
+ "stripe_size=%llu data_stripes=%d max_chunk_size=%llu",
+ ctl->stripe_size, data_stripes, ctl->max_chunk_size);
}
ctl->chunk_size = ctl->stripe_size * data_stripes;
@@ -5519,12 +5403,13 @@ static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
switch (fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy);
+ fallthrough;
case BTRFS_CHUNK_ALLOC_REGULAR:
return decide_stripe_size_regular(ctl, devices_info);
case BTRFS_CHUNK_ALLOC_ZONED:
return decide_stripe_size_zoned(ctl, devices_info);
- default:
- BUG();
}
}
@@ -5534,9 +5419,9 @@ static void chunk_map_device_set_bits(struct btrfs_chunk_map *map, unsigned int
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- set_extent_bit(&device->alloc_state, stripe->physical,
- stripe->physical + map->stripe_size - 1,
- bits | EXTENT_NOWAIT, NULL);
+ btrfs_set_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + map->stripe_size - 1,
+ bits | EXTENT_NOWAIT, NULL);
}
}
@@ -5546,10 +5431,9 @@ static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned in
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- __clear_extent_bit(&device->alloc_state, stripe->physical,
- stripe->physical + map->stripe_size - 1,
- bits | EXTENT_NOWAIT,
- NULL, NULL);
+ btrfs_clear_extent_bits(&device->alloc_state, stripe->physical,
+ stripe->physical + map->stripe_size - 1,
+ bits | EXTENT_NOWAIT);
}
}
@@ -5565,33 +5449,34 @@ void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_ma
btrfs_free_chunk_map(map);
}
+static int btrfs_chunk_map_cmp(const struct rb_node *new,
+ const struct rb_node *exist)
+{
+ const struct btrfs_chunk_map *new_map =
+ rb_entry(new, struct btrfs_chunk_map, rb_node);
+ const struct btrfs_chunk_map *exist_map =
+ rb_entry(exist, struct btrfs_chunk_map, rb_node);
+
+ if (new_map->start == exist_map->start)
+ return 0;
+ if (new_map->start < exist_map->start)
+ return -1;
+ return 1;
+}
+
EXPORT_FOR_TESTS
int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map)
{
- struct rb_node **p;
- struct rb_node *parent = NULL;
- bool leftmost = true;
+ struct rb_node *exist;
write_lock(&fs_info->mapping_tree_lock);
- p = &fs_info->mapping_tree.rb_root.rb_node;
- while (*p) {
- struct btrfs_chunk_map *entry;
-
- parent = *p;
- entry = rb_entry(parent, struct btrfs_chunk_map, rb_node);
-
- if (map->start < entry->start) {
- p = &(*p)->rb_left;
- } else if (map->start > entry->start) {
- p = &(*p)->rb_right;
- leftmost = false;
- } else {
- write_unlock(&fs_info->mapping_tree_lock);
- return -EEXIST;
- }
+ exist = rb_find_add_cached(&map->rb_node, &fs_info->mapping_tree,
+ btrfs_chunk_map_cmp);
+
+ if (exist) {
+ write_unlock(&fs_info->mapping_tree_lock);
+ return -EEXIST;
}
- rb_link_node(&map->rb_node, parent, p);
- rb_insert_color_cached(&map->rb_node, &fs_info->mapping_tree, leftmost);
chunk_map_device_set_bits(map, CHUNK_ALLOCATED);
chunk_map_device_clear_bits(map, CHUNK_TRIMMED);
write_unlock(&fs_info->mapping_tree_lock);
@@ -5614,21 +5499,6 @@ struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp)
return map;
}
-struct btrfs_chunk_map *btrfs_clone_chunk_map(struct btrfs_chunk_map *map, gfp_t gfp)
-{
- const int size = btrfs_chunk_map_size(map->num_stripes);
- struct btrfs_chunk_map *clone;
-
- clone = kmemdup(map, size, gfp);
- if (!clone)
- return NULL;
-
- refcount_set(&clone->refs, 1);
- RB_CLEAR_NODE(&clone->rb_node);
-
- return clone;
-}
-
static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
struct alloc_chunk_ctl *ctl,
struct btrfs_device_info *devices_info)
@@ -5639,8 +5509,6 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
u64 start = ctl->start;
u64 type = ctl->type;
int ret;
- int i;
- int j;
map = btrfs_alloc_chunk_map(ctl->num_stripes, GFP_NOFS);
if (!map)
@@ -5655,8 +5523,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
map->sub_stripes = ctl->sub_stripes;
map->num_stripes = ctl->num_stripes;
- for (i = 0; i < ctl->ndevs; ++i) {
- for (j = 0; j < ctl->dev_stripes; ++j) {
+ for (int i = 0; i < ctl->ndevs; i++) {
+ for (int j = 0; j < ctl->dev_stripes; j++) {
int s = i * ctl->dev_stripes + j;
map->stripes[s].dev = devices_info[i].dev;
map->stripes[s].physical = devices_info[i].dev_offset +
@@ -5672,7 +5540,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
- block_group = btrfs_make_block_group(trans, type, start, ctl->chunk_size);
+ block_group = btrfs_make_block_group(trans, ctl->space_info, type, start,
+ ctl->chunk_size);
if (IS_ERR(block_group)) {
btrfs_remove_chunk_map(info, map);
return block_group;
@@ -5698,7 +5567,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
}
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
- u64 type)
+ struct btrfs_space_info *space_info,
+ u64 type)
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -5710,7 +5580,7 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
lockdep_assert_held(&info->chunk_mutex);
if (!alloc_profile_is_valid(type, 0)) {
- ASSERT(0);
+ DEBUG_WARN("invalid alloc profile for type %llu", type);
return ERR_PTR(-EINVAL);
}
@@ -5722,12 +5592,13 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(info, "invalid chunk type 0x%llx requested", type);
- ASSERT(0);
+ DEBUG_WARN();
return ERR_PTR(-EINVAL);
}
ctl.start = find_next_chunk(info);
ctl.type = type;
+ ctl.space_info = space_info;
init_alloc_chunk_ctl(fs_devices, &ctl);
devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
@@ -5871,7 +5742,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
struct btrfs_fs_info *fs_info = trans->fs_info;
u64 alloc_profile;
struct btrfs_block_group *meta_bg;
+ struct btrfs_space_info *meta_space_info;
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *sys_space_info;
/*
* When adding a new device for sprouting, the seed device is read-only
@@ -5895,12 +5768,22 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
*/
alloc_profile = btrfs_metadata_alloc_profile(fs_info);
- meta_bg = btrfs_create_chunk(trans, alloc_profile);
+ meta_space_info = btrfs_find_space_info(fs_info, alloc_profile);
+ if (!meta_space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+ meta_bg = btrfs_create_chunk(trans, meta_space_info, alloc_profile);
if (IS_ERR(meta_bg))
return PTR_ERR(meta_bg);
alloc_profile = btrfs_system_alloc_profile(fs_info);
- sys_bg = btrfs_create_chunk(trans, alloc_profile);
+ sys_space_info = btrfs_find_space_info(fs_info, alloc_profile);
+ if (!sys_space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+ sys_bg = btrfs_create_chunk(trans, sys_space_info, alloc_profile);
if (IS_ERR(sys_bg))
return PTR_ERR(sys_bg);
@@ -5968,11 +5851,31 @@ void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info)
write_unlock(&fs_info->mapping_tree_lock);
}
+static int btrfs_chunk_map_num_copies(const struct btrfs_chunk_map *map)
+{
+ enum btrfs_raid_types index = btrfs_bg_flags_to_raid_index(map->type);
+
+ if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ return 2;
+
+ /*
+ * There could be two corrupted data stripes, we need to loop retry in
+ * order to rebuild the correct data.
+ *
+ * Fail a stripe at a time on every retry except the stripe under
+ * reconstruction.
+ */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ return map->num_stripes;
+
+ /* Non-RAID56, use their ncopies from btrfs_raid_array. */
+ return btrfs_raid_array[index].ncopies;
+}
+
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
{
struct btrfs_chunk_map *map;
- enum btrfs_raid_types index;
- int ret = 1;
+ int ret;
map = btrfs_get_chunk_map(fs_info, logical, len);
if (IS_ERR(map))
@@ -5984,22 +5887,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
*/
return 1;
- index = btrfs_bg_flags_to_raid_index(map->type);
-
- /* Non-RAID56, use their ncopies from btrfs_raid_array. */
- if (!(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK))
- ret = btrfs_raid_array[index].ncopies;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
- ret = 2;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- /*
- * There could be two corrupted data stripes, we need
- * to loop retry in order to rebuild the correct data.
- *
- * Fail a stripe at a time on every retry except the
- * stripe under reconstruction.
- */
- ret = map->num_stripes;
+ ret = btrfs_chunk_map_num_copies(map);
btrfs_free_chunk_map(map);
return ret;
}
@@ -6023,27 +5911,79 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
return len;
}
-int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first, int num_stripes)
{
- struct btrfs_chunk_map *map;
- int ret = 0;
+ for (int index = first; index < first + num_stripes; index++) {
+ const struct btrfs_device *device = map->stripes[index].dev;
- if (!btrfs_fs_incompat(fs_info, RAID56))
- return 0;
+ if (device->devid == READ_ONCE(device->fs_devices->read_devid))
+ return index;
+ }
- map = btrfs_get_chunk_map(fs_info, logical, len);
+ /* If no read-preferred device is set use the first stripe. */
+ return first;
+}
- if (!WARN_ON(IS_ERR(map))) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- ret = 1;
- btrfs_free_chunk_map(map);
+struct stripe_mirror {
+ u64 devid;
+ int num;
+};
+
+static int btrfs_cmp_devid(const void *a, const void *b)
+{
+ const struct stripe_mirror *s1 = (const struct stripe_mirror *)a;
+ const struct stripe_mirror *s2 = (const struct stripe_mirror *)b;
+
+ if (s1->devid < s2->devid)
+ return -1;
+ if (s1->devid > s2->devid)
+ return 1;
+ return 0;
+}
+
+/*
+ * Select a stripe for reading using the round-robin algorithm.
+ *
+ * 1. Compute the read cycle as the total sectors read divided by the minimum
+ * sectors per device.
+ * 2. Determine the stripe number for the current read by taking the modulus
+ * of the read cycle with the total number of stripes:
+ *
+ * stripe index = (total sectors / min sectors per dev) % num stripes
+ *
+ * The calculated stripe index is then used to select the corresponding device
+ * from the list of devices, which is ordered by devid.
+ */
+static int btrfs_read_rr(const struct btrfs_chunk_map *map, int first, int num_stripes)
+{
+ struct stripe_mirror stripes[BTRFS_RAID1_MAX_MIRRORS] = { 0 };
+ struct btrfs_device *device = map->stripes[first].dev;
+ struct btrfs_fs_info *fs_info = device->fs_devices->fs_info;
+ unsigned int read_cycle;
+ unsigned int total_reads;
+ unsigned int min_reads_per_dev;
+
+ total_reads = percpu_counter_sum(&fs_info->stats_read_blocks);
+ min_reads_per_dev = READ_ONCE(fs_info->fs_devices->rr_min_contig_read) >>
+ fs_info->sectorsize_bits;
+
+ for (int index = 0, i = first; i < first + num_stripes; i++) {
+ stripes[index].devid = map->stripes[i].dev->devid;
+ stripes[index].num = i;
+ index++;
}
- return ret;
+ sort(stripes, num_stripes, sizeof(struct stripe_mirror),
+ btrfs_cmp_devid, NULL);
+
+ read_cycle = total_reads / min_reads_per_dev;
+ return stripes[read_cycle % num_stripes].num;
}
+#endif
static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_chunk_map *map, int first,
- int dev_replace_is_ongoing)
+ bool dev_replace_is_ongoing)
{
const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy);
int i;
@@ -6052,8 +5992,8 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
int tolerance;
struct btrfs_device *srcdev;
- ASSERT((map->type &
- (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
+ ASSERT((map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)),
+ "type=%llu", map->type);
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = map->sub_stripes;
@@ -6070,6 +6010,14 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
break;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ case BTRFS_READ_POLICY_RR:
+ preferred_mirror = btrfs_read_rr(map, first, num_stripes);
+ break;
+ case BTRFS_READ_POLICY_DEVID:
+ preferred_mirror = btrfs_read_preferred(map, first, num_stripes);
+ break;
+#endif
}
if (dev_replace_is_ongoing &&
@@ -6101,9 +6049,9 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
return preferred_mirror;
}
-static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
- u64 logical,
- u16 total_stripes)
+EXPORT_FOR_TESTS
+struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
+ u64 logical, u16 total_stripes)
{
struct btrfs_io_context *bioc;
@@ -6299,20 +6247,19 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
return ret;
}
-static void handle_ops_on_dev_replace(enum btrfs_map_op op,
- struct btrfs_io_context *bioc,
+static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc,
struct btrfs_dev_replace *dev_replace,
u64 logical,
- int *num_stripes_ret, int *max_errors_ret)
+ struct btrfs_io_geometry *io_geom)
{
u64 srcdev_devid = dev_replace->srcdev->devid;
/*
* At this stage, num_stripes is still the real number of stripes,
* excluding the duplicated stripes.
*/
- int num_stripes = *num_stripes_ret;
+ int num_stripes = io_geom->num_stripes;
+ int max_errors = io_geom->max_errors;
int nr_extra_stripes = 0;
- int max_errors = *max_errors_ret;
int i;
/*
@@ -6347,18 +6294,19 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
}
/* We can only have at most 2 extra nr_stripes (for DUP). */
- ASSERT(nr_extra_stripes <= 2);
+ ASSERT(nr_extra_stripes <= 2, "nr_extra_stripes=%d", nr_extra_stripes);
/*
* For GET_READ_MIRRORS, we can only return at most 1 extra stripe for
* replace.
* If we have 2 extra stripes, only choose the one with smaller physical.
*/
- if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
+ if (io_geom->op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
struct btrfs_io_stripe *first = &bioc->stripes[num_stripes];
struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
/* Only DUP can have two extra stripes. */
- ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP);
+ ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP,
+ "map_type=%llu", bioc->map_type);
/*
* Swap the last stripe stripes and reduce @nr_extra_stripes.
@@ -6371,8 +6319,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
}
}
- *num_stripes_ret = num_stripes + nr_extra_stripes;
- *max_errors_ret = max_errors + nr_extra_stripes;
+ io_geom->num_stripes = num_stripes + nr_extra_stripes;
+ io_geom->max_errors = max_errors + nr_extra_stripes;
bioc->replace_nr_stripes = nr_extra_stripes;
}
@@ -6385,7 +6333,8 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset,
*/
io_geom->stripe_offset = offset & BTRFS_STRIPE_LEN_MASK;
io_geom->stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
- ASSERT(io_geom->stripe_offset < U32_MAX);
+ ASSERT(io_geom->stripe_offset < U32_MAX,
+ "stripe_offset=%llu", io_geom->stripe_offset);
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len =
@@ -6403,8 +6352,12 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset,
io_geom->raid56_full_stripe_start = btrfs_stripe_nr_to_offset(
rounddown(io_geom->stripe_nr, nr_data_stripes(map)));
- ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset);
- ASSERT(io_geom->raid56_full_stripe_start <= offset);
+ ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset,
+ "raid56_full_stripe_start=%llu full_stripe_len=%lu offset=%llu",
+ io_geom->raid56_full_stripe_start, full_stripe_len, offset);
+ ASSERT(io_geom->raid56_full_stripe_start <= offset,
+ "raid56_full_stripe_start=%llu offset=%llu",
+ io_geom->raid56_full_stripe_start, offset);
/*
* For writes to RAID56, allow to write a full stripe set, but
* no straddling of stripe sets.
@@ -6429,8 +6382,7 @@ static int set_io_stripe(struct btrfs_fs_info *fs_info, u64 logical,
{
dst->dev = map->stripes[io_geom->stripe_index].dev;
- if (io_geom->op == BTRFS_MAP_READ &&
- btrfs_need_stripe_tree_update(fs_info, map->type))
+ if (io_geom->op == BTRFS_MAP_READ && io_geom->use_rst)
return btrfs_get_raid_extent_offset(fs_info, logical, length,
map->type,
io_geom->stripe_index, dst);
@@ -6445,7 +6397,7 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info,
const struct btrfs_io_stripe *smap,
const struct btrfs_chunk_map *map,
int num_alloc_stripes,
- enum btrfs_map_op op, int mirror_num)
+ struct btrfs_io_geometry *io_geom)
{
if (!smap)
return false;
@@ -6453,10 +6405,10 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info,
if (num_alloc_stripes != 1)
return false;
- if (btrfs_need_stripe_tree_update(fs_info, map->type) && op != BTRFS_MAP_READ)
+ if (io_geom->use_rst && io_geom->op != BTRFS_MAP_READ)
return false;
- if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)
+ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && io_geom->mirror_num > 1)
return false;
return true;
@@ -6571,7 +6523,7 @@ static void map_blocks_raid56_read(struct btrfs_chunk_map *map,
{
int data_stripes = nr_data_stripes(map);
- ASSERT(io_geom->mirror_num <= 1);
+ ASSERT(io_geom->mirror_num <= 1, "mirror_num=%d", io_geom->mirror_num);
/* Just grab the data stripe directly. */
io_geom->stripe_index = io_geom->stripe_nr % data_stripes;
io_geom->stripe_nr /= data_stripes;
@@ -6635,12 +6587,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_chunk_map *map;
struct btrfs_io_geometry io_geom = { 0 };
u64 map_offset;
- int i;
int ret = 0;
int num_copies;
struct btrfs_io_context *bioc = NULL;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- int dev_replace_is_ongoing = 0;
+ bool dev_replace_is_ongoing = false;
u16 num_alloc_stripes;
u64 max_len;
@@ -6651,26 +6602,29 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom.stripe_index = 0;
io_geom.op = op;
- num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
- if (io_geom.mirror_num > num_copies)
- return -EINVAL;
-
map = btrfs_get_chunk_map(fs_info, logical, *length);
if (IS_ERR(map))
return PTR_ERR(map);
+ num_copies = btrfs_chunk_map_num_copies(map);
+ if (io_geom.mirror_num > num_copies)
+ return -EINVAL;
+
map_offset = logical - map->start;
io_geom.raid56_full_stripe_start = (u64)-1;
max_len = btrfs_max_io_len(map, map_offset, &io_geom);
*length = min_t(u64, map->chunk_len - map_offset, max_len);
+ io_geom.use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
+
+ if (dev_replace->replace_task != current)
+ down_read(&dev_replace->rwsem);
- down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
/*
* Hold the semaphore for read during the whole operation, write is
* requested at commit time but must wait.
*/
- if (!dev_replace_is_ongoing)
+ if (!dev_replace_is_ongoing && dev_replace->replace_task != current)
up_read(&dev_replace->rwsem);
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
@@ -6729,8 +6683,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* physical block information on the stack instead of allocating an
* I/O context structure.
*/
- if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op,
- io_geom.mirror_num)) {
+ if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, &io_geom)) {
ret = set_io_stripe(fs_info, logical, length, smap, map, &io_geom);
if (mirror_num_ret)
*mirror_num_ret = io_geom.mirror_num;
@@ -6744,6 +6697,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
goto out;
}
bioc->map_type = map->type;
+ bioc->use_rst = io_geom.use_rst;
/*
* For RAID56 full map, we need to make sure the stripes[] follows the
@@ -6781,7 +6735,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
- for (i = 0; i < io_geom.num_stripes; i++) {
+ for (int i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, logical, length,
&bioc->stripes[i], map, &io_geom);
if (ret < 0)
@@ -6801,8 +6755,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
op != BTRFS_MAP_READ) {
- handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
- &io_geom.num_stripes, &io_geom.max_errors);
+ handle_ops_on_dev_replace(bioc, dev_replace, logical, &io_geom);
}
*bioc_ret = bioc;
@@ -6811,7 +6764,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
bioc->mirror_num = io_geom.mirror_num;
out:
- if (dev_replace_is_ongoing) {
+ if (dev_replace_is_ongoing && dev_replace->replace_task != current) {
lockdep_assert_held(&dev_replace->rwsem);
/* Unlock and let waiting writers proceed */
up_read(&dev_replace->rwsem);
@@ -6943,7 +6896,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
- extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE);
+ btrfs_extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE);
if (devid)
tmp = *devid;
@@ -7085,16 +7038,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
warn_32bit_meta_chunk(fs_info, logical, length, type);
#endif
- /*
- * Only need to verify chunk item if we're reading from sys chunk array,
- * as chunk item in tree block is already verified by tree-checker.
- */
- if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
- ret = btrfs_check_chunk_valid(leaf, chunk, logical);
- if (ret)
- return ret;
- }
-
map = btrfs_find_chunk_map(fs_info, logical, 1);
/* already mapped? */
@@ -7155,6 +7098,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
btrfs_err(fs_info,
"failed to add chunk map, start=%llu len=%llu: %d",
map->start, map->chunk_len, ret);
+ btrfs_free_chunk_map(map);
}
return ret;
@@ -7200,8 +7144,12 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
fs_devices = find_fsid(fsid, NULL);
if (!fs_devices) {
- if (!btrfs_test_opt(fs_info, DEGRADED))
+ if (!btrfs_test_opt(fs_info, DEGRADED)) {
+ btrfs_err(fs_info,
+ "failed to find fsid %pU when attempting to open seed devices",
+ fsid);
return ERR_PTR(-ENOENT);
+ }
fs_devices = alloc_fs_devices(fsid);
if (IS_ERR(fs_devices))
@@ -7352,16 +7300,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
{
struct btrfs_super_block *super_copy = fs_info->super_copy;
struct extent_buffer *sb;
- struct btrfs_disk_key *disk_key;
- struct btrfs_chunk *chunk;
u8 *array_ptr;
unsigned long sb_array_offset;
int ret = 0;
- u32 num_stripes;
u32 array_size;
- u32 len = 0;
u32 cur_offset;
- u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
@@ -7384,10 +7327,15 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
cur_offset = 0;
while (cur_offset < array_size) {
- disk_key = (struct btrfs_disk_key *)array_ptr;
- len = sizeof(*disk_key);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ struct btrfs_chunk *chunk;
+ struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)array_ptr;
+ u32 len = sizeof(*disk_key);
+
+ /*
+ * The sys_chunk_array has been already verified at super block
+ * read time. Only do ASSERT()s for basic checks.
+ */
+ ASSERT(cur_offset + len <= array_size);
btrfs_disk_key_to_cpu(&key, disk_key);
@@ -7395,44 +7343,14 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
sb_array_offset += len;
cur_offset += len;
- if (key.type != BTRFS_CHUNK_ITEM_KEY) {
- btrfs_err(fs_info,
- "unexpected item type %u in sys_array at offset %u",
- (u32)key.type, cur_offset);
- ret = -EIO;
- break;
- }
+ ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY);
chunk = (struct btrfs_chunk *)sb_array_offset;
- /*
- * At least one btrfs_chunk with one stripe must be present,
- * exact stripe count check comes afterwards
- */
- len = btrfs_chunk_item_size(1);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM);
- num_stripes = btrfs_chunk_num_stripes(sb, chunk);
- if (!num_stripes) {
- btrfs_err(fs_info,
- "invalid number of stripes %u in sys_array at offset %u",
- num_stripes, cur_offset);
- ret = -EIO;
- break;
- }
+ len = btrfs_chunk_item_size(btrfs_chunk_num_stripes(sb, chunk));
- type = btrfs_chunk_type(sb, chunk);
- if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
- btrfs_err(fs_info,
- "invalid chunk type %llu in sys_array at offset %u",
- type, cur_offset);
- ret = -EIO;
- break;
- }
-
- len = btrfs_chunk_item_size(num_stripes);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ ASSERT(cur_offset + len <= array_size);
ret = read_one_chunk(&key, sb, chunk);
if (ret)
@@ -7445,13 +7363,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return ret;
-
-out_short_read:
- btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
- len, cur_offset);
- clear_extent_buffer_uptodate(sb);
- free_extent_buffer_stale(sb);
- return -EIO;
}
/*
@@ -7571,8 +7482,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
* item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
*/
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.offset = 0;
key.type = 0;
+ key.offset = 0;
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
struct extent_buffer *node = path->nodes[1];
@@ -7651,8 +7562,6 @@ int btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
struct btrfs_device *device;
int ret = 0;
- fs_devices->fs_info = fs_info;
-
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list)
device->fs_info = fs_info;
@@ -7828,8 +7737,6 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_set_dev_stats_value(eb, ptr, i,
btrfs_dev_stat_read(device, i));
- btrfs_mark_buffer_dirty(trans, eb);
-
out:
btrfs_free_path(path);
return ret;
@@ -7961,7 +7868,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
{
struct btrfs_device *curr, *next;
- ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
+ ASSERT(trans->state == TRANS_STATE_COMMIT_DOING, "state=%d" , trans->state);
if (list_empty(&trans->dev_update_list))
return;
@@ -8330,7 +8237,7 @@ static void map_raid56_repair_block(struct btrfs_io_context *bioc,
logical < stripe_start + BTRFS_STRIPE_LEN)
break;
}
- ASSERT(i < data_stripes);
+ ASSERT(i < data_stripes, "i=%d data_stripes=%d", i, data_stripes);
smap->dev = bioc->stripes[i].dev;
smap->physical = bioc->stripes[i].physical +
((logical - bioc->full_stripe_logical) &
@@ -8359,7 +8266,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
int mirror_ret = mirror_num;
int ret;
- ASSERT(mirror_num > 0);
+ ASSERT(mirror_num > 0, "mirror_num=%d", mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
&bioc, smap, &mirror_ret);
@@ -8367,7 +8274,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
return ret;
/* The map range should not cross stripe boundary. */
- ASSERT(map_length >= length);
+ ASSERT(map_length >= length, "map_length=%llu length=%u", map_length, length);
/* Already mapped to single stripe. */
if (!bioc)
@@ -8379,7 +8286,8 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
goto out;
}
- ASSERT(mirror_num <= bioc->num_stripes);
+ ASSERT(mirror_num <= bioc->num_stripes,
+ "mirror_num=%d num_stripes=%d", mirror_num, bioc->num_stripes);
smap->dev = bioc->stripes[mirror_num - 1].dev;
smap->physical = bioc->stripes[mirror_num - 1].physical;
out: