diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
| -rw-r--r-- | fs/btrfs/volumes.c | 1107 |
1 files changed, 527 insertions, 580 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1cccaf9c2b0d..ae1742a35e76 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -13,12 +13,11 @@ #include <linux/list_sort.h> #include <linux/namei.h> #include "misc.h" -#include "ctree.h" #include "disk-io.h" +#include "extent-tree.h" #include "transaction.h" #include "volumes.h" #include "raid56.h" -#include "rcu-string.h" #include "dev-replace.h" #include "sysfs.h" #include "tree-checker.h" @@ -48,6 +47,7 @@ struct btrfs_io_geometry { u64 raid56_full_stripe_start; int max_errors; enum btrfs_map_op op; + bool use_rst; }; const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { @@ -213,10 +213,8 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf) u64 flags = bg_flags; u32 size_bp = size_buf; - if (!flags) { - strcpy(bp, "NONE"); + if (!flags) return; - } #define DESCRIBE_FLAG(flag, desc) \ do { \ @@ -402,8 +400,12 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) static void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); - rcu_string_free(device->name); - extent_io_tree_release(&device->alloc_state); + /* + * No need to call kfree_rcu() nor do RCU lock/unlock, nothing is + * reading the device name. + */ + kfree(rcu_dereference_raw(device->name)); + btrfs_extent_io_tree_release(&device->alloc_state); btrfs_destroy_dev_zone_info(device); kfree(device); } @@ -413,9 +415,10 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device *device; WARN_ON(fs_devices->opened); + WARN_ON(fs_devices->holding); while (!list_empty(&fs_devices->devices)) { - device = list_entry(fs_devices->devices.next, - struct btrfs_device, dev_list); + device = list_first_entry(&fs_devices->devices, + struct btrfs_device, dev_list); list_del(&device->dev_list); btrfs_free_device(device); } @@ -427,8 +430,8 @@ void __exit btrfs_cleanup_fs_uuids(void) struct btrfs_fs_devices *fs_devices; while (!list_empty(&fs_uuids)) { - fs_devices = list_entry(fs_uuids.next, - struct btrfs_fs_devices, fs_list); + fs_devices = list_first_entry(&fs_uuids, struct btrfs_fs_devices, + fs_list); list_del(&fs_devices->fs_list); free_fs_devices(fs_devices); } @@ -472,7 +475,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, struct block_device *bdev; int ret; - *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL); + *bdev_file = bdev_file_open_by_path(device_path, flags, holder, &fs_holder_ops); if (IS_ERR(*bdev_file)) { ret = PTR_ERR(*bdev_file); @@ -487,15 +490,15 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, if (holder) { ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE); if (ret) { - fput(*bdev_file); + bdev_fput(*bdev_file); goto error; } } invalidate_bdev(bdev); - *disk_super = btrfs_read_dev_super(bdev); + *disk_super = btrfs_read_disk_super(bdev, 0, false); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - fput(*bdev_file); + bdev_fput(*bdev_file); goto error; } @@ -540,7 +543,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device continue; if (devt && devt != device->devt) continue; - if (fs_devices->opened) { + if (fs_devices->opened || fs_devices->holding) { if (devt) ret = -EBUSY; break; @@ -656,7 +659,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (!device->name) return -EINVAL; - ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, + ret = btrfs_get_bdev_and_sb(rcu_dereference_raw(device->name), flags, holder, 1, &bdev_file, &disk_super); if (ret) return ret; @@ -673,8 +676,8 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { if (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID) { - pr_err( - "BTRFS: Invalid seeding and uuid-changed device detected\n"); + btrfs_err(NULL, + "invalid seeding and uuid-changed device detected"); goto error_free_page; } @@ -700,7 +703,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (device->devt != device->bdev->bd_dev) { btrfs_warn(NULL, "device %s maj:min changed from %d:%d to %d:%d", - device->name->str, MAJOR(device->devt), + rcu_dereference_raw(device->name), MAJOR(device->devt), MINOR(device->devt), MAJOR(device->bdev->bd_dev), MINOR(device->bdev->bd_dev)); @@ -719,7 +722,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - fput(bdev_file); + bdev_fput(bdev_file); return -EINVAL; } @@ -732,83 +735,11 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/<fd>", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; struct path new = { .mnt = NULL, .dentry = NULL }; - char *old_path = NULL; + char AUTO_KFREE(old_path); bool is_same = false; int ret; @@ -820,7 +751,7 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path) goto out; rcu_read_lock(); - ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX); + ret = strscpy(old_path, rcu_dereference(device->name), PATH_MAX); rcu_read_unlock(); if (ret < 0) goto out; @@ -834,7 +765,6 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path) if (path_equal(&old, &new)) is_same = true; out: - kfree(old_path); path_put(&old); path_put(&new); return is_same; @@ -853,11 +783,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = NULL; - struct rcu_string *name; + const char *name; u64 found_transid = btrfs_super_generation(disk_super); u64 devid = btrfs_stack_device_id(&disk_super->dev_item); dev_t path_devt; - int error; + int ret; bool same_fsid_diff_dev = false; bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID); @@ -869,11 +799,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, return ERR_PTR(-EAGAIN); } - error = lookup_bdev(path, &path_devt); - if (error) { + ret = lookup_bdev(path, &path_devt); + if (ret) { btrfs_err(NULL, "failed to lookup block device for path %s: %d", - path, error); - return ERR_PTR(error); + path, ret); + return ERR_PTR(ret); } fs_devices = find_fsid_by_device(disk_super, path_devt, &same_fsid_diff_dev); @@ -890,7 +820,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; - pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n", + btrfs_info(NULL, "device %s (%d:%d) using temp-fsid %pU", path, MAJOR(path_devt), MINOR(path_devt), fs_devices->fsid); } @@ -961,6 +891,8 @@ static noinline struct btrfs_device *device_list_add(const char *path, current->comm, task_pid_nr(current)); } else if (!device->name || !is_same_device(device, path)) { + const char *old_name; + /* * When FS is already mounted. * 1. If you are here and if the device->name is NULL that @@ -1014,27 +946,31 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - btrfs_warn_in_rcu(NULL, + btrfs_warn(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, current->comm, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(NULL, + btrfs_info(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, btrfs_dev_name(device), path, current->comm, task_pid_nr(current)); } - name = rcu_string_strdup(path, GFP_NOFS); + name = kstrdup(path, GFP_NOFS); if (!name) { mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-ENOMEM); } - rcu_string_free(device->name); + rcu_read_lock(); + old_name = rcu_dereference(device->name); + rcu_read_unlock(); rcu_assign_pointer(device->name, name); + kfree_rcu_mightsleep(old_name); + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { fs_devices->missing_devices--; clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); @@ -1083,7 +1019,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) * uuid mutex so nothing we touch in here is going to disappear. */ if (orig_dev->name) - dev_path = orig_dev->name->str; + dev_path = rcu_dereference_raw(orig_dev->name); device = btrfs_alloc_device(NULL, &orig_dev->devid, orig_dev->uuid, dev_path); @@ -1141,7 +1077,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; if (device->bdev_file) { - fput(device->bdev_file); + bdev_fput(device->bdev_file); device->bdev = NULL; device->bdev_file = NULL; fs_devices->open_devices--; @@ -1188,7 +1124,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - fput(device->bdev_file); + bdev_fput(device->bdev_file); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1220,7 +1156,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) device->fs_info = NULL; atomic_set(&device->dev_stats_ccnt, 0); - extent_io_tree_release(&device->alloc_state); + btrfs_extent_io_tree_release(&device->alloc_state); /* * Reset the flush error record. We might have a transient flush error @@ -1268,7 +1204,7 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) { + if (!fs_devices->opened && !fs_devices->holding) { list_splice_init(&fs_devices->seed_list, &list); /* @@ -1298,6 +1234,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + s64 __maybe_unused value = 0; int ret = 0; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, @@ -1327,7 +1264,23 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, fs_devices->latest_dev = latest_dev; fs_devices->total_rw_bytes = 0; fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR; +#ifdef CONFIG_BTRFS_EXPERIMENTAL + fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ; + fs_devices->read_devid = latest_dev->devid; + fs_devices->read_policy = btrfs_read_policy_to_enum(btrfs_get_mod_read_policy(), + &value); + if (fs_devices->read_policy == BTRFS_READ_POLICY_RR) + fs_devices->collect_fs_stats = true; + + if (value) { + if (fs_devices->read_policy == BTRFS_READ_POLICY_RR) + fs_devices->rr_min_contig_read = value; + if (fs_devices->read_policy == BTRFS_READ_POLICY_DEVID) + fs_devices->read_devid = value; + } +#else fs_devices->read_policy = BTRFS_READ_POLICY_PID; +#endif return 0; } @@ -1379,48 +1332,58 @@ void btrfs_release_disk_super(struct btrfs_super_block *super) put_page(page); } -static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev, - u64 bytenr, u64 bytenr_orig) +struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev, + int copy_num, bool drop_cache) { - struct btrfs_super_block *disk_super; + struct btrfs_super_block *super; struct page *page; - void *p; - pgoff_t index; + u64 bytenr, bytenr_orig; + struct address_space *mapping = bdev->bd_mapping; + int ret; - /* make sure our super fits in the device */ - if (bytenr + PAGE_SIZE >= bdev_nr_bytes(bdev)) - return ERR_PTR(-EINVAL); + bytenr_orig = btrfs_sb_offset(copy_num); + ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr); + if (ret < 0) { + if (ret == -ENOENT) + ret = -EINVAL; + return ERR_PTR(ret); + } - /* make sure our super fits in the page */ - if (sizeof(*disk_super) > PAGE_SIZE) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev)) return ERR_PTR(-EINVAL); - /* make sure our super doesn't straddle pages on disk */ - index = bytenr >> PAGE_SHIFT; - if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index) - return ERR_PTR(-EINVAL); + if (drop_cache) { + /* This should only be called with the primary sb. */ + ASSERT(copy_num == 0); - /* pull in the page with our super */ - page = read_cache_page_gfp(bdev->bd_mapping, index, GFP_KERNEL); + /* + * Drop the page of the primary superblock, so later read will + * always read from the device. + */ + invalidate_inode_pages2_range(mapping, bytenr >> PAGE_SHIFT, + (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT); + } + page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); if (IS_ERR(page)) return ERR_CAST(page); - p = page_address(page); - - /* align our pointer to the offset of the super block */ - disk_super = p + offset_in_page(bytenr); - - if (btrfs_super_bytenr(disk_super) != bytenr_orig || - btrfs_super_magic(disk_super) != BTRFS_MAGIC) { - btrfs_release_disk_super(p); + super = page_address(page); + if (btrfs_super_magic(super) != BTRFS_MAGIC || + btrfs_super_bytenr(super) != bytenr_orig) { + btrfs_release_disk_super(super); return ERR_PTR(-EINVAL); } - if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1]) - disk_super->label[BTRFS_LABEL_SIZE - 1] = 0; + /* + * Make sure the last byte of label is properly NUL terminated. We use + * '%s' to print the label, if not properly NUL terminated we can access + * beyond the label. + */ + if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1]) + super->label[BTRFS_LABEL_SIZE - 1] = 0; - return disk_super; + return super; } int btrfs_forget_devices(dev_t devt) @@ -1458,7 +1421,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super, list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev && (device->bdev->bd_dev == devt) && - strcmp(device->name->str, path) != 0) { + strcmp(rcu_dereference_raw(device->name), path) != 0) { mutex_unlock(&fs_devices->device_list_mutex); /* Do not skip registration. */ @@ -1484,30 +1447,17 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super, * the device or return an error. Multi-device and seeding devices are registered * in both cases. */ -struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, +struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev) { struct btrfs_super_block *disk_super; bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; - u64 bytenr; dev_t devt; - int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1518,24 +1468,11 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL); + bdev_file = bdev_file_open_by_path(path, BLK_OPEN_READ, NULL, NULL); if (IS_ERR(bdev_file)) return ERR_CAST(bdev_file); - /* - * We would like to check all the super blocks, but doing so would - * allow a mount to succeed after a mkfs from a different filesystem. - * Currently, recovery from a bad primary btrfs superblock is done - * using the userspace command 'btrfs check --super'. - */ - ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr); - if (ret) { - device = ERR_PTR(ret); - goto error_bdev_put; - } - - disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr, - btrfs_sb_offset(0)); + disk_super = btrfs_read_disk_super(file_bdev(bdev_file), 0, false); if (IS_ERR(disk_super)) { device = ERR_CAST(disk_super); goto error_bdev_put; @@ -1543,7 +1480,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, devt = file_bdev(bdev_file)->bd_dev; if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) { - pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n", + btrfs_debug(NULL, "skip registering single non-seed device %s (%d:%d)", path, MAJOR(devt), MINOR(devt)); btrfs_free_stale_devices(devt, NULL); @@ -1552,8 +1489,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1561,8 +1497,7 @@ free_disk_super: btrfs_release_disk_super(disk_super); error_bdev_put: - fput(bdev_file); - kfree(canonical_path); + bdev_fput(bdev_file); return device; } @@ -1578,9 +1513,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, lockdep_assert_held(&device->fs_info->chunk_mutex); - if (find_first_extent_bit(&device->alloc_state, *start, - &physical_start, &physical_end, - CHUNK_ALLOCATED, NULL)) { + if (btrfs_find_first_extent_bit(&device->alloc_state, *start, + &physical_start, &physical_end, + CHUNK_ALLOCATED, NULL)) { if (in_range(physical_start, *start, len) || in_range(*start, physical_start, @@ -1595,6 +1530,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, static u64 dev_extent_search_start(struct btrfs_device *device) { switch (device->fs_devices->chunk_alloc_policy) { + default: + btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy); + fallthrough; case BTRFS_CHUNK_ALLOC_REGULAR: return BTRFS_DEVICE_RANGE_RESERVED; case BTRFS_CHUNK_ALLOC_ZONED: @@ -1604,8 +1542,6 @@ static u64 dev_extent_search_start(struct btrfs_device *device) * for superblock logging. */ return 0; - default: - BUG(); } } @@ -1618,7 +1554,8 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device, int ret; bool changed = false; - ASSERT(IS_ALIGNED(*hole_start, zone_size)); + ASSERT(IS_ALIGNED(*hole_start, zone_size), + "hole_start=%llu zone_size=%llu", *hole_start, zone_size); while (*hole_size > 0) { pos = btrfs_find_allocatable_zones(device, *hole_start, @@ -1684,6 +1621,9 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start, } switch (device->fs_devices->chunk_alloc_policy) { + default: + btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy); + fallthrough; case BTRFS_CHUNK_ALLOC_REGULAR: /* No extra check */ break; @@ -1698,8 +1638,6 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start, continue; } break; - default: - BUG(); } break; @@ -1742,7 +1680,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, struct btrfs_root *root = fs_info->dev_root; struct btrfs_key key; struct btrfs_dev_extent *dev_extent; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); u64 search_start; u64 hole_size; u64 max_hole_start; @@ -1772,12 +1710,12 @@ again: } path->reada = READA_FORWARD; - path->search_commit_root = 1; - path->skip_locking = 1; + path->search_commit_root = true; + path->skip_locking = true; key.objectid = device->devid; - key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = search_start; ret = btrfs_search_backwards(root, &key, path); if (ret < 0) @@ -1869,9 +1807,10 @@ next: else ret = 0; - ASSERT(max_hole_start + max_hole_size <= search_end); + ASSERT(max_hole_start + max_hole_size <= search_end, + "max_hole_start=%llu max_hole_size=%llu search_end=%llu", + max_hole_start, max_hole_size, search_end); out: - btrfs_free_path(path); *start = max_hole_start; if (len) *len = max_hole_size; @@ -1885,7 +1824,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_root *root = fs_info->dev_root; int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf = NULL; @@ -1896,15 +1835,15 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = device->devid; - key.offset = start; key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = start; again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { ret = btrfs_previous_item(root, path, key.objectid, BTRFS_DEV_EXTENT_KEY); if (ret) - goto out; + return ret; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); extent = btrfs_item_ptr(leaf, path->slots[0], @@ -1919,7 +1858,7 @@ again: extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); } else { - goto out; + return ret; } *dev_extent_len = btrfs_dev_extent_length(leaf, extent); @@ -1927,8 +1866,6 @@ again: ret = btrfs_del_item(trans, root, path); if (ret == 0) set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); -out: - btrfs_free_path(path); return ret; } @@ -1956,7 +1893,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info, int ret; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); path = btrfs_alloc_path(); if (!path) @@ -1968,13 +1905,12 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info, ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) - goto error; + return ret; - if (ret == 0) { + if (unlikely(ret == 0)) { /* Corruption */ btrfs_err(fs_info, "corrupted chunk tree devid -1 matched"); - ret = -EUCLEAN; - goto error; + return -EUCLEAN; } ret = btrfs_previous_item(fs_info->chunk_root, path, @@ -1987,10 +1923,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info, path->slots[0]); *devid_ret = found_key.offset + 1; } - ret = 0; -error: - btrfs_free_path(path); - return ret; + return 0; } /* @@ -2001,7 +1934,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, struct btrfs_device *device) { int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_dev_item *dev_item; struct extent_buffer *leaf; struct btrfs_key key; @@ -2020,7 +1953,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, &key, sizeof(*dev_item)); btrfs_trans_release_chunk_metadata(trans); if (ret) - goto out; + return ret; leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); @@ -2045,12 +1978,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid, ptr, BTRFS_FSID_SIZE); - btrfs_mark_buffer_dirty(trans, leaf); - ret = 0; -out: - btrfs_free_path(path); - return ret; + return 0; } /* @@ -2062,14 +1991,11 @@ out: static void update_dev_time(const char *device_path) { struct path path; - int ret; - - ret = kern_path(device_path, LOOKUP_FOLLOW, &path); - if (ret) - return; - inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION); - path_put(&path); + if (!kern_path(device_path, LOOKUP_FOLLOW, &path)) { + vfs_utimes(&path, NULL); + path_put(&path); + } } static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, @@ -2077,7 +2003,7 @@ static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, { struct btrfs_root *root = device->fs_info->chunk_root; int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; path = btrfs_alloc_path(); @@ -2091,16 +2017,12 @@ static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); btrfs_trans_release_chunk_metadata(trans); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto out; - } + if (ret > 0) + return -ENOENT; + if (ret < 0) + return ret; - ret = btrfs_del_item(trans, root, path); -out: - btrfs_free_path(path); - return ret; + return btrfs_del_item(trans, root, path); } /* @@ -2183,7 +2105,7 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) down_read(&fs_info->dev_replace.rwsem); if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { - ASSERT(num_devices > 1); + ASSERT(num_devices > 1, "num_devices=%llu", num_devices); num_devices--; } up_read(&fs_info->dev_replace.rwsem); @@ -2199,7 +2121,7 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, const u64 bytenr = btrfs_sb_offset(copy_num); int ret; - disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr); + disk_super = btrfs_read_disk_super(bdev, copy_num, false); if (IS_ERR(disk_super)) return; @@ -2232,7 +2154,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_devic btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(device->name->str); + update_dev_time(rcu_dereference_raw(device->name)); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, @@ -2272,7 +2194,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } if (btrfs_pinned_by_swapfile(fs_info, device)) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "cannot remove device %s (devid %llu) due to active swapfile", btrfs_dev_name(device), device->devid); return -ETXTBSY; @@ -2303,7 +2225,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } ret = btrfs_rm_dev_item(trans, device); - if (ret) { + if (unlikely(ret)) { /* Any error in dev item removal is critical */ btrfs_crit(fs_info, "failed to remove device item for devid %llu: %d", @@ -2362,7 +2284,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, * free the device. * * We cannot call btrfs_close_bdev() here because we're holding the sb - * write lock, and fput() on the block device will pull in the + * write lock, and bdev_fput() on the block device will pull in the * ->open_mutex on the block device and it's dependencies. Instead * just flush the device and let the caller do the final bdev_release. */ @@ -2387,7 +2309,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, */ if (cur_devices->num_devices == 0) { list_del_init(&cur_devices->seed_list); - ASSERT(cur_devices->opened == 1); + ASSERT(cur_devices->opened == 1, "opened=%d", cur_devices->opened); cur_devices->opened--; free_fs_devices(cur_devices); } @@ -2541,7 +2463,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - fput(bdev_file); + bdev_fput(bdev_file); return 0; } @@ -2686,7 +2608,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_dev_item *dev_item; struct btrfs_device *device; @@ -2700,15 +2622,15 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) return -ENOMEM; key.objectid = BTRFS_DEV_ITEMS_OBJECTID; - key.offset = 0; key.type = BTRFS_DEV_ITEM_KEY; + key.offset = 0; while (1) { btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, 0, 1); btrfs_trans_release_chunk_metadata(trans); if (ret < 0) - goto error; + return ret; leaf = path->nodes[0]; next_slot: @@ -2717,7 +2639,7 @@ next_slot: if (ret > 0) break; if (ret < 0) - goto error; + return ret; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); btrfs_release_path(path); @@ -2741,19 +2663,14 @@ next_slot: device = btrfs_find_device(fs_info->fs_devices, &args); BUG_ON(!device); /* Logic error */ - if (device->fs_devices->seeding) { + if (device->fs_devices->seeding) btrfs_set_device_generation(leaf, dev_item, device->generation); - btrfs_mark_buffer_dirty(trans, leaf); - } path->slots[0]++; goto next_slot; } - ret = 0; -error: - btrfs_free_path(path); - return ret; + return 0; } int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path) @@ -2775,7 +2692,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path return -EROFS; bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, - fs_info->bdev_holder, NULL); + fs_info->sb, &fs_holder_ops); if (IS_ERR(bdev_file)) return PTR_ERR(bdev_file); @@ -2784,6 +2701,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path goto error; } + if (bdev_nr_bytes(file_bdev(bdev_file)) <= BTRFS_DEVICE_RANGE_RESERVED) { + ret = -EINVAL; + goto error; + } + if (fs_devices->seeding) { seeding_dev = true; down_write(&sb->s_umount); @@ -2900,21 +2822,21 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path mutex_lock(&fs_info->chunk_mutex); ret = init_first_rw_device(trans); mutex_unlock(&fs_info->chunk_mutex); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto error_sysfs; } } ret = btrfs_add_dev_item(trans, device); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto error_sysfs; } if (seeding_dev) { ret = btrfs_finish_sprout(trans); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto error_sysfs; } @@ -2991,7 +2913,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - fput(bdev_file); + bdev_fput(bdev_file); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); @@ -3003,7 +2925,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_root *root = device->fs_info->chunk_root; struct btrfs_dev_item *dev_item; struct extent_buffer *leaf; @@ -3019,12 +2941,10 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) - goto out; + return ret; - if (ret > 0) { - ret = -ENOENT; - goto out; - } + if (ret > 0) + return -ENOENT; leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); @@ -3038,10 +2958,6 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_device_get_disk_total_bytes(device)); btrfs_set_device_bytes_used(leaf, dev_item, btrfs_device_get_bytes_used(device)); - btrfs_mark_buffer_dirty(trans, leaf); - -out: - btrfs_free_path(path); return ret; } @@ -3094,7 +3010,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; int ret; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; path = btrfs_alloc_path(); @@ -3102,28 +3018,26 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) return -ENOMEM; key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.offset = chunk_offset; key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) - goto out; - else if (ret > 0) { /* Logic error or corruption */ + return ret; + if (unlikely(ret > 0)) { + /* Logic error or corruption */ btrfs_err(fs_info, "failed to lookup chunk %llu when freeing", chunk_offset); btrfs_abort_transaction(trans, -ENOENT); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } ret = btrfs_del_item(trans, root, path); - if (ret < 0) { + if (unlikely(ret < 0)) { btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset); btrfs_abort_transaction(trans, ret); - goto out; + return ret; } -out: - btrfs_free_path(path); return ret; } @@ -3321,7 +3235,8 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) * user having built with ASSERT enabled, so if ASSERT doesn't * do anything we still error out. */ - ASSERT(0); + DEBUG_WARN("errr %ld reading chunk map at offset %llu", + PTR_ERR(map), chunk_offset); return PTR_ERR(map); } @@ -3341,7 +3256,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) ret = btrfs_free_dev_extent(trans, device, map->stripes[i].physical, &dev_extent_len); - if (ret) { + if (unlikely(ret)) { mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, ret); goto out; @@ -3353,6 +3268,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } @@ -3402,8 +3323,16 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) if (ret == -ENOSPC) { const u64 sys_flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *sys_bg; + struct btrfs_space_info *space_info; - sys_bg = btrfs_create_chunk(trans, sys_flags); + space_info = btrfs_find_space_info(fs_info, sys_flags); + if (unlikely(!space_info)) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } + + sys_bg = btrfs_create_chunk(trans, space_info, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -3411,17 +3340,17 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) } ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } ret = remove_chunk_item(trans, map, chunk_offset); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } - } else if (ret) { + } else if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } @@ -3430,7 +3359,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(fs_info, chunk_offset); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } @@ -3446,7 +3375,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) btrfs_trans_release_chunk_metadata(trans); ret = btrfs_remove_block_group(trans, map); - if (ret) { + if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } @@ -3461,7 +3390,8 @@ out: return ret; } -int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) +int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset, + bool verbose) { struct btrfs_root *root = fs_info->chunk_root; struct btrfs_trans_handle *trans; @@ -3491,7 +3421,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) /* step one, relocate all the extents inside this chunk */ btrfs_scrub_pause(fs_info); - ret = btrfs_relocate_block_group(fs_info, chunk_offset); + ret = btrfs_relocate_block_group(fs_info, chunk_offset, true); btrfs_scrub_continue(fs_info); if (ret) { /* @@ -3544,7 +3474,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info) { struct btrfs_root *chunk_root = fs_info->chunk_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_chunk *chunk; struct btrfs_key key; @@ -3560,17 +3490,17 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info) again: key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = (u64)-1; while (1) { mutex_lock(&fs_info->reclaim_bgs_lock); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) { mutex_unlock(&fs_info->reclaim_bgs_lock); - goto error; + return ret; } - if (ret == 0) { + if (unlikely(ret == 0)) { /* * On the first search we would find chunk tree with * offset -1, which is not possible. On subsequent @@ -3578,9 +3508,8 @@ again: * offset (one less than the previous one, wrong * alignment and size). */ - ret = -EUCLEAN; mutex_unlock(&fs_info->reclaim_bgs_lock); - goto error; + return -EUCLEAN; } ret = btrfs_previous_item(chunk_root, path, key.objectid, @@ -3588,7 +3517,7 @@ again: if (ret) mutex_unlock(&fs_info->reclaim_bgs_lock); if (ret < 0) - goto error; + return ret; if (ret > 0) break; @@ -3601,7 +3530,8 @@ again: btrfs_release_path(path); if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_relocate_chunk(fs_info, found_key.offset); + ret = btrfs_relocate_chunk(fs_info, found_key.offset, + true); if (ret == -ENOSPC) failed++; else @@ -3621,8 +3551,6 @@ again: } else if (WARN_ON(failed && retried)) { ret = -ENOSPC; } -error: - btrfs_free_path(path); return ret; } @@ -3748,10 +3676,7 @@ static int insert_balance_item(struct btrfs_fs_info *fs_info, btrfs_set_balance_meta(leaf, item, &disk_bargs); btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys); btrfs_set_balance_sys(leaf, item, &disk_bargs); - btrfs_set_balance_flags(leaf, item, bctl->flags); - - btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); err = btrfs_commit_transaction(trans); @@ -3866,26 +3791,25 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info) * Balance filters. Return 1 if chunk should be filtered out * (should not be balanced). */ -static int chunk_profiles_filter(u64 chunk_type, - struct btrfs_balance_args *bargs) +static bool chunk_profiles_filter(u64 chunk_type, struct btrfs_balance_args *bargs) { chunk_type = chunk_to_extended(chunk_type) & BTRFS_EXTENDED_PROFILE_MASK; if (bargs->profiles & chunk_type) - return 0; + return false; - return 1; + return true; } -static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, - struct btrfs_balance_args *bargs) +static bool chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, + struct btrfs_balance_args *bargs) { struct btrfs_block_group *cache; u64 chunk_used; u64 user_thresh_min; u64 user_thresh_max; - int ret = 1; + bool ret = true; cache = btrfs_lookup_block_group(fs_info, chunk_offset); chunk_used = cache->used; @@ -3903,18 +3827,18 @@ static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_off user_thresh_max = mult_perc(cache->length, bargs->usage_max); if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max) - ret = 0; + ret = false; btrfs_put_block_group(cache); return ret; } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, - u64 chunk_offset, struct btrfs_balance_args *bargs) +static bool chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, + struct btrfs_balance_args *bargs) { struct btrfs_block_group *cache; u64 chunk_used, user_thresh; - int ret = 1; + bool ret = true; cache = btrfs_lookup_block_group(fs_info, chunk_offset); chunk_used = cache->used; @@ -3927,15 +3851,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, user_thresh = mult_perc(cache->length, bargs->usage); if (chunk_used < user_thresh) - ret = 0; + ret = false; btrfs_put_block_group(cache); return ret; } -static int chunk_devid_filter(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, - struct btrfs_balance_args *bargs) +static bool chunk_devid_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk, + struct btrfs_balance_args *bargs) { struct btrfs_stripe *stripe; int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); @@ -3944,10 +3867,10 @@ static int chunk_devid_filter(struct extent_buffer *leaf, for (i = 0; i < num_stripes; i++) { stripe = btrfs_stripe_nr(chunk, i); if (btrfs_stripe_devid(leaf, stripe) == bargs->devid) - return 0; + return false; } - return 1; + return true; } static u64 calc_data_stripes(u64 type, int num_stripes) @@ -3960,9 +3883,8 @@ static u64 calc_data_stripes(u64 type, int num_stripes) } /* [pstart, pend) */ -static int chunk_drange_filter(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, - struct btrfs_balance_args *bargs) +static bool chunk_drange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk, + struct btrfs_balance_args *bargs) { struct btrfs_stripe *stripe; int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); @@ -3973,7 +3895,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf, int i; if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID)) - return 0; + return false; type = btrfs_chunk_type(leaf, chunk); factor = calc_data_stripes(type, num_stripes); @@ -3989,56 +3911,53 @@ static int chunk_drange_filter(struct extent_buffer *leaf, if (stripe_offset < bargs->pend && stripe_offset + stripe_length > bargs->pstart) - return 0; + return false; } - return 1; + return true; } /* [vstart, vend) */ -static int chunk_vrange_filter(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, - u64 chunk_offset, - struct btrfs_balance_args *bargs) +static bool chunk_vrange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk, + u64 chunk_offset, struct btrfs_balance_args *bargs) { if (chunk_offset < bargs->vend && chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart) /* at least part of the chunk is inside this vrange */ - return 0; + return false; - return 1; + return true; } -static int chunk_stripes_range_filter(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, - struct btrfs_balance_args *bargs) +static bool chunk_stripes_range_filter(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + struct btrfs_balance_args *bargs) { int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); if (bargs->stripes_min <= num_stripes && num_stripes <= bargs->stripes_max) - return 0; + return false; - return 1; + return true; } -static int chunk_soft_convert_filter(u64 chunk_type, - struct btrfs_balance_args *bargs) +static bool chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args *bargs) { if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) - return 0; + return false; chunk_type = chunk_to_extended(chunk_type) & BTRFS_EXTENDED_PROFILE_MASK; if (bargs->target == chunk_type) - return 1; + return true; - return 0; + return false; } -static int should_balance_chunk(struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 chunk_offset) +static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk, + u64 chunk_offset) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_balance_control *bctl = fs_info->balance_ctl; @@ -4048,7 +3967,7 @@ static int should_balance_chunk(struct extent_buffer *leaf, /* type filter */ if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) & (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) { - return 0; + return false; } if (chunk_type & BTRFS_BLOCK_GROUP_DATA) @@ -4061,46 +3980,46 @@ static int should_balance_chunk(struct extent_buffer *leaf, /* profiles filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) && chunk_profiles_filter(chunk_type, bargs)) { - return 0; + return false; } /* usage filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) && chunk_usage_filter(fs_info, chunk_offset, bargs)) { - return 0; + return false; } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) && chunk_usage_range_filter(fs_info, chunk_offset, bargs)) { - return 0; + return false; } /* devid filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) && chunk_devid_filter(leaf, chunk, bargs)) { - return 0; + return false; } /* drange filter, makes sense only with devid filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) && chunk_drange_filter(leaf, chunk, bargs)) { - return 0; + return false; } /* vrange filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) && chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) { - return 0; + return false; } /* stripes filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) && chunk_stripes_range_filter(leaf, chunk, bargs)) { - return 0; + return false; } /* soft profile changing mode */ if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) && chunk_soft_convert_filter(chunk_type, bargs)) { - return 0; + return false; } /* @@ -4108,7 +4027,7 @@ static int should_balance_chunk(struct extent_buffer *leaf, */ if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) { if (bargs->limit == 0) - return 0; + return false; else bargs->limit--; } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) { @@ -4118,12 +4037,12 @@ static int should_balance_chunk(struct extent_buffer *leaf, * about the count of all chunks that satisfy the filters. */ if (bargs->limit_max == 0) - return 0; + return false; else bargs->limit_max--; } - return 1; + return true; } static int __btrfs_balance(struct btrfs_fs_info *fs_info) @@ -4132,7 +4051,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) struct btrfs_root *chunk_root = fs_info->chunk_root; u64 chunk_type; struct btrfs_chunk *chunk; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -4170,8 +4089,8 @@ again: bctl->sys.limit = limit_sys; } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = (u64)-1; while (1) { if ((!counting && atomic_read(&fs_info->balance_pause_req)) || @@ -4275,7 +4194,7 @@ again: } } - ret = btrfs_relocate_chunk(fs_info, found_key.offset); + ret = btrfs_relocate_chunk(fs_info, found_key.offset, true); mutex_unlock(&fs_info->reclaim_bgs_lock); if (ret == -ENOSPC) { enospc_errors++; @@ -4303,7 +4222,6 @@ loop: goto again; } error: - btrfs_free_path(path); if (enospc_errors) { btrfs_info(fs_info, "%d enospc errors during balance", enospc_errors); @@ -4320,7 +4238,7 @@ error: * @flags: profile to validate * @extended: if true @flags is treated as an extended profile */ -static int alloc_profile_is_valid(u64 flags, int extended) +static int alloc_profile_is_valid(u64 flags, bool extended) { u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK : BTRFS_BLOCK_GROUP_PROFILE_MASK); @@ -4461,7 +4379,7 @@ static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info) { u32 size_buf = 1024; char tmp_buf[192] = {'\0'}; - char *buf; + char AUTO_KFREE(buf); char *bp; u32 size_bp = size_buf; int ret; @@ -4509,12 +4427,10 @@ out_overflow: btrfs_info(fs_info, "balance: %s %s", (bctl->flags & BTRFS_BALANCE_RESUME) ? "resume" : "start", buf); - - kfree(buf); } /* - * Should be called with balance mutexe held + * Should be called with balance mutex held */ int btrfs_balance(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl, @@ -4711,12 +4627,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; - sb_start_write(fs_info->sb); + guard(super_write)(fs_info->sb); + mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); - sb_end_write(fs_info->sb); return ret; } @@ -4738,7 +4654,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) } spin_lock(&fs_info->super_lock); - ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED); + ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED, + "exclusive_operation=%d", fs_info->exclusive_operation); fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE; spin_unlock(&fs_info->super_lock); /* @@ -4759,7 +4676,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) struct btrfs_balance_control *bctl; struct btrfs_balance_item *item; struct btrfs_disk_balance_args disk_bargs; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_key key; int ret; @@ -4774,17 +4691,14 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); if (ret < 0) - goto out; + return ret; if (ret > 0) { /* ret = -ENOENT; */ - ret = 0; - goto out; + return 0; } bctl = kzalloc(sizeof(*bctl), GFP_NOFS); - if (!bctl) { - ret = -ENOMEM; - goto out; - } + if (!bctl) + return -ENOMEM; leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); @@ -4821,8 +4735,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) fs_info->balance_ctl = bctl; spin_unlock(&fs_info->balance_lock); mutex_unlock(&fs_info->balance_mutex); -out: - btrfs_free_path(path); return ret; } @@ -4987,8 +4899,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) again: key.objectid = device->devid; - key.offset = (u64)-1; key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = (u64)-1; do { mutex_lock(&fs_info->reclaim_bgs_lock); @@ -5042,7 +4954,7 @@ again: goto done; } - ret = btrfs_relocate_chunk(fs_info, chunk_offset); + ret = btrfs_relocate_chunk(fs_info, chunk_offset, true); mutex_unlock(&fs_info->reclaim_bgs_lock); if (ret == -ENOSPC) { failed++; @@ -5074,8 +4986,8 @@ again: mutex_lock(&fs_info->chunk_mutex); /* Clear all state bits beyond the shrunk device size */ - clear_extent_bits(&device->alloc_state, new_size, (u64)-1, - CHUNK_STATE_MASK); + btrfs_clear_extent_bit(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK, NULL); btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) @@ -5091,7 +5003,7 @@ again: /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); btrfs_trans_release_chunk_metadata(trans); - if (ret < 0) { + if (unlikely(ret < 0)) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); } else { @@ -5202,6 +5114,8 @@ struct alloc_chunk_ctl { u64 stripe_size; u64 chunk_size; int ndevs; + /* Space_info the block group is going to belong. */ + struct btrfs_space_info *space_info; }; static void init_alloc_chunk_ctl_policy_regular( @@ -5275,14 +5189,15 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices, ctl->ndevs = 0; switch (fs_devices->chunk_alloc_policy) { + default: + btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy); + fallthrough; case BTRFS_CHUNK_ALLOC_REGULAR: init_alloc_chunk_ctl_policy_regular(fs_devices, ctl); break; case BTRFS_CHUNK_ALLOC_ZONED: init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl); break; - default: - BUG(); } } @@ -5421,7 +5336,9 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl, * It should hold because: * dev_extent_min == dev_extent_want == zone_size * dev_stripes */ - ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min); + ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min, + "ndevs=%d max_avail=%llu dev_extent_min=%llu", ctl->ndevs, + devices_info[ctl->ndevs - 1].max_avail, ctl->dev_extent_min); ctl->stripe_size = zone_size; ctl->num_stripes = ctl->ndevs * ctl->dev_stripes; @@ -5434,7 +5351,9 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl, ctl->dev_stripes); ctl->num_stripes = ctl->ndevs * ctl->dev_stripes; data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies; - ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size); + ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size, + "stripe_size=%llu data_stripes=%d max_chunk_size=%llu", + ctl->stripe_size, data_stripes, ctl->max_chunk_size); } ctl->chunk_size = ctl->stripe_size * data_stripes; @@ -5467,12 +5386,13 @@ static int decide_stripe_size(struct btrfs_fs_devices *fs_devices, ctl->ndevs = min(ctl->ndevs, ctl->devs_max); switch (fs_devices->chunk_alloc_policy) { + default: + btrfs_warn_unknown_chunk_allocation(fs_devices->chunk_alloc_policy); + fallthrough; case BTRFS_CHUNK_ALLOC_REGULAR: return decide_stripe_size_regular(ctl, devices_info); case BTRFS_CHUNK_ALLOC_ZONED: return decide_stripe_size_zoned(ctl, devices_info); - default: - BUG(); } } @@ -5482,9 +5402,9 @@ static void chunk_map_device_set_bits(struct btrfs_chunk_map *map, unsigned int struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; - set_extent_bit(&device->alloc_state, stripe->physical, - stripe->physical + map->stripe_size - 1, - bits | EXTENT_NOWAIT, NULL); + btrfs_set_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + map->stripe_size - 1, + bits | EXTENT_NOWAIT, NULL); } } @@ -5494,10 +5414,9 @@ static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned in struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; - __clear_extent_bit(&device->alloc_state, stripe->physical, - stripe->physical + map->stripe_size - 1, - bits | EXTENT_NOWAIT, - NULL, NULL); + btrfs_clear_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + map->stripe_size - 1, + bits | EXTENT_NOWAIT, NULL); } } @@ -5513,33 +5432,34 @@ void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_ma btrfs_free_chunk_map(map); } +static int btrfs_chunk_map_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_chunk_map *new_map = + rb_entry(new, struct btrfs_chunk_map, rb_node); + const struct btrfs_chunk_map *exist_map = + rb_entry(exist, struct btrfs_chunk_map, rb_node); + + if (new_map->start == exist_map->start) + return 0; + if (new_map->start < exist_map->start) + return -1; + return 1; +} + EXPORT_FOR_TESTS int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map) { - struct rb_node **p; - struct rb_node *parent = NULL; - bool leftmost = true; + struct rb_node *exist; write_lock(&fs_info->mapping_tree_lock); - p = &fs_info->mapping_tree.rb_root.rb_node; - while (*p) { - struct btrfs_chunk_map *entry; - - parent = *p; - entry = rb_entry(parent, struct btrfs_chunk_map, rb_node); - - if (map->start < entry->start) { - p = &(*p)->rb_left; - } else if (map->start > entry->start) { - p = &(*p)->rb_right; - leftmost = false; - } else { - write_unlock(&fs_info->mapping_tree_lock); - return -EEXIST; - } + exist = rb_find_add_cached(&map->rb_node, &fs_info->mapping_tree, + btrfs_chunk_map_cmp); + + if (exist) { + write_unlock(&fs_info->mapping_tree_lock); + return -EEXIST; } - rb_link_node(&map->rb_node, parent, p); - rb_insert_color_cached(&map->rb_node, &fs_info->mapping_tree, leftmost); chunk_map_device_set_bits(map, CHUNK_ALLOCATED); chunk_map_device_clear_bits(map, CHUNK_TRIMMED); write_unlock(&fs_info->mapping_tree_lock); @@ -5603,7 +5523,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } - block_group = btrfs_make_block_group(trans, type, start, ctl->chunk_size); + block_group = btrfs_make_block_group(trans, ctl->space_info, type, start, + ctl->chunk_size); if (IS_ERR(block_group)) { btrfs_remove_chunk_map(info, map); return block_group; @@ -5629,19 +5550,19 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, } struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, - u64 type) + struct btrfs_space_info *space_info, + u64 type) { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_fs_devices *fs_devices = info->fs_devices; - struct btrfs_device_info *devices_info = NULL; + struct btrfs_device_info AUTO_KFREE(devices_info); struct alloc_chunk_ctl ctl; - struct btrfs_block_group *block_group; int ret; lockdep_assert_held(&info->chunk_mutex); if (!alloc_profile_is_valid(type, 0)) { - ASSERT(0); + DEBUG_WARN("invalid alloc profile for type %llu", type); return ERR_PTR(-EINVAL); } @@ -5653,12 +5574,13 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { btrfs_err(info, "invalid chunk type 0x%llx requested", type); - ASSERT(0); + DEBUG_WARN(); return ERR_PTR(-EINVAL); } ctl.start = find_next_chunk(info); ctl.type = type; + ctl.space_info = space_info; init_alloc_chunk_ctl(fs_devices, &ctl); devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), @@ -5667,22 +5589,14 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); ret = gather_device_info(fs_devices, &ctl, devices_info); - if (ret < 0) { - block_group = ERR_PTR(ret); - goto out; - } + if (ret < 0) + return ERR_PTR(ret); ret = decide_stripe_size(fs_devices, &ctl, devices_info); - if (ret < 0) { - block_group = ERR_PTR(ret); - goto out; - } - - block_group = create_chunk(trans, &ctl, devices_info); + if (ret < 0) + return ERR_PTR(ret); -out: - kfree(devices_info); - return block_group; + return create_chunk(trans, &ctl, devices_info); } /* @@ -5740,7 +5654,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, item_size = btrfs_chunk_item_size(map->num_stripes); chunk = kzalloc(item_size, GFP_NOFS); - if (!chunk) { + if (unlikely(!chunk)) { ret = -ENOMEM; btrfs_abort_transaction(trans, ret); goto out; @@ -5802,7 +5716,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) struct btrfs_fs_info *fs_info = trans->fs_info; u64 alloc_profile; struct btrfs_block_group *meta_bg; + struct btrfs_space_info *meta_space_info; struct btrfs_block_group *sys_bg; + struct btrfs_space_info *sys_space_info; /* * When adding a new device for sprouting, the seed device is read-only @@ -5826,12 +5742,22 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) */ alloc_profile = btrfs_metadata_alloc_profile(fs_info); - meta_bg = btrfs_create_chunk(trans, alloc_profile); + meta_space_info = btrfs_find_space_info(fs_info, alloc_profile); + if (!meta_space_info) { + DEBUG_WARN(); + return -EINVAL; + } + meta_bg = btrfs_create_chunk(trans, meta_space_info, alloc_profile); if (IS_ERR(meta_bg)) return PTR_ERR(meta_bg); alloc_profile = btrfs_system_alloc_profile(fs_info); - sys_bg = btrfs_create_chunk(trans, alloc_profile); + sys_space_info = btrfs_find_space_info(fs_info, alloc_profile); + if (!sys_space_info) { + DEBUG_WARN(); + return -EINVAL; + } + sys_bg = btrfs_create_chunk(trans, sys_space_info, alloc_profile); if (IS_ERR(sys_bg)) return PTR_ERR(sys_bg); @@ -5959,9 +5885,79 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, return len; } +#ifdef CONFIG_BTRFS_EXPERIMENTAL +static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first, int num_stripes) +{ + for (int index = first; index < first + num_stripes; index++) { + const struct btrfs_device *device = map->stripes[index].dev; + + if (device->devid == READ_ONCE(device->fs_devices->read_devid)) + return index; + } + + /* If no read-preferred device is set use the first stripe. */ + return first; +} + +struct stripe_mirror { + u64 devid; + int num; +}; + +static int btrfs_cmp_devid(const void *a, const void *b) +{ + const struct stripe_mirror *s1 = (const struct stripe_mirror *)a; + const struct stripe_mirror *s2 = (const struct stripe_mirror *)b; + + if (s1->devid < s2->devid) + return -1; + if (s1->devid > s2->devid) + return 1; + return 0; +} + +/* + * Select a stripe for reading using the round-robin algorithm. + * + * 1. Compute the read cycle as the total sectors read divided by the minimum + * sectors per device. + * 2. Determine the stripe number for the current read by taking the modulus + * of the read cycle with the total number of stripes: + * + * stripe index = (total sectors / min sectors per dev) % num stripes + * + * The calculated stripe index is then used to select the corresponding device + * from the list of devices, which is ordered by devid. + */ +static int btrfs_read_rr(const struct btrfs_chunk_map *map, int first, int num_stripes) +{ + struct stripe_mirror stripes[BTRFS_RAID1_MAX_MIRRORS] = { 0 }; + struct btrfs_device *device = map->stripes[first].dev; + struct btrfs_fs_info *fs_info = device->fs_devices->fs_info; + unsigned int read_cycle; + unsigned int total_reads; + unsigned int min_reads_per_dev; + + total_reads = percpu_counter_sum(&fs_info->stats_read_blocks); + min_reads_per_dev = READ_ONCE(fs_info->fs_devices->rr_min_contig_read) >> + fs_info->sectorsize_bits; + + for (int index = 0, i = first; i < first + num_stripes; i++) { + stripes[index].devid = map->stripes[i].dev->devid; + stripes[index].num = i; + index++; + } + sort(stripes, num_stripes, sizeof(struct stripe_mirror), + btrfs_cmp_devid, NULL); + + read_cycle = total_reads / min_reads_per_dev; + return stripes[read_cycle % num_stripes].num; +} +#endif + static int find_live_mirror(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map, int first, - int dev_replace_is_ongoing) + bool dev_replace_is_ongoing) { const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy); int i; @@ -5970,8 +5966,8 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, int tolerance; struct btrfs_device *srcdev; - ASSERT((map->type & - (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10))); + ASSERT((map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)), + "type=%llu", map->type); if (map->type & BTRFS_BLOCK_GROUP_RAID10) num_stripes = map->sub_stripes; @@ -5988,6 +5984,14 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, case BTRFS_READ_POLICY_PID: preferred_mirror = first + (current->pid % num_stripes); break; +#ifdef CONFIG_BTRFS_EXPERIMENTAL + case BTRFS_READ_POLICY_RR: + preferred_mirror = btrfs_read_rr(map, first, num_stripes); + break; + case BTRFS_READ_POLICY_DEVID: + preferred_mirror = btrfs_read_preferred(map, first, num_stripes); + break; +#endif } if (dev_replace_is_ongoing && @@ -6025,12 +6029,7 @@ struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, { struct btrfs_io_context *bioc; - bioc = kzalloc( - /* The size of btrfs_io_context */ - sizeof(struct btrfs_io_context) + - /* Plus the variable array for the stripes */ - sizeof(struct btrfs_io_stripe) * (total_stripes), - GFP_NOFS); + bioc = kzalloc(struct_size(bioc, stripes, total_stripes), GFP_NOFS); if (!bioc) return NULL; @@ -6264,7 +6263,7 @@ static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc, } /* We can only have at most 2 extra nr_stripes (for DUP). */ - ASSERT(nr_extra_stripes <= 2); + ASSERT(nr_extra_stripes <= 2, "nr_extra_stripes=%d", nr_extra_stripes); /* * For GET_READ_MIRRORS, we can only return at most 1 extra stripe for * replace. @@ -6275,7 +6274,8 @@ static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc, struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1]; /* Only DUP can have two extra stripes. */ - ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP); + ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP, + "map_type=%llu", bioc->map_type); /* * Swap the last stripe stripes and reduce @nr_extra_stripes. @@ -6302,7 +6302,8 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset, */ io_geom->stripe_offset = offset & BTRFS_STRIPE_LEN_MASK; io_geom->stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; - ASSERT(io_geom->stripe_offset < U32_MAX); + ASSERT(io_geom->stripe_offset < U32_MAX, + "stripe_offset=%llu", io_geom->stripe_offset); if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { unsigned long full_stripe_len = @@ -6320,8 +6321,12 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset, io_geom->raid56_full_stripe_start = btrfs_stripe_nr_to_offset( rounddown(io_geom->stripe_nr, nr_data_stripes(map))); - ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset); - ASSERT(io_geom->raid56_full_stripe_start <= offset); + ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset, + "raid56_full_stripe_start=%llu full_stripe_len=%lu offset=%llu", + io_geom->raid56_full_stripe_start, full_stripe_len, offset); + ASSERT(io_geom->raid56_full_stripe_start <= offset, + "raid56_full_stripe_start=%llu offset=%llu", + io_geom->raid56_full_stripe_start, offset); /* * For writes to RAID56, allow to write a full stripe set, but * no straddling of stripe sets. @@ -6346,8 +6351,7 @@ static int set_io_stripe(struct btrfs_fs_info *fs_info, u64 logical, { dst->dev = map->stripes[io_geom->stripe_index].dev; - if (io_geom->op == BTRFS_MAP_READ && - btrfs_need_stripe_tree_update(fs_info, map->type)) + if (io_geom->op == BTRFS_MAP_READ && io_geom->use_rst) return btrfs_get_raid_extent_offset(fs_info, logical, length, map->type, io_geom->stripe_index, dst); @@ -6362,7 +6366,7 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info, const struct btrfs_io_stripe *smap, const struct btrfs_chunk_map *map, int num_alloc_stripes, - enum btrfs_map_op op, int mirror_num) + struct btrfs_io_geometry *io_geom) { if (!smap) return false; @@ -6370,10 +6374,10 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info, if (num_alloc_stripes != 1) return false; - if (btrfs_need_stripe_tree_update(fs_info, map->type) && op != BTRFS_MAP_READ) + if (io_geom->use_rst && io_geom->op != BTRFS_MAP_READ) return false; - if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) + if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && io_geom->mirror_num > 1) return false; return true; @@ -6488,7 +6492,7 @@ static void map_blocks_raid56_read(struct btrfs_chunk_map *map, { int data_stripes = nr_data_stripes(map); - ASSERT(io_geom->mirror_num <= 1); + ASSERT(io_geom->mirror_num <= 1, "mirror_num=%d", io_geom->mirror_num); /* Just grab the data stripe directly. */ io_geom->stripe_index = io_geom->stripe_nr % data_stripes; io_geom->stripe_nr /= data_stripes; @@ -6556,7 +6560,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, int num_copies; struct btrfs_io_context *bioc = NULL; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - int dev_replace_is_ongoing = 0; + bool dev_replace_is_ongoing = false; u16 num_alloc_stripes; u64 max_len; @@ -6579,6 +6583,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, io_geom.raid56_full_stripe_start = (u64)-1; max_len = btrfs_max_io_len(map, map_offset, &io_geom); *length = min_t(u64, map->chunk_len - map_offset, max_len); + io_geom.use_rst = btrfs_need_stripe_tree_update(fs_info, map->type); if (dev_replace->replace_task != current) down_read(&dev_replace->rwsem); @@ -6647,8 +6652,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * physical block information on the stack instead of allocating an * I/O context structure. */ - if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op, - io_geom.mirror_num)) { + if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, &io_geom)) { ret = set_io_stripe(fs_info, logical, length, smap, map, &io_geom); if (mirror_num_ret) *mirror_num_ret = io_geom.mirror_num; @@ -6662,6 +6666,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, goto out; } bioc->map_type = map->type; + bioc->use_rst = io_geom.use_rst; /* * For RAID56 full map, we need to make sure the stripes[] follows the @@ -6750,6 +6755,8 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, const struct btrfs_device *device) { + if (args->devt) + return device->devt == args->devt; if (args->missing) { if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && !device->bdev) @@ -6860,7 +6867,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, atomic_set(&dev->dev_stats_ccnt, 0); btrfs_device_data_ordered_init(dev); - extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE); + btrfs_extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE); if (devid) tmp = *devid; @@ -6881,9 +6888,9 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, generate_random_uuid(dev->uuid); if (path) { - struct rcu_string *name; + const char *name; - name = rcu_string_strdup(path, GFP_KERNEL); + name = kstrdup(path, GFP_KERNEL); if (!name) { btrfs_free_device(dev); return ERR_PTR(-ENOMEM); @@ -7002,16 +7009,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, warn_32bit_meta_chunk(fs_info, logical, length, type); #endif - /* - * Only need to verify chunk item if we're reading from sys chunk array, - * as chunk item in tree block is already verified by tree-checker. - */ - if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { - ret = btrfs_check_chunk_valid(leaf, chunk, logical); - if (ret) - return ret; - } - map = btrfs_find_chunk_map(fs_info, logical, 1); /* already mapped? */ @@ -7072,6 +7069,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", map->start, map->chunk_len, ret); + btrfs_free_chunk_map(map); } return ret; @@ -7117,8 +7115,12 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, fs_devices = find_fsid(fsid, NULL); if (!fs_devices) { - if (!btrfs_test_opt(fs_info, DEGRADED)) + if (!btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_err(fs_info, + "failed to find fsid %pU when attempting to open seed devices", + fsid); return ERR_PTR(-ENOENT); + } fs_devices = alloc_fs_devices(fsid); if (IS_ERR(fs_devices)) @@ -7137,7 +7139,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, if (IS_ERR(fs_devices)) return fs_devices; - ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->bdev_holder); + ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->sb); if (ret) { free_fs_devices(fs_devices); return ERR_PTR(ret); @@ -7269,16 +7271,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) { struct btrfs_super_block *super_copy = fs_info->super_copy; struct extent_buffer *sb; - struct btrfs_disk_key *disk_key; - struct btrfs_chunk *chunk; u8 *array_ptr; unsigned long sb_array_offset; int ret = 0; - u32 num_stripes; u32 array_size; - u32 len = 0; u32 cur_offset; - u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize); @@ -7301,10 +7298,15 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) cur_offset = 0; while (cur_offset < array_size) { - disk_key = (struct btrfs_disk_key *)array_ptr; - len = sizeof(*disk_key); - if (cur_offset + len > array_size) - goto out_short_read; + struct btrfs_chunk *chunk; + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)array_ptr; + u32 len = sizeof(*disk_key); + + /* + * The sys_chunk_array has been already verified at super block + * read time. Only do ASSERT()s for basic checks. + */ + ASSERT(cur_offset + len <= array_size); btrfs_disk_key_to_cpu(&key, disk_key); @@ -7312,44 +7314,14 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) sb_array_offset += len; cur_offset += len; - if (key.type != BTRFS_CHUNK_ITEM_KEY) { - btrfs_err(fs_info, - "unexpected item type %u in sys_array at offset %u", - (u32)key.type, cur_offset); - ret = -EIO; - break; - } + ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY); chunk = (struct btrfs_chunk *)sb_array_offset; - /* - * At least one btrfs_chunk with one stripe must be present, - * exact stripe count check comes afterwards - */ - len = btrfs_chunk_item_size(1); - if (cur_offset + len > array_size) - goto out_short_read; - - num_stripes = btrfs_chunk_num_stripes(sb, chunk); - if (!num_stripes) { - btrfs_err(fs_info, - "invalid number of stripes %u in sys_array at offset %u", - num_stripes, cur_offset); - ret = -EIO; - break; - } + ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM); - type = btrfs_chunk_type(sb, chunk); - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { - btrfs_err(fs_info, - "invalid chunk type %llu in sys_array at offset %u", - type, cur_offset); - ret = -EIO; - break; - } + len = btrfs_chunk_item_size(btrfs_chunk_num_stripes(sb, chunk)); - len = btrfs_chunk_item_size(num_stripes); - if (cur_offset + len > array_size) - goto out_short_read; + ASSERT(cur_offset + len <= array_size); ret = read_one_chunk(&key, sb, chunk); if (ret) @@ -7362,13 +7334,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return ret; - -out_short_read: - btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u", - len, cur_offset); - clear_extent_buffer_uptodate(sb); - free_extent_buffer_stale(sb); - return -EIO; } /* @@ -7440,7 +7405,7 @@ static void readahead_tree_node_children(struct extent_buffer *node) int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) { struct btrfs_root *root = fs_info->chunk_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_key key; struct btrfs_key found_key; @@ -7471,7 +7436,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) /* * Lockdep complains about possible circular locking dependency between * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores - * used for freeze procection of a fs (struct super_block.s_writers), + * used for freeze protection of a fs (struct super_block.s_writers), * which we take when starting a transaction, and extent buffers of the * chunk tree if we call read_one_dev() while holding a lock on an * extent buffer of the chunk tree. Since we are mounting the filesystem @@ -7479,7 +7444,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * chunk tree, to keep it simple, just skip locking on the chunk tree. */ ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags)); - path->skip_locking = 1; + path->skip_locking = true; /* * Read all device items, and then all the chunk items. All @@ -7488,8 +7453,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; - key.offset = 0; key.type = 0; + key.offset = 0; btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) { struct extent_buffer *node = path->nodes[1]; @@ -7557,8 +7522,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) ret = 0; error: mutex_unlock(&uuid_mutex); - - btrfs_free_path(path); return ret; } @@ -7568,8 +7531,6 @@ int btrfs_init_devices_late(struct btrfs_fs_info *fs_info) struct btrfs_device *device; int ret = 0; - fs_devices->fs_info = fs_info; - mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) device->fs_info = fs_info; @@ -7660,7 +7621,7 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; struct btrfs_device *device; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); int ret = 0; path = btrfs_alloc_path(); @@ -7682,8 +7643,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) } out: mutex_unlock(&fs_devices->device_list_mutex); - - btrfs_free_path(path); return ret; } @@ -7692,7 +7651,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *dev_root = fs_info->dev_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *eb; struct btrfs_dev_stats_item *ptr; @@ -7708,10 +7667,10 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, return -ENOMEM; ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "error %d while searching for dev_stats item for device %s", ret, btrfs_dev_name(device)); - goto out; + return ret; } if (ret == 0 && @@ -7719,10 +7678,10 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "delete too small dev_stats item for device %s failed %d", btrfs_dev_name(device), ret); - goto out; + return ret; } ret = 1; } @@ -7733,10 +7692,10 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "insert dev_stats item for device %s failed %d", btrfs_dev_name(device), ret); - goto out; + return ret; } } @@ -7745,10 +7704,6 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) btrfs_set_dev_stats_value(eb, ptr, i, btrfs_dev_stat_read(device, i)); - btrfs_mark_buffer_dirty(trans, eb); - -out: - btrfs_free_path(path); return ret; } @@ -7798,7 +7753,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) if (!dev->dev_stats_valid) return; - btrfs_err_rl_in_rcu(dev->fs_info, + btrfs_err_rl(dev->fs_info, "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", btrfs_dev_name(dev), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), @@ -7818,7 +7773,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) if (i == BTRFS_DEV_STAT_VALUES_MAX) return; /* all values == 0, suppress message */ - btrfs_info_in_rcu(dev->fs_info, + btrfs_info(dev->fs_info, "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", btrfs_dev_name(dev), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), @@ -7878,7 +7833,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans) { struct btrfs_device *curr, *next; - ASSERT(trans->state == TRANS_STATE_COMMIT_DOING); + ASSERT(trans->state == TRANS_STATE_COMMIT_DOING, "state=%d" , trans->state); if (list_empty(&trans->dev_update_list)) return; @@ -7908,8 +7863,6 @@ int btrfs_bg_type_to_factor(u64 flags) return btrfs_raid_array[index].ncopies; } - - static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) @@ -7923,7 +7876,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, int i; map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); - if (!map) { + if (unlikely(!map)) { btrfs_err(fs_info, "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk", physical_offset, devid); @@ -7932,7 +7885,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } stripe_len = btrfs_calc_stripe_length(map); - if (physical_len != stripe_len) { + if (unlikely(physical_len != stripe_len)) { btrfs_err(fs_info, "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu", physical_offset, devid, map->start, physical_len, @@ -7942,7 +7895,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* - * Very old mkfs.btrfs (before v4.1) will not respect the reserved + * Very old mkfs.btrfs (before v4.15) will not respect the reserved * space. Although kernel can handle it without problem, better to warn * the users. */ @@ -7952,8 +7905,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, devid, physical_offset, physical_len); for (i = 0; i < map->num_stripes; i++) { - if (map->stripes[i].dev->devid == devid && - map->stripes[i].physical == physical_offset) { + if (unlikely(map->stripes[i].dev->devid == devid && + map->stripes[i].physical == physical_offset)) { found = true; if (map->verified_stripes >= map->num_stripes) { btrfs_err(fs_info, @@ -7966,7 +7919,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, break; } } - if (!found) { + if (unlikely(!found)) { btrfs_err(fs_info, "dev extent physical offset %llu devid %llu has no corresponding chunk", physical_offset, devid); @@ -7975,13 +7928,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, /* Make sure no dev extent is beyond device boundary */ dev = btrfs_find_device(fs_info->fs_devices, &args); - if (!dev) { + if (unlikely(!dev)) { btrfs_err(fs_info, "failed to find devid %llu", devid); ret = -EUCLEAN; goto out; } - if (physical_offset + physical_len > dev->disk_total_bytes) { + if (unlikely(physical_offset + physical_len > dev->disk_total_bytes)) { btrfs_err(fs_info, "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", devid, physical_offset, physical_len, @@ -7993,8 +7946,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, if (dev->zone_info) { u64 zone_size = dev->zone_info->zone_size; - if (!IS_ALIGNED(physical_offset, zone_size) || - !IS_ALIGNED(physical_len, zone_size)) { + if (unlikely(!IS_ALIGNED(physical_offset, zone_size) || + !IS_ALIGNED(physical_len, zone_size))) { btrfs_err(fs_info, "zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone", devid, physical_offset, physical_len); @@ -8018,7 +7971,7 @@ static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) struct btrfs_chunk_map *map; map = rb_entry(node, struct btrfs_chunk_map, rb_node); - if (map->num_stripes != map->verified_stripes) { + if (unlikely(map->num_stripes != map->verified_stripes)) { btrfs_err(fs_info, "chunk %llu has missing dev extent, have %d expect %d", map->start, map->verified_stripes, map->num_stripes); @@ -8040,7 +7993,7 @@ out: */ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) { - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_root *root = fs_info->dev_root; struct btrfs_key key; u64 prev_devid = 0; @@ -8071,17 +8024,15 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) path->reada = READA_FORWARD; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - goto out; + return ret; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) - goto out; + return ret; /* No dev extents at all? Not good */ - if (ret > 0) { - ret = -EUCLEAN; - goto out; - } + if (unlikely(ret > 0)) + return -EUCLEAN; } while (1) { struct extent_buffer *leaf = path->nodes[0]; @@ -8103,24 +8054,23 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) physical_len = btrfs_dev_extent_length(leaf, dext); /* Check if this dev extent overlaps with the previous one */ - if (devid == prev_devid && physical_offset < prev_dev_ext_end) { + if (unlikely(devid == prev_devid && physical_offset < prev_dev_ext_end)) { btrfs_err(fs_info, "dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu", devid, physical_offset, prev_dev_ext_end); - ret = -EUCLEAN; - goto out; + return -EUCLEAN; } ret = verify_one_dev_extent(fs_info, chunk_offset, devid, physical_offset, physical_len); if (ret < 0) - goto out; + return ret; prev_devid = devid; prev_dev_ext_end = physical_offset + physical_len; ret = btrfs_next_item(root, path); if (ret < 0) - goto out; + return ret; if (ret > 0) { ret = 0; break; @@ -8128,10 +8078,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) } /* Ensure all chunks have corresponding dev extents */ - ret = verify_chunk_dev_extent_mapping(fs_info); -out: - btrfs_free_path(path); - return ret; + return verify_chunk_dev_extent_mapping(fs_info); } /* @@ -8168,12 +8115,12 @@ static int relocating_repair_kthread(void *data) target = cache->start; btrfs_put_block_group(cache); - sb_start_write(fs_info->sb); + guard(super_write)(fs_info->sb); + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { btrfs_info(fs_info, "zoned: skip relocating block group %llu to repair: EBUSY", target); - sb_end_write(fs_info->sb); return -EBUSY; } @@ -8194,14 +8141,13 @@ static int relocating_repair_kthread(void *data) btrfs_info(fs_info, "zoned: relocating block group %llu to repair IO failure", target); - ret = btrfs_relocate_chunk(fs_info, target); + ret = btrfs_relocate_chunk(fs_info, target, true); out: if (cache) btrfs_put_block_group(cache); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); - sb_end_write(fs_info->sb); return ret; } @@ -8247,7 +8193,7 @@ static void map_raid56_repair_block(struct btrfs_io_context *bioc, logical < stripe_start + BTRFS_STRIPE_LEN) break; } - ASSERT(i < data_stripes); + ASSERT(i < data_stripes, "i=%d data_stripes=%d", i, data_stripes); smap->dev = bioc->stripes[i].dev; smap->physical = bioc->stripes[i].physical + ((logical - bioc->full_stripe_logical) & @@ -8276,7 +8222,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info, int mirror_ret = mirror_num; int ret; - ASSERT(mirror_num > 0); + ASSERT(mirror_num > 0, "mirror_num=%d", mirror_num); ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length, &bioc, smap, &mirror_ret); @@ -8284,7 +8230,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info, return ret; /* The map range should not cross stripe boundary. */ - ASSERT(map_length >= length); + ASSERT(map_length >= length, "map_length=%llu length=%u", map_length, length); /* Already mapped to single stripe. */ if (!bioc) @@ -8296,7 +8242,8 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info, goto out; } - ASSERT(mirror_num <= bioc->num_stripes); + ASSERT(mirror_num <= bioc->num_stripes, + "mirror_num=%d num_stripes=%d", mirror_num, bioc->num_stripes); smap->dev = bioc->stripes[mirror_num - 1].dev; smap->physical = bioc->stripes[mirror_num - 1].physical; out: |
