summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/zoned.c93
-rw-r--r--fs/btrfs/zoned.h8
3 files changed, 98 insertions, 5 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a38d3f9a1a03..5022a3fd778d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3467,6 +3467,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_free_zone_cache(fs_info);
+ btrfs_check_active_zone_reservation(fs_info);
+
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d7188680c8c6..fc69041bb6b4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1889,6 +1889,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
+ const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
bool ret;
int i;
@@ -1910,19 +1911,40 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
+ spin_lock(&fs_info->zone_active_bgs_lock);
for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_zoned_device_info *zinfo;
+ int reserved = 0;
+
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
+ zinfo = device->zone_info;
- if (device->zone_info->max_active_zones == 0)
+ if (zinfo->max_active_zones == 0)
continue;
+ if (is_data)
+ reserved = zinfo->reserved_active_zones;
+ /*
+ * For the data block group, leave active zones for one
+ * metadata block group and one system block group.
+ */
+ if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+ ret = false;
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ goto out_unlock;
+ }
+
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
+ spin_unlock(&fs_info->zone_active_bgs_lock);
goto out_unlock;
}
+ if (!is_data)
+ zinfo->reserved_active_zones--;
}
+ spin_unlock(&fs_info->zone_active_bgs_lock);
/* Successfully activated all the zones */
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2061,18 +2083,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
const u64 physical = map->stripes[i].physical;
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
- if (device->zone_info->max_active_zones == 0)
+ if (zinfo->max_active_zones == 0)
continue;
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
+ zinfo->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
if (ret)
return ret;
+ if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+ zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical);
}
@@ -2111,8 +2136,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
/* Check if there is a device with active zones left */
mutex_lock(&fs_info->chunk_mutex);
+ spin_lock(&fs_info->zone_active_bgs_lock);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ int reserved = 0;
if (!device->bdev)
continue;
@@ -2122,17 +2149,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
break;
}
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ reserved = zinfo->reserved_active_zones;
+
switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
case 0: /* single */
- ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+ ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+ ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
break;
}
if (ret)
break;
}
+ spin_unlock(&fs_info->zone_active_bgs_lock);
mutex_unlock(&fs_info->chunk_mutex);
if (!ret)
@@ -2374,3 +2405,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
return 0;
}
+
+/*
+ * Reserve zones for one metadata block group, one tree-log block group, and one
+ * system block group.
+ */
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_block_group *block_group;
+ struct btrfs_device *device;
+ /* Reserve zones for normal SINGLE metadata and tree-log block group. */
+ unsigned int metadata_reserve = 2;
+ /* Reserve a zone for SINGLE system block group. */
+ unsigned int system_reserve = 1;
+
+ if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+ return;
+
+ /*
+ * This function is called from the mount context. So, there is no
+ * parallel process touching the bits. No need for read_seqretry().
+ */
+ if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+ metadata_reserve = 4;
+ if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+ system_reserve = 2;
+
+ /* Apply the reservation on all the devices. */
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (!device->bdev)
+ continue;
+
+ device->zone_info->reserved_active_zones =
+ metadata_reserve + system_reserve;
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ /* Release reservation for currently active block groups. */
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ struct map_lookup *map = block_group->physical_map;
+
+ if (!(block_group->flags &
+ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+ continue;
+
+ for (int i = 0; i < map->num_stripes; i++)
+ map->stripes[i].dev->zone_info->reserved_active_zones--;
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 74ec37a25808..b9cec523b778 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
u8 zone_size_shift;
u32 nr_zones;
unsigned int max_active_zones;
+ /*
+ * Reserved active zones for one metadata and one system block group.
+ * It can vary per-device depending on the allocation status.
+ */
+ int reserved_active_zones;
atomic_t active_zones_left;
unsigned long *seq_zones;
unsigned long *empty_zones;
@@ -78,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, bool do_finish);
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -252,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
return 0;
}
+static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
+
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)