summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorNaohiro Aota <naohiro.aota@wdc.com>2020-11-10 20:26:14 +0900
committerDavid Sterba <dsterba@suse.com>2020-12-09 19:16:04 +0100
commit12659251ca5df05a484eb122c2c34c18d84e797c (patch)
tree583253bd3dec96e8e0fe71229dceecf72fda1b02 /fs/btrfs/disk-io.c
parenta589dde0bc0bf5616e92131d803b6046573449e6 (diff)
btrfs: implement log-structured superblock for ZONED mode
Superblock (and its copies) is the only data structure in btrfs which has a fixed location on a device. Since we cannot overwrite in a sequential write required zone, we cannot place superblock in the zone. One easy solution is limiting superblock and copies to be placed only in conventional zones. However, this method has two downsides: one is reduced number of superblock copies. The location of the second copy of superblock is 256GB, which is in a sequential write required zone on typical devices in the market today. So, the number of superblock and copies is limited to be two. Second downside is that we cannot support devices which have no conventional zones at all. To solve these two problems, we employ superblock log writing. It uses two adjacent zones as a circular buffer to write updated superblocks. Once the first zone is filled up, start writing into the second one. Then, when both zones are filled up and before starting to write to the first zone again, it reset the first zone. We can determine the position of the latest superblock by reading write pointer information from a device. One corner case is when both zones are full. For this situation, we read out the last superblock of each zone, and compare them to determine which zone is older. The following zones are reserved as the circular buffer on ZONED btrfs. - The primary superblock: zones 0 and 1 - The first copy: zones 16 and 17 - The second copy: zones 1024 or zone at 256GB which is minimum, and next to it If these reserved zones are conventional, superblock is written fixed at the start of the zone without logging. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c42
1 files changed, 35 insertions, 7 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 32e29e2bc99a..aa92c0de0cd6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3488,10 +3488,17 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
{
struct btrfs_super_block *super;
struct page *page;
- u64 bytenr;
+ u64 bytenr, bytenr_orig;
struct address_space *mapping = bdev->bd_inode->i_mapping;
+ int ret;
+
+ bytenr_orig = btrfs_sb_offset(copy_num);
+ ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
+ if (ret == -ENOENT)
+ return ERR_PTR(-EINVAL);
+ else if (ret)
+ return ERR_PTR(ret);
- bytenr = btrfs_sb_offset(copy_num);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
return ERR_PTR(-EINVAL);
@@ -3505,7 +3512,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
return ERR_PTR(-ENODATA);
}
- if (btrfs_super_bytenr(super) != bytenr) {
+ if (btrfs_super_bytenr(super) != bytenr_orig) {
btrfs_release_disk_super(super);
return ERR_PTR(-EINVAL);
}
@@ -3560,7 +3567,8 @@ static int write_dev_supers(struct btrfs_device *device,
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
- u64 bytenr;
+ int ret;
+ u64 bytenr, bytenr_orig;
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3572,12 +3580,22 @@ static int write_dev_supers(struct btrfs_device *device,
struct bio *bio;
struct btrfs_super_block *disk_super;
- bytenr = btrfs_sb_offset(i);
+ bytenr_orig = btrfs_sb_offset(i);
+ ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
+ if (ret == -ENOENT) {
+ continue;
+ } else if (ret < 0) {
+ btrfs_err(device->fs_info,
+ "couldn't get super block location for mirror %d",
+ i);
+ errors++;
+ continue;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;
- btrfs_set_super_bytenr(sb, bytenr);
+ btrfs_set_super_bytenr(sb, bytenr_orig);
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
@@ -3622,6 +3640,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_opf |= REQ_FUA;
btrfsic_submit_bio(bio);
+ btrfs_advance_sb_log(device, i);
}
return errors < i ? 0 : -1;
}
@@ -3638,6 +3657,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
int i;
int errors = 0;
bool primary_failed = false;
+ int ret;
u64 bytenr;
if (max_mirrors == 0)
@@ -3646,7 +3666,15 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
for (i = 0; i < max_mirrors; i++) {
struct page *page;
- bytenr = btrfs_sb_offset(i);
+ ret = btrfs_sb_log_location(device, i, READ, &bytenr);
+ if (ret == -ENOENT) {
+ break;
+ } else if (ret < 0) {
+ errors++;
+ if (i == 0)
+ primary_failed = true;
+ continue;
+ }
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;