summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/device-mapper/dm-zoned.rst34
-rw-r--r--drivers/md/dm-zoned-metadata.c310
-rw-r--r--drivers/md/dm-zoned-target.c185
-rw-r--r--drivers/md/dm-zoned.h7
4 files changed, 427 insertions, 109 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst
index 7547ce635161..553752ea2521 100644
--- a/Documentation/admin-guide/device-mapper/dm-zoned.rst
+++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst
@@ -37,9 +37,13 @@ Algorithm
dm-zoned implements an on-disk buffering scheme to handle non-sequential
write accesses to the sequential zones of a zoned block device.
Conventional zones are used for caching as well as for storing internal
-metadata.
+metadata. It can also use a regular block device together with the zoned
+block device; in that case the regular block device will be split logically
+in zones with the same size as the zoned block device. These zones will be
+placed in front of the zones from the zoned block device and will be handled
+just like conventional zones.
-The zones of the device are separated into 2 types:
+The zones of the device(s) are separated into 2 types:
1) Metadata zones: these are conventional zones used to store metadata.
Metadata zones are not reported as useable capacity to the user.
@@ -127,6 +131,13 @@ resumed. Flushing metadata thus only temporarily delays write and
discard requests. Read requests can be processed concurrently while
metadata flush is being executed.
+If a regular device is used in conjunction with the zoned block device,
+a third set of metadata (without the zone bitmaps) is written to the
+start of the zoned block device. This metadata has a generation counter of
+'0' and will never be updated during normal operation; it just serves for
+identification purposes. The first and second copy of the metadata
+are located at the start of the regular block device.
+
Usage
=====
@@ -138,12 +149,21 @@ Ex::
dmzadm --format /dev/sdxx
-For a formatted device, the target can be created normally with the
-dmsetup utility. The only parameter that dm-zoned requires is the
-underlying zoned block device name. Ex::
- echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \
- dmsetup create dmz-`basename ${dev}`
+If two drives are to be used, both devices must be specified, with the
+regular block device as the first device.
+
+Ex::
+
+ dmzadm --format /dev/sdxx /dev/sdyy
+
+
+Fomatted device(s) can be started with the dmzadm utility, too.:
+
+Ex::
+
+ dmzadm --start /dev/sdxx /dev/sdyy
+
Information about the internal layout and current usage of the zones can
be obtained with the 'status' callback from dmsetup:
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 939deed1606a..fba690dd37f5 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -16,7 +16,7 @@
/*
* Metadata version.
*/
-#define DMZ_META_VER 1
+#define DMZ_META_VER 2
/*
* On-disk super block magic.
@@ -69,8 +69,17 @@ struct dmz_super {
/* Checksum */
__le32 crc; /* 48 */
+ /* DM-Zoned label */
+ u8 dmz_label[32]; /* 80 */
+
+ /* DM-Zoned UUID */
+ u8 dmz_uuid[16]; /* 96 */
+
+ /* Device UUID */
+ u8 dev_uuid[16]; /* 112 */
+
/* Padding to full 512B sector */
- u8 reserved[464]; /* 512 */
+ u8 reserved[400]; /* 512 */
};
/*
@@ -133,8 +142,11 @@ struct dmz_sb {
*/
struct dmz_metadata {
struct dmz_dev *dev;
+ unsigned int nr_devs;
char devname[BDEVNAME_SIZE];
+ char label[BDEVNAME_SIZE];
+ uuid_t uuid;
sector_t zone_bitmap_size;
unsigned int zone_nr_bitmap_blocks;
@@ -161,8 +173,9 @@ struct dmz_metadata {
/* Zone information array */
struct dm_zone *zones;
- struct dmz_sb sb[2];
+ struct dmz_sb sb[3];
unsigned int mblk_primary;
+ unsigned int sb_version;
u64 sb_gen;
unsigned int min_nr_mblks;
unsigned int max_nr_mblks;
@@ -195,31 +208,56 @@ struct dmz_metadata {
};
#define dmz_zmd_info(zmd, format, args...) \
- DMINFO("(%s): " format, (zmd)->devname, ## args)
+ DMINFO("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_err(zmd, format, args...) \
- DMERR("(%s): " format, (zmd)->devname, ## args)
+ DMERR("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_warn(zmd, format, args...) \
- DMWARN("(%s): " format, (zmd)->devname, ## args)
+ DMWARN("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_debug(zmd, format, args...) \
- DMDEBUG("(%s): " format, (zmd)->devname, ## args)
+ DMDEBUG("(%s): " format, (zmd)->label, ## args)
/*
* Various accessors
*/
+static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone)
+{
+ unsigned int zone_id;
+
+ if (WARN_ON(!zone))
+ return 0;
+
+ zone_id = zone->id;
+ if (zmd->nr_devs > 1 &&
+ (zone_id >= zmd->dev[1].zone_offset))
+ zone_id -= zmd->dev[1].zone_offset;
+ return zone_id;
+}
+
sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)zone->id << zmd->zone_nr_sectors_shift;
+ unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+ return (sector_t)zone_id << zmd->zone_nr_sectors_shift;
}
sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)zone->id << zmd->zone_nr_blocks_shift;
+ unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+ return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
}
struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
{
+ if (WARN_ON(!zone))
+ return &zmd->dev[0];
+
+ if (zmd->nr_devs > 1 &&
+ zone->id >= zmd->dev[1].zone_offset)
+ return &zmd->dev[1];
+
return &zmd->dev[0];
}
@@ -275,17 +313,29 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
const char *dmz_metadata_label(struct dmz_metadata *zmd)
{
- return (const char *)zmd->devname;
+ return (const char *)zmd->label;
}
bool dmz_check_dev(struct dmz_metadata *zmd)
{
- return dmz_check_bdev(&zmd->dev[0]);
+ unsigned int i;
+
+ for (i = 0; i < zmd->nr_devs; i++) {
+ if (!dmz_check_bdev(&zmd->dev[i]))
+ return false;
+ }
+ return true;
}
bool dmz_dev_is_dying(struct dmz_metadata *zmd)
{
- return dmz_bdev_is_dying(&zmd->dev[0]);
+ unsigned int i;
+
+ for (i = 0; i < zmd->nr_devs; i++) {
+ if (dmz_bdev_is_dying(&zmd->dev[i]))
+ return true;
+ }
+ return false;
}
/*
@@ -687,6 +737,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
struct bio *bio;
int ret;
+ if (WARN_ON(!dev))
+ return -EIO;
+
if (dmz_bdev_is_dying(dev))
return -EIO;
@@ -711,19 +764,32 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
*/
static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
{
- sector_t block = zmd->sb[set].block;
struct dmz_mblock *mblk = zmd->sb[set].mblk;
struct dmz_super *sb = zmd->sb[set].sb;
struct dmz_dev *dev = zmd->sb[set].dev;
+ sector_t sb_block;
u64 sb_gen = zmd->sb_gen + 1;
int ret;
sb->magic = cpu_to_le32(DMZ_MAGIC);
- sb->version = cpu_to_le32(DMZ_META_VER);
+
+ sb->version = cpu_to_le32(zmd->sb_version);
+ if (zmd->sb_version > 1) {
+ BUILD_BUG_ON(UUID_SIZE != 16);
+ export_uuid(sb->dmz_uuid, &zmd->uuid);
+ memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE);
+ export_uuid(sb->dev_uuid, &dev->uuid);
+ }
sb->gen = cpu_to_le64(sb_gen);
- sb->sb_block = cpu_to_le64(block);
+ /*
+ * The metadata always references the absolute block address,
+ * ie relative to the entire block range, not the per-device
+ * block address.
+ */
+ sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift;
+ sb->sb_block = cpu_to_le64(sb_block);
sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks);
sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq);
sb->nr_chunks = cpu_to_le32(zmd->nr_chunks);
@@ -734,7 +800,8 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
sb->crc = 0;
sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE));
- ret = dmz_rdwr_block(dev, REQ_OP_WRITE, block, mblk->page);
+ ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block,
+ mblk->page);
if (ret == 0)
ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL);
@@ -915,6 +982,23 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
u32 crc, stored_crc;
u64 gen;
+ if (le32_to_cpu(sb->magic) != DMZ_MAGIC) {
+ dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)",
+ DMZ_MAGIC, le32_to_cpu(sb->magic));
+ return -ENXIO;
+ }
+
+ zmd->sb_version = le32_to_cpu(sb->version);
+ if (zmd->sb_version > DMZ_META_VER) {
+ dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)",
+ DMZ_META_VER, zmd->sb_version);
+ return -EINVAL;
+ }
+ if ((zmd->sb_version < 1) && (set == 2)) {
+ dmz_dev_err(dev, "Tertiary superblocks are not supported");
+ return -EINVAL;
+ }
+
gen = le64_to_cpu(sb->gen);
stored_crc = le32_to_cpu(sb->crc);
sb->crc = 0;
@@ -925,16 +1009,45 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
return -ENXIO;
}
- if (le32_to_cpu(sb->magic) != DMZ_MAGIC) {
- dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)",
- DMZ_MAGIC, le32_to_cpu(sb->magic));
- return -ENXIO;
- }
+ if (zmd->sb_version > 1) {
+ uuid_t sb_uuid;
+
+ import_uuid(&sb_uuid, sb->dmz_uuid);
+ if (uuid_is_null(&sb_uuid)) {
+ dmz_dev_err(dev, "NULL DM-Zoned uuid");
+ return -ENXIO;
+ } else if (uuid_is_null(&zmd->uuid)) {
+ uuid_copy(&zmd->uuid, &sb_uuid);
+ } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) {
+ dmz_dev_err(dev, "mismatching DM-Zoned uuid, "
+ "is %pUl expected %pUl",
+ &sb_uuid, &zmd->uuid);
+ return -ENXIO;
+ }
+ if (!strlen(zmd->label))
+ memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE);
+ else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) {
+ dmz_dev_err(dev, "mismatching DM-Zoned label, "
+ "is %s expected %s",
+ sb->dmz_label, zmd->label);
+ return -ENXIO;
+ }
+ import_uuid(&dev->uuid, sb->dev_uuid);
+ if (uuid_is_null(&dev->uuid)) {
+ dmz_dev_err(dev, "NULL device uuid");
+ return -ENXIO;
+ }
- if (le32_to_cpu(sb->version) != DMZ_META_VER) {
- dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)",
- DMZ_META_VER, le32_to_cpu(sb->version));
- return -ENXIO;
+ if (set == 2) {
+ /*
+ * Generation number should be 0, but it doesn't
+ * really matter if it isn't.
+ */
+ if (gen != 0)
+ dmz_dev_warn(dev, "Invalid generation %llu",
+ gen);
+ return 0;
+ }
}
nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1)
@@ -1185,21 +1298,38 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
"Using super block %u (gen %llu)",
zmd->mblk_primary, zmd->sb_gen);
+ if ((zmd->sb_version > 1) && zmd->sb[2].zone) {
+ zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone);
+ zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone);
+ ret = dmz_get_sb(zmd, 2);
+ if (ret) {
+ dmz_dev_err(zmd->sb[2].dev,
+ "Read tertiary super block failed");
+ return ret;
+ }
+ ret = dmz_check_sb(zmd, 2);
+ if (ret == -EINVAL)
+ return ret;
+ }
return 0;
}
/*
* Initialize a zone descriptor.
*/
-static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
+static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
{
struct dmz_metadata *zmd = data;
+ struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0];
+ int idx = num + dev->zone_offset;
struct dm_zone *zone = &zmd->zones[idx];
- struct dmz_dev *dev = zmd->dev;
- /* Ignore the eventual last runt (smaller) zone */
if (blkz->len != zmd->zone_nr_sectors) {
- if (blkz->start + blkz->len == dev->capacity)
+ if (zmd->sb_version > 1) {
+ /* Ignore the eventual runt (smaller) zone */
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ return 0;
+ } else if (blkz->start + blkz->len == dev->capacity)
return 0;
return -ENXIO;
}
@@ -1234,16 +1364,45 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
zmd->nr_useable_zones++;
if (dmz_is_rnd(zone)) {
zmd->nr_rnd_zones++;
- if (!zmd->sb[0].zone) {
- /* Super block zone */
+ if (zmd->nr_devs == 1 && !zmd->sb[0].zone) {
+ /* Primary super block zone */
zmd->sb[0].zone = zone;
}
}
+ if (zmd->nr_devs > 1 && !zmd->sb[2].zone) {
+ /* Tertiary superblock zone */
+ zmd->sb[2].zone = zone;
+ }
}
return 0;
}
+static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
+{
+ int idx;
+ sector_t zone_offset = 0;
+
+ for(idx = 0; idx < dev->nr_zones; idx++) {
+ struct dm_zone *zone = &zmd->zones[idx];
+
+ INIT_LIST_HEAD(&zone->link);
+ atomic_set(&zone->refcount, 0);
+ zone->id = idx;
+ zone->chunk = DMZ_MAP_UNMAPPED;
+ set_bit(DMZ_RND, &zone->flags);
+ zone->wp_block = 0;
+ zmd->nr_rnd_zones++;
+ zmd->nr_useable_zones++;
+ if (dev->capacity - zone_offset < zmd->zone_nr_sectors) {
+ /* Disable runt zone */
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ break;
+ }
+ zone_offset += zmd->zone_nr_sectors;
+ }
+}
+
/*
* Free zones descriptors.
*/
@@ -1259,11 +1418,11 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
*/
static int dmz_init_zones(struct dmz_metadata *zmd)
{
- struct dmz_dev *dev = &zmd->dev[0];
- int ret;
+ int i, ret;
+ struct dmz_dev *zoned_dev = &zmd->dev[0];
/* Init */
- zmd->zone_nr_sectors = dev->zone_nr_sectors;
+ zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors;
zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors);
zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors);
zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks);
@@ -1274,7 +1433,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
DMZ_BLOCK_SIZE_BITS);
/* Allocate zone array */
- zmd->nr_zones = dev->nr_zones;
+ zmd->nr_zones = 0;
+ for (i = 0; i < zmd->nr_devs; i++)
+ zmd->nr_zones += zmd->dev[i].nr_zones;
+
+ if (!zmd->nr_zones) {
+ DMERR("(%s): No zones found", zmd->devname);
+ return -ENXIO;
+ }
zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
if (!zmd->zones)
return -ENOMEM;
@@ -1282,14 +1448,27 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
DMDEBUG("(%s): Using %zu B for zone information",
zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones);
+ if (zmd->nr_devs > 1) {
+ dmz_emulate_zones(zmd, &zmd->dev[0]);
+ /*
+ * Primary superblock zone is always at zone 0 when multiple
+ * drives are present.
+ */
+ zmd->sb[0].zone = &zmd->zones[0];
+
+ zoned_dev = &zmd->dev[1];
+ }
+
/*
* Get zone information and initialize zone descriptors. At the same
* time, determine where the super block should be: first block of the
* first randomly writable zone.
*/
- ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone,
- zmd);
+ ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES,
+ dmz_init_zone, zmd);
if (ret < 0) {
+ DMDEBUG("(%s): Failed to report zones, error %d",
+ zmd->devname, ret);
dmz_drop_zones(zmd);
return ret;
}
@@ -1325,6 +1504,9 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
unsigned int noio_flag;
int ret;
+ if (dev->flags & DMZ_BDEV_REGULAR)
+ return 0;
+
/*
* Get zone information from disk. Since blkdev_report_zones() uses
* GFP_KERNEL by default for memory allocations, set the per-task
@@ -2475,19 +2657,34 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num)
{
struct dmz_dev *dev = &zmd->dev[num];
- dmz_dev_info(dev, "Host-%s zoned block device",
- bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
- "aware" : "managed");
- dmz_dev_info(dev, " %llu 512-byte logical sectors",
- (u64)dev->capacity);
- dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors",
- dev->nr_zones, (u64)zmd->zone_nr_sectors);
+ if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
+ dmz_dev_info(dev, "Regular block device");
+ else
+ dmz_dev_info(dev, "Host-%s zoned block device",
+ bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
+ "aware" : "managed");
+ if (zmd->sb_version > 1) {
+ sector_t sector_offset =
+ dev->zone_offset << zmd->zone_nr_sectors_shift;
+
+ dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)",
+ (u64)dev->capacity, (u64)sector_offset);
+ dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)",
+ dev->nr_zones, (u64)zmd->zone_nr_sectors,
+ (u64)dev->zone_offset);
+ } else {
+ dmz_dev_info(dev, " %llu 512-byte logical sectors",
+ (u64)dev->capacity);
+ dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors",
+ dev->nr_zones, (u64)zmd->zone_nr_sectors);
+ }
}
/*
* Initialize the zoned metadata.
*/
-int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
+int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
+ struct dmz_metadata **metadata,
const char *devname)
{
struct dmz_metadata *zmd;
@@ -2501,6 +2698,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
strcpy(zmd->devname, devname);
zmd->dev = dev;
+ zmd->nr_devs = num_dev;
zmd->mblk_rbtree = RB_ROOT;
init_rwsem(&zmd->mblk_sem);
mutex_init(&zmd->mblk_flush_lock);
@@ -2535,11 +2733,24 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
/* Set metadata zones starting from sb_zone */
for (i = 0; i < zmd->nr_meta_zones << 1; i++) {
zone = dmz_get(zmd, zmd->sb[0].zone->id + i);
- if (!dmz_is_rnd(zone))
+ if (!dmz_is_rnd(zone)) {
+ dmz_zmd_err(zmd,
+ "metadata zone %d is not random", i);
+ ret = -ENXIO;
goto err;
+ }
+ set_bit(DMZ_META, &zone->flags);
+ }
+ if (zmd->sb[2].zone) {
+ zone = dmz_get(zmd, zmd->sb[2].zone->id);
+ if (!zone) {
+ dmz_zmd_err(zmd,
+ "Tertiary metadata zone not present");
+ ret = -ENXIO;
+ goto err;
+ }
set_bit(DMZ_META, &zone->flags);
}
-
/* Load mapping table */
ret = dmz_load_mapping(zmd);
if (ret)
@@ -2564,8 +2775,9 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
goto err;
}
- dmz_zmd_info(zmd, "DM-Zoned metadata version %d", DMZ_META_VER);
- dmz_print_dev(zmd, 0);
+ dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version);
+ for (i = 0; i < zmd->nr_devs; i++)
+ dmz_print_dev(zmd, i);
dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors",
zmd->nr_zones, (u64)zmd->zone_nr_sectors);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a09fb78ffe88..ea43f6892ced 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -13,6 +13,8 @@
#define DMZ_MIN_BIOS 8192
+#define DMZ_MAX_DEVS 2
+
/*
* Zone BIO context.
*/
@@ -38,7 +40,7 @@ struct dm_chunk_work {
* Target descriptor.
*/
struct dmz_target {
- struct dm_dev *ddev;
+ struct dm_dev *ddev[DMZ_MAX_DEVS];
unsigned long flags;
@@ -81,7 +83,7 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bio->bi_status = status;
- if (bio->bi_status != BLK_STS_OK)
+ if (bioctx->dev && bio->bi_status != BLK_STS_OK)
bioctx->dev->flags |= DMZ_CHECK_BDEV;
if (refcount_dec_and_test(&bioctx->ref)) {
@@ -690,60 +692,64 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
/*
* Get zoned device information.
*/
-static int dmz_get_zoned_device(struct dm_target *ti, char *path)
+static int dmz_get_zoned_device(struct dm_target *ti, char *path,
+ int idx, int nr_devs)
{
struct dmz_target *dmz = ti->private;
- struct request_queue *q;
+ struct dm_dev *ddev;
struct dmz_dev *dev;
- sector_t aligned_capacity;
int ret;
+ struct block_device *bdev;
/* Get the target device */
- ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
+ ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &ddev);
if (ret) {
ti->error = "Get target device failed";
- dmz->ddev = NULL;
return ret;
}
- dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
- if (!dev) {
- ret = -ENOMEM;
- goto err;
+ bdev = ddev->bdev;
+ if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) {
+ if (nr_devs == 1) {
+ ti->error = "Invalid regular device";
+ goto err;
+ }
+ if (idx != 0) {
+ ti->error = "First device must be a regular device";
+ goto err;
+ }
+ if (dmz->ddev[0]) {
+ ti->error = "Too many regular devices";
+ goto err;
+ }
+ dev = &dmz->dev[idx];
+ dev->flags = DMZ_BDEV_REGULAR;
+ } else {
+ if (dmz->ddev[idx]) {
+ ti->error = "Too many zoned devices";
+ goto err;
+ }
+ if (nr_devs > 1 && idx == 0) {
+ ti->error = "First device must be a regular device";
+ goto err;
+ }
+ dev = &dmz->dev[idx];
}
-
- dev->bdev = dmz->ddev->bdev;
+ dev->bdev = bdev;
(void)bdevname(dev->bdev, dev->name);
- if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
- ti->error = "Not a zoned block device";
- ret = -EINVAL;
- goto err;
- }
-
- q = bdev_get_queue(dev->bdev);
- dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
- aligned_capacity = dev->capacity &
- ~((sector_t)blk_queue_zone_sectors(q) - 1);
- if (ti->begin ||
- ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
- ti->error = "Partial mapping not supported";
- ret = -EINVAL;
+ dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
+ if (ti->begin) {
+ ti->error = "Partial mapping is not supported";
goto err;
}
- dev->zone_nr_sectors = blk_queue_zone_sectors(q);
-
- dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
-
- dmz->dev = dev;
+ dmz->ddev[idx] = ddev;
return 0;
err:
- dm_put_device(ti, dmz->ddev);
- kfree(dev);
-
- return ret;
+ dm_put_device(ti, ddev);
+ return -EINVAL;
}
/*
@@ -752,10 +758,56 @@ err:
static void dmz_put_zoned_device(struct dm_target *ti)
{
struct dmz_target *dmz = ti->private;
+ int i;
- dm_put_device(ti, dmz->ddev);
- kfree(dmz->dev);
- dmz->dev = NULL;
+ for (i = 0; i < DMZ_MAX_DEVS; i++) {
+ if (dmz->ddev[i]) {
+ dm_put_device(ti, dmz->ddev[i]);
+ dmz->ddev[i] = NULL;
+ }
+ }
+}
+
+static int dmz_fixup_devices(struct dm_target *ti)
+{
+ struct dmz_target *dmz = ti->private;
+ struct dmz_dev *reg_dev, *zoned_dev;
+ struct request_queue *q;
+
+ /*
+ * When we have two devices, the first one must be a regular block
+ * device and the second a zoned block device.
+ */
+ if (dmz->ddev[0] && dmz->ddev[1]) {
+ reg_dev = &dmz->dev[0];
+ if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
+ ti->error = "Primary disk is not a regular device";
+ return -EINVAL;
+ }
+ zoned_dev = &dmz->dev[1];
+ if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
+ ti->error = "Secondary disk is not a zoned device";
+ return -EINVAL;
+ }
+ } else {
+ reg_dev = NULL;
+ zoned_dev = &dmz->dev[0];
+ if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
+ ti->error = "Disk is not a zoned device";
+ return -EINVAL;
+ }
+ }
+ q = bdev_get_queue(zoned_dev->bdev);
+ zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q);
+ zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk);
+
+ if (reg_dev) {
+ reg_dev->zone_nr_sectors = zoned_dev->zone_nr_sectors;
+ reg_dev->nr_zones = DIV_ROUND_UP(reg_dev->capacity,
+ reg_dev->zone_nr_sectors);
+ zoned_dev->zone_offset = reg_dev->nr_zones;
+ }
+ return 0;
}
/*
@@ -764,11 +816,10 @@ static void dmz_put_zoned_device(struct dm_target *ti)
static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dmz_target *dmz;
- struct dmz_dev *dev;
int ret;
/* Check arguments */
- if (argc != 1) {
+ if (argc < 1 || argc > 2) {
ti->error = "Invalid argument count";
return -EINVAL;
}
@@ -779,18 +830,34 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Unable to allocate the zoned target descriptor";
return -ENOMEM;
}
+ dmz->dev = kcalloc(2, sizeof(struct dmz_dev), GFP_KERNEL);
+ if (!dmz->dev) {
+ ti->error = "Unable to allocate the zoned device descriptors";
+ kfree(dmz);
+ return -ENOMEM;
+ }
ti->private = dmz;
/* Get the target zoned block device */
- ret = dmz_get_zoned_device(ti, argv[0]);
+ ret = dmz_get_zoned_device(ti, argv[0], 0, argc);
+ if (ret)
+ goto err;
+
+ if (argc == 2) {
+ ret = dmz_get_zoned_device(ti, argv[1], 1, argc);
+ if (ret) {
+ dmz_put_zoned_device(ti);
+ goto err;
+ }
+ }
+ ret = dmz_fixup_devices(ti);
if (ret) {
- dmz->ddev = NULL;
+ dmz_put_zoned_device(ti);
goto err;
}
/* Initialize metadata */
- dev = dmz->dev;
- ret = dmz_ctr_metadata(dev, &dmz->metadata,
+ ret = dmz_ctr_metadata(dmz->dev, argc, &dmz->metadata,
dm_table_device_name(ti->table));
if (ret) {
ti->error = "Metadata initialization failed";
@@ -867,6 +934,7 @@ err_meta:
err_dev:
dmz_put_zoned_device(ti);
err:
+ kfree(dmz->dev);
kfree(dmz);
return ret;
@@ -897,6 +965,7 @@ static void dmz_dtr(struct dm_target *ti)
mutex_destroy(&dmz->chunk_lock);
+ kfree(dmz->dev);
kfree(dmz);
}
@@ -971,10 +1040,17 @@ static int dmz_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct dmz_target *dmz = ti->private;
- struct dmz_dev *dev = dmz->dev;
- sector_t capacity = dev->capacity & ~(dmz_zone_nr_sectors(dmz->metadata) - 1);
-
- return fn(ti, dmz->ddev, 0, capacity, data);
+ unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
+ sector_t capacity;
+ int r;
+
+ capacity = dmz->dev[0].capacity & ~(zone_nr_sectors - 1);
+ r = fn(ti, dmz->ddev[0], 0, capacity, data);
+ if (!r && dmz->ddev[1]) {
+ capacity = dmz->dev[1].capacity & ~(zone_nr_sectors - 1);
+ r = fn(ti, dmz->ddev[1], 0, capacity, data);
+ }
+ return r;
}
static void dmz_status(struct dm_target *ti, status_type_t type,
@@ -984,6 +1060,7 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
struct dmz_target *dmz = ti->private;
ssize_t sz = 0;
char buf[BDEVNAME_SIZE];
+ struct dmz_dev *dev;
switch (type) {
case STATUSTYPE_INFO:
@@ -995,8 +1072,14 @@ static void dmz_status(struct dm_target *ti, status_type_t type,
dmz_nr_seq_zones(dmz->metadata));
break;
case STATUSTYPE_TABLE:
- format_dev_t(buf, dmz->dev->bdev->bd_dev);
+ dev = &dmz->dev[0];
+ format_dev_t(buf, dev->bdev->bd_dev);
DMEMIT("%s", buf);
+ if (dmz->dev[1].bdev) {
+ dev = &dmz->dev[1];
+ format_dev_t(buf, dev->bdev->bd_dev);
+ DMEMIT(" %s", buf);
+ }
break;
}
return;
@@ -1018,7 +1101,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
static struct target_type dmz_type = {
.name = "zoned",
- .version = {1, 1, 0},
+ .version = {2, 0, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = dmz_ctr,
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 2629bd51fa26..4971a765be55 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -52,10 +52,12 @@ struct dmz_dev {
struct block_device *bdev;
char name[BDEVNAME_SIZE];
+ uuid_t uuid;
sector_t capacity;
unsigned int nr_zones;
+ unsigned int zone_offset;
unsigned int flags;
@@ -69,6 +71,7 @@ struct dmz_dev {
/* Device flags. */
#define DMZ_BDEV_DYING (1 << 0)
#define DMZ_CHECK_BDEV (2 << 0)
+#define DMZ_BDEV_REGULAR (4 << 0)
/*
* Zone descriptor.
@@ -163,8 +166,8 @@ struct dmz_reclaim;
/*
* Functions defined in dm-zoned-metadata.c
*/
-int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **zmd,
- const char *devname);
+int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
+ struct dmz_metadata **zmd, const char *devname);
void dmz_dtr_metadata(struct dmz_metadata *zmd);
int dmz_resume_metadata(struct dmz_metadata *zmd);