summaryrefslogtreecommitdiff
path: root/drivers/md/dm-zoned-metadata.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-zoned-metadata.c')
-rw-r--r--drivers/md/dm-zoned-metadata.c219
1 files changed, 97 insertions, 122 deletions
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 130b5a6d9f12..deff22ecccbb 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -187,7 +187,7 @@ struct dmz_metadata {
struct rb_root mblk_rbtree;
struct list_head mblk_lru_list;
struct list_head mblk_dirty_list;
- struct shrinker mblk_shrinker;
+ struct shrinker *mblk_shrinker;
/* Zone allocation management */
struct mutex map_lock;
@@ -245,11 +245,6 @@ unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd)
return zmd->zone_nr_blocks;
}
-unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd)
-{
- return zmd->zone_nr_blocks_shift;
-}
-
unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd)
{
return zmd->zone_nr_sectors;
@@ -550,11 +545,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
if (!mblk)
return ERR_PTR(-ENOMEM);
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio) {
- dmz_free_mblock(zmd, mblk);
- return ERR_PTR(-ENOMEM);
- }
+ bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO,
+ GFP_NOIO);
spin_lock(&zmd->mblk_lock);
@@ -578,11 +570,9 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
/* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
- bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
return mblk;
@@ -620,7 +610,7 @@ static unsigned long dmz_shrink_mblock_cache(struct dmz_metadata *zmd,
static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker);
+ struct dmz_metadata *zmd = shrink->private_data;
return atomic_read(&zmd->nr_mblks);
}
@@ -631,7 +621,7 @@ static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink,
static unsigned long dmz_mblock_shrinker_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker);
+ struct dmz_metadata *zmd = shrink->private_data;
unsigned long count;
spin_lock(&zmd->mblk_lock);
@@ -725,20 +715,15 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
if (dmz_bdev_is_dying(dev))
return -EIO;
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio) {
- set_bit(DMZ_META_ERROR, &mblk->state);
- return -ENOMEM;
- }
+ bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO,
+ GFP_NOIO);
set_bit(DMZ_META_WRITING, &mblk->state);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
- bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
return 0;
@@ -747,7 +732,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
/*
* Read/write a metadata block.
*/
-static int dmz_rdwr_block(struct dmz_dev *dev, int op,
+static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op,
sector_t block, struct page *page)
{
struct bio *bio;
@@ -759,14 +744,10 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
if (dmz_bdev_is_dying(dev))
return -EIO;
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio)
- return -ENOMEM;
-
+ bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO,
+ GFP_NOIO);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
- bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
- bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
ret = submit_bio_wait(bio);
bio_put(bio);
@@ -819,7 +800,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block,
mblk->page);
if (ret == 0)
- ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
+ ret = blkdev_issue_flush(dev->bdev);
return ret;
}
@@ -862,7 +843,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
/* Flush drive cache (this will also sync data) */
if (ret == 0)
- ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
+ ret = blkdev_issue_flush(dev->bdev);
return ret;
}
@@ -933,7 +914,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
/* If there are no dirty metadata blocks, just flush the device cache */
if (list_empty(&write_list)) {
- ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
+ ret = blkdev_issue_flush(dev->bdev);
goto err;
}
@@ -1027,11 +1008,9 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb,
}
sb_block = le64_to_cpu(sb->sb_block);
- if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift ) {
- dmz_dev_err(dev, "Invalid superblock position "
- "(is %llu expected %llu)",
- sb_block,
- (u64)dsb->zone->id << zmd->zone_nr_blocks_shift);
+ if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift) {
+ dmz_dev_err(dev, "Invalid superblock position (is %llu expected %llu)",
+ sb_block, (u64)dsb->zone->id << zmd->zone_nr_blocks_shift);
return -EINVAL;
}
if (zmd->sb_version > 1) {
@@ -1044,16 +1023,14 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb,
} else if (uuid_is_null(&zmd->uuid)) {
uuid_copy(&zmd->uuid, &sb_uuid);
} else if (!uuid_equal(&zmd->uuid, &sb_uuid)) {
- dmz_dev_err(dev, "mismatching DM-Zoned uuid, "
- "is %pUl expected %pUl",
+ dmz_dev_err(dev, "mismatching DM-Zoned uuid, is %pUl expected %pUl",
&sb_uuid, &zmd->uuid);
return -ENXIO;
}
if (!strlen(zmd->label))
memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE);
else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) {
- dmz_dev_err(dev, "mismatching DM-Zoned label, "
- "is %s expected %s",
+ dmz_dev_err(dev, "mismatching DM-Zoned label, is %s expected %s",
sb->dmz_label, zmd->label);
return -ENXIO;
}
@@ -1078,7 +1055,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb,
nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1)
>> zmd->zone_nr_blocks_shift;
if (!nr_meta_zones ||
- nr_meta_zones >= zmd->nr_rnd_zones) {
+ (zmd->nr_devs <= 1 && nr_meta_zones >= zmd->nr_rnd_zones) ||
+ (zmd->nr_devs > 1 && nr_meta_zones >= zmd->nr_cache_zones)) {
dmz_dev_err(dev, "Invalid number of metadata blocks");
return -ENXIO;
}
@@ -1114,8 +1092,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb,
*/
static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set)
{
- dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu",
- set, sb->dev->name, sb->block);
+ dmz_zmd_debug(zmd, "read superblock set %d dev %pg block %llu",
+ set, sb->dev->bdev, sb->block);
return dmz_rdwr_block(sb->dev, REQ_OP_READ,
sb->block, sb->mblk->page);
@@ -1359,7 +1337,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
if (ret == -EINVAL)
goto out_kfree;
}
- out_kfree:
+out_kfree:
kfree(sb);
}
return ret;
@@ -1389,6 +1367,13 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
return -ENXIO;
}
+ /*
+ * Devices that have zones with a capacity smaller than the zone size
+ * (e.g. NVMe zoned namespaces) are not supported.
+ */
+ if (blkz->capacity != blkz->len)
+ return -ENXIO;
+
switch (blkz->type) {
case BLK_ZONE_TYPE_CONVENTIONAL:
set_bit(DMZ_RND, &zone->flags);
@@ -1436,7 +1421,7 @@ static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
int idx;
sector_t zone_offset = 0;
- for(idx = 0; idx < dev->nr_zones; idx++) {
+ for (idx = 0; idx < dev->nr_zones; idx++) {
struct dm_zone *zone;
zone = dmz_insert(zmd, idx, dev);
@@ -1463,7 +1448,7 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
{
int idx;
- for(idx = 0; idx < zmd->nr_zones; idx++) {
+ for (idx = 0; idx < zmd->nr_zones; idx++) {
struct dm_zone *zone = xa_load(&zmd->zones, idx);
kfree(zone);
@@ -1665,10 +1650,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zone->dev;
+ unsigned int noio_flag;
+ noio_flag = memalloc_noio_save();
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
dmz_start_sect(zmd, zone),
- zmd->zone_nr_sectors, GFP_NOIO);
+ zmd->zone_nr_sectors);
+ memalloc_noio_restore(noio_flag);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
zone->id, ret);
@@ -1711,7 +1699,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (IS_ERR(dmap_mblk))
return PTR_ERR(dmap_mblk);
zmd->map_mblk[i] = dmap_mblk;
- dmap = (struct dmz_map *) dmap_mblk->data;
+ dmap = dmap_mblk->data;
i++;
e = 0;
}
@@ -1842,7 +1830,7 @@ static void dmz_set_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk,
unsigned int dzone_id, unsigned int bzone_id)
{
struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT];
- struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data;
+ struct dmz_map *dmap = dmap_mblk->data;
int map_idx = chunk & DMZ_MAP_ENTRIES_MASK;
dmap[map_idx].dzone_id = cpu_to_le32(dzone_id);
@@ -1949,7 +1937,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd,
unsigned int idx, bool idle)
{
struct dm_zone *dzone = NULL;
- struct dm_zone *zone, *last = NULL;
+ struct dm_zone *zone, *maxw_z = NULL;
struct list_head *zone_list;
/* If we have cache zones select from the cache zone list */
@@ -1961,18 +1949,37 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd,
} else
zone_list = &zmd->dev[idx].map_rnd_list;
+ /*
+ * Find the buffer zone with the heaviest weight or the first (oldest)
+ * data zone that can be reclaimed.
+ */
list_for_each_entry(zone, zone_list, link) {
if (dmz_is_buf(zone)) {
dzone = zone->bzone;
- if (dzone->dev->dev_idx != idx)
+ if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx)
continue;
- if (!last) {
- last = dzone;
- continue;
- }
- if (last->weight < dzone->weight)
+ if (!maxw_z || maxw_z->weight < dzone->weight)
+ maxw_z = dzone;
+ } else {
+ dzone = zone;
+ if (dmz_lock_zone_reclaim(dzone))
+ return dzone;
+ }
+ }
+
+ if (maxw_z && dmz_lock_zone_reclaim(maxw_z))
+ return maxw_z;
+
+ /*
+ * If we come here, none of the zones inspected could be locked for
+ * reclaim. Try again, being more aggressive, that is, find the
+ * first zone that can be reclaimed regardless of its weitght.
+ */
+ list_for_each_entry(zone, zone_list, link) {
+ if (dmz_is_buf(zone)) {
+ dzone = zone->bzone;
+ if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx)
continue;
- dzone = last;
} else
dzone = zone;
if (dmz_lock_zone_reclaim(dzone))
@@ -2006,7 +2013,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd,
struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
unsigned int dev_idx, bool idle)
{
- struct dm_zone *zone;
+ struct dm_zone *zone = NULL;
/*
* Search for a zone candidate to reclaim: 2 cases are possible.
@@ -2019,7 +2026,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
dmz_lock_map(zmd);
if (list_empty(&zmd->reserved_seq_zones_list))
zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx);
- else
+ if (!zone)
zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle);
dmz_unlock_map(zmd);
@@ -2032,10 +2039,11 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
* allocated and used to map the chunk.
* The zone returned will be set to the active state.
*/
-struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op)
+struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd,
+ unsigned int chunk, enum req_op op)
{
struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT];
- struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data;
+ struct dmz_map *dmap = dmap_mblk->data;
int dmap_idx = chunk & DMZ_MAP_ENTRIES_MASK;
unsigned int dzone_id;
struct dm_zone *dzone = NULL;
@@ -2197,8 +2205,15 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx,
{
struct list_head *list;
struct dm_zone *zone;
- int i = 0;
+ int i;
+
+ /* Schedule reclaim to ensure free zones are available */
+ if (!(flags & DMZ_ALLOC_RECLAIM)) {
+ for (i = 0; i < zmd->nr_devs; i++)
+ dmz_schedule_reclaim(zmd->dev[i].reclaim);
+ }
+ i = 0;
again:
if (flags & DMZ_ALLOC_CACHE)
list = &zmd->unmap_cache_list;
@@ -2819,12 +2834,11 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num)
{
struct dmz_dev *dev = &zmd->dev[num];
- if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
+ if (!bdev_is_zoned(dev->bdev))
dmz_dev_info(dev, "Regular block device");
else
- dmz_dev_info(dev, "Host-%s zoned block device",
- bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
- "aware" : "managed");
+ dmz_dev_info(dev, "Host-managed zoned block device");
+
if (zmd->sb_version > 1) {
sector_t sector_offset =
dev->zone_offset << zmd->zone_nr_sectors_shift;
@@ -2919,17 +2933,23 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
*/
zmd->min_nr_mblks = 2 + zmd->nr_map_blocks + zmd->zone_nr_bitmap_blocks * 16;
zmd->max_nr_mblks = zmd->min_nr_mblks + 512;
- zmd->mblk_shrinker.count_objects = dmz_mblock_shrinker_count;
- zmd->mblk_shrinker.scan_objects = dmz_mblock_shrinker_scan;
- zmd->mblk_shrinker.seeks = DEFAULT_SEEKS;
/* Metadata cache shrinker */
- ret = register_shrinker(&zmd->mblk_shrinker);
- if (ret) {
- dmz_zmd_err(zmd, "Register metadata cache shrinker failed");
+ zmd->mblk_shrinker = shrinker_alloc(0, "dm-zoned-meta:(%u:%u)",
+ MAJOR(dev->bdev->bd_dev),
+ MINOR(dev->bdev->bd_dev));
+ if (!zmd->mblk_shrinker) {
+ ret = -ENOMEM;
+ dmz_zmd_err(zmd, "Allocate metadata cache shrinker failed");
goto err;
}
+ zmd->mblk_shrinker->count_objects = dmz_mblock_shrinker_count;
+ zmd->mblk_shrinker->scan_objects = dmz_mblock_shrinker_scan;
+ zmd->mblk_shrinker->private_data = zmd;
+
+ shrinker_register(zmd->mblk_shrinker);
+
dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version);
for (i = 0; i < zmd->nr_devs; i++)
dmz_print_dev(zmd, i);
@@ -2976,52 +2996,7 @@ err:
*/
void dmz_dtr_metadata(struct dmz_metadata *zmd)
{
- unregister_shrinker(&zmd->mblk_shrinker);
+ shrinker_free(zmd->mblk_shrinker);
dmz_cleanup_metadata(zmd);
kfree(zmd);
}
-
-/*
- * Check zone information on resume.
- */
-int dmz_resume_metadata(struct dmz_metadata *zmd)
-{
- struct dm_zone *zone;
- sector_t wp_block;
- unsigned int i;
- int ret;
-
- /* Check zones */
- for (i = 0; i < zmd->nr_zones; i++) {
- zone = dmz_get(zmd, i);
- if (!zone) {
- dmz_zmd_err(zmd, "Unable to get zone %u", i);
- return -EIO;
- }
- wp_block = zone->wp_block;
-
- ret = dmz_update_zone(zmd, zone);
- if (ret) {
- dmz_zmd_err(zmd, "Broken zone %u", i);
- return ret;
- }
-
- if (dmz_is_offline(zone)) {
- dmz_zmd_warn(zmd, "Zone %u is offline", i);
- continue;
- }
-
- /* Check write pointer */
- if (!dmz_is_seq(zone))
- zone->wp_block = 0;
- else if (zone->wp_block != wp_block) {
- dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)",
- i, (u64)zone->wp_block, (u64)wp_block);
- zone->wp_block = wp_block;
- dmz_invalidate_blocks(zmd, zone, zone->wp_block,
- zmd->zone_nr_blocks - zone->wp_block);
- }
- }
-
- return 0;
-}