diff options
Diffstat (limited to 'drivers/md/dm-zoned-metadata.c')
| -rw-r--r-- | drivers/md/dm-zoned-metadata.c | 219 |
1 files changed, 97 insertions, 122 deletions
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 130b5a6d9f12..deff22ecccbb 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -187,7 +187,7 @@ struct dmz_metadata { struct rb_root mblk_rbtree; struct list_head mblk_lru_list; struct list_head mblk_dirty_list; - struct shrinker mblk_shrinker; + struct shrinker *mblk_shrinker; /* Zone allocation management */ struct mutex map_lock; @@ -245,11 +245,6 @@ unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd) return zmd->zone_nr_blocks; } -unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd) -{ - return zmd->zone_nr_blocks_shift; -} - unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd) { return zmd->zone_nr_sectors; @@ -550,11 +545,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, if (!mblk) return ERR_PTR(-ENOMEM); - bio = bio_alloc(GFP_NOIO, 1); - if (!bio) { - dmz_free_mblock(zmd, mblk); - return ERR_PTR(-ENOMEM); - } + bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO, + GFP_NOIO); spin_lock(&zmd->mblk_lock); @@ -578,11 +570,9 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); - bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); + __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); return mblk; @@ -620,7 +610,7 @@ static unsigned long dmz_shrink_mblock_cache(struct dmz_metadata *zmd, static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; return atomic_read(&zmd->nr_mblks); } @@ -631,7 +621,7 @@ static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink, static unsigned long dmz_mblock_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker); + struct dmz_metadata *zmd = shrink->private_data; unsigned long count; spin_lock(&zmd->mblk_lock); @@ -725,20 +715,15 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, if (dmz_bdev_is_dying(dev)) return -EIO; - bio = bio_alloc(GFP_NOIO, 1); - if (!bio) { - set_bit(DMZ_META_ERROR, &mblk->state); - return -ENOMEM; - } + bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO, + GFP_NOIO); set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); - bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); + __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); return 0; @@ -747,7 +732,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, /* * Read/write a metadata block. */ -static int dmz_rdwr_block(struct dmz_dev *dev, int op, +static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op, sector_t block, struct page *page) { struct bio *bio; @@ -759,14 +744,10 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, if (dmz_bdev_is_dying(dev)) return -EIO; - bio = bio_alloc(GFP_NOIO, 1); - if (!bio) - return -ENOMEM; - + bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO, + GFP_NOIO); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); - bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); - bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); + __bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); bio_put(bio); @@ -819,7 +800,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, mblk->page); if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -862,7 +843,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, /* Flush drive cache (this will also sync data) */ if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -933,7 +914,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); goto err; } @@ -1027,11 +1008,9 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, } sb_block = le64_to_cpu(sb->sb_block); - if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift ) { - dmz_dev_err(dev, "Invalid superblock position " - "(is %llu expected %llu)", - sb_block, - (u64)dsb->zone->id << zmd->zone_nr_blocks_shift); + if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift) { + dmz_dev_err(dev, "Invalid superblock position (is %llu expected %llu)", + sb_block, (u64)dsb->zone->id << zmd->zone_nr_blocks_shift); return -EINVAL; } if (zmd->sb_version > 1) { @@ -1044,16 +1023,14 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, } else if (uuid_is_null(&zmd->uuid)) { uuid_copy(&zmd->uuid, &sb_uuid); } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) { - dmz_dev_err(dev, "mismatching DM-Zoned uuid, " - "is %pUl expected %pUl", + dmz_dev_err(dev, "mismatching DM-Zoned uuid, is %pUl expected %pUl", &sb_uuid, &zmd->uuid); return -ENXIO; } if (!strlen(zmd->label)) memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE); else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) { - dmz_dev_err(dev, "mismatching DM-Zoned label, " - "is %s expected %s", + dmz_dev_err(dev, "mismatching DM-Zoned label, is %s expected %s", sb->dmz_label, zmd->label); return -ENXIO; } @@ -1078,7 +1055,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) >> zmd->zone_nr_blocks_shift; if (!nr_meta_zones || - nr_meta_zones >= zmd->nr_rnd_zones) { + (zmd->nr_devs <= 1 && nr_meta_zones >= zmd->nr_rnd_zones) || + (zmd->nr_devs > 1 && nr_meta_zones >= zmd->nr_cache_zones)) { dmz_dev_err(dev, "Invalid number of metadata blocks"); return -ENXIO; } @@ -1114,8 +1092,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, */ static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { - dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu", - set, sb->dev->name, sb->block); + dmz_zmd_debug(zmd, "read superblock set %d dev %pg block %llu", + set, sb->dev->bdev, sb->block); return dmz_rdwr_block(sb->dev, REQ_OP_READ, sb->block, sb->mblk->page); @@ -1359,7 +1337,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) if (ret == -EINVAL) goto out_kfree; } - out_kfree: +out_kfree: kfree(sb); } return ret; @@ -1389,6 +1367,13 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) return -ENXIO; } + /* + * Devices that have zones with a capacity smaller than the zone size + * (e.g. NVMe zoned namespaces) are not supported. + */ + if (blkz->capacity != blkz->len) + return -ENXIO; + switch (blkz->type) { case BLK_ZONE_TYPE_CONVENTIONAL: set_bit(DMZ_RND, &zone->flags); @@ -1436,7 +1421,7 @@ static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) int idx; sector_t zone_offset = 0; - for(idx = 0; idx < dev->nr_zones; idx++) { + for (idx = 0; idx < dev->nr_zones; idx++) { struct dm_zone *zone; zone = dmz_insert(zmd, idx, dev); @@ -1463,7 +1448,7 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) { int idx; - for(idx = 0; idx < zmd->nr_zones; idx++) { + for (idx = 0; idx < zmd->nr_zones; idx++) { struct dm_zone *zone = xa_load(&zmd->zones, idx); kfree(zone); @@ -1665,10 +1650,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { struct dmz_dev *dev = zone->dev; + unsigned int noio_flag; + noio_flag = memalloc_noio_save(); ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), - zmd->zone_nr_sectors, GFP_NOIO); + zmd->zone_nr_sectors); + memalloc_noio_restore(noio_flag); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", zone->id, ret); @@ -1711,7 +1699,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (IS_ERR(dmap_mblk)) return PTR_ERR(dmap_mblk); zmd->map_mblk[i] = dmap_mblk; - dmap = (struct dmz_map *) dmap_mblk->data; + dmap = dmap_mblk->data; i++; e = 0; } @@ -1842,7 +1830,7 @@ static void dmz_set_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, unsigned int dzone_id, unsigned int bzone_id) { struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT]; - struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data; + struct dmz_map *dmap = dmap_mblk->data; int map_idx = chunk & DMZ_MAP_ENTRIES_MASK; dmap[map_idx].dzone_id = cpu_to_le32(dzone_id); @@ -1949,7 +1937,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; - struct dm_zone *zone, *last = NULL; + struct dm_zone *zone, *maxw_z = NULL; struct list_head *zone_list; /* If we have cache zones select from the cache zone list */ @@ -1961,18 +1949,37 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, } else zone_list = &zmd->dev[idx].map_rnd_list; + /* + * Find the buffer zone with the heaviest weight or the first (oldest) + * data zone that can be reclaimed. + */ list_for_each_entry(zone, zone_list, link) { if (dmz_is_buf(zone)) { dzone = zone->bzone; - if (dzone->dev->dev_idx != idx) + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - if (!last) { - last = dzone; - continue; - } - if (last->weight < dzone->weight) + if (!maxw_z || maxw_z->weight < dzone->weight) + maxw_z = dzone; + } else { + dzone = zone; + if (dmz_lock_zone_reclaim(dzone)) + return dzone; + } + } + + if (maxw_z && dmz_lock_zone_reclaim(maxw_z)) + return maxw_z; + + /* + * If we come here, none of the zones inspected could be locked for + * reclaim. Try again, being more aggressive, that is, find the + * first zone that can be reclaimed regardless of its weitght. + */ + list_for_each_entry(zone, zone_list, link) { + if (dmz_is_buf(zone)) { + dzone = zone->bzone; + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - dzone = last; } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) @@ -2006,7 +2013,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int dev_idx, bool idle) { - struct dm_zone *zone; + struct dm_zone *zone = NULL; /* * Search for a zone candidate to reclaim: 2 cases are possible. @@ -2019,7 +2026,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, dmz_lock_map(zmd); if (list_empty(&zmd->reserved_seq_zones_list)) zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx); - else + if (!zone) zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle); dmz_unlock_map(zmd); @@ -2032,10 +2039,11 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, * allocated and used to map the chunk. * The zone returned will be set to the active state. */ -struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op) +struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, + unsigned int chunk, enum req_op op) { struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT]; - struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data; + struct dmz_map *dmap = dmap_mblk->data; int dmap_idx = chunk & DMZ_MAP_ENTRIES_MASK; unsigned int dzone_id; struct dm_zone *dzone = NULL; @@ -2197,8 +2205,15 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx, { struct list_head *list; struct dm_zone *zone; - int i = 0; + int i; + + /* Schedule reclaim to ensure free zones are available */ + if (!(flags & DMZ_ALLOC_RECLAIM)) { + for (i = 0; i < zmd->nr_devs; i++) + dmz_schedule_reclaim(zmd->dev[i].reclaim); + } + i = 0; again: if (flags & DMZ_ALLOC_CACHE) list = &zmd->unmap_cache_list; @@ -2819,12 +2834,11 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num) { struct dmz_dev *dev = &zmd->dev[num]; - if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) + if (!bdev_is_zoned(dev->bdev)) dmz_dev_info(dev, "Regular block device"); else - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); + dmz_dev_info(dev, "Host-managed zoned block device"); + if (zmd->sb_version > 1) { sector_t sector_offset = dev->zone_offset << zmd->zone_nr_sectors_shift; @@ -2919,17 +2933,23 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, */ zmd->min_nr_mblks = 2 + zmd->nr_map_blocks + zmd->zone_nr_bitmap_blocks * 16; zmd->max_nr_mblks = zmd->min_nr_mblks + 512; - zmd->mblk_shrinker.count_objects = dmz_mblock_shrinker_count; - zmd->mblk_shrinker.scan_objects = dmz_mblock_shrinker_scan; - zmd->mblk_shrinker.seeks = DEFAULT_SEEKS; /* Metadata cache shrinker */ - ret = register_shrinker(&zmd->mblk_shrinker); - if (ret) { - dmz_zmd_err(zmd, "Register metadata cache shrinker failed"); + zmd->mblk_shrinker = shrinker_alloc(0, "dm-zoned-meta:(%u:%u)", + MAJOR(dev->bdev->bd_dev), + MINOR(dev->bdev->bd_dev)); + if (!zmd->mblk_shrinker) { + ret = -ENOMEM; + dmz_zmd_err(zmd, "Allocate metadata cache shrinker failed"); goto err; } + zmd->mblk_shrinker->count_objects = dmz_mblock_shrinker_count; + zmd->mblk_shrinker->scan_objects = dmz_mblock_shrinker_scan; + zmd->mblk_shrinker->private_data = zmd; + + shrinker_register(zmd->mblk_shrinker); + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); for (i = 0; i < zmd->nr_devs; i++) dmz_print_dev(zmd, i); @@ -2976,52 +2996,7 @@ err: */ void dmz_dtr_metadata(struct dmz_metadata *zmd) { - unregister_shrinker(&zmd->mblk_shrinker); + shrinker_free(zmd->mblk_shrinker); dmz_cleanup_metadata(zmd); kfree(zmd); } - -/* - * Check zone information on resume. - */ -int dmz_resume_metadata(struct dmz_metadata *zmd) -{ - struct dm_zone *zone; - sector_t wp_block; - unsigned int i; - int ret; - - /* Check zones */ - for (i = 0; i < zmd->nr_zones; i++) { - zone = dmz_get(zmd, i); - if (!zone) { - dmz_zmd_err(zmd, "Unable to get zone %u", i); - return -EIO; - } - wp_block = zone->wp_block; - - ret = dmz_update_zone(zmd, zone); - if (ret) { - dmz_zmd_err(zmd, "Broken zone %u", i); - return ret; - } - - if (dmz_is_offline(zone)) { - dmz_zmd_warn(zmd, "Zone %u is offline", i); - continue; - } - - /* Check write pointer */ - if (!dmz_is_seq(zone)) - zone->wp_block = 0; - else if (zone->wp_block != wp_block) { - dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)", - i, (u64)zone->wp_block, (u64)wp_block); - zone->wp_block = wp_block; - dmz_invalidate_blocks(zmd, zone, zone->wp_block, - zmd->zone_nr_blocks - zone->wp_block); - } - } - - return 0; -} |
