summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-13 13:03:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-13 13:03:54 -0700
commit0c9f4ac808b017a0013cee92a30de980550145d5 (patch)
tree94eedbb9ef4815df9dc8d1dd6424fc92a2fbcd7a /drivers
parent9961a785944601e32f185ea696347b22ffda634c (diff)
parenta3166c51702bb00b8f8b84022090cbab8f37be1a (diff)
Merge tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - Add a partscan attribute in sysfs, fixing an issue with systemd relying on an internal interface that went away. - Attempt #2 at making long running discards interruptible. The previous attempt went into 6.9, but we ended up mostly reverting it as it had issues. - Remove old ida_simple API in bcache - Support for zoned write plugging, greatly improving the performance on zoned devices. - Remove the old throttle low interface, which has been experimental since 2017 and never made it beyond that and isn't being used. - Remove page->index debugging checks in brd, as it hasn't caught anything and prepares us for removing in struct page. - MD pull request from Song - Don't schedule block workers on isolated CPUs * tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux: (84 commits) blk-throttle: delay initialization until configuration blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW block: fix that util can be greater than 100% block: support to account io_ticks precisely block: add plug while submitting IO bcache: fix variable length array abuse in btree_iter bcache: Remove usage of the deprecated ida_simple_xx() API md: Revert "md: Fix overflow in is_mddev_idle" blk-lib: check for kill signal in ioctl BLKDISCARD block: add a bio_await_chain helper block: add a blk_alloc_discard_bio helper block: add a bio_chain_and_submit helper block: move discard checks into the ioctl handler block: remove the discard_granularity check in __blkdev_issue_discard block/ioctl: prefer different overflow check null_blk: Fix the WARNING: modpost: missing MODULE_DESCRIPTION() block: fix and simplify blkdevparts= cmdline parsing block: refine the EOF check in blkdev_iomap_begin block: add a partscan sysfs attribute for disks block: add a disk_has_partscan helper ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/brd.c40
-rw-r--r--drivers/block/null_blk/main.c43
-rw-r--r--drivers/block/null_blk/null_blk.h2
-rw-r--r--drivers/block/null_blk/zoned.c358
-rw-r--r--drivers/block/ublk_drv.c5
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/md/bcache/bset.c44
-rw-r--r--drivers/md/bcache/bset.h28
-rw-r--r--drivers/md/bcache/btree.c40
-rw-r--r--drivers/md/bcache/super.c15
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c10
-rw-r--r--drivers/md/dm-bio-prison-v2.c3
-rw-r--r--drivers/md/dm-cache-target.c12
-rw-r--r--drivers/md/dm-clone-target.c14
-rw-r--r--drivers/md/dm-core.h2
-rw-r--r--drivers/md/dm-era-target.c3
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-table.c3
-rw-r--r--drivers/md/dm-thin.c12
-rw-r--r--drivers/md/dm-vdo/data-vio.c3
-rw-r--r--drivers/md/dm-vdo/flush.c3
-rw-r--r--drivers/md/dm-zone.c501
-rw-r--r--drivers/md/dm.c72
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/md-bitmap.c6
-rw-r--r--drivers/md/md.c7
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/raid5.c15
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/nvme/target/zns.c10
-rw-r--r--drivers/scsi/scsi_lib.c1
-rw-r--r--drivers/scsi/sd.c8
-rw-r--r--drivers/scsi/sd.h19
-rw-r--r--drivers/scsi/sd_zbc.c335
35 files changed, 495 insertions, 1133 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index e322cef6596b..b900fe9e0030 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -29,10 +29,7 @@
/*
* Each block ramdisk device has a xarray brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
- * with, the kernel's pagecache or buffer cache (which sit above our block
- * device).
+ * the pages containing the block device's contents.
*/
struct brd_device {
int brd_number;
@@ -51,15 +48,7 @@ struct brd_device {
*/
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
{
- pgoff_t idx;
- struct page *page;
-
- idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
- page = xa_load(&brd->brd_pages, idx);
-
- BUG_ON(page && page->index != idx);
-
- return page;
+ return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
}
/*
@@ -67,8 +56,8 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
*/
static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
- pgoff_t idx;
- struct page *page, *cur;
+ pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
+ struct page *page;
int ret = 0;
page = brd_lookup_page(brd, sector);
@@ -80,23 +69,16 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
return -ENOMEM;
xa_lock(&brd->brd_pages);
-
- idx = sector >> PAGE_SECTORS_SHIFT;
- page->index = idx;
-
- cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp);
-
- if (unlikely(cur)) {
- __free_page(page);
- ret = xa_err(cur);
- if (!ret && (cur->index != idx))
- ret = -EIO;
- } else {
+ ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
+ if (!ret)
brd->brd_nr_pages++;
- }
-
xa_unlock(&brd->brd_pages);
+ if (ret < 0) {
+ __free_page(page);
+ if (ret == -EBUSY)
+ ret = 0;
+ }
return ret;
}
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index ed33cf7192d2..4005a8b685e8 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -225,6 +225,10 @@ static unsigned long g_cache_size;
module_param_named(cache_size, g_cache_size, ulong, 0444);
MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+static bool g_fua = true;
+module_param_named(fua, g_fua, bool, 0444);
+MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
+
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
@@ -253,6 +257,11 @@ static unsigned int g_zone_max_active;
module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
+static int g_zone_append_max_sectors = INT_MAX;
+module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
+MODULE_PARM_DESC(zone_append_max_sectors,
+ "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -436,10 +445,12 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
+NULLB_DEVICE_ATTR(fua, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -580,12 +591,14 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
+ &nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
&nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_fua,
NULL,
};
@@ -664,14 +677,14 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
- "badblocks,blocking,blocksize,cache_size,"
+ "badblocks,blocking,blocksize,cache_size,fua,"
"completion_nsec,discard,home_node,hw_queue_depth,"
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
"poll_queues,power,queue_mode,shared_tag_bitmap,"
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size\n");
+ "zone_size,zone_append_max_sectors\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -751,10 +764,13 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_nr_conv = g_zone_nr_conv;
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
+ dev->zone_append_max_sectors = g_zone_append_max_sectors;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
dev->shared_tag_bitmap = g_shared_tag_bitmap;
+ dev->fua = g_fua;
+
return dev;
}
@@ -1151,7 +1167,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev,
return BLK_STS_OK;
}
-static int null_handle_flush(struct nullb *nullb)
+static blk_status_t null_handle_flush(struct nullb *nullb)
{
int err;
@@ -1168,7 +1184,7 @@ static int null_handle_flush(struct nullb *nullb)
WARN_ON(!radix_tree_empty(&nullb->dev->cache));
spin_unlock_irq(&nullb->lock);
- return err;
+ return errno_to_blk_status(err);
}
static int null_transfer(struct nullb *nullb, struct page *page,
@@ -1206,7 +1222,7 @@ static int null_handle_rq(struct nullb_cmd *cmd)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
- int err;
+ int err = 0;
unsigned int len;
sector_t sector = blk_rq_pos(rq);
struct req_iterator iter;
@@ -1218,15 +1234,13 @@ static int null_handle_rq(struct nullb_cmd *cmd)
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
rq->cmd_flags & REQ_FUA);
- if (err) {
- spin_unlock_irq(&nullb->lock);
- return err;
- }
+ if (err)
+ break;
sector += len >> SECTOR_SHIFT;
}
spin_unlock_irq(&nullb->lock);
- return 0;
+ return errno_to_blk_status(err);
}
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
@@ -1273,8 +1287,8 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
- return errno_to_blk_status(null_handle_rq(cmd));
+ return null_handle_rq(cmd);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
@@ -1343,7 +1357,7 @@ static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
blk_status_t sts;
if (op == REQ_OP_FLUSH) {
- cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+ cmd->error = null_handle_flush(nullb);
goto out;
}
@@ -1912,7 +1926,7 @@ static int null_add_dev(struct nullb_device *dev)
if (dev->cache_size > 0) {
set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
- blk_queue_write_cache(nullb->q, true, true);
+ blk_queue_write_cache(nullb->q, true, dev->fua);
}
nullb->q->queuedata = nullb;
@@ -2113,10 +2127,13 @@ static void __exit null_exit(void)
if (tag_set.ops)
blk_mq_free_tag_set(&tag_set);
+
+ mutex_destroy(&lock);
}
module_init(null_init);
module_exit(null_exit);
MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
+MODULE_DESCRIPTION("multi queue aware block test driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 477b97746823..3234e6c85eed 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -82,6 +82,7 @@ struct nullb_device {
unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int zone_max_open; /* max number of open zones */
unsigned int zone_max_active; /* max number of active zones */
+ unsigned int zone_append_max_sectors; /* Max sectors per zone append command */
unsigned int submit_queues; /* number of submission queues */
unsigned int prev_submit_queues; /* number of submission queues before change */
unsigned int poll_queues; /* number of IOPOLL submission queues */
@@ -104,6 +105,7 @@ struct nullb_device {
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
bool shared_tag_bitmap; /* use hostwide shared tags */
+ bool fua; /* Support FUA */
};
struct nullb {
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 1689e2584104..5b5a63adacc1 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -9,6 +9,8 @@
#undef pr_fmt
#define pr_fmt(fmt) "null_blk: " fmt
+#define NULL_ZONE_INVALID_WP ((sector_t)-1)
+
static inline sector_t mb_to_sects(unsigned long mb)
{
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -19,18 +21,6 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
return sect >> ilog2(dev->zone_size_sects);
}
-static inline void null_lock_zone_res(struct nullb_device *dev)
-{
- if (dev->need_zone_res_mgmt)
- spin_lock_irq(&dev->zone_res_lock);
-}
-
-static inline void null_unlock_zone_res(struct nullb_device *dev)
-{
- if (dev->need_zone_res_mgmt)
- spin_unlock_irq(&dev->zone_res_lock);
-}
-
static inline void null_init_zone_lock(struct nullb_device *dev,
struct nullb_zone *zone)
{
@@ -103,6 +93,11 @@ int null_init_zoned_dev(struct nullb_device *dev,
dev->zone_nr_conv);
}
+ dev->zone_append_max_sectors =
+ min(ALIGN_DOWN(dev->zone_append_max_sectors,
+ dev->blocksize >> SECTOR_SHIFT),
+ zone_capacity_sects);
+
/* Max active zones has to be < nbr of seq zones in order to be enforceable */
if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
dev->zone_max_active = 0;
@@ -154,7 +149,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
lim->zoned = true;
lim->chunk_sectors = dev->zone_size_sects;
- lim->max_zone_append_sectors = dev->zone_size_sects;
+ lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
lim->max_active_zones = dev->zone_max_active;
return 0;
@@ -163,11 +158,16 @@ int null_init_zoned_dev(struct nullb_device *dev,
int null_register_zoned_dev(struct nullb *nullb)
{
struct request_queue *q = nullb->q;
+ struct gendisk *disk = nullb->disk;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
- nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
- return blk_revalidate_disk_zones(nullb->disk, NULL);
+ disk->nr_zones = bdev_nr_zones(disk->part0);
+
+ pr_info("%s: using %s zone append\n",
+ disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
+
+ return blk_revalidate_disk_zones(disk);
}
void null_free_zoned_dev(struct nullb_device *dev)
@@ -241,35 +241,6 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
return (zone->wp - sector) << SECTOR_SHIFT;
}
-static blk_status_t __null_close_zone(struct nullb_device *dev,
- struct nullb_zone *zone)
-{
- switch (zone->cond) {
- case BLK_ZONE_COND_CLOSED:
- /* close operation on closed is not an error */
- return BLK_STS_OK;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_FULL:
- default:
- return BLK_STS_IOERR;
- }
-
- if (zone->wp == zone->start) {
- zone->cond = BLK_ZONE_COND_EMPTY;
- } else {
- zone->cond = BLK_ZONE_COND_CLOSED;
- dev->nr_zones_closed++;
- }
-
- return BLK_STS_OK;
-}
-
static void null_close_imp_open_zone(struct nullb_device *dev)
{
struct nullb_zone *zone;
@@ -286,7 +257,13 @@ static void null_close_imp_open_zone(struct nullb_device *dev)
zno = dev->zone_nr_conv;
if (zone->cond == BLK_ZONE_COND_IMP_OPEN) {
- __null_close_zone(dev, zone);
+ dev->nr_zones_imp_open--;
+ if (zone->wp == zone->start) {
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ } else {
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ dev->nr_zones_closed++;
+ }
dev->imp_close_zone_no = zno;
return;
}
@@ -374,73 +351,73 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
null_lock_zone(dev, zone);
- if (zone->cond == BLK_ZONE_COND_FULL ||
- zone->cond == BLK_ZONE_COND_READONLY ||
- zone->cond == BLK_ZONE_COND_OFFLINE) {
- /* Cannot write to the zone */
- ret = BLK_STS_IOERR;
- goto unlock;
- }
-
/*
- * Regular writes must be at the write pointer position.
- * Zone append writes are automatically issued at the write
- * pointer and the position returned using the request or BIO
- * sector.
+ * Regular writes must be at the write pointer position. Zone append
+ * writes are automatically issued at the write pointer and the position
+ * returned using the request sector. Note that we do not check the zone
+ * condition because for FULL, READONLY and OFFLINE zones, the sector
+ * check against the zone write pointer will always result in failing
+ * the command.
*/
if (append) {
+ if (WARN_ON_ONCE(!dev->zone_append_max_sectors) ||
+ zone->wp == NULL_ZONE_INVALID_WP) {
+ ret = BLK_STS_IOERR;
+ goto unlock_zone;
+ }
sector = zone->wp;
blk_mq_rq_from_pdu(cmd)->__sector = sector;
- } else if (sector != zone->wp) {
- ret = BLK_STS_IOERR;
- goto unlock;
}
- if (zone->wp + nr_sectors > zone->start + zone->capacity) {
+ if (sector != zone->wp ||
+ zone->wp + nr_sectors > zone->start + zone->capacity) {
ret = BLK_STS_IOERR;
- goto unlock;
+ goto unlock_zone;
}
if (zone->cond == BLK_ZONE_COND_CLOSED ||
zone->cond == BLK_ZONE_COND_EMPTY) {
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK) {
- null_unlock_zone_res(dev);
- goto unlock;
- }
- if (zone->cond == BLK_ZONE_COND_CLOSED) {
- dev->nr_zones_closed--;
- dev->nr_zones_imp_open++;
- } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
- dev->nr_zones_imp_open++;
- }
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ goto unlock_zone;
+ }
+ if (zone->cond == BLK_ZONE_COND_CLOSED) {
+ dev->nr_zones_closed--;
+ dev->nr_zones_imp_open++;
+ } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
+ dev->nr_zones_imp_open++;
+ }
- if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
- zone->cond = BLK_ZONE_COND_IMP_OPEN;
+ spin_unlock(&dev->zone_res_lock);
+ }
- null_unlock_zone_res(dev);
+ zone->cond = BLK_ZONE_COND_IMP_OPEN;
}
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
if (ret != BLK_STS_OK)
- goto unlock;
+ goto unlock_zone;
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity) {
- null_lock_zone_res(dev);
- if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
- dev->nr_zones_exp_open--;
- else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
- dev->nr_zones_imp_open--;
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
+ if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
+ dev->nr_zones_exp_open--;
+ else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
+ dev->nr_zones_imp_open--;
+ spin_unlock(&dev->zone_res_lock);
+ }
zone->cond = BLK_ZONE_COND_FULL;
- null_unlock_zone_res(dev);
}
ret = BLK_STS_OK;
-unlock:
+unlock_zone:
null_unlock_zone(dev, zone);
return ret;
@@ -454,54 +431,100 @@ static blk_status_t null_open_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
-
switch (zone->cond) {
case BLK_ZONE_COND_EXP_OPEN:
- /* open operation on exp open is not an error */
- goto unlock;
+ /* Open operation on exp open is not an error */
+ return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- break;
case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
case BLK_ZONE_COND_CLOSED:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- dev->nr_zones_closed--;
break;
case BLK_ZONE_COND_FULL:
default:
- ret = BLK_STS_IOERR;
- goto unlock;
+ return BLK_STS_IOERR;
}
- zone->cond = BLK_ZONE_COND_EXP_OPEN;
- dev->nr_zones_exp_open++;
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
-unlock:
- null_unlock_zone_res(dev);
+ switch (zone->cond) {
+ case BLK_ZONE_COND_EMPTY:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ break;
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ dev->nr_zones_closed--;
+ break;
+ default:
+ break;
+ }
- return ret;
+ dev->nr_zones_exp_open++;
+
+ spin_unlock(&dev->zone_res_lock);
+ }
+
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+
+ return BLK_STS_OK;
}
static blk_status_t null_close_zone(struct nullb_device *dev,
struct nullb_zone *zone)
{
- blk_status_t ret;
-
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
- ret = __null_close_zone(dev, zone);
- null_unlock_zone_res(dev);
+ switch (zone->cond) {
+ case BLK_ZONE_COND_CLOSED:
+ /* close operation on closed is not an error */
+ return BLK_STS_OK;
+ case BLK_ZONE_COND_IMP_OPEN:
+ case BLK_ZONE_COND_EXP_OPEN:
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_FULL:
+ default:
+ return BLK_STS_IOERR;
+ }
+
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- return ret;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ default:
+ break;
+ }
+
+ if (zone->wp > zone->start)
+ dev->nr_zones_closed++;
+
+ spin_unlock(&dev->zone_res_lock);
+ }
+
+ if (zone->wp == zone->start)
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ else
+ zone->cond = BLK_ZONE_COND_CLOSED;
+
+ return BLK_STS_OK;
}
static blk_status_t null_finish_zone(struct nullb_device *dev,
@@ -512,41 +535,47 @@ static blk_status_t null_finish_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- switch (zone->cond) {
- case BLK_ZONE_COND_FULL:
- /* finish operation on full is not an error */
- goto unlock;
- case BLK_ZONE_COND_EMPTY:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- break;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_CLOSED:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- dev->nr_zones_closed--;
- break;
- default:
- ret = BLK_STS_IOERR;
- goto unlock;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_FULL:
+ /* Finish operation on full is not an error */
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_OK;
+ case BLK_ZONE_COND_EMPTY:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ break;
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ dev->nr_zones_closed--;
+ break;
+ default:
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_IOERR;
+ }
+
+ spin_unlock(&dev->zone_res_lock);
}
zone->cond = BLK_ZONE_COND_FULL;
zone->wp = zone->start + zone->len;
-unlock:
- null_unlock_zone_res(dev);
-
- return ret;
+ return BLK_STS_OK;
}
static blk_status_t null_reset_zone(struct nullb_device *dev,
@@ -555,34 +584,33 @@ static blk_status_t null_reset_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- switch (zone->cond) {
- case BLK_ZONE_COND_EMPTY:
- /* reset operation on empty is not an error */
- null_unlock_zone_res(dev);
- return BLK_STS_OK;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_CLOSED:
- dev->nr_zones_closed--;
- break;
- case BLK_ZONE_COND_FULL:
- break;
- default:
- null_unlock_zone_res(dev);
- return BLK_STS_IOERR;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ dev->nr_zones_closed--;
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_FULL:
+ break;
+ default:
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_IOERR;
+ }
+
+ spin_unlock(&dev->zone_res_lock);
}
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
- null_unlock_zone_res(dev);
-
if (dev->memory_backed)
return null_handle_discard(dev, zone->start, zone->len);
@@ -711,7 +739,7 @@ static void null_set_zone_cond(struct nullb_device *dev,
zone->cond != BLK_ZONE_COND_OFFLINE)
null_finish_zone(dev, zone);
zone->cond = cond;
- zone->wp = (sector_t)-1;
+ zone->wp = NULL_ZONE_INVALID_WP;
}
null_unlock_zone(dev, zone);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 374e4efa8759..176657dce3e3 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -221,7 +221,7 @@ static int ublk_get_nr_zones(const struct ublk_device *ub)
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
{
- return blk_revalidate_disk_zones(ub->ub_disk, NULL);
+ return blk_revalidate_disk_zones(ub->ub_disk);
}
static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
@@ -249,8 +249,7 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
- blk_queue_required_elevator_features(ub->ub_disk->queue,
- ELEVATOR_F_ZBD_SEQ_WRITE);
+
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 42dea7601d87..c1af0a7d56c8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1543,7 +1543,7 @@ static int virtblk_probe(struct virtio_device *vdev)
*/
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
- err = blk_revalidate_disk_zones(vblk->disk, NULL);
+ err = blk_revalidate_disk_zones(vblk->disk);
if (err)
goto out_cleanup_disk;
}
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 2bba4d6aaaa2..463eb13bd0b2 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -54,7 +54,7 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
unsigned int ret = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k;
if (b->ops->is_extents)
@@ -67,7 +67,7 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
const char *err;
for_each_key(b, k, &iter) {
@@ -879,7 +879,7 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey preceding_key_on_stack = ZERO_KEY;
struct bkey *preceding_key_p = &preceding_key_on_stack;
@@ -895,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
else
preceding_key(k, &preceding_key_p);
- m = bch_btree_iter_init(b, &iter, preceding_key_p);
+ m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
- if (b->ops->insert_fixup(b, k, &iter, replace_key))
+ if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
return status;
status = BTREE_INSERT_STATUS_INSERT;
@@ -1100,33 +1100,33 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
btree_iter_cmp));
}
-static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search,
- struct bset_tree *start)
+static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search,
+ struct bset_tree *start)
{
struct bkey *ret = NULL;
- iter->size = ARRAY_SIZE(iter->data);
- iter->used = 0;
+ iter->iter.size = ARRAY_SIZE(iter->stack_data);
+ iter->iter.used = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->b = b;
+ iter->iter.b = b;
#endif
for (; start <= bset_tree_last(b); start++) {
ret = bch_bset_search(b, start, search);
- bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
+ bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
}
return ret;
}
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
struct bkey *search)
{
- return __bch_btree_iter_init(b, iter, search, b->set);
+ return __bch_btree_iter_stack_init(b, iter, search, b->set);
}
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
@@ -1293,10 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
int oldsize = bch_count_data(b);
- __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
+ __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
if (start) {
unsigned int i;
@@ -1307,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
order = get_order(__set_bytes(b->set->data, keys));
}
- __btree_sort(b, &iter, start, order, false, state);
+ __btree_sort(b, &iter.iter, start, order, false, state);
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
@@ -1323,11 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(b, &iter, NULL);
+ bch_btree_iter_stack_init(b, &iter, NULL);
- btree_mergesort(b, new->set->data, &iter, false, true);
+ btree_mergesort(b, new->set->data, &iter.iter, false, true);
bch_time_stats_update(&state->time, start_time);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index d795c84246b0..011f6062c4c0 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -321,7 +321,14 @@ struct btree_iter {
#endif
struct btree_iter_set {
struct bkey *k, *end;
- } data[MAX_BSETS];
+ } data[];
+};
+
+/* Fixed-size btree_iter that can be allocated on the stack */
+
+struct btree_iter_stack {
+ struct btree_iter iter;
+ struct btree_iter_set stack_data[MAX_BSETS];
};
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
@@ -333,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end);
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search);
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search);
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
const struct bkey *search);
@@ -350,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-#define for_each_key_filter(b, k, iter, filter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+#define for_each_key_filter(b, k, stack_iter, filter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
+ filter));)
-#define for_each_key(b, k, iter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next(iter));)
+#define for_each_key(b, k, stack_iter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
/* Sorting */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 196cdacce38f..d011a7154d33 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1309,7 +1309,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
uint8_t stale = 0;
unsigned int keys = 0, good_keys = 0;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bset_tree *t;
gc->nodes++;
@@ -1570,7 +1570,7 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
@@ -1611,17 +1611,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
int ret = 0;
bool should_rewrite;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
- bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
+ bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
- k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true, b);
@@ -1911,7 +1912,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
@@ -1919,10 +1920,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
- bch_btree_iter_init(&b->keys, &iter, NULL);
+ bch_btree_iter_stack_init(&b->keys, &iter, NULL);
do {
- k = bch_btree_iter_next_filter(&iter, &b->keys,
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad);
if (k) {
btree_node_prefetch(b, k);
@@ -1950,7 +1951,7 @@ static int bch_btree_check_thread(void *arg)
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
@@ -1959,8 +1960,8 @@ static int bch_btree_check_thread(void *arg)
ret = 0;
/* root node keys are checked before thread created */
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -1978,7 +1979,7 @@ static int bch_btree_check_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -2051,7 +2052,7 @@ int bch_btree_check(struct cache_set *c)
int ret = 0;
int i;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct btree_check_state check_state;
/* check and mark root node keys */
@@ -2547,11 +2548,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
if (b->level) {
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad))) {
ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
@@ -2580,11 +2581,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
{
int ret = MAP_CONTINUE;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
: bcache_btree(map_keys_recurse, k,
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 330bcd9ea4a9..cba09660148a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(disk->first_minor));
+ ida_free(&bcache_device_idx,
+ first_minor_to_idx(disk->first_minor));
put_disk(disk);
}
@@ -940,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->full_dirty_stripes)
goto out_free_stripe_sectors_dirty;
- idx = ida_simple_get(&bcache_device_idx, 0,
- BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
+ idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1,
+ GFP_KERNEL);
if (idx < 0)
goto out_free_full_dirty_stripes;
@@ -986,7 +986,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
out_bioset_exit:
bioset_exit(&d->bio_split);
out_ida_remove:
- ida_simple_remove(&bcache_device_idx, idx);
+ ida_free(&bcache_device_idx, idx);
out_free_full_dirty_stripes:
kvfree(d->full_dirty_stripes);
out_free_stripe_sectors_dirty:
@@ -1914,8 +1914,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
- sizeof(struct btree_iter_set);
+ iter_size = sizeof(struct btree_iter) +
+ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
if (!c->devices)
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 6956beb55326..826b14cae4e5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -660,7 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
goto lock_root;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 8827a6f130ad..792e070ccf38 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -908,15 +908,15 @@ static int bch_dirty_init_thread(void *arg)
struct dirty_init_thrd_info *info = arg;
struct bch_dirty_init_state *state = info->state;
struct cache_set *c = state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
prev_idx = 0;
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -930,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -979,7 +979,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
int i;
struct btree *b = NULL;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index fd852981ef9c..cf433b0cf742 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -321,8 +321,7 @@ static bool __unlock(struct dm_bio_prison_v2 *prison,
{
BUG_ON(!cell->exclusive_lock);
- bio_list_merge(bios, &cell->bios);
- bio_list_init(&cell->bios);
+ bio_list_merge_init(bios, &cell->bios);
if (cell->shared_count) {
cell->exclusive_lock = false;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 911f73f7ebba..0fcbf8603846 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -115,8 +115,7 @@ static void __commit(struct work_struct *_ws)
*/
spin_lock_irq(&b->lock);
list_splice_init(&b->work_items, &work_items);
- bio_list_merge(&bios, &b->bios);
- bio_list_init(&b->bios);
+ bio_list_merge_init(&bios, &b->bios);
b->commit_scheduled = false;
spin_unlock_irq(&b->lock);
@@ -565,8 +564,7 @@ static void defer_bio(struct cache *cache, struct bio *bio)
static void defer_bios(struct cache *cache, struct bio_list *bios)
{
spin_lock_irq(&cache->lock);
- bio_list_merge(&cache->deferred_bios, bios);
- bio_list_init(bios);
+ bio_list_merge_init(&cache->deferred_bios, bios);
spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
@@ -1816,8 +1814,7 @@ static void process_deferred_bios(struct work_struct *ws)
bio_list_init(&bios);
spin_lock_irq(&cache->lock);
- bio_list_merge(&bios, &cache->deferred_bios);
- bio_list_init(&cache->deferred_bios);
+ bio_list_merge_init(&bios, &cache->deferred_bios);
spin_unlock_irq(&cache->lock);
while ((bio = bio_list_pop(&bios))) {
@@ -1847,8 +1844,7 @@ static void requeue_deferred_bios(struct cache *cache)
struct bio_list bios;
bio_list_init(&bios);
- bio_list_merge(&bios, &cache->deferred_bios);
- bio_list_init(&cache->deferred_bios);
+ bio_list_merge_init(&bios, &cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
bio->bi_status = BLK_STS_DM_REQUEUE;
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 94b2fc33f64b..3f68672ab7c9 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1181,8 +1181,7 @@ static void process_deferred_discards(struct clone *clone)
struct bio_list discards = BIO_EMPTY_LIST;
spin_lock_irq(&clone->lock);
- bio_list_merge(&discards, &clone->deferred_discard_bios);
- bio_list_init(&clone->deferred_discard_bios);
+ bio_list_merge_init(&discards, &clone->deferred_discard_bios);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&discards))
@@ -1215,8 +1214,7 @@ static void process_deferred_bios(struct clone *clone)
struct bio_list bios = BIO_EMPTY_LIST;
spin_lock_irq(&clone->lock);
- bio_list_merge(&bios, &clone->deferred_bios);
- bio_list_init(&clone->deferred_bios);
+ bio_list_merge_init(&bios, &clone->deferred_bios);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios))
@@ -1237,11 +1235,9 @@ static void process_deferred_flush_bios(struct clone *clone)
* before issuing them or signaling their completion.
*/
spin_lock_irq(&clone->lock);
- bio_list_merge(&bios, &clone->deferred_flush_bios);
- bio_list_init(&clone->deferred_flush_bios);
-
- bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
- bio_list_init(&clone->deferred_flush_completions);
+ bio_list_merge_init(&bios, &clone->deferred_flush_bios);
+ bio_list_merge_init(&bio_completions,
+ &clone->deferred_flush_completions);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index e6757a30dcca..08700bfc3e23 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -140,7 +140,7 @@ struct mapped_device {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_zones;
- unsigned int *zwp_offset;
+ void *zone_revalidate_map;
#endif
#ifdef CONFIG_IMA
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 6acfa5bf97a4..8f81e597858d 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1272,8 +1272,7 @@ static void process_deferred_bios(struct era *era)
bio_list_init(&marked_bios);
spin_lock(&era->deferred_lock);
- bio_list_merge(&deferred_bios, &era->deferred_bios);
- bio_list_init(&era->deferred_bios);
+ bio_list_merge_init(&deferred_bios, &era->deferred_bios);
spin_unlock(&era->deferred_lock);
if (bio_list_empty(&deferred_bios))
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 05d1328d1811..15b681b90153 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -704,8 +704,7 @@ static void process_queued_bios(struct work_struct *work)
return;
}
- bio_list_merge(&bios, &m->queued_bios);
- bio_list_init(&m->queued_bios);
+ bio_list_merge_init(&bios, &m->queued_bios);
spin_unlock_irqrestore(&m->lock, flags);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 41f1d731ae5a..2c6fbd87363f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2042,7 +2042,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
r = dm_set_zones_restrictions(t, q);
if (r)
return r;
- if (!static_key_enabled(&zoned_enabled.key))
+ if (blk_queue_is_zoned(q) &&
+ !static_key_enabled(&zoned_enabled.key))
static_branch_enable(&zoned_enabled);
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4793ad2aa1f7..f359984c8ef2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -592,12 +592,6 @@ struct dm_thin_endio_hook {
struct dm_bio_prison_cell *cell;
};
-static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
-{
- bio_list_merge(bios, master);
- bio_list_init(master);
-}
-
static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
@@ -616,7 +610,7 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
bio_list_init(&bios);
spin_lock_irq(&tc->lock);
- __merge_bio_list(&bios, master);
+ bio_list_merge_init(&bios, master);
spin_unlock_irq(&tc->lock);
error_bio_list(&bios, error);
@@ -645,8 +639,8 @@ static void requeue_io(struct thin_c *tc)
bio_list_init(&bios);
spin_lock_irq(&tc->lock);
- __merge_bio_list(&bios, &tc->deferred_bio_list);
- __merge_bio_list(&bios, &tc->retry_on_resume_list);
+ bio_list_merge_init(&bios, &tc->deferred_bio_list);
+ bio_list_merge_init(&bios, &tc->retry_on_resume_list);
spin_unlock_irq(&tc->lock);
error_bio_list(&bios, BLK_STS_DM_REQUEUE);
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index 94f6f1ccfb7d..ab3ea8337809 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -604,8 +604,7 @@ static void assign_discard_permit(struct limiter *limiter)
static void get_waiters(struct limiter *limiter)
{
- bio_list_merge(&limiter->waiters, &limiter->new_waiters);
- bio_list_init(&limiter->new_waiters);
+ bio_list_merge_init(&limiter->waiters, &limiter->new_waiters);
}
static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool)
diff --git a/drivers/md/dm-vdo/flush.c b/drivers/md/dm-vdo/flush.c
index 57e87f0d7069..dd4fdee2ca0c 100644
--- a/drivers/md/dm-vdo/flush.c
+++ b/drivers/md/dm-vdo/flush.c
@@ -369,8 +369,7 @@ void vdo_dump_flusher(const struct flusher *flusher)
static void initialize_flush(struct vdo_flush *flush, struct vdo *vdo)
{
bio_list_init(&flush->bios);
- bio_list_merge(&flush->bios, &vdo->flusher->waiting_flush_bios);
- bio_list_init(&vdo->flusher->waiting_flush_bios);
+ bio_list_merge_init(&flush->bios, &vdo->flusher->waiting_flush_bios);
}
static void launch_flush(struct vdo_flush *flush)
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index eb9832b22b14..8e6bcb0d786a 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -60,16 +60,23 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
struct dm_table *map;
int srcu_idx, ret;
- if (dm_suspended_md(md))
- return -EAGAIN;
+ if (!md->zone_revalidate_map) {
+ /* Regular user context */
+ if (dm_suspended_md(md))
+ return -EAGAIN;
- map = dm_get_live_table(md, &srcu_idx);
- if (!map)
- return -EIO;
+ map = dm_get_live_table(md, &srcu_idx);
+ if (!map)
+ return -EIO;
+ } else {
+ /* Zone revalidation during __bind() */
+ map = md->zone_revalidate_map;
+ }
ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data);
- dm_put_live_table(md, srcu_idx);
+ if (!md->zone_revalidate_map)
+ dm_put_live_table(md, srcu_idx);
return ret;
}
@@ -138,80 +145,47 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
}
}
-void dm_cleanup_zoned_dev(struct mapped_device *md)
+/*
+ * Count conventional zones of a mapped zoned device. If the device
+ * only has conventional zones, do not expose it as zoned.
+ */
+static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
{
- if (md->disk) {
- bitmap_free(md->disk->conv_zones_bitmap);
- md->disk->conv_zones_bitmap = NULL;
- bitmap_free(md->disk->seq_zones_wlock);
- md->disk->seq_zones_wlock = NULL;
- }
+ unsigned int *nr_conv_zones = data;
- kvfree(md->zwp_offset);
- md->zwp_offset = NULL;
- md->nr_zones = 0;
-}
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ (*nr_conv_zones)++;
-static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone)
-{
- switch (zone->cond) {
- case BLK_ZONE_COND_IMP_OPEN:
- case BLK_ZONE_COND_EXP_OPEN:
- case BLK_ZONE_COND_CLOSED:
- return zone->wp - zone->start;
- case BLK_ZONE_COND_FULL:
- return zone->len;
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_NOT_WP:
- case BLK_ZONE_COND_OFFLINE:
- case BLK_ZONE_COND_READONLY:
- default:
- /*
- * Conventional, offline and read-only zones do not have a valid
- * write pointer. Use 0 as for an empty zone.
- */
- return 0;
- }
+ return 0;
}
-static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
+static int dm_check_zoned(struct mapped_device *md, struct dm_table *t)
{
- struct mapped_device *md = data;
struct gendisk *disk = md->disk;
+ unsigned int nr_conv_zones = 0;
+ int ret;
- switch (zone->type) {
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!disk->conv_zones_bitmap) {
- disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones,
- GFP_NOIO);
- if (!disk->conv_zones_bitmap)
- return -ENOMEM;
- }
- set_bit(idx, disk->conv_zones_bitmap);
- break;
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
- if (!disk->seq_zones_wlock) {
- disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones,
- GFP_NOIO);
- if (!disk->seq_zones_wlock)
- return -ENOMEM;
- }
- if (!md->zwp_offset) {
- md->zwp_offset =
- kvcalloc(disk->nr_zones, sizeof(unsigned int),
- GFP_KERNEL);
- if (!md->zwp_offset)
- return -ENOMEM;
- }
- md->zwp_offset[idx] = dm_get_zone_wp_offset(zone);
-
- break;
- default:
- DMERR("Invalid zone type 0x%x at sectors %llu",
- (int)zone->type, zone->start);
- return -ENODEV;
+ /* Count conventional zones */
+ md->zone_revalidate_map = t;
+ ret = dm_blk_report_zones(disk, 0, UINT_MAX,
+ dm_check_zoned_cb, &nr_conv_zones);
+ md->zone_revalidate_map = NULL;
+ if (ret < 0) {
+ DMERR("Check zoned failed %d", ret);
+ return ret;
+ }
+
+ /*
+ * If we only have conventional zones, expose the mapped device as
+ * a regular device.
+ */
+ if (nr_conv_zones >= ret) {
+ disk->queue->limits.max_open_zones = 0;
+ disk->queue->limits.max_active_zones = 0;
+ disk->queue->limits.zoned = false;
+ clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ disk->nr_zones = 0;
}
return 0;
@@ -226,41 +200,32 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
{
struct gendisk *disk = md->disk;
- unsigned int noio_flag;
int ret;
- /*
- * Check if something changed. If yes, cleanup the current resources
- * and reallocate everything.
- */
+ /* Revalidate only if something changed. */
if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
- dm_cleanup_zoned_dev(md);
+ md->nr_zones = 0;
+
if (md->nr_zones)
return 0;
/*
- * Scan all zones to initialize everything. Ensure that all vmalloc
- * operations in this context are done as if GFP_NOIO was specified.
+ * Our table is not live yet. So the call to dm_get_live_table()
+ * in dm_blk_report_zones() will fail. Set a temporary pointer to
+ * our table for dm_blk_report_zones() to use directly.
*/
- noio_flag = memalloc_noio_save();
- ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones,
- dm_zone_revalidate_cb, md);
- memalloc_noio_restore(noio_flag);
- if (ret < 0)
- goto err;
- if (ret != disk->nr_zones) {
- ret = -EIO;
- goto err;
+ md->zone_revalidate_map = t;
+ ret = blk_revalidate_disk_zones(disk);
+ md->zone_revalidate_map = NULL;
+
+ if (ret) {
+ DMERR("Revalidate zones failed %d", ret);
+ return ret;
}
md->nr_zones = disk->nr_zones;
return 0;
-
-err:
- DMERR("Revalidate zones failed %d", ret);
- dm_cleanup_zoned_dev(md);
- return ret;
}
static int device_not_zone_append_capable(struct dm_target *ti,
@@ -289,294 +254,40 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
{
struct mapped_device *md = t->md;
+ int ret;
/*
- * For a zoned target, the number of zones should be updated for the
- * correct value to be exposed in sysfs queue/nr_zones.
+ * Check if zone append is natively supported, and if not, set the
+ * mapped device queue as needing zone append emulation.
*/
WARN_ON_ONCE(queue_is_mq(q));
- md->disk->nr_zones = bdev_nr_zones(md->disk->part0);
-
- /* Check if zone append is natively supported */
if (dm_table_supports_zone_append(t)) {
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- dm_cleanup_zoned_dev(md);
- return 0;
+ } else {
+ set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ blk_queue_max_zone_append_sectors(q, 0);
}
- /*
- * Mark the mapped device as needing zone append emulation and
- * initialize the emulation resources once the capacity is set.
- */
- set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
if (!get_capacity(md->disk))
return 0;
- return dm_revalidate_zones(md, t);
-}
-
-static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- unsigned int *wp_offset = data;
-
- *wp_offset = dm_get_zone_wp_offset(zone);
-
- return 0;
-}
-
-static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
- unsigned int *wp_ofst)
-{
- sector_t sector = zno * bdev_zone_sectors(md->disk->part0);
- unsigned int noio_flag;
- struct dm_table *t;
- int srcu_idx, ret;
-
- t = dm_get_live_table(md, &srcu_idx);
- if (!t)
- return -EIO;
-
- /*
- * Ensure that all memory allocations in this context are done as if
- * GFP_NOIO was specified.
- */
- noio_flag = memalloc_noio_save();
- ret = dm_blk_do_report_zones(md, t, sector, 1,
- dm_update_zone_wp_offset_cb, wp_ofst);
- memalloc_noio_restore(noio_flag);
-
- dm_put_live_table(md, srcu_idx);
-
- if (ret != 1)
- return -EIO;
-
- return 0;
-}
-
-struct orig_bio_details {
- enum req_op op;
- unsigned int nr_sectors;
-};
-
-/*
- * First phase of BIO mapping for targets with zone append emulation:
- * check all BIO that change a zone writer pointer and change zone
- * append operations into regular write operations.
- */
-static bool dm_zone_map_bio_begin(struct mapped_device *md,
- unsigned int zno, struct bio *clone)
-{
- sector_t zsectors = bdev_zone_sectors(md->disk->part0);
- unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
-
- /*
- * If the target zone is in an error state, recover by inspecting the
- * zone to get its current write pointer position. Note that since the
- * target zone is already locked, a BIO issuing context should never
- * see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
- */
- if (zwp_offset == DM_ZONE_INVALID_WP_OFST) {
- if (dm_update_zone_wp_offset(md, zno, &zwp_offset))
- return false;
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
- }
-
- switch (bio_op(clone)) {
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_FINISH:
- return true;
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- /* Writes must be aligned to the zone write pointer */
- if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset)
- return false;
- break;
- case REQ_OP_ZONE_APPEND:
- /*
- * Change zone append operations into a non-mergeable regular
- * writes directed at the current write pointer position of the
- * target zone.
- */
- clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
- (clone->bi_opf & (~REQ_OP_MASK));
- clone->bi_iter.bi_sector += zwp_offset;
- break;
- default:
- DMWARN_LIMIT("Invalid BIO operation");
- return false;
- }
-
- /* Cannot write to a full zone */
- if (zwp_offset >= zsectors)
- return false;
-
- return true;
-}
-
-/*
- * Second phase of BIO mapping for targets with zone append emulation:
- * update the zone write pointer offset array to account for the additional
- * data written to a zone. Note that at this point, the remapped clone BIO
- * may already have completed, so we do not touch it.
- */
-static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
- struct orig_bio_details *orig_bio_details,
- unsigned int nr_sectors)
-{
- unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
-
- /* The clone BIO may already have been completed and failed */
- if (zwp_offset == DM_ZONE_INVALID_WP_OFST)
- return BLK_STS_IOERR;
-
- /* Update the zone wp offset */
- switch (orig_bio_details->op) {
- case REQ_OP_ZONE_RESET:
- WRITE_ONCE(md->zwp_offset[zno], 0);
- return BLK_STS_OK;
- case REQ_OP_ZONE_FINISH:
- WRITE_ONCE(md->zwp_offset[zno],
- bdev_zone_sectors(md->disk->part0));
- return BLK_STS_OK;
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
- return BLK_STS_OK;
- case REQ_OP_ZONE_APPEND:
- /*
- * Check that the target did not truncate the write operation
- * emulating a zone append.
- */
- if (nr_sectors != orig_bio_details->nr_sectors) {
- DMWARN_LIMIT("Truncated write for zone append");
- return BLK_STS_IOERR;
- }
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
- return BLK_STS_OK;
- default:
- DMWARN_LIMIT("Invalid BIO operation");
- return BLK_STS_IOERR;
- }
-}
-
-static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno,
- struct bio *clone)
-{
- if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
- return;
-
- wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
- bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
-}
-
-static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno,
- struct bio *clone)
-{
- if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
- return;
-
- WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock));
- clear_bit_unlock(zno, disk->seq_zones_wlock);
- smp_mb__after_atomic();
- wake_up_bit(disk->seq_zones_wlock, zno);
-
- bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
-}
-
-static bool dm_need_zone_wp_tracking(struct bio *bio)
-{
/*
- * Special processing is not needed for operations that do not need the
- * zone write lock, that is, all operations that target conventional
- * zones and all operations that do not modify directly a sequential
- * zone write pointer.
+ * Check that the mapped device will indeed be zoned, that is, that it
+ * has sequential write required zones.
*/
- if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
- return false;
- switch (bio_op(bio)) {
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_FINISH:
- case REQ_OP_ZONE_APPEND:
- return bio_zone_is_seq(bio);
- default:
- return false;
- }
-}
-
-/*
- * Special IO mapping for targets needing zone append emulation.
- */
-int dm_zone_map_bio(struct dm_target_io *tio)
-{
- struct dm_io *io = tio->io;
- struct dm_target *ti = tio->ti;
- struct mapped_device *md = io->md;
- struct bio *clone = &tio->clone;
- struct orig_bio_details orig_bio_details;
- unsigned int zno;
- blk_status_t sts;
- int r;
-
- /*
- * IOs that do not change a zone write pointer do not need
- * any additional special processing.
- */
- if (!dm_need_zone_wp_tracking(clone))
- return ti->type->map(ti, clone);
-
- /* Lock the target zone */
- zno = bio_zone_no(clone);
- dm_zone_lock(md->disk, zno, clone);
-
- orig_bio_details.nr_sectors = bio_sectors(clone);
- orig_bio_details.op = bio_op(clone);
+ ret = dm_check_zoned(md, t);
+ if (ret)
+ return ret;
+ if (!blk_queue_is_zoned(q))
+ return 0;
- /*
- * Check that the bio and the target zone write pointer offset are
- * both valid, and if the bio is a zone append, remap it to a write.
- */
- if (!dm_zone_map_bio_begin(md, zno, clone)) {
- dm_zone_unlock(md->disk, zno, clone);
- return DM_MAPIO_KILL;
+ if (!md->disk->nr_zones) {
+ DMINFO("%s using %s zone append",
+ md->disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
}
- /* Let the target do its work */
- r = ti->type->map(ti, clone);
- switch (r) {
- case DM_MAPIO_SUBMITTED:
- /*
- * The target submitted the clone BIO. The target zone will
- * be unlocked on completion of the clone.
- */
- sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
- *tio->len_ptr);
- break;
- case DM_MAPIO_REMAPPED:
- /*
- * The target only remapped the clone BIO. In case of error,
- * unlock the target zone here as the clone will not be
- * submitted.
- */
- sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
- *tio->len_ptr);
- if (sts != BLK_STS_OK)
- dm_zone_unlock(md->disk, zno, clone);
- break;
- case DM_MAPIO_REQUEUE:
- case DM_MAPIO_KILL:
- default:
- dm_zone_unlock(md->disk, zno, clone);
- sts = BLK_STS_IOERR;
- break;
- }
-
- if (sts != BLK_STS_OK)
- return DM_MAPIO_KILL;
-
- return r;
+ return dm_revalidate_zones(md, t);
}
/*
@@ -587,61 +298,17 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
struct mapped_device *md = io->md;
struct gendisk *disk = md->disk;
struct bio *orig_bio = io->orig_bio;
- unsigned int zwp_offset;
- unsigned int zno;
/*
- * For targets that do not emulate zone append, we only need to
- * handle native zone-append bios.
+ * Get the offset within the zone of the written sector
+ * and add that to the original bio sector position.
*/
- if (!dm_emulate_zone_append(md)) {
- /*
- * Get the offset within the zone of the written sector
- * and add that to the original bio sector position.
- */
- if (clone->bi_status == BLK_STS_OK &&
- bio_op(clone) == REQ_OP_ZONE_APPEND) {
- sector_t mask =
- (sector_t)bdev_zone_sectors(disk->part0) - 1;
-
- orig_bio->bi_iter.bi_sector +=
- clone->bi_iter.bi_sector & mask;
- }
-
- return;
- }
+ if (clone->bi_status == BLK_STS_OK &&
+ bio_op(clone) == REQ_OP_ZONE_APPEND) {
+ sector_t mask = bdev_zone_sectors(disk->part0) - 1;
- /*
- * For targets that do emulate zone append, if the clone BIO does not
- * own the target zone write lock, we have nothing to do.
- */
- if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
- return;
-
- zno = bio_zone_no(orig_bio);
-
- if (clone->bi_status != BLK_STS_OK) {
- /*
- * BIOs that modify a zone write pointer may leave the zone
- * in an unknown state in case of failure (e.g. the write
- * pointer was only partially advanced). In this case, set
- * the target zone write pointer as invalid unless it is
- * already being updated.
- */
- WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST);
- } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
- /*
- * Get the written sector for zone append operation that were
- * emulated using regular write operations.
- */
- zwp_offset = READ_ONCE(md->zwp_offset[zno]);
- if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio)))
- WRITE_ONCE(md->zwp_offset[zno],
- DM_ZONE_INVALID_WP_OFST);
- else
- orig_bio->bi_iter.bi_sector +=
- zwp_offset - bio_sectors(orig_bio);
+ orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask;
}
- dm_zone_unlock(disk, zno, clone);
+ return;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7d0746b37c8e..597dd7a25823 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1428,25 +1428,12 @@ static void __map_bio(struct bio *clone)
down(&md->swap_bios_semaphore);
}
- if (static_branch_unlikely(&zoned_enabled)) {
- /*
- * Check if the IO needs a special mapping due to zone append
- * emulation on zoned target. In this case, dm_zone_map_bio()
- * calls the target map operation.
- */
- if (unlikely(dm_emulate_zone_append(md)))
- r = dm_zone_map_bio(tio);
- else
- goto do_map;
- } else {
-do_map:
- if (likely(ti->type->map == linear_map))
- r = linear_map(ti, clone);
- else if (ti->type->map == stripe_map)
- r = stripe_map(ti, clone);
- else
- r = ti->type->map(ti, clone);
- }
+ if (likely(ti->type->map == linear_map))
+ r = linear_map(ti, clone);
+ else if (ti->type->map == stripe_map)
+ r = stripe_map(ti, clone);
+ else
+ r = ti->type->map(ti, clone);
switch (r) {
case DM_MAPIO_SUBMITTED:
@@ -1774,6 +1761,33 @@ static void init_clone_info(struct clone_info *ci, struct dm_io *io,
ci->sector_count = 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
+ struct bio *bio)
+{
+ /*
+ * For mapped device that need zone append emulation, we must
+ * split any large BIO that straddles zone boundaries.
+ */
+ return dm_emulate_zone_append(md) && bio_straddles_zones(bio) &&
+ !bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
+}
+static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
+{
+ return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
+}
+#else
+static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
+ struct bio *bio)
+{
+ return false;
+}
+static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
+{
+ return false;
+}
+#endif
+
/*
* Entry point to split a bio into clones and submit them to the targets.
*/
@@ -1783,19 +1797,32 @@ static void dm_split_and_process_bio(struct mapped_device *md,
struct clone_info ci;
struct dm_io *io;
blk_status_t error = BLK_STS_OK;
- bool is_abnormal;
+ bool is_abnormal, need_split;
+
+ need_split = is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled))
+ need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
- is_abnormal = is_abnormal_io(bio);
- if (unlikely(is_abnormal)) {
+ if (unlikely(need_split)) {
/*
* Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
* otherwise associated queue_limits won't be imposed.
+ * Also split the BIO for mapped devices needing zone append
+ * emulation to ensure that the BIO does not cross zone
+ * boundaries.
*/
bio = bio_split_to_limits(bio);
if (!bio)
return;
}
+ /*
+ * Use the block layer zone write plugging for mapped devices that
+ * need zone append emulation (e.g. dm-crypt).
+ */
+ if (static_branch_unlikely(&zoned_enabled) && dm_zone_plug_bio(md, bio))
+ return;
+
/* Only support nowait for normal IO */
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
io = alloc_io(md, bio, GFP_NOWAIT);
@@ -2016,7 +2043,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->dax_dev = NULL;
}
- dm_cleanup_zoned_dev(md);
if (md->disk) {
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 7f1acbf6bd9e..e0c57f19839b 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -104,13 +104,11 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED
-void dm_cleanup_zoned_dev(struct mapped_device *md);
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
#else
-static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
{
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 059afc24c08b..0a2d37eb38ef 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1424,7 +1424,7 @@ __acquires(bitmap->lock)
sector_t chunk = offset >> bitmap->chunkshift;
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
- sector_t csize;
+ sector_t csize = ((sector_t)1) << bitmap->chunkshift;
int err;
if (page >= bitmap->pages) {
@@ -1433,6 +1433,7 @@ __acquires(bitmap->lock)
* End-of-device while looking for a whole page or
* user set a huge number to sysfs bitmap_set_bits.
*/
+ *blocks = csize - (offset & (csize - 1));
return NULL;
}
err = md_bitmap_checkpage(bitmap, page, create, 0);
@@ -1441,8 +1442,7 @@ __acquires(bitmap->lock)
bitmap->bp[page].map == NULL)
csize = ((sector_t)1) << (bitmap->chunkshift +
PAGE_COUNTER_SHIFT);
- else
- csize = ((sector_t)1) << bitmap->chunkshift;
+
*blocks = csize - (offset & (csize - 1));
if (err < 0)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e575e74aabf5..aff9118ff697 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8087,7 +8087,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread)
if (t) {
pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
set_bit(THREAD_WAKEUP, &t->flags);
- wake_up(&t->wqueue);
+ if (wq_has_sleeper(&t->wqueue))
+ wake_up(&t->wqueue);
}
rcu_read_unlock();
}
@@ -8582,6 +8583,10 @@ static int is_mddev_idle(struct mddev *mddev, int init)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
+
+ if (!init && !blk_queue_io_stat(disk->queue))
+ continue;
+
curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 097d9dbd69b8..ca085ecad504 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -621,7 +621,8 @@ extern void mddev_unlock(struct mddev *mddev);
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
- atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
+ if (blk_queue_io_stat(bdev->bd_disk->queue))
+ atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
}
static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d874abfc1836..2bd1ce9b3922 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,7 +36,6 @@
*/
#include <linux/blkdev.h>
-#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -6734,6 +6733,9 @@ static void raid5d(struct md_thread *thread)
int batch_size, released;
unsigned int offset;
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ break;
+
released = release_stripe_list(conf, conf->temp_inactive_list);
if (released)
clear_bit(R5_DID_ALLOC, &conf->cache_state);
@@ -6770,18 +6772,7 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
-
- /*
- * Waiting on MD_SB_CHANGE_PENDING below may deadlock
- * seeing md_check_recovery() is needed to clear
- * the flag when using mdmon.
- */
- continue;
}
-
- wait_event_lock_irq(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
- conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 095f59e7aa93..bf7615cb36ee 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2132,7 +2132,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
- ret = blk_revalidate_disk_zones(ns->disk, NULL);
+ ret = blk_revalidate_disk_zones(ns->disk);
if (ret && !nvme_first_scan(ns->disk))
goto out;
}
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 3148d9f1bde6..0021d06041c1 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -52,14 +52,10 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1))
return false;
/*
- * ZNS does not define a conventional zone type. If the underlying
- * device has a bitmap set indicating the existence of conventional
- * zones, reject the device. Otherwise, use report zones to detect if
- * the device has conventional zones.
+ * ZNS does not define a conventional zone type. Use report zones
+ * to detect if the device has conventional zones and reject it if
+ * it does.
*/
- if (ns->bdev->bd_disk->conv_zones_bitmap)
- return false;
-
ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev),
validate_conv_zones_cb, NULL);
if (ret < 0)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5b3230ef51fe..967b6d62bb37 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1869,7 +1869,6 @@ out_put_budget:
case BLK_STS_OK:
break;
case BLK_STS_RESOURCE:
- case BLK_STS_ZONE_RESOURCE:
if (scsi_device_blocked(sdev))
ret = BLK_STS_DEV_RESOURCE;
break;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 65cdc8b77e35..64c5129044b3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1260,12 +1260,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
}
}
- if (req_op(rq) == REQ_OP_ZONE_APPEND) {
- ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
- if (ret)
- goto fail;
- }
-
fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
dix = scsi_prot_sg_count(cmd);
dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
@@ -1348,7 +1342,6 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
return sd_setup_flush_cmnd(cmd);
case REQ_OP_READ:
case REQ_OP_WRITE:
- case REQ_OP_ZONE_APPEND:
return sd_setup_read_write_cmnd(cmd);
case REQ_OP_ZONE_RESET:
return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
@@ -3981,7 +3974,6 @@ static void scsi_disk_release(struct device *dev)
struct scsi_disk *sdkp = to_scsi_disk(dev);
ida_free(&sd_index_ida, sdkp->index);
- sd_zbc_free_zone_info(sdkp);
put_device(&sdkp->device->sdev_gendev);
free_opal_dev(sdkp->opal_dev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5c4285a582b2..49dd600bfa48 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -104,12 +104,6 @@ struct scsi_disk {
* between zone starting LBAs is constant.
*/
u32 zone_starting_lba_gran;
- u32 *zones_wp_offset;
- spinlock_t zones_wp_offset_lock;
- u32 *rev_wp_offset;
- struct mutex rev_mutex;
- struct work_struct zone_wp_offset_work;
- char *zone_wp_update_buf;
#endif
atomic_t openers;
sector_t capacity; /* size in logical blocks */
@@ -245,7 +239,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
#ifdef CONFIG_BLK_DEV_ZONED
-void sd_zbc_free_zone_info(struct scsi_disk *sdkp);
int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
@@ -255,13 +248,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
- unsigned int nr_blocks);
-
#else /* CONFIG_BLK_DEV_ZONED */
-static inline void sd_zbc_free_zone_info(struct scsi_disk *sdkp) {}
-
static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
{
return 0;
@@ -285,13 +273,6 @@ static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
return good_bytes;
}
-static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
- sector_t *lba,
- unsigned int nr_blocks)
-{
- return BLK_STS_TARGET;
-}
-
#define sd_zbc_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 26af5ab7d7c1..806036e48abe 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -23,36 +23,6 @@
#define CREATE_TRACE_POINTS
#include "sd_trace.h"
-/**
- * sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
- * @zone: Zone for which to return the write pointer offset.
- *
- * Return: offset of the write pointer from the start of the zone.
- */
-static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
-{
- if (zone->type == ZBC_ZONE_TYPE_CONV)
- return 0;
-
- switch (zone->cond) {
- case BLK_ZONE_COND_IMP_OPEN:
- case BLK_ZONE_COND_EXP_OPEN:
- case BLK_ZONE_COND_CLOSED:
- return zone->wp - zone->start;
- case BLK_ZONE_COND_FULL:
- return zone->len;
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_OFFLINE:
- case BLK_ZONE_COND_READONLY:
- default:
- /*
- * Offline and read-only zones do not have a valid
- * write pointer. Use 0 as for an empty zone.
- */
- return 0;
- }
-}
-
/* Whether or not a SCSI zone descriptor describes a gap zone. */
static bool sd_zbc_is_gap_zone(const u8 buf[64])
{
@@ -121,9 +91,6 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
if (ret)
return ret;
- if (sdkp->rev_wp_offset)
- sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
-
return 0;
}
@@ -347,123 +314,6 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
return BLK_STS_OK;
}
-#define SD_ZBC_INVALID_WP_OFST (~0u)
-#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
-
-static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- struct scsi_disk *sdkp = data;
-
- lockdep_assert_held(&sdkp->zones_wp_offset_lock);
-
- sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
-
- return 0;
-}
-
-/*
- * An attempt to append a zone triggered an invalid write pointer error.
- * Reread the write pointer of the zone(s) in which the append failed.
- */
-static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
-{
- struct scsi_disk *sdkp;
- unsigned long flags;
- sector_t zno;
- int ret;
-
- sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
-
- spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
- for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
- if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
- continue;
-
- spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
- ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
- SD_BUF_SIZE,
- zno * sdkp->zone_info.zone_blocks, true);
- spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
- if (!ret)
- sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
- zno, sd_zbc_update_wp_offset_cb,
- sdkp);
- }
- spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-
- scsi_device_put(sdkp->device);
-}
-
-/**
- * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
- * @cmd: the command to setup
- * @lba: the LBA to patch
- * @nr_blocks: the number of LBAs to be written
- *
- * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
- * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
- * patching of the lba for an emulated ZONE_APPEND command.
- *
- * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
- * schedule a REPORT ZONES command and return BLK_STS_IOERR.
- */
-blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
- unsigned int nr_blocks)
-{
- struct request *rq = scsi_cmd_to_rq(cmd);
- struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
- unsigned int wp_offset, zno = blk_rq_zone_no(rq);
- unsigned long flags;
- blk_status_t ret;
-
- ret = sd_zbc_cmnd_checks(cmd);
- if (ret != BLK_STS_OK)
- return ret;
-
- if (!blk_rq_zone_is_seq(rq))
- return BLK_STS_IOERR;
-
- /* Unlock of the write lock will happen in sd_zbc_complete() */
- if (!blk_req_zone_write_trylock(rq))
- return BLK_STS_ZONE_RESOURCE;
-
- spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
- wp_offset = sdkp->zones_wp_offset[zno];
- switch (wp_offset) {
- case SD_ZBC_INVALID_WP_OFST:
- /*
- * We are about to schedule work to update a zone write pointer
- * offset, which will cause the zone append command to be
- * requeued. So make sure that the scsi device does not go away
- * while the work is being processed.
- */
- if (scsi_device_get(sdkp->device)) {
- ret = BLK_STS_IOERR;
- break;
- }
- sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
- schedule_work(&sdkp->zone_wp_offset_work);
- fallthrough;
- case SD_ZBC_UPDATING_WP_OFST:
- ret = BLK_STS_DEV_RESOURCE;
- break;
- default:
- wp_offset = sectors_to_logical(sdkp->device, wp_offset);
- if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
- ret = BLK_STS_IOERR;
- break;
- }
-
- trace_scsi_prepare_zone_append(cmd, *lba, wp_offset);
- *lba += wp_offset;
- }
- spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
- if (ret)
- blk_req_zone_write_unlock(rq);
- return ret;
-}
-
/**
* sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
* can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@@ -504,96 +354,6 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
return BLK_STS_OK;
}
-static bool sd_zbc_need_zone_wp_update(struct request *rq)
-{
- switch (req_op(rq)) {
- case REQ_OP_ZONE_APPEND:
- case REQ_OP_ZONE_FINISH:
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_RESET_ALL:
- return true;
- case REQ_OP_WRITE:
- case REQ_OP_WRITE_ZEROES:
- return blk_rq_zone_is_seq(rq);
- default:
- return false;
- }
-}
-
-/**
- * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
- * @cmd: Completed command
- * @good_bytes: Command reply bytes
- *
- * Called from sd_zbc_complete() to handle the update of the cached zone write
- * pointer value in case an update is needed.
- */
-static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
- unsigned int good_bytes)
-{
- int result = cmd->result;
- struct request *rq = scsi_cmd_to_rq(cmd);
- struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
- unsigned int zno = blk_rq_zone_no(rq);
- enum req_op op = req_op(rq);
- unsigned long flags;
-
- /*
- * If we got an error for a command that needs updating the write
- * pointer offset cache, we must mark the zone wp offset entry as
- * invalid to force an update from disk the next time a zone append
- * command is issued.
- */
- spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
-
- if (result && op != REQ_OP_ZONE_RESET_ALL) {
- if (op == REQ_OP_ZONE_APPEND) {
- /* Force complete completion (no retry) */
- good_bytes = 0;
- scsi_set_resid(cmd, blk_rq_bytes(rq));
- }
-
- /*
- * Force an update of the zone write pointer offset on
- * the next zone append access.
- */
- if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
- sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
- goto unlock_wp_offset;
- }
-
- switch (op) {
- case REQ_OP_ZONE_APPEND:
- trace_scsi_zone_wp_update(cmd, rq->__sector,
- sdkp->zones_wp_offset[zno], good_bytes);
- rq->__sector += sdkp->zones_wp_offset[zno];
- fallthrough;
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
- sdkp->zones_wp_offset[zno] +=
- good_bytes >> SECTOR_SHIFT;
- break;
- case REQ_OP_ZONE_RESET:
- sdkp->zones_wp_offset[zno] = 0;
- break;
- case REQ_OP_ZONE_FINISH:
- sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
- break;
- case REQ_OP_ZONE_RESET_ALL:
- memset(sdkp->zones_wp_offset, 0,
- sdkp->zone_info.nr_zones * sizeof(unsigned int));
- break;
- default:
- break;
- }
-
-unlock_wp_offset:
- spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
-
- return good_bytes;
-}
-
/**
* sd_zbc_complete - ZBC command post processing.
* @cmd: Completed command
@@ -619,11 +379,7 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
* so be quiet about the error.
*/
rq->rq_flags |= RQF_QUIET;
- } else if (sd_zbc_need_zone_wp_update(rq))
- good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
-
- if (req_op(rq) == REQ_OP_ZONE_APPEND)
- blk_req_zone_write_unlock(rq);
+ }
return good_bytes;
}
@@ -780,46 +536,6 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp)
sdkp->zone_info.zone_blocks);
}
-static int sd_zbc_init_disk(struct scsi_disk *sdkp)
-{
- sdkp->zones_wp_offset = NULL;
- spin_lock_init(&sdkp->zones_wp_offset_lock);
- sdkp->rev_wp_offset = NULL;
- mutex_init(&sdkp->rev_mutex);
- INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
- sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
- if (!sdkp->zone_wp_update_buf)
- return -ENOMEM;
-
- return 0;
-}
-
-void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
-{
- if (!sdkp->zone_wp_update_buf)
- return;
-
- /* Serialize against revalidate zones */
- mutex_lock(&sdkp->rev_mutex);
-
- kvfree(sdkp->zones_wp_offset);
- sdkp->zones_wp_offset = NULL;
- kfree(sdkp->zone_wp_update_buf);
- sdkp->zone_wp_update_buf = NULL;
-
- sdkp->early_zone_info = (struct zoned_disk_info){ };
- sdkp->zone_info = (struct zoned_disk_info){ };
-
- mutex_unlock(&sdkp->rev_mutex);
-}
-
-static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
-{
- struct scsi_disk *sdkp = scsi_disk(disk);
-
- swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
-}
-
/*
* Call blk_revalidate_disk_zones() if any of the zoned disk properties have
* changed that make it necessary to call that function. Called by
@@ -831,18 +547,8 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
struct request_queue *q = disk->queue;
u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
- int ret = 0;
unsigned int flags;
-
- /*
- * For all zoned disks, initialize zone append emulation data if not
- * already done.
- */
- if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
- ret = sd_zbc_init_disk(sdkp);
- if (ret)
- return ret;
- }
+ int ret;
/*
* There is nothing to do for regular disks, including host-aware disks
@@ -851,50 +557,32 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
if (!blk_queue_is_zoned(q))
return 0;
- /*
- * Make sure revalidate zones are serialized to ensure exclusive
- * updates of the scsi disk data.
- */
- mutex_lock(&sdkp->rev_mutex);
-
if (sdkp->zone_info.zone_blocks == zone_blocks &&
sdkp->zone_info.nr_zones == nr_zones &&
disk->nr_zones == nr_zones)
- goto unlock;
+ return 0;
- flags = memalloc_noio_save();
sdkp->zone_info.zone_blocks = zone_blocks;
sdkp->zone_info.nr_zones = nr_zones;
- sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
- if (!sdkp->rev_wp_offset) {
- ret = -ENOMEM;
- memalloc_noio_restore(flags);
- goto unlock;
- }
blk_queue_chunk_sectors(q,
logical_to_sectors(sdkp->device, zone_blocks));
- blk_queue_max_zone_append_sectors(q,
- q->limits.max_segments << PAGE_SECTORS_SHIFT);
- ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+ /* Enable block layer zone append emulation */
+ blk_queue_max_zone_append_sectors(q, 0);
+ flags = memalloc_noio_save();
+ ret = blk_revalidate_disk_zones(disk);
memalloc_noio_restore(flags);
- kvfree(sdkp->rev_wp_offset);
- sdkp->rev_wp_offset = NULL;
-
if (ret) {
sdkp->zone_info = (struct zoned_disk_info){ };
sdkp->capacity = 0;
- goto unlock;
+ return ret;
}
sd_zbc_print_zones(sdkp);
-unlock:
- mutex_unlock(&sdkp->rev_mutex);
-
- return ret;
+ return 0;
}
/**
@@ -917,10 +605,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
if (!sd_is_zoned(sdkp)) {
/*
* Device managed or normal SCSI disk, no special handling
- * required. Nevertheless, free the disk zone information in
- * case the device type changed.
+ * required.
*/
- sd_zbc_free_zone_info(sdkp);
return 0;
}
@@ -941,7 +627,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
/* The drive satisfies the kernel restrictions: set it up */
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
if (sdkp->zones_max_open == U32_MAX)
disk_set_max_open_zones(disk, 0);
else