summaryrefslogtreecommitdiff
path: root/drivers/block/null_blk/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/null_blk/main.c')
-rw-r--r--drivers/block/null_blk/main.c1092
1 files changed, 492 insertions, 600 deletions
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 7d28e3aa406c..c7c0fb79a6bf 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -77,7 +77,7 @@ enum {
NULL_IRQ_TIMER = 2,
};
-static bool g_virt_boundary = false;
+static bool g_virt_boundary;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
@@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
#endif
+/*
+ * Historic queue modes.
+ *
+ * These days nothing but NULL_Q_MQ is actually supported, but we keep it the
+ * enum for error reporting.
+ */
+enum {
+ NULL_Q_BIO = 0,
+ NULL_Q_RQ = 1,
+ NULL_Q_MQ = 2,
+};
+
static int g_queue_mode = NULL_Q_MQ;
static int null_param_store_val(const char *str, int *val, int min, int max)
@@ -165,8 +177,8 @@ static bool g_blocking;
module_param_named(blocking, g_blocking, bool, 0444);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
-static bool shared_tags;
-module_param(shared_tags, bool, 0444);
+static bool g_shared_tags;
+module_param_named(shared_tags, g_shared_tags, bool, 0444);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
static bool g_shared_tag_bitmap;
@@ -211,7 +223,11 @@ MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed nu
static unsigned long g_cache_size;
module_param_named(cache_size, g_cache_size, ulong, 0444);
-MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+MODULE_PARM_DESC(cache_size, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+
+static bool g_fua = true;
+module_param_named(fua, g_fua, bool, 0444);
+MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
@@ -241,6 +257,19 @@ static unsigned int g_zone_max_active;
module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
+static int g_zone_append_max_sectors = INT_MAX;
+module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
+MODULE_PARM_DESC(zone_append_max_sectors,
+ "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+
+static bool g_zone_full;
+module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
+MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
+
+static bool g_rotational;
+module_param_named(rotational, g_rotational, bool, S_IRUGO);
+MODULE_PARM_DESC(rotational, "Set the rotational feature for the device. Default: false");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -250,7 +279,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
static inline struct nullb_device *to_nullb_device(struct config_item *item)
{
- return item ? container_of(item, struct nullb_device, item) : NULL;
+ return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
}
static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
@@ -392,13 +421,25 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev,
static int nullb_apply_submit_queues(struct nullb_device *dev,
unsigned int submit_queues)
{
- return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
+ int ret;
+
+ mutex_lock(&lock);
+ ret = nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
+ mutex_unlock(&lock);
+
+ return ret;
}
static int nullb_apply_poll_queues(struct nullb_device *dev,
unsigned int poll_queues)
{
- return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
+ int ret;
+
+ mutex_lock(&lock);
+ ret = nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
+ mutex_unlock(&lock);
+
+ return ret;
}
NULLB_DEVICE_ATTR(size, ulong, NULL);
@@ -424,9 +465,16 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(zone_full, bool, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
+NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
+NULLB_DEVICE_ATTR(fua, bool, NULL);
+NULLB_DEVICE_ATTR(rotational, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_once, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -444,28 +492,32 @@ static ssize_t nullb_device_power_store(struct config_item *item,
if (ret < 0)
return ret;
+ ret = count;
+ mutex_lock(&lock);
if (!dev->power && newp) {
if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
- return count;
+ goto out;
+
ret = null_add_dev(dev);
if (ret) {
clear_bit(NULLB_DEV_FL_UP, &dev->flags);
- return ret;
+ goto out;
}
set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
dev->power = newp;
+ ret = count;
} else if (dev->power && !newp) {
if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
- mutex_lock(&lock);
dev->power = newp;
null_del_dev(dev->nullb);
- mutex_unlock(&lock);
}
clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
}
- return count;
+out:
+ mutex_unlock(&lock);
+ return ret;
}
CONFIGFS_ATTR(nullb_device_, power);
@@ -509,14 +561,14 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item,
goto out;
/* enable badblocks */
cmpxchg(&t_dev->badblocks.shift, -1, 0);
- if (buf[0] == '+')
- ret = badblocks_set(&t_dev->badblocks, start,
- end - start + 1, 1);
- else
- ret = badblocks_clear(&t_dev->badblocks, start,
- end - start + 1);
- if (ret == 0)
+ if (buf[0] == '+') {
+ if (badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1))
+ ret = count;
+ } else if (badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1)) {
ret = count;
+ }
out:
kfree(orig);
return ret;
@@ -542,36 +594,43 @@ static ssize_t nullb_device_zone_offline_store(struct config_item *item,
CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
static struct configfs_attribute *nullb_device_attrs[] = {
- &nullb_device_attr_size,
+ &nullb_device_attr_badblocks,
+ &nullb_device_attr_badblocks_once,
+ &nullb_device_attr_badblocks_partial_io,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_cache_size,
&nullb_device_attr_completion_nsec,
- &nullb_device_attr_submit_queues,
- &nullb_device_attr_poll_queues,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_fua,
&nullb_device_attr_home_node,
- &nullb_device_attr_queue_mode,
- &nullb_device_attr_blocksize,
- &nullb_device_attr_max_sectors,
- &nullb_device_attr_irqmode,
&nullb_device_attr_hw_queue_depth,
&nullb_device_attr_index,
- &nullb_device_attr_blocking,
- &nullb_device_attr_use_per_node_hctx,
- &nullb_device_attr_power,
- &nullb_device_attr_memory_backed,
- &nullb_device_attr_discard,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_max_sectors,
&nullb_device_attr_mbps,
- &nullb_device_attr_cache_size,
- &nullb_device_attr_badblocks,
- &nullb_device_attr_zoned,
- &nullb_device_attr_zone_size,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_no_sched,
+ &nullb_device_attr_poll_queues,
+ &nullb_device_attr_power,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_rotational,
+ &nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_shared_tags,
+ &nullb_device_attr_size,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_virt_boundary,
+ &nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_capacity,
- &nullb_device_attr_zone_nr_conv,
- &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_zone_max_active,
- &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_offline,
- &nullb_device_attr_virt_boundary,
- &nullb_device_attr_no_sched,
- &nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_size,
+ &nullb_device_attr_zoned,
NULL,
};
@@ -593,8 +652,29 @@ static const struct config_item_type nullb_device_type = {
.ct_owner = THIS_MODULE,
};
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+
+static void nullb_add_fault_config(struct nullb_device *dev)
+{
+ fault_config_init(&dev->timeout_config, "timeout_inject");
+ fault_config_init(&dev->requeue_config, "requeue_inject");
+ fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
+
+ configfs_add_default_group(&dev->timeout_config.group, &dev->group);
+ configfs_add_default_group(&dev->requeue_config.group, &dev->group);
+ configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
+}
+
+#else
+
+static void nullb_add_fault_config(struct nullb_device *dev)
+{
+}
+
+#endif
+
static struct
-config_item *nullb_group_make_item(struct config_group *group, const char *name)
+config_group *nullb_group_make_group(struct config_group *group, const char *name)
{
struct nullb_device *dev;
@@ -605,9 +685,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
if (!dev)
return ERR_PTR(-ENOMEM);
- config_item_init_type_name(&dev->item, name, &nullb_device_type);
+ config_group_init_type_name(&dev->group, name, &nullb_device_type);
+ nullb_add_fault_config(dev);
- return &dev->item;
+ return &dev->group;
}
static void
@@ -627,14 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE,
- "badblocks,blocking,blocksize,cache_size,"
- "completion_nsec,discard,home_node,hw_queue_depth,"
- "irqmode,max_sectors,mbps,memory_backed,no_sched,"
- "poll_queues,power,queue_mode,shared_tag_bitmap,size,"
- "submit_queues,use_per_node_hctx,virt_boundary,zoned,"
- "zone_capacity,zone_max_active,zone_max_open,"
- "zone_nr_conv,zone_offline,zone_readonly,zone_size\n");
+
+ struct configfs_attribute **entry;
+ char delimiter = ',';
+ size_t left = PAGE_SIZE;
+ size_t written = 0;
+ int ret;
+
+ for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) {
+ if (!*(entry + 1))
+ delimiter = '\n';
+ ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name,
+ delimiter);
+ if (ret >= left) {
+ WARN_ONCE(1, "Too many null_blk features to print\n");
+ memzero_explicit(page, PAGE_SIZE);
+ return -ENOBUFS;
+ }
+ left -= ret;
+ written += ret;
+ }
+
+ return written;
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -645,7 +740,7 @@ static struct configfs_attribute *nullb_group_attrs[] = {
};
static struct configfs_group_operations nullb_group_ops = {
- .make_item = nullb_group_make_item,
+ .make_group = nullb_group_make_group,
.drop_item = nullb_group_drop_item,
};
@@ -676,6 +771,13 @@ static struct nullb_device *null_alloc_dev(void)
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
+
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ dev->timeout_config.attr = null_timeout_attr;
+ dev->requeue_config.attr = null_requeue_attr;
+ dev->init_hctx_fault_config.attr = null_init_hctx_attr;
+#endif
+
INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
if (badblocks_init(&dev->badblocks, 0)) {
@@ -707,9 +809,15 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_nr_conv = g_zone_nr_conv;
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
+ dev->zone_append_max_sectors = g_zone_append_max_sectors;
+ dev->zone_full = g_zone_full;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
+ dev->shared_tags = g_shared_tags;
dev->shared_tag_bitmap = g_shared_tag_bitmap;
+ dev->fua = g_fua;
+ dev->rotational = g_rotational;
+
return dev;
}
@@ -723,98 +831,11 @@ static void null_free_dev(struct nullb_device *dev)
kfree(dev);
}
-static void put_tag(struct nullb_queue *nq, unsigned int tag)
-{
- clear_bit_unlock(tag, nq->tag_map);
-
- if (waitqueue_active(&nq->wait))
- wake_up(&nq->wait);
-}
-
-static unsigned int get_tag(struct nullb_queue *nq)
-{
- unsigned int tag;
-
- do {
- tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
- if (tag >= nq->queue_depth)
- return -1U;
- } while (test_and_set_bit_lock(tag, nq->tag_map));
-
- return tag;
-}
-
-static void free_cmd(struct nullb_cmd *cmd)
-{
- put_tag(cmd->nq, cmd->tag);
-}
-
-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
-
-static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
-{
- struct nullb_cmd *cmd;
- unsigned int tag;
-
- tag = get_tag(nq);
- if (tag != -1U) {
- cmd = &nq->cmds[tag];
- cmd->tag = tag;
- cmd->error = BLK_STS_OK;
- cmd->nq = nq;
- if (nq->dev->irqmode == NULL_IRQ_TIMER) {
- hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- cmd->timer.function = null_cmd_timer_expired;
- }
- return cmd;
- }
-
- return NULL;
-}
-
-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
-{
- struct nullb_cmd *cmd;
- DEFINE_WAIT(wait);
-
- do {
- /*
- * This avoids multiple return statements, multiple calls to
- * __alloc_cmd() and a fast path call to prepare_to_wait().
- */
- cmd = __alloc_cmd(nq);
- if (cmd) {
- cmd->bio = bio;
- return cmd;
- }
- prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
- io_schedule();
- finish_wait(&nq->wait, &wait);
- } while (1);
-}
-
-static void end_cmd(struct nullb_cmd *cmd)
-{
- int queue_mode = cmd->nq->dev->queue_mode;
-
- switch (queue_mode) {
- case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, cmd->error);
- return;
- case NULL_Q_BIO:
- cmd->bio->bi_status = cmd->error;
- bio_endio(cmd->bio);
- break;
- }
-
- free_cmd(cmd);
-}
-
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
{
- end_cmd(container_of(timer, struct nullb_cmd, timer));
+ struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
+ blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
return HRTIMER_NORESTART;
}
@@ -827,7 +848,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
static void null_complete_rq(struct request *rq)
{
- end_cmd(blk_mq_rq_to_pdu(rq));
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+ blk_mq_end_request(rq, cmd->error);
}
static struct nullb_page *null_alloc_page(void)
@@ -900,7 +923,7 @@ static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
if (radix_tree_insert(root, idx, t_page)) {
null_free_page(t_page);
t_page = radix_tree_lookup(root, idx);
- WARN_ON(!t_page || t_page->page->index != idx);
+ WARN_ON(!t_page || t_page->page->private != idx);
} else if (is_cache)
nullb->dev->curr_cache += PAGE_SIZE;
@@ -923,7 +946,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
(void **)t_pages, pos, FREE_BATCH);
for (i = 0; i < nr_pages; i++) {
- pos = t_pages[i]->page->index;
+ pos = t_pages[i]->page->private;
ret = radix_tree_delete_item(root, pos, t_pages[i]);
WARN_ON(ret != t_pages[i]);
null_free_page(ret);
@@ -949,7 +972,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb,
root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
t_page = radix_tree_lookup(root, idx);
- WARN_ON(t_page && t_page->page->index != idx);
+ WARN_ON(t_page && t_page->page->private != idx);
if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
return t_page;
@@ -992,7 +1015,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb,
spin_lock_irq(&nullb->lock);
idx = sector >> PAGE_SECTORS_SHIFT;
- t_page->page->index = idx;
+ t_page->page->private = idx;
t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
radix_tree_preload_end();
@@ -1012,7 +1035,7 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
struct nullb_page *t_page, *ret;
void *dst, *src;
- idx = c_page->page->index;
+ idx = c_page->page->private;
t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
@@ -1030,8 +1053,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
if (!t_page)
return -ENOMEM;
- src = kmap_atomic(c_page->page);
- dst = kmap_atomic(t_page->page);
+ src = kmap_local_page(c_page->page);
+ dst = kmap_local_page(t_page->page);
for (i = 0; i < PAGE_SECTORS;
i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
@@ -1043,8 +1066,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
}
}
- kunmap_atomic(dst);
- kunmap_atomic(src);
+ kunmap_local(dst);
+ kunmap_local(src);
ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
null_free_page(ret);
@@ -1071,7 +1094,7 @@ again:
* avoid race, we don't allow page free
*/
for (i = 0; i < nr_pages; i++) {
- nullb->cache_flush_pos = c_pages[i]->page->index;
+ nullb->cache_flush_pos = c_pages[i]->page->private;
/*
* We found the page which is being flushed to disk by other
* threads
@@ -1106,31 +1129,28 @@ again:
return 0;
}
-static int copy_to_nullb(struct nullb *nullb, struct page *source,
- unsigned int off, sector_t sector, size_t n, bool is_fua)
+static blk_status_t copy_to_nullb(struct nullb *nullb, void *source,
+ loff_t pos, size_t n, bool is_fua)
{
size_t temp, count = 0;
- unsigned int offset;
struct nullb_page *t_page;
- void *dst, *src;
+ sector_t sector;
while (count < n) {
- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+ temp = min3(nullb->dev->blocksize, n - count,
+ PAGE_SIZE - offset_in_page(pos));
+ sector = pos >> SECTOR_SHIFT;
if (null_cache_active(nullb) && !is_fua)
null_make_cache_space(nullb, PAGE_SIZE);
- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
t_page = null_insert_page(nullb, sector,
!null_cache_active(nullb) || is_fua);
if (!t_page)
- return -ENOSPC;
+ return BLK_STS_NOSPC;
- src = kmap_atomic(source);
- dst = kmap_atomic(t_page->page);
- memcpy(dst + offset, src + off + count, temp);
- kunmap_atomic(dst);
- kunmap_atomic(src);
+ memcpy_to_page(t_page->page, offset_in_page(pos),
+ source + count, temp);
__set_bit(sector & SECTOR_MASK, t_page->bitmap);
@@ -1138,51 +1158,34 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
null_free_sector(nullb, sector, true);
count += temp;
- sector += temp >> SECTOR_SHIFT;
+ pos += temp;
}
- return 0;
+ return BLK_STS_OK;
}
-static int copy_from_nullb(struct nullb *nullb, struct page *dest,
- unsigned int off, sector_t sector, size_t n)
+static void copy_from_nullb(struct nullb *nullb, void *dest, loff_t pos,
+ size_t n)
{
size_t temp, count = 0;
- unsigned int offset;
struct nullb_page *t_page;
- void *dst, *src;
+ sector_t sector;
while (count < n) {
- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+ temp = min3(nullb->dev->blocksize, n - count,
+ PAGE_SIZE - offset_in_page(pos));
+ sector = pos >> SECTOR_SHIFT;
- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
t_page = null_lookup_page(nullb, sector, false,
!null_cache_active(nullb));
-
- dst = kmap_atomic(dest);
- if (!t_page) {
- memset(dst + off + count, 0, temp);
- goto next;
- }
- src = kmap_atomic(t_page->page);
- memcpy(dst + off + count, src + offset, temp);
- kunmap_atomic(src);
-next:
- kunmap_atomic(dst);
+ if (t_page)
+ memcpy_from_page(dest + count, t_page->page,
+ offset_in_page(pos), temp);
+ else
+ memset(dest + count, 0, temp);
count += temp;
- sector += temp >> SECTOR_SHIFT;
+ pos += temp;
}
- return 0;
-}
-
-static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
- unsigned int len, unsigned int off)
-{
- void *dst;
-
- dst = kmap_atomic(page);
- memset(dst + off, 0xFF, len);
- kunmap_atomic(dst);
}
blk_status_t null_handle_discard(struct nullb_device *dev,
@@ -1206,7 +1209,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev,
return BLK_STS_OK;
}
-static int null_handle_flush(struct nullb *nullb)
+static blk_status_t null_handle_flush(struct nullb *nullb)
{
int err;
@@ -1223,107 +1226,80 @@ static int null_handle_flush(struct nullb *nullb)
WARN_ON(!radix_tree_empty(&nullb->dev->cache));
spin_unlock_irq(&nullb->lock);
- return err;
+ return errno_to_blk_status(err);
}
-static int null_transfer(struct nullb *nullb, struct page *page,
- unsigned int len, unsigned int off, bool is_write, sector_t sector,
+static blk_status_t null_transfer(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off, bool is_write, loff_t pos,
bool is_fua)
{
struct nullb_device *dev = nullb->dev;
+ blk_status_t err = BLK_STS_OK;
unsigned int valid_len = len;
- int err = 0;
+ void *p;
+ p = kmap_local_page(page) + off;
if (!is_write) {
- if (dev->zoned)
+ if (dev->zoned) {
valid_len = null_zone_valid_read_len(nullb,
- sector, len);
+ pos >> SECTOR_SHIFT, len);
+ if (valid_len && valid_len != len)
+ valid_len -= pos & (SECTOR_SIZE - 1);
+ }
if (valid_len) {
- err = copy_from_nullb(nullb, page, off,
- sector, valid_len);
+ copy_from_nullb(nullb, p, pos, valid_len);
off += valid_len;
len -= valid_len;
}
if (len)
- nullb_fill_pattern(nullb, page, len, off);
+ memset(p + valid_len, 0xff, len);
flush_dcache_page(page);
} else {
flush_dcache_page(page);
- err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+ err = copy_to_nullb(nullb, p, pos, len, is_fua);
}
+ kunmap_local(p);
return err;
}
-static int null_handle_rq(struct nullb_cmd *cmd)
+/*
+ * Transfer data for the given request. The transfer size is capped with the
+ * nr_sectors argument.
+ */
+static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
+ sector_t nr_sectors)
{
- struct request *rq = cmd->rq;
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
- int err;
+ blk_status_t err = BLK_STS_OK;
unsigned int len;
- sector_t sector = blk_rq_pos(rq);
+ loff_t pos = blk_rq_pos(rq) << SECTOR_SHIFT;
+ unsigned int max_bytes = nr_sectors << SECTOR_SHIFT;
+ unsigned int transferred_bytes = 0;
struct req_iterator iter;
struct bio_vec bvec;
spin_lock_irq(&nullb->lock);
rq_for_each_segment(bvec, rq, iter) {
len = bvec.bv_len;
+ if (transferred_bytes + len > max_bytes)
+ len = max_bytes - transferred_bytes;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
- op_is_write(req_op(rq)), sector,
+ op_is_write(req_op(rq)), pos,
rq->cmd_flags & REQ_FUA);
- if (err) {
- spin_unlock_irq(&nullb->lock);
- return err;
- }
- sector += len >> SECTOR_SHIFT;
- }
- spin_unlock_irq(&nullb->lock);
-
- return 0;
-}
-
-static int null_handle_bio(struct nullb_cmd *cmd)
-{
- struct bio *bio = cmd->bio;
- struct nullb *nullb = cmd->nq->dev->nullb;
- int err;
- unsigned int len;
- sector_t sector = bio->bi_iter.bi_sector;
- struct bio_vec bvec;
- struct bvec_iter iter;
-
- spin_lock_irq(&nullb->lock);
- bio_for_each_segment(bvec, bio, iter) {
- len = bvec.bv_len;
- err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
- op_is_write(bio_op(bio)), sector,
- bio->bi_opf & REQ_FUA);
- if (err) {
- spin_unlock_irq(&nullb->lock);
- return err;
- }
- sector += len >> SECTOR_SHIFT;
+ if (err)
+ break;
+ pos += len;
+ transferred_bytes += len;
+ if (transferred_bytes >= max_bytes)
+ break;
}
spin_unlock_irq(&nullb->lock);
- return 0;
-}
-
-static void null_stop_queue(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
-
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- blk_mq_stop_hw_queues(q);
-}
-
-static void null_restart_queue_async(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- blk_mq_start_stopped_hw_queues(q, true);
+ return err;
}
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
@@ -1331,73 +1307,85 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
blk_status_t sts = BLK_STS_OK;
- struct request *rq = cmd->rq;
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
if (!hrtimer_active(&nullb->bw_timer))
hrtimer_restart(&nullb->bw_timer);
if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
- null_stop_queue(nullb);
+ blk_mq_stop_hw_queues(nullb->q);
/* race with timer */
if (atomic_long_read(&nullb->cur_bytes) > 0)
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
/* requeue request */
sts = BLK_STS_DEV_RESOURCE;
}
return sts;
}
-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
- sector_t sector,
- sector_t nr_sectors)
+/*
+ * Check if the command should fail for the badblocks. If so, return
+ * BLK_STS_IOERR and return number of partial I/O sectors to be written or read,
+ * which may be less than the requested number of sectors.
+ *
+ * @cmd: The command to handle.
+ * @sector: The start sector for I/O.
+ * @nr_sectors: Specifies number of sectors to write or read, and returns the
+ * number of sectors to be written or read.
+ */
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int *nr_sectors)
{
struct badblocks *bb = &cmd->nq->dev->badblocks;
- sector_t first_bad;
- int bad_sectors;
+ struct nullb_device *dev = cmd->nq->dev;
+ unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT;
+ sector_t first_bad, bad_sectors;
+ unsigned int partial_io_sectors = 0;
- if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
- return BLK_STS_IOERR;
+ if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors))
+ return BLK_STS_OK;
- return BLK_STS_OK;
+ if (cmd->nq->dev->badblocks_once)
+ badblocks_clear(bb, first_bad, bad_sectors);
+
+ if (cmd->nq->dev->badblocks_partial_io) {
+ if (!IS_ALIGNED(first_bad, block_sectors))
+ first_bad = ALIGN_DOWN(first_bad, block_sectors);
+ if (sector < first_bad)
+ partial_io_sectors = first_bad - sector;
+ }
+ *nr_sectors = partial_io_sectors;
+
+ return BLK_STS_IOERR;
}
-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
- enum req_op op,
- sector_t sector,
- sector_t nr_sectors)
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
- int err;
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
- if (dev->queue_mode == NULL_Q_BIO)
- err = null_handle_bio(cmd);
- else
- err = null_handle_rq(cmd);
-
- return errno_to_blk_status(err);
+ return null_handle_data_transfer(cmd, nr_sectors);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb_device *dev = cmd->nq->dev;
struct bio *bio;
- if (dev->memory_backed)
- return;
-
- if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
- zero_fill_bio(cmd->bio);
- } else if (req_op(cmd->rq) == REQ_OP_READ) {
- __rq_for_each_bio(bio, cmd->rq)
+ if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
+ __rq_for_each_bio(bio, rq)
zero_fill_bio(bio);
}
}
static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+
/*
* Since root privileges are required to configure the null_blk
* driver, it is fine that this driver does not initialize the
@@ -1411,21 +1399,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
/* Complete IO by inline, softirq or timer */
switch (cmd->nq->dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (cmd->nq->dev->queue_mode) {
- case NULL_Q_MQ:
- if (likely(!blk_should_fake_timeout(cmd->rq->q)))
- blk_mq_complete_request(cmd->rq);
- break;
- case NULL_Q_BIO:
- /*
- * XXX: no proper submitting cpu information available.
- */
- end_cmd(cmd);
- break;
- }
+ blk_mq_complete_request(rq);
break;
case NULL_IRQ_NONE:
- end_cmd(cmd);
+ blk_mq_end_request(rq, cmd->error);
break;
case NULL_IRQ_TIMER:
null_cmd_end_timer(cmd);
@@ -1437,35 +1414,30 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
+ blk_status_t badblocks_ret = BLK_STS_OK;
blk_status_t ret;
- if (dev->badblocks.shift != -1) {
- ret = null_handle_badblocks(cmd, sector, nr_sectors);
+ if (dev->badblocks.shift != -1)
+ badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
+
+ if (dev->memory_backed && nr_sectors) {
+ ret = null_handle_memory_backed(cmd, op, sector, nr_sectors);
if (ret != BLK_STS_OK)
return ret;
}
- if (dev->memory_backed)
- return null_handle_memory_backed(cmd, op, sector, nr_sectors);
-
- return BLK_STS_OK;
+ return badblocks_ret;
}
-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
- sector_t nr_sectors, enum req_op op)
+static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+ sector_t nr_sectors, enum req_op op)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
blk_status_t sts;
- if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
- sts = null_handle_throttled(cmd);
- if (sts != BLK_STS_OK)
- return sts;
- }
-
if (op == REQ_OP_FLUSH) {
- cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+ cmd->error = null_handle_flush(nullb);
goto out;
}
@@ -1480,7 +1452,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
out:
nullb_complete_cmd(cmd);
- return BLK_STS_OK;
}
static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
@@ -1493,7 +1464,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
hrtimer_forward_now(&nullb->bw_timer, timer_interval);
@@ -1504,50 +1475,53 @@ static void nullb_setup_bwtimer(struct nullb *nullb)
{
ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
- hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- nullb->bw_timer.function = nullb_bwtimer_fn;
+ hrtimer_setup(&nullb->bw_timer, nullb_bwtimer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
-static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
-{
- int index = 0;
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (nullb->nr_queues != 1)
- index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
+static bool should_timeout_request(struct request *rq)
+{
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct nullb_device *dev = cmd->nq->dev;
- return &nullb->queues[index];
+ return should_fail(&dev->timeout_config.attr, 1);
}
-static void null_submit_bio(struct bio *bio)
+static bool should_requeue_request(struct request *rq)
{
- sector_t sector = bio->bi_iter.bi_sector;
- sector_t nr_sectors = bio_sectors(bio);
- struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
- struct nullb_queue *nq = nullb_to_queue(nullb);
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct nullb_device *dev = cmd->nq->dev;
- null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
+ return should_fail(&dev->requeue_config.attr, 1);
}
+static bool should_init_hctx_fail(struct nullb_device *dev)
+{
+ return should_fail(&dev->init_hctx_fault_config.attr, 1);
+}
+
+#else
+
static bool should_timeout_request(struct request *rq)
{
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_timeout_str[0])
- return should_fail(&null_timeout_attr, 1);
-#endif
return false;
}
static bool should_requeue_request(struct request *rq)
{
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_requeue_str[0])
- return should_fail(&null_requeue_attr, 1);
-#endif
return false;
}
+static bool should_init_hctx_fail(struct nullb_device *dev)
+{
+ return false;
+}
+
+#endif
+
static void null_map_queues(struct blk_mq_tag_set *set)
{
struct nullb *nullb = set->driver_data;
@@ -1606,9 +1580,12 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
struct nullb_queue *nq = hctx->driver_data;
LIST_HEAD(list);
int nr = 0;
+ struct request *rq;
spin_lock(&nq->poll_lock);
list_splice_init(&nq->poll_list, &list);
+ list_for_each_entry(rq, &list, queuelist)
+ blk_mq_set_request_complete(rq);
spin_unlock(&nq->poll_lock);
while (!list_empty(&list)) {
@@ -1620,9 +1597,9 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
cmd = blk_mq_rq_to_pdu(req);
cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
blk_rq_sectors(req));
- if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
- blk_mq_end_request_batch))
- end_cmd(cmd);
+ if (!blk_mq_add_to_batch(req, iob, cmd->error != BLK_STS_OK,
+ blk_mq_end_request_batch))
+ blk_mq_end_request(req, cmd->error);
nr++;
}
@@ -1634,16 +1611,21 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
- pr_info("rq %p timed out\n", rq);
-
if (hctx->type == HCTX_TYPE_POLL) {
struct nullb_queue *nq = hctx->driver_data;
spin_lock(&nq->poll_lock);
+ /* The request may have completed meanwhile. */
+ if (blk_mq_request_completed(rq)) {
+ spin_unlock(&nq->poll_lock);
+ return BLK_EH_DONE;
+ }
list_del_init(&rq->queuelist);
spin_unlock(&nq->poll_lock);
}
+ pr_info("rq %p timed out\n", rq);
+
/*
* If the device is marked as blocking (i.e. memory backed or zoned
* device), the submission path may be blocked waiting for resources
@@ -1658,28 +1640,27 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
}
static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+ const struct blk_mq_queue_data *bd)
{
- struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct request *rq = bd->rq;
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
struct nullb_queue *nq = hctx->driver_data;
- sector_t nr_sectors = blk_rq_sectors(bd->rq);
- sector_t sector = blk_rq_pos(bd->rq);
+ sector_t nr_sectors = blk_rq_sectors(rq);
+ sector_t sector = blk_rq_pos(rq);
const bool is_poll = hctx->type == HCTX_TYPE_POLL;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
- hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- cmd->timer.function = null_cmd_timer_expired;
+ hrtimer_setup(&cmd->timer, null_cmd_timer_expired, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
- cmd->rq = bd->rq;
cmd->error = BLK_STS_OK;
cmd->nq = nq;
- cmd->fake_timeout = should_timeout_request(bd->rq);
-
- blk_mq_start_request(bd->rq);
+ cmd->fake_timeout = should_timeout_request(rq) ||
+ blk_should_fake_timeout(rq->q);
- if (should_requeue_request(bd->rq)) {
+ if (should_requeue_request(rq)) {
/*
* Alternate between hitting the core BUSY path, and the
* driver driven requeue path
@@ -1687,52 +1668,52 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
nq->requeue_selection++;
if (nq->requeue_selection & 1)
return BLK_STS_RESOURCE;
- else {
- blk_mq_requeue_request(bd->rq, true);
- return BLK_STS_OK;
- }
+ blk_mq_requeue_request(rq, true);
+ return BLK_STS_OK;
}
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
+ blk_status_t sts = null_handle_throttled(cmd);
+
+ if (sts != BLK_STS_OK)
+ return sts;
+ }
+
+ blk_mq_start_request(rq);
+
if (is_poll) {
spin_lock(&nq->poll_lock);
- list_add_tail(&bd->rq->queuelist, &nq->poll_list);
+ list_add_tail(&rq->queuelist, &nq->poll_list);
spin_unlock(&nq->poll_lock);
return BLK_STS_OK;
}
if (cmd->fake_timeout)
return BLK_STS_OK;
- return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
-}
-
-static void cleanup_queue(struct nullb_queue *nq)
-{
- bitmap_free(nq->tag_map);
- kfree(nq->cmds);
+ null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
+ return BLK_STS_OK;
}
-static void cleanup_queues(struct nullb *nullb)
+static void null_queue_rqs(struct rq_list *rqlist)
{
- int i;
-
- for (i = 0; i < nullb->nr_queues; i++)
- cleanup_queue(&nullb->queues[i]);
+ struct rq_list requeue_list = {};
+ struct blk_mq_queue_data bd = { };
+ blk_status_t ret;
- kfree(nullb->queues);
-}
+ do {
+ struct request *rq = rq_list_pop(rqlist);
-static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
- struct nullb_queue *nq = hctx->driver_data;
- struct nullb *nullb = nq->dev->nullb;
+ bd.rq = rq;
+ ret = null_queue_rq(rq->mq_hctx, &bd);
+ if (ret != BLK_STS_OK)
+ rq_list_add_tail(&requeue_list, rq);
+ } while (!rq_list_empty(rqlist));
- nullb->nr_queues--;
+ *rqlist = requeue_list;
}
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
{
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
nq->dev = nullb->dev;
INIT_LIST_HEAD(&nq->poll_list);
spin_lock_init(&nq->poll_lock);
@@ -1744,27 +1725,24 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
struct nullb *nullb = hctx->queue->queuedata;
struct nullb_queue *nq;
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+ if (should_init_hctx_fail(nullb->dev))
return -EFAULT;
-#endif
nq = &nullb->queues[hctx_idx];
hctx->driver_data = nq;
null_init_queue(nullb, nq);
- nullb->nr_queues++;
return 0;
}
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
+ .queue_rqs = null_queue_rqs,
.complete = null_complete_rq,
.timeout = null_timeout_rq,
.poll = null_poll,
.map_queues = null_map_queues,
.init_hctx = null_init_hctx,
- .exit_hctx = null_exit_hctx,
};
static void null_del_dev(struct nullb *nullb)
@@ -1776,7 +1754,7 @@ static void null_del_dev(struct nullb *nullb)
dev = nullb->dev;
- ida_simple_remove(&nullb_indexes, nullb->index);
+ ida_free(&nullb_indexes, nullb->index);
list_del_init(&nullb->list);
@@ -1785,21 +1763,20 @@ static void null_del_dev(struct nullb *nullb)
if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
hrtimer_cancel(&nullb->bw_timer);
atomic_long_set(&nullb->cur_bytes, LONG_MAX);
- null_restart_queue_async(nullb);
+ blk_mq_start_stopped_hw_queues(nullb->q, true);
}
put_disk(nullb->disk);
- if (dev->queue_mode == NULL_Q_MQ &&
- nullb->tag_set == &nullb->__tag_set)
+ if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
- cleanup_queues(nullb);
+ kfree(nullb->queues);
if (null_cache_active(nullb))
null_free_device_storage(nullb->dev, true);
kfree(nullb);
dev->nullb = NULL;
}
-static void null_config_discard(struct nullb *nullb)
+static void null_config_discard(struct nullb *nullb, struct queue_limits *lim)
{
if (nullb->dev->discard == false)
return;
@@ -1816,44 +1793,14 @@ static void null_config_discard(struct nullb *nullb)
return;
}
- nullb->q->limits.discard_granularity = nullb->dev->blocksize;
- blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+ lim->max_hw_discard_sectors = UINT_MAX >> 9;
}
-static const struct block_device_operations null_bio_ops = {
- .owner = THIS_MODULE,
- .submit_bio = null_submit_bio,
- .report_zones = null_report_zones,
-};
-
-static const struct block_device_operations null_rq_ops = {
+static const struct block_device_operations null_ops = {
.owner = THIS_MODULE,
.report_zones = null_report_zones,
};
-static int setup_commands(struct nullb_queue *nq)
-{
- struct nullb_cmd *cmd;
- int i;
-
- nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
- if (!nq->cmds)
- return -ENOMEM;
-
- nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
- if (!nq->tag_map) {
- kfree(nq->cmds);
- return -ENOMEM;
- }
-
- for (i = 0; i < nq->queue_depth; i++) {
- cmd = &nq->cmds[i];
- cmd->tag = -1U;
- }
-
- return 0;
-}
-
static int setup_queues(struct nullb *nullb)
{
int nqueues = nr_cpu_ids;
@@ -1866,109 +1813,78 @@ static int setup_queues(struct nullb *nullb)
if (!nullb->queues)
return -ENOMEM;
- nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0;
}
-static int init_driver_queues(struct nullb *nullb)
+static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
{
- struct nullb_queue *nq;
- int i, ret = 0;
-
- for (i = 0; i < nullb->dev->submit_queues; i++) {
- nq = &nullb->queues[i];
-
- null_init_queue(nullb, nq);
-
- ret = setup_commands(nq);
- if (ret)
- return ret;
- nullb->nr_queues++;
+ set->ops = &null_mq_ops;
+ set->cmd_size = sizeof(struct nullb_cmd);
+ set->timeout = 5 * HZ;
+ set->nr_maps = 1;
+ if (poll_queues) {
+ set->nr_hw_queues += poll_queues;
+ set->nr_maps += 2;
}
- return 0;
+ return blk_mq_alloc_tag_set(set);
}
-static int null_gendisk_register(struct nullb *nullb)
+static int null_init_global_tag_set(void)
{
- sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
- struct gendisk *disk = nullb->disk;
-
- set_capacity(disk, size);
-
- disk->major = null_major;
- disk->first_minor = nullb->index;
- disk->minors = 1;
- if (queue_is_mq(nullb->q))
- disk->fops = &null_rq_ops;
- else
- disk->fops = &null_bio_ops;
- disk->private_data = nullb;
- strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+ int error;
- if (nullb->dev->zoned) {
- int ret = null_register_zoned_dev(nullb);
+ if (tag_set.ops)
+ return 0;
- if (ret)
- return ret;
- }
+ tag_set.nr_hw_queues = g_submit_queues;
+ tag_set.queue_depth = g_hw_queue_depth;
+ tag_set.numa_node = g_home_node;
+ if (g_no_sched)
+ tag_set.flags |= BLK_MQ_F_NO_SCHED_BY_DEFAULT;
+ if (g_shared_tag_bitmap)
+ tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (g_blocking)
+ tag_set.flags |= BLK_MQ_F_BLOCKING;
- return add_disk(disk);
+ error = null_init_tag_set(&tag_set, g_poll_queues);
+ if (error)
+ tag_set.ops = NULL;
+ return error;
}
-static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
+static int null_setup_tagset(struct nullb *nullb)
{
- unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
- int hw_queues, numa_node;
- unsigned int queue_depth;
- int poll_queues;
-
- if (nullb) {
- hw_queues = nullb->dev->submit_queues;
- poll_queues = nullb->dev->poll_queues;
- queue_depth = nullb->dev->hw_queue_depth;
- numa_node = nullb->dev->home_node;
- if (nullb->dev->no_sched)
- flags |= BLK_MQ_F_NO_SCHED;
- if (nullb->dev->shared_tag_bitmap)
- flags |= BLK_MQ_F_TAG_HCTX_SHARED;
- if (nullb->dev->blocking)
- flags |= BLK_MQ_F_BLOCKING;
- } else {
- hw_queues = g_submit_queues;
- poll_queues = g_poll_queues;
- queue_depth = g_hw_queue_depth;
- numa_node = g_home_node;
- if (g_no_sched)
- flags |= BLK_MQ_F_NO_SCHED;
- if (g_shared_tag_bitmap)
- flags |= BLK_MQ_F_TAG_HCTX_SHARED;
- if (g_blocking)
- flags |= BLK_MQ_F_BLOCKING;
+ if (nullb->dev->shared_tags) {
+ nullb->tag_set = &tag_set;
+ return null_init_global_tag_set();
}
- set->ops = &null_mq_ops;
- set->cmd_size = sizeof(struct nullb_cmd);
- set->flags = flags;
- set->driver_data = nullb;
- set->nr_hw_queues = hw_queues;
- set->queue_depth = queue_depth;
- set->numa_node = numa_node;
- if (poll_queues) {
- set->nr_hw_queues += poll_queues;
- set->nr_maps = 3;
- } else {
- set->nr_maps = 1;
- }
-
- return blk_mq_alloc_tag_set(set);
+ nullb->tag_set = &nullb->__tag_set;
+ nullb->tag_set->driver_data = nullb;
+ nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
+ nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
+ nullb->tag_set->numa_node = nullb->dev->home_node;
+ if (nullb->dev->no_sched)
+ nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED_BY_DEFAULT;
+ if (nullb->dev->shared_tag_bitmap)
+ nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (nullb->dev->blocking)
+ nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
+ return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
}
static int null_validate_conf(struct nullb_device *dev)
{
- dev->blocksize = round_down(dev->blocksize, 512);
- dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+ if (dev->queue_mode == NULL_Q_RQ) {
+ pr_err("legacy IO path is no longer available\n");
+ return -EINVAL;
+ }
+ if (dev->queue_mode == NULL_Q_BIO) {
+ pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
+ dev->queue_mode = NULL_Q_MQ;
+ }
- if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+ if (dev->use_per_node_hctx) {
if (dev->submit_queues != nr_online_nodes)
dev->submit_queues = nr_online_nodes;
} else if (dev->submit_queues > nr_cpu_ids)
@@ -1980,8 +1896,6 @@ static int null_validate_conf(struct nullb_device *dev)
if (dev->poll_queues > g_poll_queues)
dev->poll_queues = g_poll_queues;
dev->prev_poll_queues = dev->poll_queues;
-
- dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
/* Do memory allocation, so set blocking */
@@ -1992,9 +1906,6 @@ static int null_validate_conf(struct nullb_device *dev)
dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
dev->cache_size);
dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
- /* can not stop a queue */
- if (dev->queue_mode == NULL_Q_BIO)
- dev->mbps = 0;
if (dev->zoned &&
(!dev->zone_size || !is_power_of_2(dev->zone_size))) {
@@ -2034,6 +1945,13 @@ static bool null_setup_fault(void)
static int null_add_dev(struct nullb_device *dev)
{
+ struct queue_limits lim = {
+ .logical_block_size = dev->blocksize,
+ .physical_block_size = dev->blocksize,
+ .max_hw_sectors = dev->max_sectors,
+ .dma_alignment = 1,
+ };
+
struct nullb *nullb;
int rv;
@@ -2055,98 +1973,78 @@ static int null_add_dev(struct nullb_device *dev)
if (rv)
goto out_free_nullb;
- if (dev->queue_mode == NULL_Q_MQ) {
- if (shared_tags) {
- nullb->tag_set = &tag_set;
- rv = 0;
- } else {
- nullb->tag_set = &nullb->__tag_set;
- rv = null_init_tag_set(nullb, nullb->tag_set);
- }
+ rv = null_setup_tagset(nullb);
+ if (rv)
+ goto out_cleanup_queues;
+ if (dev->virt_boundary)
+ lim.virt_boundary_mask = PAGE_SIZE - 1;
+ null_config_discard(nullb, &lim);
+ if (dev->zoned) {
+ rv = null_init_zoned_dev(dev, &lim);
if (rv)
- goto out_cleanup_queues;
-
- if (!null_setup_fault())
goto out_cleanup_tags;
+ }
- nullb->tag_set->timeout = 5 * HZ;
- nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
- if (IS_ERR(nullb->disk)) {
- rv = PTR_ERR(nullb->disk);
- goto out_cleanup_tags;
- }
- nullb->q = nullb->disk->queue;
- } else if (dev->queue_mode == NULL_Q_BIO) {
- rv = -ENOMEM;
- nullb->disk = blk_alloc_disk(nullb->dev->home_node);
- if (!nullb->disk)
- goto out_cleanup_queues;
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (dev->fua)
+ lim.features |= BLK_FEAT_FUA;
+ }
- nullb->q = nullb->disk->queue;
- rv = init_driver_queues(nullb);
- if (rv)
- goto out_cleanup_disk;
+ if (dev->rotational)
+ lim.features |= BLK_FEAT_ROTATIONAL;
+
+ nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
+ if (IS_ERR(nullb->disk)) {
+ rv = PTR_ERR(nullb->disk);
+ goto out_cleanup_zone;
}
+ nullb->q = nullb->disk->queue;
if (dev->mbps) {
set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
nullb_setup_bwtimer(nullb);
}
- if (dev->cache_size > 0) {
- set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
- blk_queue_write_cache(nullb->q, true, true);
- }
-
- if (dev->zoned) {
- rv = null_init_zoned_dev(dev, nullb->q);
- if (rv)
- goto out_cleanup_disk;
- }
-
nullb->q->queuedata = nullb;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
- mutex_lock(&lock);
- rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
- if (rv < 0) {
- mutex_unlock(&lock);
- goto out_cleanup_zone;
- }
+ rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
+ if (rv < 0)
+ goto out_cleanup_disk;
+
nullb->index = rv;
dev->index = rv;
- mutex_unlock(&lock);
- blk_queue_logical_block_size(nullb->q, dev->blocksize);
- blk_queue_physical_block_size(nullb->q, dev->blocksize);
- if (!dev->max_sectors)
- dev->max_sectors = queue_max_hw_sectors(nullb->q);
- dev->max_sectors = min_t(unsigned int, dev->max_sectors,
- BLK_DEF_MAX_SECTORS);
- blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
-
- if (dev->virt_boundary)
- blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
-
- null_config_discard(nullb);
-
- if (config_item_name(&dev->item)) {
+ if (config_item_name(&dev->group.cg_item)) {
/* Use configfs dir name as the device name */
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
- "%s", config_item_name(&dev->item));
+ "%s", config_item_name(&dev->group.cg_item));
} else {
sprintf(nullb->disk_name, "nullb%d", nullb->index);
}
- rv = null_gendisk_register(nullb);
+ set_capacity(nullb->disk,
+ ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT);
+ nullb->disk->major = null_major;
+ nullb->disk->first_minor = nullb->index;
+ nullb->disk->minors = 1;
+ nullb->disk->fops = &null_ops;
+ nullb->disk->private_data = nullb;
+ strscpy(nullb->disk->disk_name, nullb->disk_name);
+
+ if (nullb->dev->zoned) {
+ rv = null_register_zoned_dev(nullb);
+ if (rv)
+ goto out_ida_free;
+ }
+
+ rv = add_disk(nullb->disk);
if (rv)
goto out_ida_free;
- mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
- mutex_unlock(&lock);
pr_info("disk %s created\n", nullb->disk_name);
@@ -2154,15 +2052,15 @@ static int null_add_dev(struct nullb_device *dev)
out_ida_free:
ida_free(&nullb_indexes, nullb->index);
-out_cleanup_zone:
- null_free_zoned_dev(dev);
out_cleanup_disk:
put_disk(nullb->disk);
+out_cleanup_zone:
+ null_free_zoned_dev(dev);
out_cleanup_tags:
- if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
- cleanup_queues(nullb);
+ kfree(nullb->queues);
out_free_nullb:
kfree(nullb);
dev->nullb = NULL;
@@ -2195,7 +2093,9 @@ static int null_create_dev(void)
if (!dev)
return -ENOMEM;
+ mutex_lock(&lock);
ret = null_add_dev(dev);
+ mutex_unlock(&lock);
if (ret) {
null_free_dev(dev);
return ret;
@@ -2209,6 +2109,7 @@ static void null_destroy_dev(struct nullb *nullb)
struct nullb_device *dev = nullb->dev;
null_del_dev(nullb);
+ null_free_device_storage(dev, false);
null_free_dev(dev);
}
@@ -2224,23 +2125,20 @@ static int __init null_init(void)
g_bs = PAGE_SIZE;
}
- if (g_max_sectors > BLK_DEF_MAX_SECTORS) {
- pr_warn("invalid max sectors\n");
- pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS);
- g_max_sectors = BLK_DEF_MAX_SECTORS;
- }
-
if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
pr_err("invalid home_node value\n");
g_home_node = NUMA_NO_NODE;
}
+ if (!null_setup_fault())
+ return -EINVAL;
+
if (g_queue_mode == NULL_Q_RQ) {
pr_err("legacy IO path is no longer available\n");
return -EINVAL;
}
- if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
pr_warn("submit_queues param is set to %u.\n",
nr_online_nodes);
@@ -2252,18 +2150,12 @@ static int __init null_init(void)
g_submit_queues = 1;
}
- if (g_queue_mode == NULL_Q_MQ && shared_tags) {
- ret = null_init_tag_set(NULL, &tag_set);
- if (ret)
- return ret;
- }
-
config_group_init(&nullb_subsys.su_group);
mutex_init(&nullb_subsys.su_mutex);
ret = configfs_register_subsystem(&nullb_subsys);
if (ret)
- goto err_tagset;
+ return ret;
mutex_init(&lock);
@@ -2290,9 +2182,6 @@ err_dev:
unregister_blkdev(null_major, "nullb");
err_conf:
configfs_unregister_subsystem(&nullb_subsys);
-err_tagset:
- if (g_queue_mode == NULL_Q_MQ && shared_tags)
- blk_mq_free_tag_set(&tag_set);
return ret;
}
@@ -2311,12 +2200,15 @@ static void __exit null_exit(void)
}
mutex_unlock(&lock);
- if (g_queue_mode == NULL_Q_MQ && shared_tags)
+ if (tag_set.ops)
blk_mq_free_tag_set(&tag_set);
+
+ mutex_destroy(&lock);
}
module_init(null_init);
module_exit(null_exit);
MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
+MODULE_DESCRIPTION("multi queue aware block test driver");
MODULE_LICENSE("GPL");