diff options
Diffstat (limited to 'drivers/block/null_blk/main.c')
| -rw-r--r-- | drivers/block/null_blk/main.c | 1110 |
1 files changed, 511 insertions, 599 deletions
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 1f154f92f4c2..c7c0fb79a6bf 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -77,7 +77,7 @@ enum { NULL_IRQ_TIMER = 2, }; -static bool g_virt_boundary = false; +static bool g_virt_boundary; module_param_named(virt_boundary, g_virt_boundary, bool, 0444); MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False"); @@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>"); #endif +/* + * Historic queue modes. + * + * These days nothing but NULL_Q_MQ is actually supported, but we keep it the + * enum for error reporting. + */ +enum { + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) @@ -165,8 +177,8 @@ static bool g_blocking; module_param_named(blocking, g_blocking, bool, 0444); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); -static bool shared_tags; -module_param(shared_tags, bool, 0444); +static bool g_shared_tags; +module_param_named(shared_tags, g_shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); static bool g_shared_tag_bitmap; @@ -211,7 +223,11 @@ MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed nu static unsigned long g_cache_size; module_param_named(cache_size, g_cache_size, ulong, 0444); -MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)"); +MODULE_PARM_DESC(cache_size, "Cache size in MiB for memory-backed device. Default: 0 (none)"); + +static bool g_fua = true; +module_param_named(fua, g_fua, bool, 0444); +MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true"); static unsigned int g_mbps; module_param_named(mbps, g_mbps, uint, 0444); @@ -241,6 +257,19 @@ static unsigned int g_zone_max_active; module_param_named(zone_max_active, g_zone_max_active, uint, 0444); MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)"); +static int g_zone_append_max_sectors = INT_MAX; +module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444); +MODULE_PARM_DESC(zone_append_max_sectors, + "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation"); + +static bool g_zone_full; +module_param_named(zone_full, g_zone_full, bool, S_IRUGO); +MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false"); + +static bool g_rotational; +module_param_named(rotational, g_rotational, bool, S_IRUGO); +MODULE_PARM_DESC(rotational, "Set the rotational feature for the device. Default: false"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -250,7 +279,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache); static inline struct nullb_device *to_nullb_device(struct config_item *item) { - return item ? container_of(item, struct nullb_device, item) : NULL; + return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL; } static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) @@ -392,13 +421,25 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev, static int nullb_apply_submit_queues(struct nullb_device *dev, unsigned int submit_queues) { - return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); + int ret; + + mutex_lock(&lock); + ret = nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); + mutex_unlock(&lock); + + return ret; } static int nullb_apply_poll_queues(struct nullb_device *dev, unsigned int poll_queues) { - return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); + int ret; + + mutex_lock(&lock); + ret = nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); + mutex_unlock(&lock); + + return ret; } NULLB_DEVICE_ATTR(size, ulong, NULL); @@ -424,9 +465,16 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); +NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL); +NULLB_DEVICE_ATTR(zone_full, bool, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); +NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); +NULLB_DEVICE_ATTR(fua, bool, NULL); +NULLB_DEVICE_ATTR(rotational, bool, NULL); +NULLB_DEVICE_ATTR(badblocks_once, bool, NULL); +NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -444,28 +492,32 @@ static ssize_t nullb_device_power_store(struct config_item *item, if (ret < 0) return ret; + ret = count; + mutex_lock(&lock); if (!dev->power && newp) { if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) - return count; + goto out; + ret = null_add_dev(dev); if (ret) { clear_bit(NULLB_DEV_FL_UP, &dev->flags); - return ret; + goto out; } set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); dev->power = newp; + ret = count; } else if (dev->power && !newp) { if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) { - mutex_lock(&lock); dev->power = newp; null_del_dev(dev->nullb); - mutex_unlock(&lock); } clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); } - return count; +out: + mutex_unlock(&lock); + return ret; } CONFIGFS_ATTR(nullb_device_, power); @@ -509,49 +561,76 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item, goto out; /* enable badblocks */ cmpxchg(&t_dev->badblocks.shift, -1, 0); - if (buf[0] == '+') - ret = badblocks_set(&t_dev->badblocks, start, - end - start + 1, 1); - else - ret = badblocks_clear(&t_dev->badblocks, start, - end - start + 1); - if (ret == 0) + if (buf[0] == '+') { + if (badblocks_set(&t_dev->badblocks, start, + end - start + 1, 1)) + ret = count; + } else if (badblocks_clear(&t_dev->badblocks, start, + end - start + 1)) { ret = count; + } out: kfree(orig); return ret; } CONFIGFS_ATTR(nullb_device_, badblocks); +static ssize_t nullb_device_zone_readonly_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + + return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY); +} +CONFIGFS_ATTR_WO(nullb_device_, zone_readonly); + +static ssize_t nullb_device_zone_offline_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + + return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE); +} +CONFIGFS_ATTR_WO(nullb_device_, zone_offline); + static struct configfs_attribute *nullb_device_attrs[] = { - &nullb_device_attr_size, + &nullb_device_attr_badblocks, + &nullb_device_attr_badblocks_once, + &nullb_device_attr_badblocks_partial_io, + &nullb_device_attr_blocking, + &nullb_device_attr_blocksize, + &nullb_device_attr_cache_size, &nullb_device_attr_completion_nsec, - &nullb_device_attr_submit_queues, - &nullb_device_attr_poll_queues, + &nullb_device_attr_discard, + &nullb_device_attr_fua, &nullb_device_attr_home_node, - &nullb_device_attr_queue_mode, - &nullb_device_attr_blocksize, - &nullb_device_attr_max_sectors, - &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, - &nullb_device_attr_blocking, - &nullb_device_attr_use_per_node_hctx, - &nullb_device_attr_power, - &nullb_device_attr_memory_backed, - &nullb_device_attr_discard, + &nullb_device_attr_irqmode, + &nullb_device_attr_max_sectors, &nullb_device_attr_mbps, - &nullb_device_attr_cache_size, - &nullb_device_attr_badblocks, - &nullb_device_attr_zoned, - &nullb_device_attr_zone_size, - &nullb_device_attr_zone_capacity, - &nullb_device_attr_zone_nr_conv, - &nullb_device_attr_zone_max_open, - &nullb_device_attr_zone_max_active, - &nullb_device_attr_virt_boundary, + &nullb_device_attr_memory_backed, &nullb_device_attr_no_sched, + &nullb_device_attr_poll_queues, + &nullb_device_attr_power, + &nullb_device_attr_queue_mode, + &nullb_device_attr_rotational, &nullb_device_attr_shared_tag_bitmap, + &nullb_device_attr_shared_tags, + &nullb_device_attr_size, + &nullb_device_attr_submit_queues, + &nullb_device_attr_use_per_node_hctx, + &nullb_device_attr_virt_boundary, + &nullb_device_attr_zone_append_max_sectors, + &nullb_device_attr_zone_capacity, + &nullb_device_attr_zone_full, + &nullb_device_attr_zone_max_active, + &nullb_device_attr_zone_max_open, + &nullb_device_attr_zone_nr_conv, + &nullb_device_attr_zone_offline, + &nullb_device_attr_zone_readonly, + &nullb_device_attr_zone_size, + &nullb_device_attr_zoned, NULL, }; @@ -573,8 +652,29 @@ static const struct config_item_type nullb_device_type = { .ct_owner = THIS_MODULE, }; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static void nullb_add_fault_config(struct nullb_device *dev) +{ + fault_config_init(&dev->timeout_config, "timeout_inject"); + fault_config_init(&dev->requeue_config, "requeue_inject"); + fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject"); + + configfs_add_default_group(&dev->timeout_config.group, &dev->group); + configfs_add_default_group(&dev->requeue_config.group, &dev->group); + configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group); +} + +#else + +static void nullb_add_fault_config(struct nullb_device *dev) +{ +} + +#endif + static struct -config_item *nullb_group_make_item(struct config_group *group, const char *name) +config_group *nullb_group_make_group(struct config_group *group, const char *name) { struct nullb_device *dev; @@ -585,9 +685,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name) if (!dev) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&dev->item, name, &nullb_device_type); + config_group_init_type_name(&dev->group, name, &nullb_device_type); + nullb_add_fault_config(dev); - return &dev->item; + return &dev->group; } static void @@ -607,14 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, - "badblocks,blocking,blocksize,cache_size," - "completion_nsec,discard,home_node,hw_queue_depth," - "irqmode,max_sectors,mbps,memory_backed,no_sched," - "poll_queues,power,queue_mode,shared_tag_bitmap,size," - "submit_queues,use_per_node_hctx,virt_boundary,zoned," - "zone_capacity,zone_max_active,zone_max_open," - "zone_nr_conv,zone_size\n"); + + struct configfs_attribute **entry; + char delimiter = ','; + size_t left = PAGE_SIZE; + size_t written = 0; + int ret; + + for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) { + if (!*(entry + 1)) + delimiter = '\n'; + ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name, + delimiter); + if (ret >= left) { + WARN_ONCE(1, "Too many null_blk features to print\n"); + memzero_explicit(page, PAGE_SIZE); + return -ENOBUFS; + } + left -= ret; + written += ret; + } + + return written; } CONFIGFS_ATTR_RO(memb_group_, features); @@ -625,7 +740,7 @@ static struct configfs_attribute *nullb_group_attrs[] = { }; static struct configfs_group_operations nullb_group_ops = { - .make_item = nullb_group_make_item, + .make_group = nullb_group_make_group, .drop_item = nullb_group_drop_item, }; @@ -656,6 +771,13 @@ static struct nullb_device *null_alloc_dev(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + dev->timeout_config.attr = null_timeout_attr; + dev->requeue_config.attr = null_requeue_attr; + dev->init_hctx_fault_config.attr = null_init_hctx_attr; +#endif + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); if (badblocks_init(&dev->badblocks, 0)) { @@ -687,9 +809,15 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_nr_conv = g_zone_nr_conv; dev->zone_max_open = g_zone_max_open; dev->zone_max_active = g_zone_max_active; + dev->zone_append_max_sectors = g_zone_append_max_sectors; + dev->zone_full = g_zone_full; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; + dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; + dev->fua = g_fua; + dev->rotational = g_rotational; + return dev; } @@ -703,98 +831,11 @@ static void null_free_dev(struct nullb_device *dev) kfree(dev); } -static void put_tag(struct nullb_queue *nq, unsigned int tag) -{ - clear_bit_unlock(tag, nq->tag_map); - - if (waitqueue_active(&nq->wait)) - wake_up(&nq->wait); -} - -static unsigned int get_tag(struct nullb_queue *nq) -{ - unsigned int tag; - - do { - tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); - if (tag >= nq->queue_depth) - return -1U; - } while (test_and_set_bit_lock(tag, nq->tag_map)); - - return tag; -} - -static void free_cmd(struct nullb_cmd *cmd) -{ - put_tag(cmd->nq, cmd->tag); -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); - -static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - unsigned int tag; - - tag = get_tag(nq); - if (tag != -1U) { - cmd = &nq->cmds[tag]; - cmd->tag = tag; - cmd->error = BLK_STS_OK; - cmd->nq = nq; - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - return cmd; - } - - return NULL; -} - -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) -{ - struct nullb_cmd *cmd; - DEFINE_WAIT(wait); - - do { - /* - * This avoids multiple return statements, multiple calls to - * __alloc_cmd() and a fast path call to prepare_to_wait(). - */ - cmd = __alloc_cmd(nq); - if (cmd) { - cmd->bio = bio; - return cmd; - } - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule(); - finish_wait(&nq->wait, &wait); - } while (1); -} - -static void end_cmd(struct nullb_cmd *cmd) -{ - int queue_mode = cmd->nq->dev->queue_mode; - - switch (queue_mode) { - case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, cmd->error); - return; - case NULL_Q_BIO: - cmd->bio->bi_status = cmd->error; - bio_endio(cmd->bio); - break; - } - - free_cmd(cmd); -} - static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) { - end_cmd(container_of(timer, struct nullb_cmd, timer)); + struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer); + blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error); return HRTIMER_NORESTART; } @@ -807,7 +848,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_complete_rq(struct request *rq) { - end_cmd(blk_mq_rq_to_pdu(rq)); + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + + blk_mq_end_request(rq, cmd->error); } static struct nullb_page *null_alloc_page(void) @@ -880,7 +923,7 @@ static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, if (radix_tree_insert(root, idx, t_page)) { null_free_page(t_page); t_page = radix_tree_lookup(root, idx); - WARN_ON(!t_page || t_page->page->index != idx); + WARN_ON(!t_page || t_page->page->private != idx); } else if (is_cache) nullb->dev->curr_cache += PAGE_SIZE; @@ -903,7 +946,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache) (void **)t_pages, pos, FREE_BATCH); for (i = 0; i < nr_pages; i++) { - pos = t_pages[i]->page->index; + pos = t_pages[i]->page->private; ret = radix_tree_delete_item(root, pos, t_pages[i]); WARN_ON(ret != t_pages[i]); null_free_page(ret); @@ -929,7 +972,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb, root = is_cache ? &nullb->dev->cache : &nullb->dev->data; t_page = radix_tree_lookup(root, idx); - WARN_ON(t_page && t_page->page->index != idx); + WARN_ON(t_page && t_page->page->private != idx); if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) return t_page; @@ -972,7 +1015,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb, spin_lock_irq(&nullb->lock); idx = sector >> PAGE_SECTORS_SHIFT; - t_page->page->index = idx; + t_page->page->private = idx; t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache); radix_tree_preload_end(); @@ -992,7 +1035,7 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) struct nullb_page *t_page, *ret; void *dst, *src; - idx = c_page->page->index; + idx = c_page->page->private; t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); @@ -1010,8 +1053,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) if (!t_page) return -ENOMEM; - src = kmap_atomic(c_page->page); - dst = kmap_atomic(t_page->page); + src = kmap_local_page(c_page->page); + dst = kmap_local_page(t_page->page); for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { @@ -1023,8 +1066,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) } } - kunmap_atomic(dst); - kunmap_atomic(src); + kunmap_local(dst); + kunmap_local(src); ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); null_free_page(ret); @@ -1051,7 +1094,7 @@ again: * avoid race, we don't allow page free */ for (i = 0; i < nr_pages; i++) { - nullb->cache_flush_pos = c_pages[i]->page->index; + nullb->cache_flush_pos = c_pages[i]->page->private; /* * We found the page which is being flushed to disk by other * threads @@ -1086,31 +1129,28 @@ again: return 0; } -static int copy_to_nullb(struct nullb *nullb, struct page *source, - unsigned int off, sector_t sector, size_t n, bool is_fua) +static blk_status_t copy_to_nullb(struct nullb *nullb, void *source, + loff_t pos, size_t n, bool is_fua) { size_t temp, count = 0; - unsigned int offset; struct nullb_page *t_page; - void *dst, *src; + sector_t sector; while (count < n) { - temp = min_t(size_t, nullb->dev->blocksize, n - count); + temp = min3(nullb->dev->blocksize, n - count, + PAGE_SIZE - offset_in_page(pos)); + sector = pos >> SECTOR_SHIFT; if (null_cache_active(nullb) && !is_fua) null_make_cache_space(nullb, PAGE_SIZE); - offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; t_page = null_insert_page(nullb, sector, !null_cache_active(nullb) || is_fua); if (!t_page) - return -ENOSPC; + return BLK_STS_NOSPC; - src = kmap_atomic(source); - dst = kmap_atomic(t_page->page); - memcpy(dst + offset, src + off + count, temp); - kunmap_atomic(dst); - kunmap_atomic(src); + memcpy_to_page(t_page->page, offset_in_page(pos), + source + count, temp); __set_bit(sector & SECTOR_MASK, t_page->bitmap); @@ -1118,51 +1158,34 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, null_free_sector(nullb, sector, true); count += temp; - sector += temp >> SECTOR_SHIFT; + pos += temp; } - return 0; + return BLK_STS_OK; } -static int copy_from_nullb(struct nullb *nullb, struct page *dest, - unsigned int off, sector_t sector, size_t n) +static void copy_from_nullb(struct nullb *nullb, void *dest, loff_t pos, + size_t n) { size_t temp, count = 0; - unsigned int offset; struct nullb_page *t_page; - void *dst, *src; + sector_t sector; while (count < n) { - temp = min_t(size_t, nullb->dev->blocksize, n - count); + temp = min3(nullb->dev->blocksize, n - count, + PAGE_SIZE - offset_in_page(pos)); + sector = pos >> SECTOR_SHIFT; - offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; t_page = null_lookup_page(nullb, sector, false, !null_cache_active(nullb)); - - dst = kmap_atomic(dest); - if (!t_page) { - memset(dst + off + count, 0, temp); - goto next; - } - src = kmap_atomic(t_page->page); - memcpy(dst + off + count, src + offset, temp); - kunmap_atomic(src); -next: - kunmap_atomic(dst); + if (t_page) + memcpy_from_page(dest + count, t_page->page, + offset_in_page(pos), temp); + else + memset(dest + count, 0, temp); count += temp; - sector += temp >> SECTOR_SHIFT; + pos += temp; } - return 0; -} - -static void nullb_fill_pattern(struct nullb *nullb, struct page *page, - unsigned int len, unsigned int off) -{ - void *dst; - - dst = kmap_atomic(page); - memset(dst + off, 0xFF, len); - kunmap_atomic(dst); } blk_status_t null_handle_discard(struct nullb_device *dev, @@ -1186,7 +1209,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev, return BLK_STS_OK; } -static int null_handle_flush(struct nullb *nullb) +static blk_status_t null_handle_flush(struct nullb *nullb) { int err; @@ -1203,107 +1226,80 @@ static int null_handle_flush(struct nullb *nullb) WARN_ON(!radix_tree_empty(&nullb->dev->cache)); spin_unlock_irq(&nullb->lock); - return err; + return errno_to_blk_status(err); } -static int null_transfer(struct nullb *nullb, struct page *page, - unsigned int len, unsigned int off, bool is_write, sector_t sector, +static blk_status_t null_transfer(struct nullb *nullb, struct page *page, + unsigned int len, unsigned int off, bool is_write, loff_t pos, bool is_fua) { struct nullb_device *dev = nullb->dev; + blk_status_t err = BLK_STS_OK; unsigned int valid_len = len; - int err = 0; + void *p; + p = kmap_local_page(page) + off; if (!is_write) { - if (dev->zoned) + if (dev->zoned) { valid_len = null_zone_valid_read_len(nullb, - sector, len); + pos >> SECTOR_SHIFT, len); + if (valid_len && valid_len != len) + valid_len -= pos & (SECTOR_SIZE - 1); + } if (valid_len) { - err = copy_from_nullb(nullb, page, off, - sector, valid_len); + copy_from_nullb(nullb, p, pos, valid_len); off += valid_len; len -= valid_len; } if (len) - nullb_fill_pattern(nullb, page, len, off); + memset(p + valid_len, 0xff, len); flush_dcache_page(page); } else { flush_dcache_page(page); - err = copy_to_nullb(nullb, page, off, sector, len, is_fua); + err = copy_to_nullb(nullb, p, pos, len, is_fua); } + kunmap_local(p); return err; } -static int null_handle_rq(struct nullb_cmd *cmd) +/* + * Transfer data for the given request. The transfer size is capped with the + * nr_sectors argument. + */ +static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd, + sector_t nr_sectors) { - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; - int err; + blk_status_t err = BLK_STS_OK; unsigned int len; - sector_t sector = blk_rq_pos(rq); + loff_t pos = blk_rq_pos(rq) << SECTOR_SHIFT; + unsigned int max_bytes = nr_sectors << SECTOR_SHIFT; + unsigned int transferred_bytes = 0; struct req_iterator iter; struct bio_vec bvec; spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; + if (transferred_bytes + len > max_bytes) + len = max_bytes - transferred_bytes; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(req_op(rq)), sector, + op_is_write(req_op(rq)), pos, rq->cmd_flags & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - - return 0; -} - -static int null_handle_bio(struct nullb_cmd *cmd) -{ - struct bio *bio = cmd->bio; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector = bio->bi_iter.bi_sector; - struct bio_vec bvec; - struct bvec_iter iter; - - spin_lock_irq(&nullb->lock); - bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector, - bio->bi_opf & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; + if (err) + break; + pos += len; + transferred_bytes += len; + if (transferred_bytes >= max_bytes) + break; } spin_unlock_irq(&nullb->lock); - return 0; -} - -static void null_stop_queue(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_stop_hw_queues(q); -} - -static void null_restart_queue_async(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_start_stopped_hw_queues(q, true); + return err; } static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) @@ -1311,73 +1307,85 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts = BLK_STS_OK; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); if (!hrtimer_active(&nullb->bw_timer)) hrtimer_restart(&nullb->bw_timer); if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) { - null_stop_queue(nullb); + blk_mq_stop_hw_queues(nullb->q); /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); /* requeue request */ sts = BLK_STS_DEV_RESOURCE; } return sts; } -static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, - sector_t sector, - sector_t nr_sectors) +/* + * Check if the command should fail for the badblocks. If so, return + * BLK_STS_IOERR and return number of partial I/O sectors to be written or read, + * which may be less than the requested number of sectors. + * + * @cmd: The command to handle. + * @sector: The start sector for I/O. + * @nr_sectors: Specifies number of sectors to write or read, and returns the + * number of sectors to be written or read. + */ +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector, + unsigned int *nr_sectors) { struct badblocks *bb = &cmd->nq->dev->badblocks; - sector_t first_bad; - int bad_sectors; + struct nullb_device *dev = cmd->nq->dev; + unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT; + sector_t first_bad, bad_sectors; + unsigned int partial_io_sectors = 0; - if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors)) - return BLK_STS_IOERR; + if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors)) + return BLK_STS_OK; - return BLK_STS_OK; + if (cmd->nq->dev->badblocks_once) + badblocks_clear(bb, first_bad, bad_sectors); + + if (cmd->nq->dev->badblocks_partial_io) { + if (!IS_ALIGNED(first_bad, block_sectors)) + first_bad = ALIGN_DOWN(first_bad, block_sectors); + if (sector < first_bad) + partial_io_sectors = first_bad - sector; + } + *nr_sectors = partial_io_sectors; + + return BLK_STS_IOERR; } -static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_op op, - sector_t sector, - sector_t nr_sectors) +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; - int err; if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); - if (dev->queue_mode == NULL_Q_BIO) - err = null_handle_bio(cmd); - else - err = null_handle_rq(cmd); - - return errno_to_blk_status(err); + return null_handle_data_transfer(cmd, nr_sectors); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb_device *dev = cmd->nq->dev; struct bio *bio; - if (dev->memory_backed) - return; - - if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) { - zero_fill_bio(cmd->bio); - } else if (req_op(cmd->rq) == REQ_OP_READ) { - __rq_for_each_bio(bio, cmd->rq) + if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) { + __rq_for_each_bio(bio, rq) zero_fill_bio(bio); } } static inline void nullb_complete_cmd(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + /* * Since root privileges are required to configure the null_blk * driver, it is fine that this driver does not initialize the @@ -1391,21 +1399,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) /* Complete IO by inline, softirq or timer */ switch (cmd->nq->dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (cmd->nq->dev->queue_mode) { - case NULL_Q_MQ: - if (likely(!blk_should_fake_timeout(cmd->rq->q))) - blk_mq_complete_request(cmd->rq); - break; - case NULL_Q_BIO: - /* - * XXX: no proper submitting cpu information available. - */ - end_cmd(cmd); - break; - } + blk_mq_complete_request(rq); break; case NULL_IRQ_NONE: - end_cmd(cmd); + blk_mq_end_request(rq, cmd->error); break; case NULL_IRQ_TIMER: null_cmd_end_timer(cmd); @@ -1417,35 +1414,30 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors) { struct nullb_device *dev = cmd->nq->dev; + blk_status_t badblocks_ret = BLK_STS_OK; blk_status_t ret; - if (dev->badblocks.shift != -1) { - ret = null_handle_badblocks(cmd, sector, nr_sectors); + if (dev->badblocks.shift != -1) + badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors); + + if (dev->memory_backed && nr_sectors) { + ret = null_handle_memory_backed(cmd, op, sector, nr_sectors); if (ret != BLK_STS_OK) return ret; } - if (dev->memory_backed) - return null_handle_memory_backed(cmd, op, sector, nr_sectors); - - return BLK_STS_OK; + return badblocks_ret; } -static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, - sector_t nr_sectors, enum req_op op) +static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, + sector_t nr_sectors, enum req_op op) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts; - if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { - sts = null_handle_throttled(cmd); - if (sts != BLK_STS_OK) - return sts; - } - if (op == REQ_OP_FLUSH) { - cmd->error = errno_to_blk_status(null_handle_flush(nullb)); + cmd->error = null_handle_flush(nullb); goto out; } @@ -1460,7 +1452,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, out: nullb_complete_cmd(cmd); - return BLK_STS_OK; } static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) @@ -1473,7 +1464,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); hrtimer_forward_now(&nullb->bw_timer, timer_interval); @@ -1484,50 +1475,53 @@ static void nullb_setup_bwtimer(struct nullb *nullb) { ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); - hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - nullb->bw_timer.function = nullb_bwtimer_fn; + hrtimer_setup(&nullb->bw_timer, nullb_bwtimer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps)); hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); } -static struct nullb_queue *nullb_to_queue(struct nullb *nullb) -{ - int index = 0; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (nullb->nr_queues != 1) - index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); +static bool should_timeout_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; - return &nullb->queues[index]; + return should_fail(&dev->timeout_config.attr, 1); } -static void null_submit_bio(struct bio *bio) +static bool should_requeue_request(struct request *rq) { - sector_t sector = bio->bi_iter.bi_sector; - sector_t nr_sectors = bio_sectors(bio); - struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; - struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; - null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); + return should_fail(&dev->requeue_config.attr, 1); +} + +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return should_fail(&dev->init_hctx_fault_config.attr, 1); } +#else + static bool should_timeout_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_timeout_str[0]) - return should_fail(&null_timeout_attr, 1); -#endif return false; } static bool should_requeue_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_requeue_str[0]) - return should_fail(&null_requeue_attr, 1); -#endif return false; } +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return false; +} + +#endif + static void null_map_queues(struct blk_mq_tag_set *set) { struct nullb *nullb = set->driver_data; @@ -1586,9 +1580,12 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) struct nullb_queue *nq = hctx->driver_data; LIST_HEAD(list); int nr = 0; + struct request *rq; spin_lock(&nq->poll_lock); list_splice_init(&nq->poll_list, &list); + list_for_each_entry(rq, &list, queuelist) + blk_mq_set_request_complete(rq); spin_unlock(&nq->poll_lock); while (!list_empty(&list)) { @@ -1600,9 +1597,9 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) cmd = blk_mq_rq_to_pdu(req); cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), blk_rq_sectors(req)); - if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, - blk_mq_end_request_batch)) - end_cmd(cmd); + if (!blk_mq_add_to_batch(req, iob, cmd->error != BLK_STS_OK, + blk_mq_end_request_batch)) + blk_mq_end_request(req, cmd->error); nr++; } @@ -1614,16 +1611,21 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq) struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); - pr_info("rq %p timed out\n", rq); - if (hctx->type == HCTX_TYPE_POLL) { struct nullb_queue *nq = hctx->driver_data; spin_lock(&nq->poll_lock); + /* The request may have completed meanwhile. */ + if (blk_mq_request_completed(rq)) { + spin_unlock(&nq->poll_lock); + return BLK_EH_DONE; + } list_del_init(&rq->queuelist); spin_unlock(&nq->poll_lock); } + pr_info("rq %p timed out\n", rq); + /* * If the device is marked as blocking (i.e. memory backed or zoned * device), the submission path may be blocked waiting for resources @@ -1638,28 +1640,27 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq) } static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) + const struct blk_mq_queue_data *bd) { - struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct request *rq = bd->rq; + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); struct nullb_queue *nq = hctx->driver_data; - sector_t nr_sectors = blk_rq_sectors(bd->rq); - sector_t sector = blk_rq_pos(bd->rq); + sector_t nr_sectors = blk_rq_sectors(rq); + sector_t sector = blk_rq_pos(rq); const bool is_poll = hctx->type == HCTX_TYPE_POLL; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; + hrtimer_setup(&cmd->timer, null_cmd_timer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } - cmd->rq = bd->rq; cmd->error = BLK_STS_OK; cmd->nq = nq; - cmd->fake_timeout = should_timeout_request(bd->rq); - - blk_mq_start_request(bd->rq); + cmd->fake_timeout = should_timeout_request(rq) || + blk_should_fake_timeout(rq->q); - if (should_requeue_request(bd->rq)) { + if (should_requeue_request(rq)) { /* * Alternate between hitting the core BUSY path, and the * driver driven requeue path @@ -1667,52 +1668,52 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, nq->requeue_selection++; if (nq->requeue_selection & 1) return BLK_STS_RESOURCE; - else { - blk_mq_requeue_request(bd->rq, true); - return BLK_STS_OK; - } + blk_mq_requeue_request(rq, true); + return BLK_STS_OK; + } + + if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) { + blk_status_t sts = null_handle_throttled(cmd); + + if (sts != BLK_STS_OK) + return sts; } + blk_mq_start_request(rq); + if (is_poll) { spin_lock(&nq->poll_lock); - list_add_tail(&bd->rq->queuelist, &nq->poll_list); + list_add_tail(&rq->queuelist, &nq->poll_list); spin_unlock(&nq->poll_lock); return BLK_STS_OK; } if (cmd->fake_timeout) return BLK_STS_OK; - return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq)); -} - -static void cleanup_queue(struct nullb_queue *nq) -{ - bitmap_free(nq->tag_map); - kfree(nq->cmds); + null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); + return BLK_STS_OK; } -static void cleanup_queues(struct nullb *nullb) +static void null_queue_rqs(struct rq_list *rqlist) { - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); + struct rq_list requeue_list = {}; + struct blk_mq_queue_data bd = { }; + blk_status_t ret; - kfree(nullb->queues); -} + do { + struct request *rq = rq_list_pop(rqlist); -static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nullb_queue *nq = hctx->driver_data; - struct nullb *nullb = nq->dev->nullb; + bd.rq = rq; + ret = null_queue_rq(rq->mq_hctx, &bd); + if (ret != BLK_STS_OK) + rq_list_add_tail(&requeue_list, rq); + } while (!rq_list_empty(rqlist)); - nullb->nr_queues--; + *rqlist = requeue_list; } static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; nq->dev = nullb->dev; INIT_LIST_HEAD(&nq->poll_list); spin_lock_init(&nq->poll_lock); @@ -1724,27 +1725,24 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, struct nullb *nullb = hctx->queue->queuedata; struct nullb_queue *nq; -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1)) + if (should_init_hctx_fail(nullb->dev)) return -EFAULT; -#endif nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; null_init_queue(nullb, nq); - nullb->nr_queues++; return 0; } static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, + .queue_rqs = null_queue_rqs, .complete = null_complete_rq, .timeout = null_timeout_rq, .poll = null_poll, .map_queues = null_map_queues, .init_hctx = null_init_hctx, - .exit_hctx = null_exit_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -1756,7 +1754,7 @@ static void null_del_dev(struct nullb *nullb) dev = nullb->dev; - ida_simple_remove(&nullb_indexes, nullb->index); + ida_free(&nullb_indexes, nullb->index); list_del_init(&nullb->list); @@ -1765,21 +1763,20 @@ static void null_del_dev(struct nullb *nullb) if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { hrtimer_cancel(&nullb->bw_timer); atomic_long_set(&nullb->cur_bytes, LONG_MAX); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); } put_disk(nullb->disk); - if (dev->queue_mode == NULL_Q_MQ && - nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - cleanup_queues(nullb); + kfree(nullb->queues); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); kfree(nullb); dev->nullb = NULL; } -static void null_config_discard(struct nullb *nullb) +static void null_config_discard(struct nullb *nullb, struct queue_limits *lim) { if (nullb->dev->discard == false) return; @@ -1796,44 +1793,14 @@ static void null_config_discard(struct nullb *nullb) return; } - nullb->q->limits.discard_granularity = nullb->dev->blocksize; - blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + lim->max_hw_discard_sectors = UINT_MAX >> 9; } -static const struct block_device_operations null_bio_ops = { +static const struct block_device_operations null_ops = { .owner = THIS_MODULE, - .submit_bio = null_submit_bio, .report_zones = null_report_zones, }; -static const struct block_device_operations null_rq_ops = { - .owner = THIS_MODULE, - .report_zones = null_report_zones, -}; - -static int setup_commands(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - int i; - - nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); - if (!nq->cmds) - return -ENOMEM; - - nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL); - if (!nq->tag_map) { - kfree(nq->cmds); - return -ENOMEM; - } - - for (i = 0; i < nq->queue_depth; i++) { - cmd = &nq->cmds[i]; - cmd->tag = -1U; - } - - return 0; -} - static int setup_queues(struct nullb *nullb) { int nqueues = nr_cpu_ids; @@ -1846,109 +1813,78 @@ static int setup_queues(struct nullb *nullb) if (!nullb->queues) return -ENOMEM; - nullb->queue_depth = nullb->dev->hw_queue_depth; return 0; } -static int init_driver_queues(struct nullb *nullb) +static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { - struct nullb_queue *nq; - int i, ret = 0; - - for (i = 0; i < nullb->dev->submit_queues; i++) { - nq = &nullb->queues[i]; - - null_init_queue(nullb, nq); - - ret = setup_commands(nq); - if (ret) - return ret; - nullb->nr_queues++; + set->ops = &null_mq_ops; + set->cmd_size = sizeof(struct nullb_cmd); + set->timeout = 5 * HZ; + set->nr_maps = 1; + if (poll_queues) { + set->nr_hw_queues += poll_queues; + set->nr_maps += 2; } - return 0; + return blk_mq_alloc_tag_set(set); } -static int null_gendisk_register(struct nullb *nullb) +static int null_init_global_tag_set(void) { - sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; - struct gendisk *disk = nullb->disk; - - set_capacity(disk, size); - - disk->major = null_major; - disk->first_minor = nullb->index; - disk->minors = 1; - if (queue_is_mq(nullb->q)) - disk->fops = &null_rq_ops; - else - disk->fops = &null_bio_ops; - disk->private_data = nullb; - strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + int error; - if (nullb->dev->zoned) { - int ret = null_register_zoned_dev(nullb); + if (tag_set.ops) + return 0; - if (ret) - return ret; - } + tag_set.nr_hw_queues = g_submit_queues; + tag_set.queue_depth = g_hw_queue_depth; + tag_set.numa_node = g_home_node; + if (g_no_sched) + tag_set.flags |= BLK_MQ_F_NO_SCHED_BY_DEFAULT; + if (g_shared_tag_bitmap) + tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (g_blocking) + tag_set.flags |= BLK_MQ_F_BLOCKING; - return add_disk(disk); + error = null_init_tag_set(&tag_set, g_poll_queues); + if (error) + tag_set.ops = NULL; + return error; } -static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) +static int null_setup_tagset(struct nullb *nullb) { - unsigned int flags = BLK_MQ_F_SHOULD_MERGE; - int hw_queues, numa_node; - unsigned int queue_depth; - int poll_queues; - - if (nullb) { - hw_queues = nullb->dev->submit_queues; - poll_queues = nullb->dev->poll_queues; - queue_depth = nullb->dev->hw_queue_depth; - numa_node = nullb->dev->home_node; - if (nullb->dev->no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (nullb->dev->shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (nullb->dev->blocking) - flags |= BLK_MQ_F_BLOCKING; - } else { - hw_queues = g_submit_queues; - poll_queues = g_poll_queues; - queue_depth = g_hw_queue_depth; - numa_node = g_home_node; - if (g_no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (g_shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (g_blocking) - flags |= BLK_MQ_F_BLOCKING; + if (nullb->dev->shared_tags) { + nullb->tag_set = &tag_set; + return null_init_global_tag_set(); } - set->ops = &null_mq_ops; - set->cmd_size = sizeof(struct nullb_cmd); - set->flags = flags; - set->driver_data = nullb; - set->nr_hw_queues = hw_queues; - set->queue_depth = queue_depth; - set->numa_node = numa_node; - if (poll_queues) { - set->nr_hw_queues += poll_queues; - set->nr_maps = 3; - } else { - set->nr_maps = 1; - } - - return blk_mq_alloc_tag_set(set); + nullb->tag_set = &nullb->__tag_set; + nullb->tag_set->driver_data = nullb; + nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues; + nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth; + nullb->tag_set->numa_node = nullb->dev->home_node; + if (nullb->dev->no_sched) + nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED_BY_DEFAULT; + if (nullb->dev->shared_tag_bitmap) + nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (nullb->dev->blocking) + nullb->tag_set->flags |= BLK_MQ_F_BLOCKING; + return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues); } static int null_validate_conf(struct nullb_device *dev) { - dev->blocksize = round_down(dev->blocksize, 512); - dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); + if (dev->queue_mode == NULL_Q_RQ) { + pr_err("legacy IO path is no longer available\n"); + return -EINVAL; + } + if (dev->queue_mode == NULL_Q_BIO) { + pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n"); + dev->queue_mode = NULL_Q_MQ; + } - if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->use_per_node_hctx) { if (dev->submit_queues != nr_online_nodes) dev->submit_queues = nr_online_nodes; } else if (dev->submit_queues > nr_cpu_ids) @@ -1960,8 +1896,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; dev->prev_poll_queues = dev->poll_queues; - - dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); /* Do memory allocation, so set blocking */ @@ -1972,9 +1906,6 @@ static int null_validate_conf(struct nullb_device *dev) dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, dev->cache_size); dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); - /* can not stop a queue */ - if (dev->queue_mode == NULL_Q_BIO) - dev->mbps = 0; if (dev->zoned && (!dev->zone_size || !is_power_of_2(dev->zone_size))) { @@ -2014,6 +1945,13 @@ static bool null_setup_fault(void) static int null_add_dev(struct nullb_device *dev) { + struct queue_limits lim = { + .logical_block_size = dev->blocksize, + .physical_block_size = dev->blocksize, + .max_hw_sectors = dev->max_sectors, + .dma_alignment = 1, + }; + struct nullb *nullb; int rv; @@ -2035,98 +1973,78 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->queue_mode == NULL_Q_MQ) { - if (shared_tags) { - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); - } + rv = null_setup_tagset(nullb); + if (rv) + goto out_cleanup_queues; + if (dev->virt_boundary) + lim.virt_boundary_mask = PAGE_SIZE - 1; + null_config_discard(nullb, &lim); + if (dev->zoned) { + rv = null_init_zoned_dev(dev, &lim); if (rv) - goto out_cleanup_queues; - - if (!null_setup_fault()) goto out_cleanup_tags; + } - nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); - goto out_cleanup_tags; - } - nullb->q = nullb->disk->queue; - } else if (dev->queue_mode == NULL_Q_BIO) { - rv = -ENOMEM; - nullb->disk = blk_alloc_disk(nullb->dev->home_node); - if (!nullb->disk) - goto out_cleanup_queues; + if (dev->cache_size > 0) { + set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); + lim.features |= BLK_FEAT_WRITE_CACHE; + if (dev->fua) + lim.features |= BLK_FEAT_FUA; + } - nullb->q = nullb->disk->queue; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_disk; + if (dev->rotational) + lim.features |= BLK_FEAT_ROTATIONAL; + + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); + goto out_cleanup_zone; } + nullb->q = nullb->disk->queue; if (dev->mbps) { set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); nullb_setup_bwtimer(nullb); } - if (dev->cache_size > 0) { - set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); - blk_queue_write_cache(nullb->q, true, true); - } - - if (dev->zoned) { - rv = null_init_zoned_dev(dev, nullb->q); - if (rv) - goto out_cleanup_disk; - } - nullb->q->queuedata = nullb; - blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); - mutex_lock(&lock); - rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); - if (rv < 0) { - mutex_unlock(&lock); - goto out_cleanup_zone; - } + rv = ida_alloc(&nullb_indexes, GFP_KERNEL); + if (rv < 0) + goto out_cleanup_disk; + nullb->index = rv; dev->index = rv; - mutex_unlock(&lock); - - blk_queue_logical_block_size(nullb->q, dev->blocksize); - blk_queue_physical_block_size(nullb->q, dev->blocksize); - if (!dev->max_sectors) - dev->max_sectors = queue_max_hw_sectors(nullb->q); - dev->max_sectors = min_t(unsigned int, dev->max_sectors, - BLK_DEF_MAX_SECTORS); - blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); - - if (dev->virt_boundary) - blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1); - - null_config_discard(nullb); - if (config_item_name(&dev->item)) { + if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), - "%s", config_item_name(&dev->item)); + "%s", config_item_name(&dev->group.cg_item)); } else { sprintf(nullb->disk_name, "nullb%d", nullb->index); } - rv = null_gendisk_register(nullb); + set_capacity(nullb->disk, + ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT); + nullb->disk->major = null_major; + nullb->disk->first_minor = nullb->index; + nullb->disk->minors = 1; + nullb->disk->fops = &null_ops; + nullb->disk->private_data = nullb; + strscpy(nullb->disk->disk_name, nullb->disk_name); + + if (nullb->dev->zoned) { + rv = null_register_zoned_dev(nullb); + if (rv) + goto out_ida_free; + } + + rv = add_disk(nullb->disk); if (rv) goto out_ida_free; - mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); - mutex_unlock(&lock); pr_info("disk %s created\n", nullb->disk_name); @@ -2134,15 +2052,15 @@ static int null_add_dev(struct nullb_device *dev) out_ida_free: ida_free(&nullb_indexes, nullb->index); -out_cleanup_zone: - null_free_zoned_dev(dev); out_cleanup_disk: put_disk(nullb->disk); +out_cleanup_zone: + null_free_zoned_dev(dev); out_cleanup_tags: - if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); out_cleanup_queues: - cleanup_queues(nullb); + kfree(nullb->queues); out_free_nullb: kfree(nullb); dev->nullb = NULL; @@ -2175,7 +2093,9 @@ static int null_create_dev(void) if (!dev) return -ENOMEM; + mutex_lock(&lock); ret = null_add_dev(dev); + mutex_unlock(&lock); if (ret) { null_free_dev(dev); return ret; @@ -2189,6 +2109,7 @@ static void null_destroy_dev(struct nullb *nullb) struct nullb_device *dev = nullb->dev; null_del_dev(nullb); + null_free_device_storage(dev, false); null_free_dev(dev); } @@ -2204,23 +2125,20 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } - if (g_max_sectors > BLK_DEF_MAX_SECTORS) { - pr_warn("invalid max sectors\n"); - pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS); - g_max_sectors = BLK_DEF_MAX_SECTORS; - } - if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { pr_err("invalid home_node value\n"); g_home_node = NUMA_NO_NODE; } + if (!null_setup_fault()) + return -EINVAL; + if (g_queue_mode == NULL_Q_RQ) { pr_err("legacy IO path is no longer available\n"); return -EINVAL; } - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("submit_queues param is set to %u.\n", nr_online_nodes); @@ -2232,18 +2150,12 @@ static int __init null_init(void) g_submit_queues = 1; } - if (g_queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(NULL, &tag_set); - if (ret) - return ret; - } - config_group_init(&nullb_subsys.su_group); mutex_init(&nullb_subsys.su_mutex); ret = configfs_register_subsystem(&nullb_subsys); if (ret) - goto err_tagset; + return ret; mutex_init(&lock); @@ -2270,9 +2182,6 @@ err_dev: unregister_blkdev(null_major, "nullb"); err_conf: configfs_unregister_subsystem(&nullb_subsys); -err_tagset: - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); return ret; } @@ -2291,12 +2200,15 @@ static void __exit null_exit(void) } mutex_unlock(&lock); - if (g_queue_mode == NULL_Q_MQ && shared_tags) + if (tag_set.ops) blk_mq_free_tag_set(&tag_set); + + mutex_destroy(&lock); } module_init(null_init); module_exit(null_exit); MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>"); +MODULE_DESCRIPTION("multi queue aware block test driver"); MODULE_LICENSE("GPL"); |
