From 558ab300c8f2e8843cbd2f30b358815b01b790e1 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 3 Aug 2017 00:26:39 +0800 Subject: null_blk: simplify logic for use_per_node_hctx make sure submit_queues equal nr_online_nodes. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 85c24cace973..72e2bc5017d4 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -733,9 +733,6 @@ static int null_add_dev(void) spin_lock_init(&nullb->lock); - if (queue_mode == NULL_Q_MQ && use_per_node_hctx) - submit_queues = nr_online_nodes; - rv = setup_queues(nullb); if (rv) goto out_free_nullb; @@ -845,8 +842,8 @@ static int __init null_init(void) } if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { - if (submit_queues < nr_online_nodes) { - pr_warn("null_blk: submit_queues param is set to %u.", + if (submit_queues != nr_online_nodes) { + pr_warn("null_blk: submit_queues param is set to %u.\n", nr_online_nodes); submit_queues = nr_online_nodes; } -- cgit From d424681cc9ebaaeac2b6af842d2f497ba7ccf349 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 3 Aug 2017 00:27:37 +0800 Subject: null_blk: make sure submit_queues > 0 set submit_queues to 1 by default, and make sure it's value > 0. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 72e2bc5017d4..f1d0ca020999 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -65,7 +65,7 @@ enum { NULL_Q_MQ = 2, }; -static int submit_queues; +static int submit_queues = 1; module_param(submit_queues, int, S_IRUGO); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); @@ -849,7 +849,7 @@ static int __init null_init(void) } } else if (submit_queues > nr_cpu_ids) submit_queues = nr_cpu_ids; - else if (!submit_queues) + else if (submit_queues <= 0) submit_queues = 1; if (queue_mode == NULL_Q_MQ && shared_tags) { -- cgit From 2984c8684f962c2936b7175ec5df44e9d607cea9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:52 -0700 Subject: nullb: factor disk parameters When we switch to configfs interface, each disk could have different configuration. To prepare for the change, we move most disk setting to a separate data structure. The existing module parameter interface is kept. The 'nr_devices' and 'shared_tags' don't make sense for per-disk setting, so they are remained as global settings. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 234 +++++++++++++++++++++++++++++++---------------- 1 file changed, 153 insertions(+), 81 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f1d0ca020999..73938cde11f3 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -25,11 +25,29 @@ struct nullb_queue { unsigned long *tag_map; wait_queue_head_t wait; unsigned int queue_depth; + struct nullb_device *dev; struct nullb_cmd *cmds; }; +struct nullb_device { + struct nullb *nullb; + + unsigned long size; /* device size in MB */ + unsigned long completion_nsec; /* time in ns to complete a request */ + unsigned int submit_queues; /* number of submission queues */ + unsigned int home_node; /* home node for the device */ + unsigned int queue_mode; /* block interface */ + unsigned int blocksize; /* block size */ + unsigned int irqmode; /* IRQ completion handler */ + unsigned int hw_queue_depth; /* queue depth */ + bool use_lightnvm; /* register as a LightNVM device */ + bool blocking; /* blocking blk-mq device */ + bool use_per_node_hctx; /* use per-node allocation for hardware context */ +}; + struct nullb { + struct nullb_device *dev; struct list_head list; unsigned int index; struct request_queue *q; @@ -65,15 +83,15 @@ enum { NULL_Q_MQ = 2, }; -static int submit_queues = 1; -module_param(submit_queues, int, S_IRUGO); +static int g_submit_queues = 1; +module_param_named(submit_queues, g_submit_queues, int, S_IRUGO); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); -static int home_node = NUMA_NO_NODE; -module_param(home_node, int, S_IRUGO); +static int g_home_node = NUMA_NO_NODE; +module_param_named(home_node, g_home_node, int, S_IRUGO); MODULE_PARM_DESC(home_node, "Home node for the device"); -static int queue_mode = NULL_Q_MQ; +static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) { @@ -92,7 +110,7 @@ static int null_param_store_val(const char *str, int *val, int min, int max) static int null_set_queue_mode(const char *str, const struct kernel_param *kp) { - return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); + return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ); } static const struct kernel_param_ops null_queue_mode_param_ops = { @@ -100,38 +118,38 @@ static const struct kernel_param_ops null_queue_mode_param_ops = { .get = param_get_int, }; -device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO); +device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO); MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); -static int gb = 250; -module_param(gb, int, S_IRUGO); +static int g_gb = 250; +module_param_named(gb, g_gb, int, S_IRUGO); MODULE_PARM_DESC(gb, "Size in GB"); -static int bs = 512; -module_param(bs, int, S_IRUGO); +static int g_bs = 512; +module_param_named(bs, g_bs, int, S_IRUGO); MODULE_PARM_DESC(bs, "Block size (in bytes)"); static int nr_devices = 1; module_param(nr_devices, int, S_IRUGO); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); -static bool use_lightnvm; -module_param(use_lightnvm, bool, S_IRUGO); +static bool g_use_lightnvm; +module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO); MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device"); -static bool blocking; -module_param(blocking, bool, S_IRUGO); +static bool g_blocking; +module_param_named(blocking, g_blocking, bool, S_IRUGO); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); static bool shared_tags; module_param(shared_tags, bool, S_IRUGO); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); -static int irqmode = NULL_IRQ_SOFTIRQ; +static int g_irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) { - return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, + return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE, NULL_IRQ_TIMER); } @@ -140,21 +158,47 @@ static const struct kernel_param_ops null_irqmode_param_ops = { .get = param_get_int, }; -device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); +device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO); MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); -static unsigned long completion_nsec = 10000; -module_param(completion_nsec, ulong, S_IRUGO); +static unsigned long g_completion_nsec = 10000; +module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO); MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); -static int hw_queue_depth = 64; -module_param(hw_queue_depth, int, S_IRUGO); +static int g_hw_queue_depth = 64; +module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO); MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); -static bool use_per_node_hctx = false; -module_param(use_per_node_hctx, bool, S_IRUGO); +static bool g_use_per_node_hctx; +module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO); MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); +static struct nullb_device *null_alloc_dev(void) +{ + struct nullb_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + dev->size = g_gb * 1024; + dev->completion_nsec = g_completion_nsec; + dev->submit_queues = g_submit_queues; + dev->home_node = g_home_node; + dev->queue_mode = g_queue_mode; + dev->blocksize = g_bs; + dev->irqmode = g_irqmode; + dev->hw_queue_depth = g_hw_queue_depth; + dev->use_lightnvm = g_use_lightnvm; + dev->blocking = g_blocking; + dev->use_per_node_hctx = g_use_per_node_hctx; + return dev; +} + +static void null_free_dev(struct nullb_device *dev) +{ + kfree(dev); +} + static void put_tag(struct nullb_queue *nq, unsigned int tag) { clear_bit_unlock(tag, nq->tag_map); @@ -193,7 +237,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) cmd = &nq->cmds[tag]; cmd->tag = tag; cmd->nq = nq; - if (irqmode == NULL_IRQ_TIMER) { + if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; @@ -229,6 +273,7 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) static void end_cmd(struct nullb_cmd *cmd) { struct request_queue *q = NULL; + int queue_mode = cmd->nq->dev->queue_mode; if (cmd->rq) q = cmd->rq->q; @@ -267,14 +312,16 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) static void null_cmd_end_timer(struct nullb_cmd *cmd) { - ktime_t kt = completion_nsec; + ktime_t kt = cmd->nq->dev->completion_nsec; hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL); } static void null_softirq_done_fn(struct request *rq) { - if (queue_mode == NULL_Q_MQ) + struct nullb *nullb = rq->q->queuedata; + + if (nullb->dev->queue_mode == NULL_Q_MQ) end_cmd(blk_mq_rq_to_pdu(rq)); else end_cmd(rq->special); @@ -283,9 +330,9 @@ static void null_softirq_done_fn(struct request *rq) static inline void null_handle_cmd(struct nullb_cmd *cmd) { /* Complete IO by inline, softirq or timer */ - switch (irqmode) { + switch (cmd->nq->dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (queue_mode) { + switch (cmd->nq->dev->queue_mode) { case NULL_Q_MQ: blk_mq_complete_request(cmd->rq); break; @@ -366,15 +413,16 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct nullb_queue *nq = hctx->driver_data; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - if (irqmode == NULL_IRQ_TIMER) { + if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } cmd->rq = bd->rq; - cmd->nq = hctx->driver_data; + cmd->nq = nq; blk_mq_start_request(bd->rq); @@ -438,7 +486,8 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) { - sector_t size = gb * 1024 * 1024 * 1024ULL; + struct nullb *nullb = dev->q->queuedata; + sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL; sector_t blksize; struct nvm_id_group *grp; @@ -460,7 +509,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) id->ppaf.ch_offset = 56; id->ppaf.ch_len = 8; - sector_div(size, bs); /* convert size to pages */ + sector_div(size, nullb->dev->blocksize); /* convert size to pages */ size >>= 8; /* concert size to pgs pr blk */ grp = &id->grp; grp->mtype = 0; @@ -474,8 +523,8 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) grp->num_blk = blksize; grp->num_pln = 1; - grp->fpg_sz = bs; - grp->csecs = bs; + grp->fpg_sz = nullb->dev->blocksize; + grp->csecs = nullb->dev->blocksize; grp->trdt = 25000; grp->trdm = 25000; grp->tprt = 500000; @@ -483,7 +532,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) grp->tbet = 1500000; grp->tbem = 1500000; grp->mpos = 0x010101; /* single plane rwe */ - grp->cpar = hw_queue_depth; + grp->cpar = nullb->dev->hw_queue_depth; return 0; } @@ -568,19 +617,23 @@ static void null_nvm_unregister(struct nullb *nullb) {} static void null_del_dev(struct nullb *nullb) { + struct nullb_device *dev = nullb->dev; + list_del_init(&nullb->list); - if (use_lightnvm) + if (dev->use_lightnvm) null_nvm_unregister(nullb); else del_gendisk(nullb->disk); blk_cleanup_queue(nullb->q); - if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (dev->queue_mode == NULL_Q_MQ && + nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - if (!use_lightnvm) + if (!dev->use_lightnvm) put_disk(nullb->disk); cleanup_queues(nullb); kfree(nullb); + dev->nullb = NULL; } static int null_open(struct block_device *bdev, fmode_t mode) @@ -605,6 +658,7 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) init_waitqueue_head(&nq->wait); nq->queue_depth = nullb->queue_depth; + nq->dev = nullb->dev; } static void null_init_queues(struct nullb *nullb) @@ -652,13 +706,13 @@ static int setup_commands(struct nullb_queue *nq) static int setup_queues(struct nullb *nullb) { - nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), - GFP_KERNEL); + nullb->queues = kzalloc(nullb->dev->submit_queues * + sizeof(struct nullb_queue), GFP_KERNEL); if (!nullb->queues) return -ENOMEM; nullb->nr_queues = 0; - nullb->queue_depth = hw_queue_depth; + nullb->queue_depth = nullb->dev->hw_queue_depth; return 0; } @@ -668,7 +722,7 @@ static int init_driver_queues(struct nullb *nullb) struct nullb_queue *nq; int i, ret = 0; - for (i = 0; i < submit_queues; i++) { + for (i = 0; i < nullb->dev->submit_queues; i++) { nq = &nullb->queues[i]; null_init_queue(nullb, nq); @@ -686,10 +740,10 @@ static int null_gendisk_register(struct nullb *nullb) struct gendisk *disk; sector_t size; - disk = nullb->disk = alloc_disk_node(1, home_node); + disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); if (!disk) return -ENOMEM; - size = gb * 1024 * 1024 * 1024ULL; + size = (sector_t)nullb->dev->size * 1024 * 1024ULL; set_capacity(disk, size >> 9); disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; @@ -704,32 +758,36 @@ static int null_gendisk_register(struct nullb *nullb) return 0; } -static int null_init_tag_set(struct blk_mq_tag_set *set) +static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) { set->ops = &null_mq_ops; - set->nr_hw_queues = submit_queues; - set->queue_depth = hw_queue_depth; - set->numa_node = home_node; + set->nr_hw_queues = nullb ? nullb->dev->submit_queues : + g_submit_queues; + set->queue_depth = nullb ? nullb->dev->hw_queue_depth : + g_hw_queue_depth; + set->numa_node = nullb ? nullb->dev->home_node : g_home_node; set->cmd_size = sizeof(struct nullb_cmd); set->flags = BLK_MQ_F_SHOULD_MERGE; set->driver_data = NULL; - if (blocking) + if (nullb->dev->blocking) set->flags |= BLK_MQ_F_BLOCKING; return blk_mq_alloc_tag_set(set); } -static int null_add_dev(void) +static int null_add_dev(struct nullb_device *dev) { struct nullb *nullb; int rv; - nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); + nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); if (!nullb) { rv = -ENOMEM; goto out; } + nullb->dev = dev; + dev->nullb = nullb; spin_lock_init(&nullb->lock); @@ -737,13 +795,13 @@ static int null_add_dev(void) if (rv) goto out_free_nullb; - if (queue_mode == NULL_Q_MQ) { + if (dev->queue_mode == NULL_Q_MQ) { if (shared_tags) { nullb->tag_set = &tag_set; rv = 0; } else { nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb->tag_set); + rv = null_init_tag_set(nullb, nullb->tag_set); } if (rv) @@ -755,8 +813,8 @@ static int null_add_dev(void) goto out_cleanup_tags; } null_init_queues(nullb); - } else if (queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); + } else if (dev->queue_mode == NULL_Q_BIO) { + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; @@ -766,7 +824,8 @@ static int null_add_dev(void) if (rv) goto out_cleanup_blk_queue; } else { - nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); + nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, + dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; @@ -786,12 +845,12 @@ static int null_add_dev(void) nullb->index = nullb_indexes++; mutex_unlock(&lock); - blk_queue_logical_block_size(nullb->q, bs); - blk_queue_physical_block_size(nullb->q, bs); + blk_queue_logical_block_size(nullb->q, dev->blocksize); + blk_queue_physical_block_size(nullb->q, dev->blocksize); sprintf(nullb->disk_name, "nullb%d", nullb->index); - if (use_lightnvm) + if (dev->use_lightnvm) rv = null_nvm_register(nullb); else rv = null_gendisk_register(nullb); @@ -807,13 +866,14 @@ static int null_add_dev(void) out_cleanup_blk_queue: blk_cleanup_queue(nullb->q); out_cleanup_tags: - if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); out_cleanup_queues: cleanup_queues(nullb); out_free_nullb: kfree(nullb); out: + null_free_dev(dev); return rv; } @@ -822,38 +882,39 @@ static int __init null_init(void) int ret = 0; unsigned int i; struct nullb *nullb; + struct nullb_device *dev; - if (bs > PAGE_SIZE) { + if (g_bs > PAGE_SIZE) { pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); - bs = PAGE_SIZE; + g_bs = PAGE_SIZE; } - if (use_lightnvm && bs != 4096) { + if (g_use_lightnvm && g_bs != 4096) { pr_warn("null_blk: LightNVM only supports 4k block size\n"); pr_warn("null_blk: defaults block size to 4k\n"); - bs = 4096; + g_bs = 4096; } - if (use_lightnvm && queue_mode != NULL_Q_MQ) { + if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) { pr_warn("null_blk: LightNVM only supported for blk-mq\n"); pr_warn("null_blk: defaults queue mode to blk-mq\n"); - queue_mode = NULL_Q_MQ; + g_queue_mode = NULL_Q_MQ; } - if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { - if (submit_queues != nr_online_nodes) { + if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_submit_queues != nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.\n", nr_online_nodes); - submit_queues = nr_online_nodes; + g_submit_queues = nr_online_nodes; } - } else if (submit_queues > nr_cpu_ids) - submit_queues = nr_cpu_ids; - else if (submit_queues <= 0) - submit_queues = 1; + } else if (g_submit_queues > nr_cpu_ids) + g_submit_queues = nr_cpu_ids; + else if (g_submit_queues <= 0) + g_submit_queues = 1; - if (queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(&tag_set); + if (g_queue_mode == NULL_Q_MQ && shared_tags) { + ret = null_init_tag_set(NULL, &tag_set); if (ret) return ret; } @@ -866,7 +927,7 @@ static int __init null_init(void) goto err_tagset; } - if (use_lightnvm) { + if (g_use_lightnvm) { ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64), 0, 0, NULL); if (!ppa_cache) { @@ -877,9 +938,14 @@ static int __init null_init(void) } for (i = 0; i < nr_devices; i++) { - ret = null_add_dev(); - if (ret) + dev = null_alloc_dev(); + if (!dev) + goto err_dev; + ret = null_add_dev(dev); + if (ret) { + null_free_dev(dev); goto err_dev; + } } pr_info("null: module loaded\n"); @@ -888,13 +954,15 @@ static int __init null_init(void) err_dev: while (!list_empty(&nullb_list)) { nullb = list_entry(nullb_list.next, struct nullb, list); + dev = nullb->dev; null_del_dev(nullb); + null_free_dev(dev); } kmem_cache_destroy(ppa_cache); err_ppa: unregister_blkdev(null_major, "nullb"); err_tagset: - if (queue_mode == NULL_Q_MQ && shared_tags) + if (g_queue_mode == NULL_Q_MQ && shared_tags) blk_mq_free_tag_set(&tag_set); return ret; } @@ -907,12 +975,16 @@ static void __exit null_exit(void) mutex_lock(&lock); while (!list_empty(&nullb_list)) { + struct nullb_device *dev; + nullb = list_entry(nullb_list.next, struct nullb, list); + dev = nullb->dev; null_del_dev(nullb); + null_free_dev(dev); } mutex_unlock(&lock); - if (queue_mode == NULL_Q_MQ && shared_tags) + if (g_queue_mode == NULL_Q_MQ && shared_tags) blk_mq_free_tag_set(&tag_set); kmem_cache_destroy(ppa_cache); -- cgit From 3bf2bd20734e3e6ffda53719a9c10fb3ee9c5ffa Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:53 -0700 Subject: nullb: add configfs interface Add configfs interface for nullb. configfs interface is more flexible and easy to configure in a per-disk basis. Configuration is something like this: mount -t configfs none /mnt Checking which features the driver supports: cat /mnt/nullb/features The 'features' attribute is for future extension. We probably will add new features into the driver, userspace can check this attribute to find the supported features. Create/remove a device: mkdir/rmdir /mnt/nullb/a Then configure the device by setting attributes under /mnt/nullb/a, most of nullb supported module parameters are converted to attributes: size; /* device size in MB */ completion_nsec; /* time in ns to complete a request */ submit_queues; /* number of submission queues */ home_node; /* home node for the device */ queue_mode; /* block interface */ blocksize; /* block size */ irqmode; /* IRQ completion handler */ hw_queue_depth; /* queue depth */ use_lightnvm; /* register as a LightNVM device */ blocking; /* blocking blk-mq device */ use_per_node_hctx; /* use per-node allocation for hardware context */ Note, creating a device doesn't create a disk immediately. Creating a disk is done in two phases: create a device and then power on the device. Next patch will introduce device power on. Based on original patch from Kyungchan Koh Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 209 insertions(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 73938cde11f3..c782492c0099 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1,3 +1,7 @@ +/* + * Add configfs and memory store: Kyungchan Koh and + * Shaohua Li + */ #include #include @@ -9,6 +13,7 @@ #include #include #include +#include struct nullb_cmd { struct list_head list; @@ -30,8 +35,21 @@ struct nullb_queue { struct nullb_cmd *cmds; }; +/* + * Status flags for nullb_device. + * + * CONFIGURED: Device has been configured and turned on. Cannot reconfigure. + * UP: Device is currently on and visible in userspace. + */ +enum nullb_device_flags { + NULLB_DEV_FL_CONFIGURED = 0, + NULLB_DEV_FL_UP = 1, +}; + struct nullb_device { struct nullb *nullb; + struct config_item item; + unsigned long flags; /* device flags */ unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ @@ -173,6 +191,185 @@ static bool g_use_per_node_hctx; module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO); MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); +static struct nullb_device *null_alloc_dev(void); +static void null_free_dev(struct nullb_device *dev); + +static inline struct nullb_device *to_nullb_device(struct config_item *item) +{ + return item ? container_of(item, struct nullb_device, item) : NULL; +} + +static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static inline ssize_t nullb_device_ulong_attr_show(unsigned long val, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%lu\n", val); +} + +static inline ssize_t nullb_device_bool_attr_show(bool val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static ssize_t nullb_device_uint_attr_store(unsigned int *val, + const char *page, size_t count) +{ + unsigned int tmp; + int result; + + result = kstrtouint(page, 0, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +static ssize_t nullb_device_ulong_attr_store(unsigned long *val, + const char *page, size_t count) +{ + int result; + unsigned long tmp; + + result = kstrtoul(page, 0, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +static ssize_t nullb_device_bool_attr_store(bool *val, const char *page, + size_t count) +{ + bool tmp; + int result; + + result = kstrtobool(page, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +/* The following macro should only be used with TYPE = {uint, ulong, bool}. */ +#define NULLB_DEVICE_ATTR(NAME, TYPE) \ +static ssize_t \ +nullb_device_##NAME##_show(struct config_item *item, char *page) \ +{ \ + return nullb_device_##TYPE##_attr_show( \ + to_nullb_device(item)->NAME, page); \ +} \ +static ssize_t \ +nullb_device_##NAME##_store(struct config_item *item, const char *page, \ + size_t count) \ +{ \ + if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \ + return -EBUSY; \ + return nullb_device_##TYPE##_attr_store( \ + &to_nullb_device(item)->NAME, page, count); \ +} \ +CONFIGFS_ATTR(nullb_device_, NAME); + +NULLB_DEVICE_ATTR(size, ulong); +NULLB_DEVICE_ATTR(completion_nsec, ulong); +NULLB_DEVICE_ATTR(submit_queues, uint); +NULLB_DEVICE_ATTR(home_node, uint); +NULLB_DEVICE_ATTR(queue_mode, uint); +NULLB_DEVICE_ATTR(blocksize, uint); +NULLB_DEVICE_ATTR(irqmode, uint); +NULLB_DEVICE_ATTR(hw_queue_depth, uint); +NULLB_DEVICE_ATTR(use_lightnvm, bool); +NULLB_DEVICE_ATTR(blocking, bool); +NULLB_DEVICE_ATTR(use_per_node_hctx, bool); + +static struct configfs_attribute *nullb_device_attrs[] = { + &nullb_device_attr_size, + &nullb_device_attr_completion_nsec, + &nullb_device_attr_submit_queues, + &nullb_device_attr_home_node, + &nullb_device_attr_queue_mode, + &nullb_device_attr_blocksize, + &nullb_device_attr_irqmode, + &nullb_device_attr_hw_queue_depth, + &nullb_device_attr_use_lightnvm, + &nullb_device_attr_blocking, + &nullb_device_attr_use_per_node_hctx, + NULL, +}; + +static void nullb_device_release(struct config_item *item) +{ + null_free_dev(to_nullb_device(item)); +} + +static struct configfs_item_operations nullb_device_ops = { + .release = nullb_device_release, +}; + +static struct config_item_type nullb_device_type = { + .ct_item_ops = &nullb_device_ops, + .ct_attrs = nullb_device_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct +config_item *nullb_group_make_item(struct config_group *group, const char *name) +{ + struct nullb_device *dev; + + dev = null_alloc_dev(); + if (!dev) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&dev->item, name, &nullb_device_type); + + return &dev->item; +} + +static void +nullb_group_drop_item(struct config_group *group, struct config_item *item) +{ + config_item_put(item); +} + +static ssize_t memb_group_features_show(struct config_item *item, char *page) +{ + return snprintf(page, PAGE_SIZE, "\n"); +} + +CONFIGFS_ATTR_RO(memb_group_, features); + +static struct configfs_attribute *nullb_group_attrs[] = { + &memb_group_attr_features, + NULL, +}; + +static struct configfs_group_operations nullb_group_ops = { + .make_item = nullb_group_make_item, + .drop_item = nullb_group_drop_item, +}; + +static struct config_item_type nullb_group_type = { + .ct_group_ops = &nullb_group_ops, + .ct_attrs = nullb_group_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem nullb_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "nullb", + .ci_type = &nullb_group_type, + }, + }, +}; + static struct nullb_device *null_alloc_dev(void) { struct nullb_device *dev; @@ -919,12 +1116,19 @@ static int __init null_init(void) return ret; } + config_group_init(&nullb_subsys.su_group); + mutex_init(&nullb_subsys.su_mutex); + + ret = configfs_register_subsystem(&nullb_subsys); + if (ret) + goto err_tagset; + mutex_init(&lock); null_major = register_blkdev(0, "nullb"); if (null_major < 0) { ret = null_major; - goto err_tagset; + goto err_conf; } if (g_use_lightnvm) { @@ -961,6 +1165,8 @@ err_dev: kmem_cache_destroy(ppa_cache); err_ppa: unregister_blkdev(null_major, "nullb"); +err_conf: + configfs_unregister_subsystem(&nullb_subsys); err_tagset: if (g_queue_mode == NULL_Q_MQ && shared_tags) blk_mq_free_tag_set(&tag_set); @@ -971,6 +1177,8 @@ static void __exit null_exit(void) { struct nullb *nullb; + configfs_unregister_subsystem(&nullb_subsys); + unregister_blkdev(null_major, "nullb"); mutex_lock(&lock); -- cgit From cedcafad8277b3a07e90bf2f68fff5c6b28a183e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:54 -0700 Subject: nullb: add interface to power on disk The device created in nullb configfs interface isn't power on by default. After user configures the device, user can do 'echo 1 > xxx/nullb/device_name/power' to power on the device, which will create a disk. the xxx/nullb/device_name/index is the disk index, so if the index is 2, the new created disk should be named as /dev/nullb2. Note, the 'index' is only valid after disk is power on. 'echo 0 > xxx/nullb/device_name/power' will remove the disk. Note, this doesn't remove the device. To remove the device, user should do 'rmdir xxx/nullb/device_name'. Removing the device will remove the disk too. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c782492c0099..cf14c46d3462 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -59,9 +59,11 @@ struct nullb_device { unsigned int blocksize; /* block size */ unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ + unsigned int index; /* index of the disk, only valid with a disk */ bool use_lightnvm; /* register as a LightNVM device */ bool blocking; /* blocking blk-mq device */ bool use_per_node_hctx; /* use per-node allocation for hardware context */ + bool power; /* power on/off the device */ }; struct nullb { @@ -193,6 +195,8 @@ MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware contex static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); +static void null_del_dev(struct nullb *nullb); +static int null_add_dev(struct nullb_device *dev); static inline struct nullb_device *to_nullb_device(struct config_item *item) { @@ -284,10 +288,50 @@ NULLB_DEVICE_ATTR(queue_mode, uint); NULLB_DEVICE_ATTR(blocksize, uint); NULLB_DEVICE_ATTR(irqmode, uint); NULLB_DEVICE_ATTR(hw_queue_depth, uint); +NULLB_DEVICE_ATTR(index, uint); NULLB_DEVICE_ATTR(use_lightnvm, bool); NULLB_DEVICE_ATTR(blocking, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool); +static ssize_t nullb_device_power_show(struct config_item *item, char *page) +{ + return nullb_device_bool_attr_show(to_nullb_device(item)->power, page); +} + +static ssize_t nullb_device_power_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + bool newp = false; + ssize_t ret; + + ret = nullb_device_bool_attr_store(&newp, page, count); + if (ret < 0) + return ret; + + if (!dev->power && newp) { + if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) + return count; + if (null_add_dev(dev)) { + clear_bit(NULLB_DEV_FL_UP, &dev->flags); + return -ENOMEM; + } + + set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); + dev->power = newp; + } else if (to_nullb_device(item)->power && !newp) { + mutex_lock(&lock); + dev->power = newp; + null_del_dev(dev->nullb); + mutex_unlock(&lock); + clear_bit(NULLB_DEV_FL_UP, &dev->flags); + } + + return count; +} + +CONFIGFS_ATTR(nullb_device_, power); + static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_size, &nullb_device_attr_completion_nsec, @@ -297,9 +341,11 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_blocksize, &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, + &nullb_device_attr_index, &nullb_device_attr_use_lightnvm, &nullb_device_attr_blocking, &nullb_device_attr_use_per_node_hctx, + &nullb_device_attr_power, NULL, }; @@ -335,6 +381,15 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name) static void nullb_group_drop_item(struct config_group *group, struct config_item *item) { + struct nullb_device *dev = to_nullb_device(item); + + if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) { + mutex_lock(&lock); + dev->power = false; + null_del_dev(dev->nullb); + mutex_unlock(&lock); + } + config_item_put(item); } @@ -973,11 +1028,35 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) return blk_mq_alloc_tag_set(set); } +static void null_validate_conf(struct nullb_device *dev) +{ + dev->blocksize = round_down(dev->blocksize, 512); + dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); + if (dev->use_lightnvm && dev->blocksize != 4096) + dev->blocksize = 4096; + + if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ) + dev->queue_mode = NULL_Q_MQ; + + if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->submit_queues != nr_online_nodes) + dev->submit_queues = nr_online_nodes; + } else if (dev->submit_queues > nr_cpu_ids) + dev->submit_queues = nr_cpu_ids; + else if (dev->submit_queues == 0) + dev->submit_queues = 1; + + dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); + dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); +} + static int null_add_dev(struct nullb_device *dev) { struct nullb *nullb; int rv; + null_validate_conf(dev); + nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); if (!nullb) { rv = -ENOMEM; @@ -1040,6 +1119,7 @@ static int null_add_dev(struct nullb_device *dev) mutex_lock(&lock); nullb->index = nullb_indexes++; + dev->index = nullb->index; mutex_unlock(&lock); blk_queue_logical_block_size(nullb->q, dev->blocksize); -- cgit From 94bc02e30fb8d04429ecf91820abbea0eb5c4ee1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:55 -0700 Subject: nullb: use ida to manage index We now dynamically create disks. Managing the disk index with ida to avoid bump up the index too much. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index cf14c46d3462..2f66627d8c4b 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -87,7 +87,7 @@ struct nullb { static LIST_HEAD(nullb_list); static struct mutex lock; static int null_major; -static int nullb_indexes; +static DEFINE_IDA(nullb_indexes); static struct kmem_cache *ppa_cache; static struct blk_mq_tag_set tag_set; @@ -871,6 +871,8 @@ static void null_del_dev(struct nullb *nullb) { struct nullb_device *dev = nullb->dev; + ida_simple_remove(&nullb_indexes, nullb->index); + list_del_init(&nullb->list); if (dev->use_lightnvm) @@ -1118,7 +1120,7 @@ static int null_add_dev(struct nullb_device *dev) queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); mutex_lock(&lock); - nullb->index = nullb_indexes++; + nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); dev->index = nullb->index; mutex_unlock(&lock); -- cgit From 5bcd0e0c79b513261172d1348f93d1bf00dffbdf Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:56 -0700 Subject: nullb: support memory backed store This adds memory backed store in nullb. User configure 'memory_backed' attribute for this. By default, nullb disk doesn't use memory backed store. Based on original patch from Kyungchan Koh Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 339 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 330 insertions(+), 9 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 2f66627d8c4b..45e0b565f448 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -15,6 +15,14 @@ #include #include +#define SECTOR_SHIFT 9 +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + +#define FREE_BATCH 16 + struct nullb_cmd { struct list_head list; struct llist_node ll_list; @@ -24,6 +32,7 @@ struct nullb_cmd { unsigned int tag; struct nullb_queue *nq; struct hrtimer timer; + blk_status_t error; }; struct nullb_queue { @@ -46,9 +55,23 @@ enum nullb_device_flags { NULLB_DEV_FL_UP = 1, }; +/* + * nullb_page is a page in memory for nullb devices. + * + * @page: The page holding the data. + * @bitmap: The bitmap represents which sector in the page has data. + * Each bit represents one block size. For example, sector 8 + * will use the 7th bit + */ +struct nullb_page { + struct page *page; + unsigned long bitmap; +}; + struct nullb_device { struct nullb *nullb; struct config_item item; + struct radix_tree_root data; /* data stored in the disk */ unsigned long flags; /* device flags */ unsigned long size; /* device size in MB */ @@ -64,6 +87,7 @@ struct nullb_device { bool blocking; /* blocking blk-mq device */ bool use_per_node_hctx; /* use per-node allocation for hardware context */ bool power; /* power on/off the device */ + bool memory_backed; /* if data is stored in memory */ }; struct nullb { @@ -197,6 +221,7 @@ static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); static int null_add_dev(struct nullb_device *dev); +static void null_free_device_storage(struct nullb_device *dev); static inline struct nullb_device *to_nullb_device(struct config_item *item) { @@ -292,6 +317,7 @@ NULLB_DEVICE_ATTR(index, uint); NULLB_DEVICE_ATTR(use_lightnvm, bool); NULLB_DEVICE_ATTR(blocking, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool); +NULLB_DEVICE_ATTR(memory_backed, bool); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -346,12 +372,16 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_blocking, &nullb_device_attr_use_per_node_hctx, &nullb_device_attr_power, + &nullb_device_attr_memory_backed, NULL, }; static void nullb_device_release(struct config_item *item) { - null_free_dev(to_nullb_device(item)); + struct nullb_device *dev = to_nullb_device(item); + + null_free_device_storage(dev); + null_free_dev(dev); } static struct configfs_item_operations nullb_device_ops = { @@ -395,7 +425,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "\n"); + return snprintf(page, PAGE_SIZE, "memory_backed\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -432,6 +462,7 @@ static struct nullb_device *null_alloc_dev(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); dev->size = g_gb * 1024; dev->completion_nsec = g_completion_nsec; dev->submit_queues = g_submit_queues; @@ -532,13 +563,14 @@ static void end_cmd(struct nullb_cmd *cmd) switch (queue_mode) { case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, BLK_STS_OK); + blk_mq_end_request(cmd->rq, cmd->error); return; case NULL_Q_RQ: INIT_LIST_HEAD(&cmd->rq->queuelist); - blk_end_request_all(cmd->rq, BLK_STS_OK); + blk_end_request_all(cmd->rq, cmd->error); break; case NULL_Q_BIO: + cmd->bio->bi_status = cmd->error; bio_endio(cmd->bio); break; } @@ -579,12 +611,297 @@ static void null_softirq_done_fn(struct request *rq) end_cmd(rq->special); } -static inline void null_handle_cmd(struct nullb_cmd *cmd) +static struct nullb_page *null_alloc_page(gfp_t gfp_flags) +{ + struct nullb_page *t_page; + + t_page = kmalloc(sizeof(struct nullb_page), gfp_flags); + if (!t_page) + goto out; + + t_page->page = alloc_pages(gfp_flags, 0); + if (!t_page->page) + goto out_freepage; + + t_page->bitmap = 0; + return t_page; +out_freepage: + kfree(t_page); +out: + return NULL; +} + +static void null_free_page(struct nullb_page *t_page) +{ + __free_page(t_page->page); + kfree(t_page); +} + +static void null_free_sector(struct nullb *nullb, sector_t sector) +{ + unsigned int sector_bit; + u64 idx; + struct nullb_page *t_page, *ret; + struct radix_tree_root *root; + + root = &nullb->dev->data; + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + t_page = radix_tree_lookup(root, idx); + if (t_page) { + __clear_bit(sector_bit, &t_page->bitmap); + + if (!t_page->bitmap) { + ret = radix_tree_delete_item(root, idx, t_page); + WARN_ON(ret != t_page); + null_free_page(ret); + } + } +} + +static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, + struct nullb_page *t_page) { + struct radix_tree_root *root; + + root = &nullb->dev->data; + + if (radix_tree_insert(root, idx, t_page)) { + null_free_page(t_page); + t_page = radix_tree_lookup(root, idx); + WARN_ON(!t_page || t_page->page->index != idx); + } + + return t_page; +} + +static void null_free_device_storage(struct nullb_device *dev) +{ + unsigned long pos = 0; + int nr_pages; + struct nullb_page *ret, *t_pages[FREE_BATCH]; + struct radix_tree_root *root; + + root = &dev->data; + + do { + int i; + + nr_pages = radix_tree_gang_lookup(root, + (void **)t_pages, pos, FREE_BATCH); + + for (i = 0; i < nr_pages; i++) { + pos = t_pages[i]->page->index; + ret = radix_tree_delete_item(root, pos, t_pages[i]); + WARN_ON(ret != t_pages[i]); + null_free_page(ret); + } + + pos++; + } while (nr_pages == FREE_BATCH); +} + +static struct nullb_page *null_lookup_page(struct nullb *nullb, + sector_t sector, bool for_write) +{ + unsigned int sector_bit; + u64 idx; + struct nullb_page *t_page; + + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + t_page = radix_tree_lookup(&nullb->dev->data, idx); + WARN_ON(t_page && t_page->page->index != idx); + + if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) + return t_page; + + return NULL; +} + +static struct nullb_page *null_insert_page(struct nullb *nullb, + sector_t sector) +{ + u64 idx; + struct nullb_page *t_page; + + t_page = null_lookup_page(nullb, sector, true); + if (t_page) + return t_page; + + spin_unlock_irq(&nullb->lock); + + t_page = null_alloc_page(GFP_NOIO); + if (!t_page) + goto out_lock; + + if (radix_tree_preload(GFP_NOIO)) + goto out_freepage; + + spin_lock_irq(&nullb->lock); + idx = sector >> PAGE_SECTORS_SHIFT; + t_page->page->index = idx; + t_page = null_radix_tree_insert(nullb, idx, t_page); + radix_tree_preload_end(); + + return t_page; +out_freepage: + null_free_page(t_page); +out_lock: + spin_lock_irq(&nullb->lock); + return null_lookup_page(nullb, sector, true); +} + +static int copy_to_nullb(struct nullb *nullb, struct page *source, + unsigned int off, sector_t sector, size_t n) +{ + size_t temp, count = 0; + unsigned int offset; + struct nullb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, nullb->dev->blocksize, n - count); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = null_insert_page(nullb, sector); + if (!t_page) + return -ENOSPC; + + src = kmap_atomic(source); + dst = kmap_atomic(t_page->page); + memcpy(dst + offset, src + off + count, temp); + kunmap_atomic(dst); + kunmap_atomic(src); + + __set_bit(sector & SECTOR_MASK, &t_page->bitmap); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static int copy_from_nullb(struct nullb *nullb, struct page *dest, + unsigned int off, sector_t sector, size_t n) +{ + size_t temp, count = 0; + unsigned int offset; + struct nullb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, nullb->dev->blocksize, n - count); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = null_lookup_page(nullb, sector, false); + + dst = kmap_atomic(dest); + if (!t_page) { + memset(dst + off + count, 0, temp); + goto next; + } + src = kmap_atomic(t_page->page); + memcpy(dst + off + count, src + offset, temp); + kunmap_atomic(src); +next: + kunmap_atomic(dst); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static int null_transfer(struct nullb *nullb, struct page *page, + unsigned int len, unsigned int off, bool is_write, sector_t sector) +{ + int err = 0; + + if (!is_write) { + err = copy_from_nullb(nullb, page, off, sector, len); + flush_dcache_page(page); + } else { + flush_dcache_page(page); + err = copy_to_nullb(nullb, page, off, sector, len); + } + + return err; +} + +static int null_handle_rq(struct nullb_cmd *cmd) +{ + struct request *rq = cmd->rq; + struct nullb *nullb = cmd->nq->dev->nullb; + int err; + unsigned int len; + sector_t sector; + struct req_iterator iter; + struct bio_vec bvec; + + sector = blk_rq_pos(rq); + + spin_lock_irq(&nullb->lock); + rq_for_each_segment(bvec, rq, iter) { + len = bvec.bv_len; + err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, + op_is_write(req_op(rq)), sector); + if (err) { + spin_unlock_irq(&nullb->lock); + return err; + } + sector += len >> SECTOR_SHIFT; + } + spin_unlock_irq(&nullb->lock); + + return 0; +} + +static int null_handle_bio(struct nullb_cmd *cmd) +{ + struct bio *bio = cmd->bio; + struct nullb *nullb = cmd->nq->dev->nullb; + int err; + unsigned int len; + sector_t sector; + struct bio_vec bvec; + struct bvec_iter iter; + + sector = bio->bi_iter.bi_sector; + + spin_lock_irq(&nullb->lock); + bio_for_each_segment(bvec, bio, iter) { + len = bvec.bv_len; + err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, + op_is_write(bio_op(bio)), sector); + if (err) { + spin_unlock_irq(&nullb->lock); + return err; + } + sector += len >> SECTOR_SHIFT; + } + spin_unlock_irq(&nullb->lock); + return 0; +} + +static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) +{ + struct nullb_device *dev = cmd->nq->dev; + int err = 0; + + if (dev->memory_backed) { + if (dev->queue_mode == NULL_Q_BIO) + err = null_handle_bio(cmd); + else + err = null_handle_rq(cmd); + } + cmd->error = errno_to_blk_status(err); /* Complete IO by inline, softirq or timer */ - switch (cmd->nq->dev->irqmode) { + switch (dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (cmd->nq->dev->queue_mode) { + switch (dev->queue_mode) { case NULL_Q_MQ: blk_mq_complete_request(cmd->rq); break; @@ -606,6 +923,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) null_cmd_end_timer(cmd); break; } + return BLK_STS_OK; } static struct nullb_queue *nullb_to_queue(struct nullb *nullb) @@ -678,8 +996,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); - null_handle_cmd(cmd); - return BLK_STS_OK; + return null_handle_cmd(cmd); } static const struct blk_mq_ops null_mq_ops = { @@ -1050,6 +1367,10 @@ static void null_validate_conf(struct nullb_device *dev) dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); + + /* Do memory allocation, so set blocking */ + if (dev->memory_backed) + dev->blocking = true; } static int null_add_dev(struct nullb_device *dev) -- cgit From 306eb6b4ad4f2d51c989b9e3d3a9271c44408431 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:57 -0700 Subject: nullb: support discard discard makes sense for memory backed disk. And also it's useful to test if upper layer supports dicard correctly. User configures 'discard' attribute to enable/disable dicard support. Based on original patch from Kyungchan Koh Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 45e0b565f448..1f3cf257f2e7 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -88,6 +88,7 @@ struct nullb_device { bool use_per_node_hctx; /* use per-node allocation for hardware context */ bool power; /* power on/off the device */ bool memory_backed; /* if data is stored in memory */ + bool discard; /* if support discard */ }; struct nullb { @@ -318,6 +319,7 @@ NULLB_DEVICE_ATTR(use_lightnvm, bool); NULLB_DEVICE_ATTR(blocking, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool); NULLB_DEVICE_ATTR(memory_backed, bool); +NULLB_DEVICE_ATTR(discard, bool); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -373,6 +375,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_use_per_node_hctx, &nullb_device_attr_power, &nullb_device_attr_memory_backed, + &nullb_device_attr_discard, NULL, }; @@ -425,7 +428,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed\n"); + return snprintf(page, PAGE_SIZE, "memory_backed,discard\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -815,6 +818,20 @@ next: return 0; } +static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) +{ + size_t temp; + + spin_lock_irq(&nullb->lock); + while (n > 0) { + temp = min_t(size_t, n, nullb->dev->blocksize); + null_free_sector(nullb, sector); + sector += temp >> SECTOR_SHIFT; + n -= temp; + } + spin_unlock_irq(&nullb->lock); +} + static int null_transfer(struct nullb *nullb, struct page *page, unsigned int len, unsigned int off, bool is_write, sector_t sector) { @@ -843,6 +860,11 @@ static int null_handle_rq(struct nullb_cmd *cmd) sector = blk_rq_pos(rq); + if (req_op(rq) == REQ_OP_DISCARD) { + null_handle_discard(nullb, sector, blk_rq_bytes(rq)); + return 0; + } + spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; @@ -871,6 +893,12 @@ static int null_handle_bio(struct nullb_cmd *cmd) sector = bio->bi_iter.bi_sector; + if (bio_op(bio) == REQ_OP_DISCARD) { + null_handle_discard(nullb, sector, + bio_sectors(bio) << SECTOR_SHIFT); + return 0; + } + spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; @@ -1207,6 +1235,16 @@ static void null_del_dev(struct nullb *nullb) dev->nullb = NULL; } +static void null_config_discard(struct nullb *nullb) +{ + if (nullb->dev->discard == false) + return; + nullb->q->limits.discard_granularity = nullb->dev->blocksize; + nullb->q->limits.discard_alignment = nullb->dev->blocksize; + blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q); +} + static int null_open(struct block_device *bdev, fmode_t mode) { return 0; @@ -1448,6 +1486,8 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_logical_block_size(nullb->q, dev->blocksize); blk_queue_physical_block_size(nullb->q, dev->blocksize); + null_config_discard(nullb); + sprintf(nullb->disk_name, "nullb%d", nullb->index); if (dev->use_lightnvm) -- cgit From eff2c4f108735ddfce37a912a133938d96d70356 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:58 -0700 Subject: nullb: bandwidth control In test, we usually expect controllable disk speed. For example, in a raid array, we'd like some disks are fast and some are slow. MD RAID actually has a feature for this. To test the feature, we'd like to make the disk run in specific speed. block throttling probably can be used for this purpose, but it requires cgroup setup. Here we just implement a simple throttling mechanism in the driver. There is slight fluctuation in the mechanism, but it's good enough for test. To configure the bandwidth cap, user sets the 'mbps' attribute. mbps is MB/s. Based on original patch from Kyungchan Koh Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 1f3cf257f2e7..7e6332e836e6 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -23,6 +23,14 @@ #define FREE_BATCH 16 +#define TICKS_PER_SEC 50ULL +#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC) + +static inline u64 mb_per_tick(int mbps) +{ + return (1 << 20) / TICKS_PER_SEC * ((u64) mbps); +} + struct nullb_cmd { struct list_head list; struct llist_node ll_list; @@ -49,10 +57,12 @@ struct nullb_queue { * * CONFIGURED: Device has been configured and turned on. Cannot reconfigure. * UP: Device is currently on and visible in userspace. + * THROTTLED: Device is being throttled. */ enum nullb_device_flags { NULLB_DEV_FL_CONFIGURED = 0, NULLB_DEV_FL_UP = 1, + NULLB_DEV_FL_THROTTLED = 2, }; /* @@ -83,6 +93,7 @@ struct nullb_device { unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ + unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */ bool use_lightnvm; /* register as a LightNVM device */ bool blocking; /* blocking blk-mq device */ bool use_per_node_hctx; /* use per-node allocation for hardware context */ @@ -100,8 +111,9 @@ struct nullb { struct nvm_dev *ndev; struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set __tag_set; - struct hrtimer timer; unsigned int queue_depth; + atomic_long_t cur_bytes; + struct hrtimer bw_timer; spinlock_t lock; struct nullb_queue *queues; @@ -320,6 +332,7 @@ NULLB_DEVICE_ATTR(blocking, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool); NULLB_DEVICE_ATTR(memory_backed, bool); NULLB_DEVICE_ATTR(discard, bool); +NULLB_DEVICE_ATTR(mbps, uint); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -376,6 +389,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_power, &nullb_device_attr_memory_backed, &nullb_device_attr_discard, + &nullb_device_attr_mbps, NULL, }; @@ -428,7 +442,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed,discard\n"); + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -914,11 +928,65 @@ static int null_handle_bio(struct nullb_cmd *cmd) return 0; } +static void null_stop_queue(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + + if (nullb->dev->queue_mode == NULL_Q_MQ) + blk_mq_stop_hw_queues(q); + else { + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + spin_unlock_irq(q->queue_lock); + } +} + +static void null_restart_queue_async(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + unsigned long flags; + + if (nullb->dev->queue_mode == NULL_Q_MQ) + blk_mq_start_stopped_hw_queues(q, true); + else { + spin_lock_irqsave(q->queue_lock, flags); + blk_start_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } +} + static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; + struct nullb *nullb = dev->nullb; int err = 0; + if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { + struct request *rq = cmd->rq; + + if (!hrtimer_active(&nullb->bw_timer)) + hrtimer_restart(&nullb->bw_timer); + + if (atomic_long_sub_return(blk_rq_bytes(rq), + &nullb->cur_bytes) < 0) { + null_stop_queue(nullb); + /* race with timer */ + if (atomic_long_read(&nullb->cur_bytes) > 0) + null_restart_queue_async(nullb); + if (dev->queue_mode == NULL_Q_RQ) { + struct request_queue *q = nullb->q; + + spin_lock_irq(q->queue_lock); + rq->rq_flags |= RQF_DONTPREP; + blk_requeue_request(q, rq); + spin_unlock_irq(q->queue_lock); + return BLK_STS_OK; + } else + /* requeue request */ + return BLK_STS_RESOURCE; + } + } + if (dev->memory_backed) { if (dev->queue_mode == NULL_Q_BIO) err = null_handle_bio(cmd); @@ -954,6 +1022,33 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) return BLK_STS_OK; } +static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) +{ + struct nullb *nullb = container_of(timer, struct nullb, bw_timer); + ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); + unsigned int mbps = nullb->dev->mbps; + + if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps)) + return HRTIMER_NORESTART; + + atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); + null_restart_queue_async(nullb); + + hrtimer_forward_now(&nullb->bw_timer, timer_interval); + + return HRTIMER_RESTART; +} + +static void nullb_setup_bwtimer(struct nullb *nullb) +{ + ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); + + hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + nullb->bw_timer.function = nullb_bwtimer_fn; + atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps)); + hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); +} + static struct nullb_queue *nullb_to_queue(struct nullb *nullb) { int index = 0; @@ -1224,6 +1319,13 @@ static void null_del_dev(struct nullb *nullb) null_nvm_unregister(nullb); else del_gendisk(nullb->disk); + + if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { + hrtimer_cancel(&nullb->bw_timer); + atomic_long_set(&nullb->cur_bytes, LONG_MAX); + null_restart_queue_async(nullb); + } + blk_cleanup_queue(nullb->q); if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) @@ -1409,6 +1511,11 @@ static void null_validate_conf(struct nullb_device *dev) /* Do memory allocation, so set blocking */ if (dev->memory_backed) dev->blocking = true; + + dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); + /* can not stop a queue */ + if (dev->queue_mode == NULL_Q_BIO) + dev->mbps = 0; } static int null_add_dev(struct nullb_device *dev) @@ -1474,6 +1581,11 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_blk_queue; } + if (dev->mbps) { + set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); + nullb_setup_bwtimer(nullb); + } + nullb->q->queuedata = nullb; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); -- cgit From deb78b419dfda333318a6ed1fe8e8c6245dd0d43 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:04:59 -0700 Subject: nullb: emulate cache Software must flush disk cache to guarantee data safety. To check if software correctly does disk cache flush, we must know the behavior of disk. But physical disk behavior is uncontrollable. Even software doesn't do the flush, the disk probably does the flush. This patch tries to emulate a cache in the test disk. All write will go to a cache first, when the cache is full, we then flush some data to disk storage. A flush request will flush all data of the cache to disk storage. A FUA write will write to memory store directly and revalidate data in cache. If there is a power failure (by writing to power attribute, 'echo 0 > disk_name/power'), we discard all data in the cache, but preserve the data in disk storage. Later we can power on the disk again as usual (write 1 to 'power' attribute), then we can check data integrity and very if software does everything correctly. A new attribute 'cache_size' (in MB) is added to configure cache size. Based on original patch from Kyungchan Koh Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 261 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 231 insertions(+), 30 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 7e6332e836e6..e138a670a2a4 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -58,11 +58,13 @@ struct nullb_queue { * CONFIGURED: Device has been configured and turned on. Cannot reconfigure. * UP: Device is currently on and visible in userspace. * THROTTLED: Device is being throttled. + * CACHE: Device is using a write-back cache. */ enum nullb_device_flags { NULLB_DEV_FL_CONFIGURED = 0, NULLB_DEV_FL_UP = 1, NULLB_DEV_FL_THROTTLED = 2, + NULLB_DEV_FL_CACHE = 3, }; /* @@ -72,20 +74,29 @@ enum nullb_device_flags { * @bitmap: The bitmap represents which sector in the page has data. * Each bit represents one block size. For example, sector 8 * will use the 7th bit + * The highest 2 bits of bitmap are for special purpose. LOCK means the cache + * page is being flushing to storage. FREE means the cache page is freed and + * should be skipped from flushing to storage. Please see + * null_make_cache_space */ struct nullb_page { struct page *page; unsigned long bitmap; }; +#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1) +#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2) struct nullb_device { struct nullb *nullb; struct config_item item; struct radix_tree_root data; /* data stored in the disk */ + struct radix_tree_root cache; /* disk cache data */ unsigned long flags; /* device flags */ + unsigned int curr_cache; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ + unsigned long cache_size; /* disk cache size in MB */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ @@ -114,6 +125,7 @@ struct nullb { unsigned int queue_depth; atomic_long_t cur_bytes; struct hrtimer bw_timer; + unsigned long cache_flush_pos; spinlock_t lock; struct nullb_queue *queues; @@ -234,7 +246,7 @@ static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); static int null_add_dev(struct nullb_device *dev); -static void null_free_device_storage(struct nullb_device *dev); +static void null_free_device_storage(struct nullb_device *dev, bool is_cache); static inline struct nullb_device *to_nullb_device(struct config_item *item) { @@ -333,6 +345,7 @@ NULLB_DEVICE_ATTR(use_per_node_hctx, bool); NULLB_DEVICE_ATTR(memory_backed, bool); NULLB_DEVICE_ATTR(discard, bool); NULLB_DEVICE_ATTR(mbps, uint); +NULLB_DEVICE_ATTR(cache_size, ulong); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -390,6 +403,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_memory_backed, &nullb_device_attr_discard, &nullb_device_attr_mbps, + &nullb_device_attr_cache_size, NULL, }; @@ -397,7 +411,7 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); - null_free_device_storage(dev); + null_free_device_storage(dev, false); null_free_dev(dev); } @@ -442,7 +456,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth\n"); + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -472,6 +486,11 @@ static struct configfs_subsystem nullb_subsys = { }, }; +static inline int null_cache_active(struct nullb *nullb) +{ + return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); +} + static struct nullb_device *null_alloc_dev(void) { struct nullb_device *dev; @@ -480,6 +499,7 @@ static struct nullb_device *null_alloc_dev(void) if (!dev) return NULL; INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); + INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); dev->size = g_gb * 1024; dev->completion_nsec = g_completion_nsec; dev->submit_queues = g_submit_queues; @@ -650,18 +670,22 @@ out: static void null_free_page(struct nullb_page *t_page) { + __set_bit(NULLB_PAGE_FREE, &t_page->bitmap); + if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap)) + return; __free_page(t_page->page); kfree(t_page); } -static void null_free_sector(struct nullb *nullb, sector_t sector) +static void null_free_sector(struct nullb *nullb, sector_t sector, + bool is_cache) { unsigned int sector_bit; u64 idx; struct nullb_page *t_page, *ret; struct radix_tree_root *root; - root = &nullb->dev->data; + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; idx = sector >> PAGE_SECTORS_SHIFT; sector_bit = (sector & SECTOR_MASK); @@ -673,34 +697,37 @@ static void null_free_sector(struct nullb *nullb, sector_t sector) ret = radix_tree_delete_item(root, idx, t_page); WARN_ON(ret != t_page); null_free_page(ret); + if (is_cache) + nullb->dev->curr_cache -= PAGE_SIZE; } } } static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, - struct nullb_page *t_page) + struct nullb_page *t_page, bool is_cache) { struct radix_tree_root *root; - root = &nullb->dev->data; + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; if (radix_tree_insert(root, idx, t_page)) { null_free_page(t_page); t_page = radix_tree_lookup(root, idx); WARN_ON(!t_page || t_page->page->index != idx); - } + } else if (is_cache) + nullb->dev->curr_cache += PAGE_SIZE; return t_page; } -static void null_free_device_storage(struct nullb_device *dev) +static void null_free_device_storage(struct nullb_device *dev, bool is_cache) { unsigned long pos = 0; int nr_pages; struct nullb_page *ret, *t_pages[FREE_BATCH]; struct radix_tree_root *root; - root = &dev->data; + root = is_cache ? &dev->cache : &dev->data; do { int i; @@ -717,19 +744,24 @@ static void null_free_device_storage(struct nullb_device *dev) pos++; } while (nr_pages == FREE_BATCH); + + if (is_cache) + dev->curr_cache = 0; } -static struct nullb_page *null_lookup_page(struct nullb *nullb, - sector_t sector, bool for_write) +static struct nullb_page *__null_lookup_page(struct nullb *nullb, + sector_t sector, bool for_write, bool is_cache) { unsigned int sector_bit; u64 idx; struct nullb_page *t_page; + struct radix_tree_root *root; idx = sector >> PAGE_SECTORS_SHIFT; sector_bit = (sector & SECTOR_MASK); - t_page = radix_tree_lookup(&nullb->dev->data, idx); + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; + t_page = radix_tree_lookup(root, idx); WARN_ON(t_page && t_page->page->index != idx); if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) @@ -738,13 +770,25 @@ static struct nullb_page *null_lookup_page(struct nullb *nullb, return NULL; } +static struct nullb_page *null_lookup_page(struct nullb *nullb, + sector_t sector, bool for_write, bool ignore_cache) +{ + struct nullb_page *page = NULL; + + if (!ignore_cache) + page = __null_lookup_page(nullb, sector, for_write, true); + if (page) + return page; + return __null_lookup_page(nullb, sector, for_write, false); +} + static struct nullb_page *null_insert_page(struct nullb *nullb, - sector_t sector) + sector_t sector, bool ignore_cache) { u64 idx; struct nullb_page *t_page; - t_page = null_lookup_page(nullb, sector, true); + t_page = null_lookup_page(nullb, sector, true, ignore_cache); if (t_page) return t_page; @@ -760,7 +804,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb, spin_lock_irq(&nullb->lock); idx = sector >> PAGE_SECTORS_SHIFT; t_page->page->index = idx; - t_page = null_radix_tree_insert(nullb, idx, t_page); + t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache); radix_tree_preload_end(); return t_page; @@ -768,11 +812,113 @@ out_freepage: null_free_page(t_page); out_lock: spin_lock_irq(&nullb->lock); - return null_lookup_page(nullb, sector, true); + return null_lookup_page(nullb, sector, true, ignore_cache); +} + +static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) +{ + int i; + unsigned int offset; + u64 idx; + struct nullb_page *t_page, *ret; + void *dst, *src; + + idx = c_page->page->index; + + t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); + + __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap); + if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) { + null_free_page(c_page); + if (t_page && t_page->bitmap == 0) { + ret = radix_tree_delete_item(&nullb->dev->data, + idx, t_page); + null_free_page(t_page); + } + return 0; + } + + if (!t_page) + return -ENOMEM; + + src = kmap_atomic(c_page->page); + dst = kmap_atomic(t_page->page); + + for (i = 0; i < PAGE_SECTORS; + i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { + if (test_bit(i, &c_page->bitmap)) { + offset = (i << SECTOR_SHIFT); + memcpy(dst + offset, src + offset, + nullb->dev->blocksize); + __set_bit(i, &t_page->bitmap); + } + } + + kunmap_atomic(dst); + kunmap_atomic(src); + + ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); + null_free_page(ret); + nullb->dev->curr_cache -= PAGE_SIZE; + + return 0; +} + +static int null_make_cache_space(struct nullb *nullb, unsigned long n) +{ + int i, err, nr_pages; + struct nullb_page *c_pages[FREE_BATCH]; + unsigned long flushed = 0, one_round; + +again: + if ((nullb->dev->cache_size * 1024 * 1024) > + nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0) + return 0; + + nr_pages = radix_tree_gang_lookup(&nullb->dev->cache, + (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH); + /* + * nullb_flush_cache_page could unlock before using the c_pages. To + * avoid race, we don't allow page free + */ + for (i = 0; i < nr_pages; i++) { + nullb->cache_flush_pos = c_pages[i]->page->index; + /* + * We found the page which is being flushed to disk by other + * threads + */ + if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap)) + c_pages[i] = NULL; + else + __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap); + } + + one_round = 0; + for (i = 0; i < nr_pages; i++) { + if (c_pages[i] == NULL) + continue; + err = null_flush_cache_page(nullb, c_pages[i]); + if (err) + return err; + one_round++; + } + flushed += one_round << PAGE_SHIFT; + + if (n > flushed) { + if (nr_pages == 0) + nullb->cache_flush_pos = 0; + if (one_round == 0) { + /* give other threads a chance */ + spin_unlock_irq(&nullb->lock); + spin_lock_irq(&nullb->lock); + } + goto again; + } + return 0; } static int copy_to_nullb(struct nullb *nullb, struct page *source, - unsigned int off, sector_t sector, size_t n) + unsigned int off, sector_t sector, size_t n, bool is_fua) { size_t temp, count = 0; unsigned int offset; @@ -782,8 +928,12 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); + if (null_cache_active(nullb) && !is_fua) + null_make_cache_space(nullb, PAGE_SIZE); + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; - t_page = null_insert_page(nullb, sector); + t_page = null_insert_page(nullb, sector, + !null_cache_active(nullb) || is_fua); if (!t_page) return -ENOSPC; @@ -795,6 +945,9 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, __set_bit(sector & SECTOR_MASK, &t_page->bitmap); + if (is_fua) + null_free_sector(nullb, sector, true); + count += temp; sector += temp >> SECTOR_SHIFT; } @@ -813,7 +966,8 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, temp = min_t(size_t, nullb->dev->blocksize, n - count); offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; - t_page = null_lookup_page(nullb, sector, false); + t_page = null_lookup_page(nullb, sector, false, + !null_cache_active(nullb)); dst = kmap_atomic(dest); if (!t_page) { @@ -839,15 +993,38 @@ static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) spin_lock_irq(&nullb->lock); while (n > 0) { temp = min_t(size_t, n, nullb->dev->blocksize); - null_free_sector(nullb, sector); + null_free_sector(nullb, sector, false); + if (null_cache_active(nullb)) + null_free_sector(nullb, sector, true); sector += temp >> SECTOR_SHIFT; n -= temp; } spin_unlock_irq(&nullb->lock); } +static int null_handle_flush(struct nullb *nullb) +{ + int err; + + if (!null_cache_active(nullb)) + return 0; + + spin_lock_irq(&nullb->lock); + while (true) { + err = null_make_cache_space(nullb, + nullb->dev->cache_size * 1024 * 1024); + if (err || nullb->dev->curr_cache == 0) + break; + } + + WARN_ON(!radix_tree_empty(&nullb->dev->cache)); + spin_unlock_irq(&nullb->lock); + return err; +} + static int null_transfer(struct nullb *nullb, struct page *page, - unsigned int len, unsigned int off, bool is_write, sector_t sector) + unsigned int len, unsigned int off, bool is_write, sector_t sector, + bool is_fua) { int err = 0; @@ -856,7 +1033,7 @@ static int null_transfer(struct nullb *nullb, struct page *page, flush_dcache_page(page); } else { flush_dcache_page(page); - err = copy_to_nullb(nullb, page, off, sector, len); + err = copy_to_nullb(nullb, page, off, sector, len, is_fua); } return err; @@ -883,7 +1060,8 @@ static int null_handle_rq(struct nullb_cmd *cmd) rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(req_op(rq)), sector); + op_is_write(req_op(rq)), sector, + req_op(rq) & REQ_FUA); if (err) { spin_unlock_irq(&nullb->lock); return err; @@ -917,7 +1095,8 @@ static int null_handle_bio(struct nullb_cmd *cmd) bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector); + op_is_write(bio_op(bio)), sector, + bio_op(bio) & REQ_FUA); if (err) { spin_unlock_irq(&nullb->lock); return err; @@ -988,10 +1167,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) } if (dev->memory_backed) { - if (dev->queue_mode == NULL_Q_BIO) - err = null_handle_bio(cmd); - else - err = null_handle_rq(cmd); + if (dev->queue_mode == NULL_Q_BIO) { + if (bio_op(cmd->bio) == REQ_OP_FLUSH) + err = null_handle_flush(nullb); + else + err = null_handle_bio(cmd); + } else { + if (req_op(cmd->rq) == REQ_OP_FLUSH) + err = null_handle_flush(nullb); + else + err = null_handle_rq(cmd); + } } cmd->error = errno_to_blk_status(err); /* Complete IO by inline, softirq or timer */ @@ -1333,6 +1519,8 @@ static void null_del_dev(struct nullb *nullb) if (!dev->use_lightnvm) put_disk(nullb->disk); cleanup_queues(nullb); + if (null_cache_active(nullb)) + null_free_device_storage(nullb->dev, true); kfree(nullb); dev->nullb = NULL; } @@ -1511,7 +1699,10 @@ static void null_validate_conf(struct nullb_device *dev) /* Do memory allocation, so set blocking */ if (dev->memory_backed) dev->blocking = true; - + else /* cache is meaningless */ + dev->cache_size = 0; + dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, + dev->cache_size); dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); /* can not stop a queue */ if (dev->queue_mode == NULL_Q_BIO) @@ -1586,6 +1777,12 @@ static int null_add_dev(struct nullb_device *dev) nullb_setup_bwtimer(nullb); } + if (dev->cache_size > 0) { + set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); + blk_queue_write_cache(nullb->q, true, true); + blk_queue_flush_queueable(nullb->q, true); + } + nullb->q->queuedata = nullb; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); @@ -1636,6 +1833,10 @@ static int __init null_init(void) struct nullb *nullb; struct nullb_device *dev; + /* check for nullb_page.bitmap */ + if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT)) + return -EINVAL; + if (g_bs > PAGE_SIZE) { pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); -- cgit From 2f54a613c9421ddd5897f861145ed0b8615a2ec4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Aug 2017 15:05:00 -0700 Subject: nullb: badbblocks support Sometime disk could have tracks broken and data there is inaccessable, but data in other parts can be accessed in normal way. MD RAID supports such disks. But we don't have a good way to test it, because we can't control which part of a physical disk is bad. For a virtual disk, this can be easily controlled. This patch adds a new 'badblock' attribute. Configure it in this way: echo "+1-100" > xxx/badblock, this will make sector [1-100] as bad blocks. echo "-20-30" > xxx/badblock, this will make sector [20-30] good If badblocks are accessed, the nullb disk will return IO error. Other parts of the disk can accessed in normal way. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index e138a670a2a4..2032360abee6 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -14,6 +14,7 @@ #include #include #include +#include #define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) @@ -93,6 +94,7 @@ struct nullb_device { struct radix_tree_root cache; /* disk cache data */ unsigned long flags; /* device flags */ unsigned int curr_cache; + struct badblocks badblocks; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ @@ -386,6 +388,59 @@ static ssize_t nullb_device_power_store(struct config_item *item, CONFIGFS_ATTR(nullb_device_, power); +static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page) +{ + struct nullb_device *t_dev = to_nullb_device(item); + + return badblocks_show(&t_dev->badblocks, page, 0); +} + +static ssize_t nullb_device_badblocks_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *t_dev = to_nullb_device(item); + char *orig, *buf, *tmp; + u64 start, end; + int ret; + + orig = kstrndup(page, count, GFP_KERNEL); + if (!orig) + return -ENOMEM; + + buf = strstrip(orig); + + ret = -EINVAL; + if (buf[0] != '+' && buf[0] != '-') + goto out; + tmp = strchr(&buf[1], '-'); + if (!tmp) + goto out; + *tmp = '\0'; + ret = kstrtoull(buf + 1, 0, &start); + if (ret) + goto out; + ret = kstrtoull(tmp + 1, 0, &end); + if (ret) + goto out; + ret = -EINVAL; + if (start > end) + goto out; + /* enable badblocks */ + cmpxchg(&t_dev->badblocks.shift, -1, 0); + if (buf[0] == '+') + ret = badblocks_set(&t_dev->badblocks, start, + end - start + 1, 1); + else + ret = badblocks_clear(&t_dev->badblocks, start, + end - start + 1); + if (ret == 0) + ret = count; +out: + kfree(orig); + return ret; +} +CONFIGFS_ATTR(nullb_device_, badblocks); + static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_size, &nullb_device_attr_completion_nsec, @@ -404,6 +459,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_discard, &nullb_device_attr_mbps, &nullb_device_attr_cache_size, + &nullb_device_attr_badblocks, NULL, }; @@ -411,6 +467,7 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); + badblocks_exit(&dev->badblocks); null_free_device_storage(dev, false); null_free_dev(dev); } @@ -456,7 +513,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache\n"); + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -500,6 +557,11 @@ static struct nullb_device *null_alloc_dev(void) return NULL; INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); + if (badblocks_init(&dev->badblocks, 0)) { + kfree(dev); + return NULL; + } + dev->size = g_gb * 1024; dev->completion_nsec = g_completion_nsec; dev->submit_queues = g_submit_queues; @@ -1166,6 +1228,30 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) } } + if (nullb->dev->badblocks.shift != -1) { + int bad_sectors; + sector_t sector, size, first_bad; + bool is_flush = true; + + if (dev->queue_mode == NULL_Q_BIO && + bio_op(cmd->bio) != REQ_OP_FLUSH) { + is_flush = false; + sector = cmd->bio->bi_iter.bi_sector; + size = bio_sectors(cmd->bio); + } + if (dev->queue_mode != NULL_Q_BIO && + req_op(cmd->rq) != REQ_OP_FLUSH) { + is_flush = false; + sector = blk_rq_pos(cmd->rq); + size = blk_rq_sectors(cmd->rq); + } + if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector, + size, &first_bad, &bad_sectors)) { + cmd->error = BLK_STS_IOERR; + goto out; + } + } + if (dev->memory_backed) { if (dev->queue_mode == NULL_Q_BIO) { if (bio_op(cmd->bio) == REQ_OP_FLUSH) @@ -1180,6 +1266,7 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) } } cmd->error = errno_to_blk_status(err); +out: /* Complete IO by inline, softirq or timer */ switch (dev->irqmode) { case NULL_IRQ_SOFTIRQ: -- cgit From 231b3db18d4be74e8b199916911c2c16db1790de Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Aug 2017 12:53:15 -0600 Subject: null_blk: update email adress Update to a working one, the fusionio address hasn't been valid in 4 years. Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 2032360abee6..70b17db8c21f 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -2044,5 +2044,5 @@ static void __exit null_exit(void) module_init(null_init); module_exit(null_exit); -MODULE_AUTHOR("Jens Axboe "); +MODULE_AUTHOR("Jens Axboe "); MODULE_LICENSE("GPL"); -- cgit From 0d06a42f794bec6061e170fa9468d878051bc8b1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 25 Aug 2017 13:46:25 -0700 Subject: block/nullb: fix NULL dereference Dan reported this: The patch 2984c8684f96: "nullb: factor disk parameters" from Aug 14, 2017, leads to the following Smatch complaint: drivers/block/null_blk.c:1759 null_init_tag_set() error: we previously assumed 'nullb' could be null (see line 1750) 1755 set->cmd_size = sizeof(struct nullb_cmd); 1756 set->flags = BLK_MQ_F_SHOULD_MERGE; 1757 set->driver_data = NULL; 1758 1759 if (nullb->dev->blocking) ^^^^^^^^^^^^^^^^^^^^ And an unchecked dereference. nullb could be NULL here. Reported-by: Dan Carpenter Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 70b17db8c21f..647213525549 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1756,7 +1756,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->flags = BLK_MQ_F_SHOULD_MERGE; set->driver_data = NULL; - if (nullb->dev->blocking) + if ((nullb && nullb->dev->blocking) || g_blocking) set->flags |= BLK_MQ_F_BLOCKING; return blk_mq_alloc_tag_set(set); -- cgit From 060fd198a3e113047da456f15889579067e6b75f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 28 Aug 2017 13:49:31 -0700 Subject: block/nullb: delete unnecessary memory free Commit 2984c86(nullb: factor disk parameters) has a typo. The nullb_device allocation/free is done outside of null_add_dev. The commit accidentally frees the nullb_device in error code path. Reported-by: Dan Carpenter Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 647213525549..3b5cabe374d2 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1909,7 +1909,6 @@ out_cleanup_queues: out_free_nullb: kfree(nullb); out: - null_free_dev(dev); return rv; } -- cgit From b3c3051220f3e2a576ba8008c4a87b7d4c8a35e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Aug 2017 15:06:31 -0600 Subject: null_blk: use available 'dev' in nullb_device_power_store() We already have this pointer, no need to use to_nullb_device() again. Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 3b5cabe374d2..bd922868a861 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -375,7 +375,7 @@ static ssize_t nullb_device_power_store(struct config_item *item, set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); dev->power = newp; - } else if (to_nullb_device(item)->power && !newp) { + } else if (dev->power && !newp) { mutex_lock(&lock); dev->power = newp; null_del_dev(dev->nullb); -- cgit