From 382fee1a8b623e2546a3e15e80517389e0e0673e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2020 11:51:00 -0700 Subject: nvmet: fix a memory leak We forgot to free new_model_number Fixes: 013b7ebe5a0d ("nvmet: make ctrl model configurable") Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 74b2b61c773b..37e1d7784e17 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1136,6 +1136,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, up_write(&nvmet_config_sem); kfree_rcu(new_model, rcuhead); + kfree(new_model_number); return count; } -- cgit From 0ceeab96ba598c578f0e87cb25b79349cd35b8f1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Jul 2020 13:10:09 -0600 Subject: nvmet-passthru: Reject commands with non-sgl flags set Any command with a non-SGL flag set (like fuse flags) should be rejected. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Signed-off-by: Logan Gunthorpe Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/passthru.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 89d91dc999a6..f69c3ac82e58 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -326,6 +326,10 @@ static u16 nvmet_setup_passthru_command(struct nvmet_req *req) u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req) { + /* Reject any commands with non-sgl flags set (ie. fused commands) */ + if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL) + return NVME_SC_INVALID_FIELD; + switch (req->cmd->common.opcode) { case nvme_cmd_resv_register: case nvme_cmd_resv_report: @@ -396,6 +400,10 @@ static u16 nvmet_passthru_get_set_features(struct nvmet_req *req) u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) { + /* Reject any commands with non-sgl flags set (ie. fused commands) */ + if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL) + return NVME_SC_INVALID_FIELD; + /* * Passthru all vendor specific commands */ -- cgit From f34448cd0dc697723fb5f4118f8431d9233b370d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 2 Aug 2020 19:15:45 +0800 Subject: nvme-fc: Fix wrong return value in __nvme_fc_init_request() On an error exit path, a negative error code should be returned instead of a positive return value. Fixes: e399441de9115 ("nvme-fabrics: Add host support for FC transport") Cc: James Smart Signed-off-by: Tianjia Zhang Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index eae43bb444e0..07aacfe46368 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2078,7 +2078,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { dev_err(ctrl->dev, "FCP Op failed - cmdiu dma mapping failed.\n"); - ret = EFAULT; + ret = -EFAULT; goto out_on_error; } @@ -2088,7 +2088,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { dev_err(ctrl->dev, "FCP Op failed - rspiu dma mapping failed.\n"); - ret = EFAULT; + ret = -EFAULT; } atomic_set(&op->state, FCPOP_STATE_IDLE); -- cgit From 93eb0381e13d249a18ed4aae203291ff977e7ffb Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 6 Aug 2020 15:19:31 +0200 Subject: nvme: multipath: round-robin: fix single non-optimized path case If there's only one usable, non-optimized path, nvme_round_robin_path() returns NULL, which is wrong. Fix it by falling back to "old", like in the single optimized path case. Also, if the active path isn't changed, there's no need to re-assign the pointer. Fixes: 3f6e3246db0e ("nvme-multipath: fix logic for non-optimized paths") Signed-off-by: Martin Wilck Signed-off-by: Martin George Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3ded54d2c9c6..a64dfff0d0ce 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -255,12 +255,17 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, fallback = ns; } - /* No optimized path found, re-check the current path */ + /* + * The loop above skips the current path for round-robin semantics. + * Fall back to the current path if either: + * - no other optimized path found and current is optimized, + * - no other usable path found and current is usable. + */ if (!nvme_path_is_disabled(old) && - old->ana_state == NVME_ANA_OPTIMIZED) { - found = old; - goto out; - } + (old->ana_state == NVME_ANA_OPTIMIZED || + (!fallback && old->ana_state == NVME_ANA_NONOPTIMIZED))) + return old; + if (!fallback) return NULL; found = fallback; -- cgit From e398863b75af24103cee69cc8ee8bf4bc9c8913e Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 6 Aug 2020 15:19:32 +0200 Subject: nvme: multipath: round-robin: eliminate "fallback" variable If we find an optimized path, we quit the loop immediately. Thus we can use just one variable for the next path, slighly simplifying the code. Signed-off-by: Martin Wilck Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a64dfff0d0ce..760f625cefc8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -233,7 +233,7 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head, static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, int node, struct nvme_ns *old) { - struct nvme_ns *ns, *found, *fallback = NULL; + struct nvme_ns *ns, *found = NULL; if (list_is_singular(&head->list)) { if (nvme_path_is_disabled(old)) @@ -252,7 +252,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, goto out; } if (ns->ana_state == NVME_ANA_NONOPTIMIZED) - fallback = ns; + found = ns; } /* @@ -263,12 +263,11 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, */ if (!nvme_path_is_disabled(old) && (old->ana_state == NVME_ANA_OPTIMIZED || - (!fallback && old->ana_state == NVME_ANA_NONOPTIMIZED))) + (!found && old->ana_state == NVME_ANA_NONOPTIMIZED))) return old; - if (!fallback) + if (!found) return NULL; - found = fallback; out: rcu_assign_pointer(head->current_path[node], found); return found; -- cgit From 4db69a3d7cfe31e0205bfcb6de331aa9a124cb10 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 6 Aug 2020 13:02:23 -0700 Subject: nvmet: add ns tear down label for pt-cmd handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current implementation before submitting the passthru cmd we may come across error e.g. getting ns from passthru controller, allocating a request from passthru controller, etc. For all the failure cases it only uses single goto label fail_out. In the target code, we follow the pattern to have a separate label for each error out the case when setting up multiple things before the actual action. This patch follows the same pattern and renames generic fail_out label to out_put_ns and updates the error out cases in the nvmet_passthru_execute_cmd() where it is needed. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/passthru.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index f69c3ac82e58..750d7b009b34 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -230,7 +230,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) if (unlikely(!ns)) { pr_err("failed to get passthru ns nsid:%u\n", nsid); status = NVME_SC_INVALID_NS | NVME_SC_DNR; - goto fail_out; + goto out; } q = ns->queue; @@ -240,14 +240,14 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) if (IS_ERR(rq)) { rq = NULL; status = NVME_SC_INTERNAL; - goto fail_out; + goto out_put_ns; } if (req->sg_cnt) { ret = nvmet_passthru_map_sg(req, rq); if (unlikely(ret)) { status = NVME_SC_INTERNAL; - goto fail_out; + goto out_put_ns; } } @@ -274,9 +274,10 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) return; -fail_out: +out_put_ns: if (ns) nvme_put_ns(ns); +out: nvmet_req_complete(req, status); blk_put_request(rq); } -- cgit From a2138fd49467d0b909b9f19936974f9a3c4e3e1a Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 6 Aug 2020 15:48:58 -0700 Subject: nvmet: fix oops in pt cmd execution In the existing NVMeOF Passthru core command handling on failure of nvme_alloc_request() it errors out with rq value set to NULL. In the error handling path it calls blk_put_request() without checking if rq is set to NULL or not which produces following Oops:- [ 1457.346861] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 1457.347838] #PF: supervisor read access in kernel mode [ 1457.348464] #PF: error_code(0x0000) - not-present page [ 1457.349085] PGD 0 P4D 0 [ 1457.349402] Oops: 0000 [#1] SMP NOPTI [ 1457.349851] CPU: 18 PID: 10782 Comm: kworker/18:2 Tainted: G OE 5.8.0-rc4nvme-5.9+ #35 [ 1457.350951] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e3214 [ 1457.352347] Workqueue: events nvme_loop_execute_work [nvme_loop] [ 1457.353062] RIP: 0010:blk_mq_free_request+0xe/0x110 [ 1457.353651] Code: 3f ff ff ff 83 f8 01 75 0d 4c 89 e7 e8 1b db ff ff e9 2d ff ff ff 0f 0b eb ef 66 8 [ 1457.355975] RSP: 0018:ffffc900035b7de0 EFLAGS: 00010282 [ 1457.356636] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002 [ 1457.357526] RDX: ffffffffa060bd05 RSI: 0000000000000000 RDI: 0000000000000000 [ 1457.358416] RBP: 0000000000000037 R08: 0000000000000000 R09: 0000000000000000 [ 1457.359317] R10: 0000000000000000 R11: 000000000000006d R12: 0000000000000000 [ 1457.360424] R13: ffff8887ffa68600 R14: 0000000000000000 R15: ffff8888150564c8 [ 1457.361322] FS: 0000000000000000(0000) GS:ffff888814600000(0000) knlGS:0000000000000000 [ 1457.362337] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1457.363058] CR2: 0000000000000000 CR3: 000000081c0ac000 CR4: 00000000003406e0 [ 1457.363973] Call Trace: [ 1457.364296] nvmet_passthru_execute_cmd+0x150/0x2c0 [nvmet] [ 1457.364990] process_one_work+0x24e/0x5a0 [ 1457.365493] ? __schedule+0x353/0x840 [ 1457.365957] worker_thread+0x3c/0x380 [ 1457.366426] ? process_one_work+0x5a0/0x5a0 [ 1457.366948] kthread+0x135/0x150 [ 1457.367362] ? kthread_create_on_node+0x60/0x60 [ 1457.367934] ret_from_fork+0x22/0x30 [ 1457.368388] Modules linked in: nvme_loop(OE) nvmet(OE) nvme_fabrics(OE) null_blk nvme(OE) nvme_corer [ 1457.368414] ata_piix crc32c_intel virtio_pci libata virtio_ring serio_raw t10_pi virtio floppy dm_] [ 1457.380849] CR2: 0000000000000000 [ 1457.381288] ---[ end trace c6cab61bfd1f68fd ]--- [ 1457.381861] RIP: 0010:blk_mq_free_request+0xe/0x110 [ 1457.382469] Code: 3f ff ff ff 83 f8 01 75 0d 4c 89 e7 e8 1b db ff ff e9 2d ff ff ff 0f 0b eb ef 66 8 [ 1457.384749] RSP: 0018:ffffc900035b7de0 EFLAGS: 00010282 [ 1457.385393] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002 [ 1457.386264] RDX: ffffffffa060bd05 RSI: 0000000000000000 RDI: 0000000000000000 [ 1457.387142] RBP: 0000000000000037 R08: 0000000000000000 R09: 0000000000000000 [ 1457.388029] R10: 0000000000000000 R11: 000000000000006d R12: 0000000000000000 [ 1457.388914] R13: ffff8887ffa68600 R14: 0000000000000000 R15: ffff8888150564c8 [ 1457.389798] FS: 0000000000000000(0000) GS:ffff888814600000(0000) knlGS:0000000000000000 [ 1457.390796] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1457.391508] CR2: 0000000000000000 CR3: 000000081c0ac000 CR4: 00000000003406e0 [ 1457.392525] Kernel panic - not syncing: Fatal exception [ 1457.394138] Kernel Offset: disabled [ 1457.394677] ---[ end Kernel panic - not syncing: Fatal exception ]--- We fix this Oops by adding a new goto label out_put_req and reordering the blk_put_request call to avoid calling blk_put_request() with rq value is set to NULL. Here we also update the rest of the code accordingly. Fixes: 06b7164dfdc0 ("nvmet: add passthru code to process commands") Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/passthru.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 750d7b009b34..ec223bf56254 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -238,7 +238,6 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); if (IS_ERR(rq)) { - rq = NULL; status = NVME_SC_INTERNAL; goto out_put_ns; } @@ -247,7 +246,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) ret = nvmet_passthru_map_sg(req, rq); if (unlikely(ret)) { status = NVME_SC_INTERNAL; - goto out_put_ns; + goto out_put_req; } } @@ -274,12 +273,13 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) return; +out_put_req: + blk_put_request(rq); out_put_ns: if (ns) nvme_put_ns(ns); out: nvmet_req_complete(req, status); - blk_put_request(rq); } /* -- cgit From 7ee51cf60a90c2017b8d5b6e936cb52490aac7bd Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 6 Aug 2020 15:56:27 -0700 Subject: nvmet: call blk_mq_free_request() directly Instead of calling blk_put_request() which calls blk_mq_free_request(), call blk_mq_free_request() directly for NVMeOF passthru. This is to mainly avoid an extra function call in the completion path nvmet_passthru_req_done(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Reviewed-by: Logan Gunthorpe Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/passthru.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index ec223bf56254..8bd7f656e240 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -165,7 +165,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, status); - blk_put_request(rq); + blk_mq_free_request(rq); } static void nvmet_passthru_req_done(struct request *rq, @@ -175,7 +175,7 @@ static void nvmet_passthru_req_done(struct request *rq, req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, nvme_req(rq)->status); - blk_put_request(rq); + blk_mq_free_request(rq); } static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) @@ -274,7 +274,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) return; out_put_req: - blk_put_request(rq); + blk_mq_free_request(rq); out_put_ns: if (ns) nvme_put_ns(ns); -- cgit From ecbcdf0c81265f7f780b588eed77fa933fd79f68 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 12 Aug 2020 17:24:44 -0600 Subject: nvme: Use spin_lock_irq() when taking the ctrl->lock When locking the ctrl->lock spinlock IRQs need to be disabled to avoid a dead lock. The new spin_lock() calls recently added produce the following lockdep warning when running the blktest nvme/003: ================================ WARNING: inconsistent lock state -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/2/22 [HC0[0]:SC1[1]:HE0:SE0] takes: ffff888276a8c4c0 (&ctrl->lock){+.?.}-{2:2}, at: nvme_keep_alive_end_io+0x50/0xc0 {SOFTIRQ-ON-W} state was registered at: lock_acquire+0x164/0x500 _raw_spin_lock+0x28/0x40 nvme_get_effects_log+0x37/0x1c0 nvme_init_identify+0x9e4/0x14f0 nvme_reset_work+0xadd/0x2360 process_one_work+0x66b/0xb70 worker_thread+0x6e/0x6c0 kthread+0x1e7/0x210 ret_from_fork+0x22/0x30 irq event stamp: 1449221 hardirqs last enabled at (1449220): [] ktime_get+0xf9/0x140 hardirqs last disabled at (1449221): [] _raw_spin_lock_irqsave+0x25/0x60 softirqs last enabled at (1449210): [] __do_softirq+0x447/0x595 softirqs last disabled at (1449215): [] run_ksoftirqd+0x35/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ctrl->lock); lock(&ctrl->lock); *** DEADLOCK *** no locks held by ksoftirqd/2/22. stack backtrace: CPU: 2 PID: 22 Comm: ksoftirqd/2 Not tainted 5.8.0-rc4-eid-vmlocalyes-dbg-00157-g7236657c6b3a #1450 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0xc8/0x11a print_usage_bug.cold.63+0x235/0x23e mark_lock+0xa9c/0xcf0 __lock_acquire+0xd9a/0x2b50 lock_acquire+0x164/0x500 _raw_spin_lock_irqsave+0x40/0x60 nvme_keep_alive_end_io+0x50/0xc0 blk_mq_end_request+0x158/0x210 nvme_complete_rq+0x146/0x500 nvme_loop_complete_rq+0x26/0x30 [nvme_loop] blk_done_softirq+0x187/0x1e0 __do_softirq+0x118/0x595 run_ksoftirqd+0x35/0x50 smpboot_thread_fn+0x1d3/0x310 kthread+0x1e7/0x210 ret_from_fork+0x22/0x30 Fixes: be93e87e7802 ("nvme: support for multiple Command Sets Supported and Effects log pages") Signed-off-by: Logan Gunthorpe Reviewed-by: Keith Busch Tested-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88cff309d8e4..466c591c05e9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2965,14 +2965,14 @@ static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi) { struct nvme_cel *cel, *ret = NULL; - spin_lock(&ctrl->lock); + spin_lock_irq(&ctrl->lock); list_for_each_entry(cel, &ctrl->cels, entry) { if (cel->csi == csi) { ret = cel; break; } } - spin_unlock(&ctrl->lock); + spin_unlock_irq(&ctrl->lock); return ret; } @@ -2999,9 +2999,9 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, cel->csi = csi; - spin_lock(&ctrl->lock); + spin_lock_irq(&ctrl->lock); list_add_tail(&cel->entry, &ctrl->cels); - spin_unlock(&ctrl->lock); + spin_unlock_irq(&ctrl->lock); out: *log = &cel->log; return 0; -- cgit From 7442ddcedc344b6fa073692f165dffdd1889e780 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 14 Aug 2020 23:34:25 +0800 Subject: nvme-pci: Use u32 for nvme_dev.q_depth and nvme_queue.q_depth Recently nvme_dev.q_depth was changed from an int to u16 type. This falls over for the queue depth calculation in nvme_pci_enable(), where NVME_CAP_MQES(dev->ctrl.cap) + 1 may overflow as a u16, as NVME_CAP_MQES() is a 16b number also. That happens for me, and this is the result: root@ubuntu:/home/john# [148.272996] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000a27bf3c9000 [0000000000000010] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: nvme nvme_core CPU: 56 PID: 256 Comm: kworker/u195:0 Not tainted 5.8.0-next-20200812 #27 Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 - V1.16.01 03/15/2019 Workqueue: nvme-reset-wq nvme_reset_work [nvme] pstate: 80c00009 (Nzcv daif +PAN +UAO BTYPE=--) pc : __sg_alloc_table_from_pages+0xec/0x238 lr : __sg_alloc_table_from_pages+0xc8/0x238 sp : ffff800013ccbad0 x29: ffff800013ccbad0 x28: ffff0a27b3d380a8 x27: 0000000000000000 x26: 0000000000002dc2 x25: 0000000000000dc0 x24: 0000000000000000 x23: 0000000000000000 x22: ffff800013ccbbe8 x21: 0000000000000010 x20: 0000000000000000 x19: 00000000fffff000 x18: ffffffffffffffff x17: 00000000000000c0 x16: fffffe289eaf6380 x15: ffff800011b59948 x14: ffff002bc8fe98f8 x13: ff00000000000000 x12: ffff8000114ca000 x11: 0000000000000000 x10: ffffffffffffffff x9 : ffffffffffffffc0 x8 : ffff0a27b5f9b6a0 x7 : 0000000000000000 x6 : 0000000000000001 x5 : ffff0a27b5f9b680 x4 : 0000000000000000 x3 : ffff0a27b5f9b680 x2 : 0000000000000000 x1 : 0000000000000001 x0 : 0000000000000000 Call trace: __sg_alloc_table_from_pages+0xec/0x238 sg_alloc_table_from_pages+0x18/0x28 iommu_dma_alloc+0x474/0x678 dma_alloc_attrs+0xd8/0xf0 nvme_alloc_queue+0x114/0x160 [nvme] nvme_reset_work+0xb34/0x14b4 [nvme] process_one_work+0x1e8/0x360 worker_thread+0x44/0x478 kthread+0x150/0x158 ret_from_fork+0x10/0x34 Code: f94002c3 6b01017f 540007c2 11000486 (f8645aa5) ---[ end trace 89bb2b72d59bf925 ]--- Fix by making onto a u32. Also use u32 for nvme_dev.q_depth, as we assign this value from nvme_dev.q_depth, and nvme_dev.q_depth will possibly hold 65536 - this avoids the same crash as above. Fixes: 61f3b8963097 ("nvme-pci: use unsigned for io queue depth") Signed-off-by: John Garry Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ba725ae47305..1cb09a187ae7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -120,7 +120,7 @@ struct nvme_dev { unsigned max_qid; unsigned io_queues[HCTX_MAX_TYPES]; unsigned int num_vecs; - u16 q_depth; + u32 q_depth; int io_sqes; u32 db_stride; void __iomem *bar; @@ -157,13 +157,13 @@ struct nvme_dev { static int io_queue_depth_set(const char *val, const struct kernel_param *kp) { int ret; - u16 n; + u32 n; - ret = kstrtou16(val, 10, &n); + ret = kstrtou32(val, 10, &n); if (ret != 0 || n < 2) return -EINVAL; - return param_set_ushort(val, kp); + return param_set_uint(val, kp); } static inline unsigned int sq_idx(unsigned int qid, u32 stride) @@ -195,7 +195,7 @@ struct nvme_queue { dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; - u16 q_depth; + u32 q_depth; u16 cq_vector; u16 sq_tail; u16 cq_head; @@ -2320,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); - dev->q_depth = min_t(u16, NVME_CAP_MQES(dev->ctrl.cap) + 1, + dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1, io_queue_depth); dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); -- cgit From c61b82c7b71343c6aca6bb6cc3ff44fb123fb5d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 19:51:59 +0200 Subject: nvme-pci: fix PRP pool size All operations are based on the controller, not the host page size. Switch the dma pool to use the controller page size as well to avoid massive overallocations on large page size systems. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1cb09a187ae7..e001a4ae65b0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2460,7 +2460,8 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) static int nvme_setup_prp_pools(struct nvme_dev *dev) { dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, - PAGE_SIZE, PAGE_SIZE, 0); + NVME_CTRL_PAGE_SIZE, + NVME_CTRL_PAGE_SIZE, 0); if (!dev->prp_page_pool) return -ENOMEM; -- cgit From c41ad98bebb8f4f0335b3c50dbb7583a6149dce4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 7 Aug 2020 09:32:35 -0700 Subject: nvme: skip noiob for zoned devices Zoned block devices reuse the chunk_sectors queue limit to define zone boundaries. If a such a device happens to also report an optimal boundary, do not use that to define the chunk_sectors as that may intermittently interfere with io splitting and zone size queries. Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 466c591c05e9..6c0d175f2ffa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2075,7 +2075,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) } } - if (iob) + if (iob && !blk_queue_is_zoned(ns->queue)) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH -- cgit From 2eb81a3364eada43985efc0641490b73af78d0fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 09:11:29 +0200 Subject: nvme: rename and document nvme_end_request nvme_end_request is a bit misnamed, as it wraps around the blk_mq_complete_* API. It's semantics also are non-trivial, so give it a more descriptive name and add a comment explaining the semantics. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 8 +++++++- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 4 ++-- drivers/nvme/target/loop.c | 2 +- 6 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 07aacfe46368..a7f474ddfff7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2035,7 +2035,7 @@ done: } __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); - if (!nvme_end_request(rq, status, result)) + if (!nvme_try_complete_req(rq, status, result)) nvme_fc_complete_rq(rq); check_error: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ebb8c3ed3885..510f7dbeba98 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -523,7 +523,13 @@ static inline u32 nvme_bytes_to_numd(size_t len) return (len >> 2) - 1; } -static inline bool nvme_end_request(struct request *req, __le16 status, +/* + * Fill in the status and result information from the CQE, and then figure out + * if blk-mq will need to use IPI magic to complete the request, and if yes do + * so. If not let the caller complete the request without an indirect function + * call. + */ +static inline bool nvme_try_complete_req(struct request *req, __le16 status, union nvme_result result) { struct nvme_request *rq = nvme_req(req); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e001a4ae65b0..b673fa4cf5ea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -961,7 +961,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); - if (!nvme_end_request(req, cqe->status, cqe->result)) + if (!nvme_try_complete_req(req, cqe->status, cqe->result)) nvme_pci_complete_rq(req); } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 44c76ffbb264..4c5660201009 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1189,7 +1189,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req) if (!refcount_dec_and_test(&req->ref)) return; - if (!nvme_end_request(rq, req->status, req->result)) + if (!nvme_try_complete_req(rq, req->status, req->result)) nvme_rdma_complete_rq(rq); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 62fbaecdc960..3be4749dbf11 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -481,7 +481,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, return -EINVAL; } - if (!nvme_end_request(rq, cqe->status, cqe->result)) + if (!nvme_try_complete_req(rq, cqe->status, cqe->result)) nvme_complete_rq(rq); queue->nr_cqe++; @@ -672,7 +672,7 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status) { union nvme_result res = {}; - if (!nvme_end_request(rq, cpu_to_le16(status << 1), res)) + if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res)) nvme_complete_rq(rq); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 4884ef1e46a2..0d6008cf66a2 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -115,7 +115,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req) return; } - if (!nvme_end_request(rq, cqe->status, cqe->result)) + if (!nvme_try_complete_req(rq, cqe->status, cqe->result)) nvme_loop_complete_rq(rq); } } -- cgit From 5ddaabe8ed713f148e3d67e99b86d99427aceb5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 09:11:30 +0200 Subject: nvme: refactor command completion Lift all the code to decide the dispostition of a completed command from nvme_complete_rq and nvme_failover_req into a new helper, which returns an emum of the potential actions. nvme_complete_rq then just switches on those and calls the proper helper for the action. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 75 +++++++++++++++++++++++++++---------------- drivers/nvme/host/multipath.c | 47 ++++++++------------------- drivers/nvme/host/nvme.h | 31 ++++++++++++++++-- 3 files changed, 89 insertions(+), 64 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6c0d175f2ffa..9e75f6f62471 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status) } } -static inline bool nvme_req_needs_retry(struct request *req) -{ - if (blk_noretry_request(req)) - return false; - if (nvme_req(req)->status & NVME_SC_DNR) - return false; - if (nvme_req(req)->retries >= nvme_max_retries) - return false; - return true; -} - static void nvme_retry_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -268,34 +257,66 @@ static void nvme_retry_req(struct request *req) blk_mq_delay_kick_requeue_list(req->q, delay); } -void nvme_complete_rq(struct request *req) +enum nvme_disposition { + COMPLETE, + RETRY, + FAILOVER, +}; + +static inline enum nvme_disposition nvme_decide_disposition(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); + if (likely(nvme_req(req)->status == 0)) + return COMPLETE; - trace_nvme_complete_rq(req); + if (blk_noretry_request(req) || + (nvme_req(req)->status & NVME_SC_DNR) || + nvme_req(req)->retries >= nvme_max_retries) + return COMPLETE; - nvme_cleanup_cmd(req); + if (req->cmd_flags & REQ_NVME_MPATH) { + if (nvme_is_path_error(nvme_req(req)->status)) + return FAILOVER; + } - if (nvme_req(req)->ctrl->kas) - nvme_req(req)->ctrl->comp_seen = true; + if (blk_queue_dying(req->q)) + return COMPLETE; - if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { - if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req)) - return; + return RETRY; +} - if (!blk_queue_dying(req->q)) { - nvme_retry_req(req); - return; - } - } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) { +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); - } nvme_trace_bio_complete(req, status); blk_mq_end_request(req, status); } + +void nvme_complete_rq(struct request *req) +{ + trace_nvme_complete_rq(req); + nvme_cleanup_cmd(req); + + if (nvme_req(req)->ctrl->kas) + nvme_req(req)->ctrl->comp_seen = true; + + switch (nvme_decide_disposition(req)) { + case COMPLETE: + nvme_end_req(req); + return; + case RETRY: + nvme_retry_req(req); + return; + case FAILOVER: + nvme_failover_req(req); + return; + } +} EXPORT_SYMBOL_GPL(nvme_complete_rq); bool nvme_cancel_request(struct request *req, void *data, bool reserved) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 760f625cefc8..d4ba736c6c89 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -65,51 +65,30 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, } } -bool nvme_failover_req(struct request *req) +void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; - u16 status = nvme_req(req)->status; + u16 status = nvme_req(req)->status & 0x7ff; unsigned long flags; - switch (status & 0x7ff) { - case NVME_SC_ANA_TRANSITION: - case NVME_SC_ANA_INACCESSIBLE: - case NVME_SC_ANA_PERSISTENT_LOSS: - /* - * If we got back an ANA error we know the controller is alive, - * but not ready to serve this namespaces. The spec suggests - * we should update our general state here, but due to the fact - * that the admin and I/O queues are not serialized that is - * fundamentally racy. So instead just clear the current path, - * mark the the path as pending and kick of a re-read of the ANA - * log page ASAP. - */ - nvme_mpath_clear_current_path(ns); - if (ns->ctrl->ana_log_buf) { - set_bit(NVME_NS_ANA_PENDING, &ns->flags); - queue_work(nvme_wq, &ns->ctrl->ana_work); - } - break; - case NVME_SC_HOST_PATH_ERROR: - case NVME_SC_HOST_ABORTED_CMD: - /* - * Temporary transport disruption in talking to the controller. - * Try to send on a new path. - */ - nvme_mpath_clear_current_path(ns); - break; - default: - /* This was a non-ANA error so follow the normal error path. */ - return false; + nvme_mpath_clear_current_path(ns); + + /* + * If we got back an ANA error, we know the controller is alive but not + * ready to serve this namespace. Kick of a re-read of the ANA + * information page, and just try any other available path for now. + */ + if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) { + set_bit(NVME_NS_ANA_PENDING, &ns->flags); + queue_work(nvme_wq, &ns->ctrl->ana_work); } spin_lock_irqsave(&ns->head->requeue_lock, flags); blk_steal_bios(&ns->head->requeue_list, req); spin_unlock_irqrestore(&ns->head->requeue_lock, flags); - blk_mq_end_request(req, 0); + blk_mq_end_request(req, 0); kblockd_schedule_work(&ns->head->requeue_work); - return true; } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 510f7dbeba98..4ff6fd289082 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -523,6 +523,32 @@ static inline u32 nvme_bytes_to_numd(size_t len) return (len >> 2) - 1; } +static inline bool nvme_is_ana_error(u16 status) +{ + switch (status & 0x7ff) { + case NVME_SC_ANA_TRANSITION: + case NVME_SC_ANA_INACCESSIBLE: + case NVME_SC_ANA_PERSISTENT_LOSS: + return true; + default: + return false; + } +} + +static inline bool nvme_is_path_error(u16 status) +{ + switch (status & 0x7ff) { + case NVME_SC_HOST_PATH_ERROR: + case NVME_SC_HOST_ABORTED_CMD: + case NVME_SC_ANA_TRANSITION: + case NVME_SC_ANA_INACCESSIBLE: + case NVME_SC_ANA_PERSISTENT_LOSS: + return true; + default: + return false; + } +} + /* * Fill in the status and result information from the CQE, and then figure out * if blk-mq will need to use IPI magic to complete the request, and if yes do @@ -635,7 +661,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); -bool nvme_failover_req(struct request *req); +void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); @@ -694,9 +720,8 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } -static inline bool nvme_failover_req(struct request *req) +static inline void nvme_failover_req(struct request *req) { - return false; } static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { -- cgit From 1e41f3bd26f79463c07aebf062a0a77f1fd39b2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 09:11:31 +0200 Subject: nvme: just check the status code type in nvme_is_path_error Check the SCT sub-field for a path related status instead of enumerating invididual status code. As of NVMe 1.4 this adds "Internal Path Error" and "Controller Pathing Error" to the list, but it also future proofs for additional status codes added to the category. Suggested-by: Chao Leng Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/nvme.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4ff6fd289082..e9cf29449dd1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -537,16 +537,8 @@ static inline bool nvme_is_ana_error(u16 status) static inline bool nvme_is_path_error(u16 status) { - switch (status & 0x7ff) { - case NVME_SC_HOST_PATH_ERROR: - case NVME_SC_HOST_ABORTED_CMD: - case NVME_SC_ANA_TRANSITION: - case NVME_SC_ANA_INACCESSIBLE: - case NVME_SC_ANA_PERSISTENT_LOSS: - return true; - default: - return false; - } + /* check for a status code type of 'path related status' */ + return (status & 0x700) == 0x300; } /* -- cgit From 5eac5f3342b20825260d3800e7f5f74f12bac931 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Tue, 18 Aug 2020 09:11:32 +0200 Subject: nvme: redirect commands on dying queue If a command send through nvme-multipath failed on a dying queue, resend it on another path. Signed-off-by: Chao Leng [hch: rebased on top of the completion refactoring] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9e75f6f62471..c9826ecf80e2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -274,13 +274,14 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return COMPLETE; if (req->cmd_flags & REQ_NVME_MPATH) { - if (nvme_is_path_error(nvme_req(req)->status)) + if (nvme_is_path_error(nvme_req(req)->status) || + blk_queue_dying(req->q)) return FAILOVER; + } else { + if (blk_queue_dying(req->q)) + return COMPLETE; } - if (blk_queue_dying(req->q)) - return COMPLETE; - return RETRY; } -- cgit From 0d3b6a8d213a30387b5104b2fb25376d18636f23 Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Wed, 19 Aug 2020 11:31:11 +0300 Subject: nvmet: Disable keep-alive timer when kato is cleared to 0h Based on nvme spec, when keep alive timeout is set to zero the keep-alive timer should be disabled. Signed-off-by: Amit Engel Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b92f45f5cd5b..c052c922853a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -397,6 +397,9 @@ static void nvmet_keep_alive_timer(struct work_struct *work) static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) { + if (unlikely(ctrl->kato == 0)) + return; + pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); @@ -406,6 +409,9 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) { + if (unlikely(ctrl->kato == 0)) + return; + pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); cancel_delayed_work_sync(&ctrl->ka_work); -- cgit