From ecbcdf0c81265f7f780b588eed77fa933fd79f68 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 12 Aug 2020 17:24:44 -0600 Subject: nvme: Use spin_lock_irq() when taking the ctrl->lock When locking the ctrl->lock spinlock IRQs need to be disabled to avoid a dead lock. The new spin_lock() calls recently added produce the following lockdep warning when running the blktest nvme/003: ================================ WARNING: inconsistent lock state -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/2/22 [HC0[0]:SC1[1]:HE0:SE0] takes: ffff888276a8c4c0 (&ctrl->lock){+.?.}-{2:2}, at: nvme_keep_alive_end_io+0x50/0xc0 {SOFTIRQ-ON-W} state was registered at: lock_acquire+0x164/0x500 _raw_spin_lock+0x28/0x40 nvme_get_effects_log+0x37/0x1c0 nvme_init_identify+0x9e4/0x14f0 nvme_reset_work+0xadd/0x2360 process_one_work+0x66b/0xb70 worker_thread+0x6e/0x6c0 kthread+0x1e7/0x210 ret_from_fork+0x22/0x30 irq event stamp: 1449221 hardirqs last enabled at (1449220): [] ktime_get+0xf9/0x140 hardirqs last disabled at (1449221): [] _raw_spin_lock_irqsave+0x25/0x60 softirqs last enabled at (1449210): [] __do_softirq+0x447/0x595 softirqs last disabled at (1449215): [] run_ksoftirqd+0x35/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ctrl->lock); lock(&ctrl->lock); *** DEADLOCK *** no locks held by ksoftirqd/2/22. stack backtrace: CPU: 2 PID: 22 Comm: ksoftirqd/2 Not tainted 5.8.0-rc4-eid-vmlocalyes-dbg-00157-g7236657c6b3a #1450 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0xc8/0x11a print_usage_bug.cold.63+0x235/0x23e mark_lock+0xa9c/0xcf0 __lock_acquire+0xd9a/0x2b50 lock_acquire+0x164/0x500 _raw_spin_lock_irqsave+0x40/0x60 nvme_keep_alive_end_io+0x50/0xc0 blk_mq_end_request+0x158/0x210 nvme_complete_rq+0x146/0x500 nvme_loop_complete_rq+0x26/0x30 [nvme_loop] blk_done_softirq+0x187/0x1e0 __do_softirq+0x118/0x595 run_ksoftirqd+0x35/0x50 smpboot_thread_fn+0x1d3/0x310 kthread+0x1e7/0x210 ret_from_fork+0x22/0x30 Fixes: be93e87e7802 ("nvme: support for multiple Command Sets Supported and Effects log pages") Signed-off-by: Logan Gunthorpe Reviewed-by: Keith Busch Tested-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88cff309d8e4..466c591c05e9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2965,14 +2965,14 @@ static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi) { struct nvme_cel *cel, *ret = NULL; - spin_lock(&ctrl->lock); + spin_lock_irq(&ctrl->lock); list_for_each_entry(cel, &ctrl->cels, entry) { if (cel->csi == csi) { ret = cel; break; } } - spin_unlock(&ctrl->lock); + spin_unlock_irq(&ctrl->lock); return ret; } @@ -2999,9 +2999,9 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, cel->csi = csi; - spin_lock(&ctrl->lock); + spin_lock_irq(&ctrl->lock); list_add_tail(&cel->entry, &ctrl->cels); - spin_unlock(&ctrl->lock); + spin_unlock_irq(&ctrl->lock); out: *log = &cel->log; return 0; -- cgit From c41ad98bebb8f4f0335b3c50dbb7583a6149dce4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 7 Aug 2020 09:32:35 -0700 Subject: nvme: skip noiob for zoned devices Zoned block devices reuse the chunk_sectors queue limit to define zone boundaries. If a such a device happens to also report an optimal boundary, do not use that to define the chunk_sectors as that may intermittently interfere with io splitting and zone size queries. Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 466c591c05e9..6c0d175f2ffa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2075,7 +2075,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) } } - if (iob) + if (iob && !blk_queue_is_zoned(ns->queue)) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH -- cgit From 5ddaabe8ed713f148e3d67e99b86d99427aceb5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 09:11:30 +0200 Subject: nvme: refactor command completion Lift all the code to decide the dispostition of a completed command from nvme_complete_rq and nvme_failover_req into a new helper, which returns an emum of the potential actions. nvme_complete_rq then just switches on those and calls the proper helper for the action. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 75 +++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 27 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6c0d175f2ffa..9e75f6f62471 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status) } } -static inline bool nvme_req_needs_retry(struct request *req) -{ - if (blk_noretry_request(req)) - return false; - if (nvme_req(req)->status & NVME_SC_DNR) - return false; - if (nvme_req(req)->retries >= nvme_max_retries) - return false; - return true; -} - static void nvme_retry_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -268,34 +257,66 @@ static void nvme_retry_req(struct request *req) blk_mq_delay_kick_requeue_list(req->q, delay); } -void nvme_complete_rq(struct request *req) +enum nvme_disposition { + COMPLETE, + RETRY, + FAILOVER, +}; + +static inline enum nvme_disposition nvme_decide_disposition(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); + if (likely(nvme_req(req)->status == 0)) + return COMPLETE; - trace_nvme_complete_rq(req); + if (blk_noretry_request(req) || + (nvme_req(req)->status & NVME_SC_DNR) || + nvme_req(req)->retries >= nvme_max_retries) + return COMPLETE; - nvme_cleanup_cmd(req); + if (req->cmd_flags & REQ_NVME_MPATH) { + if (nvme_is_path_error(nvme_req(req)->status)) + return FAILOVER; + } - if (nvme_req(req)->ctrl->kas) - nvme_req(req)->ctrl->comp_seen = true; + if (blk_queue_dying(req->q)) + return COMPLETE; - if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { - if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req)) - return; + return RETRY; +} - if (!blk_queue_dying(req->q)) { - nvme_retry_req(req); - return; - } - } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) { +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); - } nvme_trace_bio_complete(req, status); blk_mq_end_request(req, status); } + +void nvme_complete_rq(struct request *req) +{ + trace_nvme_complete_rq(req); + nvme_cleanup_cmd(req); + + if (nvme_req(req)->ctrl->kas) + nvme_req(req)->ctrl->comp_seen = true; + + switch (nvme_decide_disposition(req)) { + case COMPLETE: + nvme_end_req(req); + return; + case RETRY: + nvme_retry_req(req); + return; + case FAILOVER: + nvme_failover_req(req); + return; + } +} EXPORT_SYMBOL_GPL(nvme_complete_rq); bool nvme_cancel_request(struct request *req, void *data, bool reserved) -- cgit From 5eac5f3342b20825260d3800e7f5f74f12bac931 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Tue, 18 Aug 2020 09:11:32 +0200 Subject: nvme: redirect commands on dying queue If a command send through nvme-multipath failed on a dying queue, resend it on another path. Signed-off-by: Chao Leng [hch: rebased on top of the completion refactoring] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Mike Snitzer Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9e75f6f62471..c9826ecf80e2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -274,13 +274,14 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return COMPLETE; if (req->cmd_flags & REQ_NVME_MPATH) { - if (nvme_is_path_error(nvme_req(req)->status)) + if (nvme_is_path_error(nvme_req(req)->status) || + blk_queue_dying(req->q)) return FAILOVER; + } else { + if (blk_queue_dying(req->q)) + return COMPLETE; } - if (blk_queue_dying(req->q)) - return COMPLETE; - return RETRY; } -- cgit From df561f6688fef775baa341a0f5d960becd248b11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 23 Aug 2020 17:36:59 -0500 Subject: treewide: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88cff309d8e4..96ee5a476c4d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -330,7 +330,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } @@ -340,7 +340,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_NEW: case NVME_CTRL_LIVE: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } @@ -350,7 +350,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_NEW: case NVME_CTRL_RESETTING: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } @@ -361,7 +361,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } @@ -371,7 +371,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: case NVME_CTRL_DEAD: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } @@ -380,7 +380,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_DELETING: changed = true; - /* FALLTHRU */ + fallthrough; default: break; } -- cgit From 7cf0d7c0f3c3b0203aaf81c1bc884924d8fdb9bd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2020 13:24:45 -0700 Subject: nvme: have nvme_wait_freeze_timeout return if it timed out Users can detect if the wait has completed or not and take appropriate actions based on this information (e.g. weather to continue initialization or rather fail and schedule another initialization attempt). Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c9826ecf80e2..537dcd900cb5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4534,7 +4534,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_unfreeze); -void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) +int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) { struct nvme_ns *ns; @@ -4545,6 +4545,7 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) break; } up_read(&ctrl->namespaces_rwsem); + return timeout; } EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); -- cgit From 7cd49f7576b0c61d6c4a2114cda08cc4d5ce0028 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 24 Aug 2020 15:47:25 -0700 Subject: nvme: Fix NULL dereference for pci nvme controllers PCIe controllers do not have fabric opts, verify they exist before showing ctrl_loss_tmo or reconnect_delay attributes. Fixes: 764075fdcb2f ("nvme: expose reconnect_delay and ctrl_loss_tmo via sysfs") Reported-by: Tobias Markus Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 537dcd900cb5..e406c3cf55bc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3676,6 +3676,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_hostid.attr && !ctrl->opts) return 0; + if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts) + return 0; return a->mode; } -- cgit From 192f6c29bb28bfd0a17e6ad331d09f1ec84143d0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 26 Aug 2020 10:53:04 -0700 Subject: nvme: fix controller instance leak If the driver has to unbind from the controller for an early failure before the subsystem has been set up, there won't be a subsystem holding the controller's instance, so the controller needs to free its own instance in this case. Fixes: 733e4b69d508d ("nvme: Assign subsys instance from first ctrl") Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e406c3cf55bc..d6186208abf9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4394,7 +4394,7 @@ static void nvme_free_ctrl(struct device *dev) struct nvme_subsystem *subsys = ctrl->subsys; struct nvme_cel *cel, *next; - if (subsys && ctrl->instance != subsys->instance) + if (!subsys || ctrl->instance != subsys->instance) ida_simple_remove(&nvme_instance_ida, ctrl->instance); list_for_each_entry_safe(cel, next, &ctrl->cels, entry) { -- cgit From e83d776f9f98b4af18d67f05f9d1f3042dbe62c7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Aug 2020 10:38:57 -0700 Subject: nvme: only use power of two io boundaries The kernel requires a power of two for boundaries because that's the only way it can efficiently split commands that cross them. A controller, however, may report a non-power of two boundary. The driver had been rounding the controller's value to one the kernel can use, but splitting on the wrong boundary provides no benefit on the device side, and incurs additional submission overhead from non-optimal splits. Don't provide any boundary hint if the controller's value can't be used and log a warning when first scanning a disk's unreported IO boundary. Since the chunk sector logic has grown, move it to a separate function. Cc: Martin K. Petersen Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d6186208abf9..5702a3843746 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } +static inline bool nvme_first_scan(struct gendisk *disk) +{ + /* nvme_alloc_ns() scans the disk prior to adding it */ + return !(disk->flags & GENHD_FL_UP); +} + +static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + u32 iob; + + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) + iob = ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + + if (!iob) + return; + + if (!is_power_of_2(iob)) { + if (nvme_first_scan(ns->disk)) + pr_warn("%s: ignoring unaligned IO boundary:%u\n", + ns->disk->disk_name, iob); + return; + } + + if (blk_queue_is_zoned(ns->disk->queue)) { + if (nvme_first_scan(ns->disk)) + pr_warn("%s: ignoring zoned namespace IO boundary\n", + ns->disk->disk_name); + return; + } + + blk_queue_chunk_sectors(ns->queue, iob); +} + static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; int ret; - u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) return -ENODEV; } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - iob = ctrl->max_hw_sectors; - else - iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); - ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); /* the PI implementation requires metadata equal t10 pi tuple size */ @@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) } } - if (iob && !blk_queue_is_zoned(ns->queue)) - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); + nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { -- cgit From b63de8400a6e1001b5732286cf6f5ec27799b7b4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 12:01:50 -0700 Subject: nvme: Revert: Fix controller creation races with teardown flow The indicated patch introduced a barrier in the sysfs_delete attribute for the controller that rejects the request if the controller isn't created. "Created" is defined as at least 1 call to nvme_start_ctrl(). This is problematic in error-injection testing. If an error occurs on the initial attempt to create an association and the controller enters reconnect(s) attempts, the admin cannot delete the controller until either there is a successful association created or ctrl_loss_tmo times out. Where this issue is particularly hurtful is when the "admin" is the nvme-cli, it is performing a connection to a discovery controller, and it is initiated via auto-connect scripts. With the FC transport, if the first connection attempt fails, the controller enters a normal reconnect state but returns control to the cli thread that created the controller. In this scenario, the cli attempts to read the discovery log via ioctl, which fails, causing the cli to see it as an empty log and then proceeds to delete the discovery controller. The delete is rejected and the controller is left live. If the discovery controller reconnect then succeeds, there is no action to delete it, and it sits live doing nothing. Cc: # v5.7+ Fixes: ce1518139e69 ("nvme: Fix controller creation races with teardown flow") Signed-off-by: James Smart CC: Israel Rukshin CC: Max Gurtovoy CC: Christoph Hellwig CC: Keith Busch CC: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5702a3843746..8b75f6ca0b61 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3525,10 +3525,6 @@ static ssize_t nvme_sysfs_delete(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - /* Can't delete non-created controllers */ - if (!ctrl->created) - return -EBUSY; - if (device_remove_file_self(dev, attr)) nvme_delete_ctrl_sync(ctrl); return count; @@ -4403,7 +4399,6 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } - ctrl->created = true; } EXPORT_SYMBOL_GPL(nvme_start_ctrl); -- cgit From 52a3974feb1a3eec25d8836d37a508b67b0a9cd0 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Sep 2020 20:53:25 -0700 Subject: nvme-core: get/put ctrl and transport module in nvme_dev_open/release() Get and put the reference to the ctrl in the nvme_dev_open() and nvme_dev_release() before and after module get/put for ctrl in char device file operations. Introduce char_dev relase function, get/put the controller and module which allows us to fix the potential Oops which can be easily reproduced with a passthru ctrl (although the problem also exists with pure user access): Entering kdb (current=0xffff8887f8290000, pid 3128) on processor 30 Oops: (null) due to oops @ 0xffffffffa01019ad CPU: 30 PID: 3128 Comm: bash Tainted: G W OE 5.8.0-rc4nvme-5.9+ #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.4 RIP: 0010:nvme_free_ctrl+0x234/0x285 [nvme_core] Code: 57 10 a0 e8 73 bf 02 e1 ba 3d 11 00 00 48 c7 c6 98 33 10 a0 48 c7 c7 1d 57 10 a0 e8 5b bf 02 e1 8 RSP: 0018:ffffc90001d63de0 EFLAGS: 00010246 RAX: ffffffffa05c0440 RBX: ffff8888119e45a0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8888177e9550 RDI: ffff8888119e43b0 RBP: ffff8887d4768000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffffc90001d63c90 R12: ffff8888119e43b0 R13: ffff8888119e5108 R14: dead000000000100 R15: ffff8888119e5108 FS: 00007f1ef27b0740(0000) GS:ffff888817600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa05c0470 CR3: 00000007f6bee000 CR4: 00000000003406e0 Call Trace: device_release+0x27/0x80 kobject_put+0x98/0x170 nvmet_passthru_ctrl_disable+0x4a/0x70 [nvmet] nvmet_passthru_enable_store+0x4c/0x90 [nvmet] configfs_write_file+0xe6/0x150 vfs_write+0xba/0x1e0 ksys_write+0x5f/0xe0 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f1ef1eb2840 Code: Bad RIP value. RSP: 002b:00007fffdbff0eb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f1ef1eb2840 RDX: 0000000000000002 RSI: 00007f1ef27d2000 RDI: 0000000000000001 RBP: 00007f1ef27d2000 R08: 000000000000000a R09: 00007f1ef27b0740 R10: 0000000000000001 R11: 0000000000000246 R12: 00007f1ef2186400 R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000000 With this patch fix we take the module ref count in nvme_dev_open() and release that ref count in newly introduced nvme_dev_release(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b75f6ca0b61..c013eb52fdc8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3261,10 +3261,24 @@ static int nvme_dev_open(struct inode *inode, struct file *file) return -EWOULDBLOCK; } + nvme_get_ctrl(ctrl); + if (!try_module_get(ctrl->ops->module)) + return -EINVAL; + file->private_data = ctrl; return 0; } +static int nvme_dev_release(struct inode *inode, struct file *file) +{ + struct nvme_ctrl *ctrl = + container_of(inode->i_cdev, struct nvme_ctrl, cdev); + + module_put(ctrl->ops->module); + nvme_put_ctrl(ctrl); + return 0; +} + static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; @@ -3327,6 +3341,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, + .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, }; -- cgit From 59e330f8ff7ada7aa64fa422f6adf22a45152a7e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 17 Sep 2020 08:50:25 -0700 Subject: nvme: return errors for hwmon init Initializing the nvme hwmon retrieves a log from the controller. If the controller is broken, we need to return the appropriate error so that subsequent initialization doesn't attempt to continue. Reported-by: Tong Zhang Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c013eb52fdc8..4cea14c18a6d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3236,8 +3236,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) - nvme_hwmon_init(ctrl); + if (!ctrl->identified) { + ret = nvme_hwmon_init(ctrl); + if (ret < 0) + return ret; + } ctrl->identified = true; -- cgit From 46d2613eae51d527ecaf0e8248a9bfcc0b92aa7e Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Sep 2020 12:49:38 -0700 Subject: nvme-core: don't use NVME_NSID_ALL for command effects and supported log In the function nvme_get_effects_log() it uses NVME_NSID_ALL which has namespace scope. The command effect log page is controller specific. Replace NVME_NSID_ALL with 0x00 which specifies the controller scope instead of namespace scope. Fixes: 84fef62d135b ("nvme: check admin passthru command effects") Link: https://bugzilla.kernel.org/show_bug.cgi?id=209287 Reported-by: Huai-Cheng Kuo Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4cea14c18a6d..53c93836c7c6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3041,7 +3041,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, if (!cel) return -ENOMEM; - ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, csi, + ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi, &cel->log, sizeof(cel->log), 0); if (ret) { kfree(cel); -- cgit From 4bab69093044ca81f394bd0780be1b71c5a4d308 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 6 Oct 2020 16:36:47 -0700 Subject: nvme-core: put ctrl ref when module ref get fail When try_module_get() fails in the nvme_dev_open() it returns without releasing the ctrl reference which was taken earlier. Put the ctrl reference which is taken before calling the try_module_get() in the error return code path. Fixes: 52a3974feb1a "nvme-core: get/put ctrl and transport module in nvme_dev_open/release()" Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 53c93836c7c6..ca516d68f14f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3265,8 +3265,10 @@ static int nvme_dev_open(struct inode *inode, struct file *file) } nvme_get_ctrl(ctrl); - if (!try_module_get(ctrl->ops->module)) + if (!try_module_get(ctrl->ops->module)) { + nvme_put_ctrl(ctrl); return -EINVAL; + } file->private_data = ctrl; return 0; -- cgit