From 2b3f056f72e56fa07df69b4705e0b46a6c08e77c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Oct 2022 15:57:17 +0200 Subject: blk-mq: move the call to blk_put_queue out of blk_mq_destroy_queue The fact that blk_mq_destroy_queue also drops a queue reference leads to various places having to grab an extra reference. Move the call to blk_put_queue into the callers to allow removing the extra references. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/20221018135720.670094-2-hch@lst.de [axboe: fix fabrics_q vs admin_q conflict in nvme core.c] Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 31e577b01257..7d37c81fbe70 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1749,6 +1749,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) */ nvme_start_admin_queue(&dev->ctrl); blk_mq_destroy_queue(dev->ctrl.admin_q); + blk_put_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } } -- cgit From 7dcebef90d35de13a326f765dd787538880566f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Oct 2022 15:57:19 +0200 Subject: nvme-pci: remove an extra queue reference Now that blk_mq_destroy_queue does not release the queue reference, there is no need for a second admin queue reference to be held by the nvme_dev. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/20221018135720.670094-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7d37c81fbe70..5f1d71ac0086 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1749,7 +1749,6 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) */ nvme_start_admin_queue(&dev->ctrl); blk_mq_destroy_queue(dev->ctrl.admin_q); - blk_put_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } } @@ -1778,11 +1777,6 @@ static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev) dev->ctrl.admin_q = NULL; return -ENOMEM; } - if (!blk_get_queue(dev->ctrl.admin_q)) { - nvme_dev_remove_admin(dev); - dev->ctrl.admin_q = NULL; - return -ENODEV; - } return 0; } -- cgit From 0ffc7e98bfaa45380b800deeb9b65ce0371c652d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2022 16:00:38 +0100 Subject: nvme-pci: refactor the tagset handling in nvme_reset_work The code to create, update or delete a tagset and namespaces in nvme_reset_work is a bit convoluted. Refactor it with a two high-level conditionals for first probe vs reset and I/O queues vs no I/O queues to make the code flow more clear. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20221101150050.3510-3-hch@lst.de [axboe: fix whitespace issue] Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5f1d71ac0086..6ab7be07b727 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2898,24 +2898,36 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - /* - * Keep the controller around but remove all namespaces if we don't have - * any working I/O queue. - */ - if (dev->online_queues < 2) { - dev_warn(dev->ctrl.device, "IO queues not created\n"); - nvme_kill_queues(&dev->ctrl); - nvme_remove_namespaces(&dev->ctrl); - nvme_free_tagset(dev); + if (dev->ctrl.tagset) { + /* + * This is a controller reset and we already have a tagset. + * Freeze and update the number of I/O queues as thos might have + * changed. If there are no I/O queues left after this reset, + * keep the controller around but remove all namespaces. + */ + if (dev->online_queues > 1) { + nvme_start_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); + nvme_pci_update_nr_queues(dev); + nvme_dbbuf_set(dev); + nvme_unfreeze(&dev->ctrl); + } else { + dev_warn(dev->ctrl.device, "IO queues lost\n"); + nvme_kill_queues(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); + nvme_free_tagset(dev); + } } else { - nvme_start_queues(&dev->ctrl); - nvme_wait_freeze(&dev->ctrl); - if (!dev->ctrl.tagset) + /* + * First probe. Still allow the controller to show up even if + * there are no namespaces. + */ + if (dev->online_queues > 1) { nvme_pci_alloc_tag_set(dev); - else - nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); - nvme_unfreeze(&dev->ctrl); + nvme_dbbuf_set(dev); + } else { + dev_warn(dev->ctrl.device, "IO queues not created\n"); + } } /* -- cgit From cd50f9b24726e9e195a0682c8d8d952396d57aef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2022 16:00:43 +0100 Subject: nvme: split nvme_kill_queues nvme_kill_queues does two things: 1) mark the gendisk of all namespaces dead 2) unquiesce all I/O queues These used to be be intertwined due to block layer issues, but aren't any more. So move the unquiscing of the I/O queues into the callers, and rename the rest of the function to the now more descriptive nvme_mark_namespaces_dead. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20221101150050.3510-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6ab7be07b727..f72143945ab5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2788,7 +2788,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev) nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); - nvme_kill_queues(&dev->ctrl); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_start_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -2913,7 +2914,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); - nvme_kill_queues(&dev->ctrl); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_start_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_free_tagset(dev); } -- cgit From bad3e021ae2bb5ac9d650c9a04788efe753367f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2022 16:00:44 +0100 Subject: nvme-pci: don't unquiesce the I/O queues in nvme_remove_dead_ctrl nvme_remove_dead_ctrl schedules nvme_remove to be called, which will call nvme_dev_disable and unquiesce the I/O queues. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20221101150050.3510-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f72143945ab5..208c387f1558 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2789,7 +2789,6 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev) nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); nvme_mark_namespaces_dead(&dev->ctrl); - nvme_start_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } -- cgit From 94cc781f69f49f665383dd87aef973b7896153d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 15:48:27 +0100 Subject: nvme: move OPAL setup from PCIe to core Nothing about the TCG Opal support is PCIe transport specific, so move it to the core code. For this nvme_init_ctrl_finish grows a new was_suspended argument that allows the transport driver to tell the OPAL code if the controller came out of a suspend cycle. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: James Smart Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 208c387f1558..e4f084e12b96 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2772,7 +2772,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); - free_opal_dev(dev->ctrl.opal_dev); mempool_destroy(dev->iod_mempool); put_device(dev->dev); kfree(dev->queues); @@ -2866,21 +2865,10 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_integrity_segments = 1; - result = nvme_init_ctrl_finish(&dev->ctrl); + result = nvme_init_ctrl_finish(&dev->ctrl, was_suspend); if (result) goto out; - if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { - if (!dev->ctrl.opal_dev) - dev->ctrl.opal_dev = - init_opal_dev(&dev->ctrl, &nvme_sec_submit); - else if (was_suspend) - opal_unlock_from_suspend(dev->ctrl.opal_dev); - } else { - free_opal_dev(dev->ctrl.opal_dev); - dev->ctrl.opal_dev = NULL; - } - if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { result = nvme_dbbuf_dma_alloc(dev); if (result) -- cgit From 86adbf0cdb9ec6533234696c3e243184d4d0d040 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Oct 2022 02:34:13 -0700 Subject: nvme: simplify transport specific device attribute handling Allow the transport driver to override the attribute groups for the control device, so that the PCIe driver doesn't manually have to add a group after device creation and keep track of it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e4f084e12b96..c8f6ce5eee1c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -158,8 +158,6 @@ struct nvme_dev { unsigned int nr_allocated_queues; unsigned int nr_write_queues; unsigned int nr_poll_queues; - - bool attrs_added; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -2234,11 +2232,17 @@ static struct attribute *nvme_pci_attrs[] = { NULL, }; -static const struct attribute_group nvme_pci_attr_group = { +static const struct attribute_group nvme_pci_dev_attrs_group = { .attrs = nvme_pci_attrs, .is_visible = nvme_pci_attrs_are_visible, }; +static const struct attribute_group *nvme_pci_dev_attr_groups[] = { + &nvme_dev_attrs_group, + &nvme_pci_dev_attrs_group, + NULL, +}; + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2930,10 +2934,6 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj, - &nvme_pci_attr_group)) - dev->attrs_added = true; - nvme_start_ctrl(&dev->ctrl); return; @@ -3006,6 +3006,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, .flags = NVME_F_METADATA_SUPPORTED, + .dev_attr_groups = nvme_pci_dev_attr_groups, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, @@ -3204,13 +3205,6 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_disable_prepare_reset(dev, true); } -static void nvme_remove_attrs(struct nvme_dev *dev) -{ - if (dev->attrs_added) - sysfs_remove_group(&dev->ctrl.device->kobj, - &nvme_pci_attr_group); -} - /* * The driver's remove may be called on a device in a partially initialized * state. This function must not have any dependencies on the device state in @@ -3232,7 +3226,6 @@ static void nvme_remove(struct pci_dev *pdev) nvme_stop_ctrl(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); - nvme_remove_attrs(dev); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); -- cgit From 96ef1be53663a9343dffcf106e2f1b59da4b8799 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 09:10:21 +0100 Subject: nvme-pci: put the admin queue in nvme_dev_remove_admin Once the controller is shutdown no one can access the admin queue. Tear it down in nvme_dev_remove_admin, which matches the flow in the other drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c8f6ce5eee1c..f526ad578088 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1747,6 +1747,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) */ nvme_start_admin_queue(&dev->ctrl); blk_mq_destroy_queue(dev->ctrl.admin_q); + blk_put_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } } @@ -2774,8 +2775,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_dbbuf_dma_free(dev); nvme_free_tagset(dev); - if (dev->ctrl.admin_q) - blk_put_queue(dev->ctrl.admin_q); mempool_destroy(dev->iod_mempool); put_device(dev->dev); kfree(dev->queues); -- cgit From c11b7716d6c96e82d2b404c4520237d968357a0d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 09:11:13 +0100 Subject: nvme-pci: move more teardown work to nvme_remove nvme_dbbuf_dma_free frees dma coherent memory, so it must not be called after ->remove has returned. Fortunately there is no way to use it after shutdown as no more I/O is possible so it can be moved. Similarly the iod_mempool can't be used for a device kept alive after shutdown, so move it next to freeing the PRP pools. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f526ad578088..b638f43f2df2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2773,9 +2773,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); - nvme_dbbuf_dma_free(dev); nvme_free_tagset(dev); - mempool_destroy(dev->iod_mempool); put_device(dev->dev); kfree(dev->queues); kfree(dev); @@ -3227,7 +3225,9 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_disable(dev, true); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); + nvme_dbbuf_dma_free(dev); nvme_free_queues(dev, 0); + mempool_destroy(dev->iod_mempool); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_uninit_ctrl(&dev->ctrl); -- cgit From 081a7d958ce4b65f9aab6e70e65b0b2e0b92297c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 09:41:41 +0100 Subject: nvme-pci: factor the iod mempool creation into a helper Add a helper to create the iod mempool. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b638f43f2df2..f7dab65bf504 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -390,14 +390,6 @@ static int nvme_pci_npages_sgl(void) PAGE_SIZE); } -static size_t nvme_pci_iod_alloc_size(void) -{ - size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); - - return sizeof(__le64 *) * npages + - sizeof(struct scatterlist) * NVME_MAX_SEGS; -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -2762,6 +2754,22 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } +static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) +{ + size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); + size_t alloc_size = sizeof(__le64 *) * npages + + sizeof(struct scatterlist) * NVME_MAX_SEGS; + + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + dev->iod_mempool = mempool_create_node(1, + mempool_kmalloc, mempool_kfree, + (void *)alloc_size, GFP_KERNEL, + dev_to_node(dev->dev)); + if (!dev->iod_mempool) + return -ENOMEM; + return 0; +} + static void nvme_free_tagset(struct nvme_dev *dev) { if (dev->tagset.tags) @@ -3087,7 +3095,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int node, result = -ENOMEM; struct nvme_dev *dev; unsigned long quirks = id->driver_data; - size_t alloc_size; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -3132,21 +3139,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) quirks |= NVME_QUIRK_SIMPLE_SUSPEND; } - /* - * Double check that our mempool alloc size will cover the biggest - * command we support. - */ - alloc_size = nvme_pci_iod_alloc_size(); - WARN_ON_ONCE(alloc_size > PAGE_SIZE); - - dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, - mempool_kfree, - (void *) alloc_size, - GFP_KERNEL, node); - if (!dev->iod_mempool) { - result = -ENOMEM; + result = nvme_pci_alloc_iod_mempool(dev); + if (result) goto release_pools; - } result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); -- cgit From 2e87570be9d2746e7c4e7ab1cc18fd3ca7de2768 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 09:44:00 +0100 Subject: nvme-pci: factor out a nvme_pci_alloc_dev helper Add a helper that allocates the nvme_dev structure up to the point where we can call nvme_init_ctrl. This pairs with the free_ctrl method and can thus be used to cleanup the teardown path and make it more symmetric. Note that this now calls nvme_init_ctrl a lot earlier during probing, which also means the per-controller character device shows up earlier. Due to the controller state no commnds can be send on it, but it might make sense to delay the cdev registration until nvme_init_ctrl_finish. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 81 ++++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 35 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f7dab65bf504..03c83cd724ec 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2777,6 +2777,7 @@ static void nvme_free_tagset(struct nvme_dev *dev) dev->ctrl.tagset = NULL; } +/* pairs with nvme_pci_alloc_dev */ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -3090,19 +3091,23 @@ static void nvme_async_probe(void *data, async_cookie_t cookie) nvme_put_ctrl(&dev->ctrl); } -static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, + const struct pci_device_id *id) { - int node, result = -ENOMEM; - struct nvme_dev *dev; unsigned long quirks = id->driver_data; + int node = dev_to_node(&pdev->dev); + struct nvme_dev *dev; + int ret = -ENOMEM; - node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) set_dev_node(&pdev->dev, first_memory_node); dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) - return -ENOMEM; + return NULL; + INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); + INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); + mutex_init(&dev->shutdown_lock); dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; @@ -3110,25 +3115,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev->queues = kcalloc_node(dev->nr_allocated_queues, sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) - goto free; + goto out_free_dev; dev->dev = get_device(&pdev->dev); - pci_set_drvdata(pdev, dev); - - result = nvme_dev_map(dev); - if (result) - goto put_pci; - - INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); - INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); - mutex_init(&dev->shutdown_lock); - - result = nvme_setup_prp_pools(dev); - if (result) - goto unmap; quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on @@ -3138,34 +3129,54 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) "platform quirk: setting simple suspend\n"); quirks |= NVME_QUIRK_SIMPLE_SUSPEND; } + ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, + quirks); + if (ret) + goto out_put_device; + return dev; - result = nvme_pci_alloc_iod_mempool(dev); +out_put_device: + put_device(dev->dev); + kfree(dev->queues); +out_free_dev: + kfree(dev); + return ERR_PTR(ret); +} + +static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct nvme_dev *dev; + int result = -ENOMEM; + + dev = nvme_pci_alloc_dev(pdev, id); + if (!dev) + return -ENOMEM; + + result = nvme_dev_map(dev); if (result) - goto release_pools; + goto out_uninit_ctrl; - result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - quirks); + result = nvme_setup_prp_pools(dev); + if (result) + goto out_dev_unmap; + + result = nvme_pci_alloc_iod_mempool(dev); if (result) - goto release_mempool; + goto out_release_prp_pools; dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + pci_set_drvdata(pdev, dev); nvme_reset_ctrl(&dev->ctrl); async_schedule(nvme_async_probe, dev); - return 0; - release_mempool: - mempool_destroy(dev->iod_mempool); - release_pools: +out_release_prp_pools: nvme_release_prp_pools(dev); - unmap: +out_dev_unmap: nvme_dev_unmap(dev); - put_pci: - put_device(dev->dev); - free: - kfree(dev->queues); - kfree(dev); +out_uninit_ctrl: + nvme_uninit_ctrl(&dev->ctrl); return result; } -- cgit From 3f30a79c2e2c7d5ad14dfc372adb248fc239c6c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 09:48:43 +0100 Subject: nvme-pci: set constant paramters in nvme_pci_alloc_ctrl Move setting of low-level constant parameters from nvme_reset_work to nvme_pci_alloc_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 03c83cd724ec..9dcb35f14800 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2841,21 +2841,6 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_admin_queue(&dev->ctrl); } - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); - - /* - * Limit the max command size to prevent iod->sg allocations going - * over a single page. - */ - dev->ctrl.max_hw_sectors = min_t(u32, - NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); - dev->ctrl.max_segments = NVME_MAX_SEGS; - - /* - * Don't limit the IOMMU merged segment size. - */ - dma_set_max_seg_size(dev->dev, 0xffffffff); - mutex_unlock(&dev->shutdown_lock); /* @@ -2869,12 +2854,6 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - /* - * We do not support an SGL for metadata (yet), so we are limited to a - * single integrity segment for the separate metadata pointer. - */ - dev->ctrl.max_integrity_segments = 1; - result = nvme_init_ctrl_finish(&dev->ctrl, was_suspend); if (result) goto out; @@ -3133,6 +3112,23 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks); if (ret) goto out_put_device; + + dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1); + dma_set_max_seg_size(&pdev->dev, 0xffffffff); + + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + dev->ctrl.max_hw_sectors = min_t(u32, + NVME_MAX_KB_SZ << 1, dma_max_mapping_size(&pdev->dev) >> 9); + dev->ctrl.max_segments = NVME_MAX_SEGS; + + /* + * There is no support for SGLs for metadata (yet), so we are limited to + * a single integrity segment for the separate metadata pointer. + */ + dev->ctrl.max_integrity_segments = 1; return dev; out_put_device: -- cgit From a6ee7f19ebfd158ffb3a6ebacf20ae43549bed05 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Oct 2022 03:28:16 -0700 Subject: nvme-pci: call nvme_pci_configure_admin_queue from nvme_pci_enable nvme_pci_configure_admin_queue is called right after nvme_pci_enable, and it's work is undone by nvme_dev_disable. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9dcb35f14800..c2e3a87237da 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2639,7 +2639,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); - return 0; + + return nvme_pci_configure_admin_queue(dev); disable: pci_disable_device(pdev); @@ -2829,10 +2830,6 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out_unlock; - result = nvme_pci_configure_admin_queue(dev); - if (result) - goto out_unlock; - if (!dev->ctrl.admin_q) { result = nvme_pci_alloc_admin_tag_set(dev); if (result) -- cgit From 65a54646420e1409760c2b9f0e1a5e5feca1364e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 10:56:49 +0100 Subject: nvme-pci: simplify nvme_dbbuf_dma_alloc Move the OACS check and the error checking into nvme_dbbuf_dma_alloc so that an upcoming second caller doesn't have to duplicate this boilerplate code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c2e3a87237da..4da339690ec6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -239,10 +239,13 @@ static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) return dev->nr_allocated_queues * 8 * dev->db_stride; } -static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +static void nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { unsigned int mem_size = nvme_dbbuf_size(dev); + if (!(dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP)) + return; + if (dev->dbbuf_dbs) { /* * Clear the dbbuf memory so the driver doesn't observe stale @@ -250,25 +253,27 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) */ memset(dev->dbbuf_dbs, 0, mem_size); memset(dev->dbbuf_eis, 0, mem_size); - return 0; + return; } dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_dbs_dma_addr, GFP_KERNEL); if (!dev->dbbuf_dbs) - return -ENOMEM; + goto fail; dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_eis_dma_addr, GFP_KERNEL); - if (!dev->dbbuf_eis) { - dma_free_coherent(dev->dev, mem_size, - dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); - dev->dbbuf_dbs = NULL; - return -ENOMEM; - } + if (!dev->dbbuf_eis) + goto fail_free_dbbuf_dbs; + return; - return 0; +fail_free_dbbuf_dbs: + dma_free_coherent(dev->dev, mem_size, dev->dbbuf_dbs, + dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; +fail: + dev_warn(dev->dev, "unable to allocate dma for dbbuf\n"); } static void nvme_dbbuf_dma_free(struct nvme_dev *dev) @@ -2855,12 +2860,7 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { - result = nvme_dbbuf_dma_alloc(dev); - if (result) - dev_warn(dev->dev, - "unable to allocate dma for dbbuf\n"); - } + nvme_dbbuf_dma_alloc(dev); if (dev->ctrl.hmpre) { result = nvme_setup_host_mem(dev); -- cgit From acb71e53bb47a08731aa77497cd3f1871cba59c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Nov 2022 12:05:58 +0100 Subject: nvme-pci: move the HMPRE check into nvme_setup_host_mem Check that a HMB is wanted into the allocation helper instead of the caller. This makes life simpler for an upcoming second caller. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4da339690ec6..57fb88396fd8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2102,6 +2102,9 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) u32 enable_bits = NVME_HOST_MEM_ENABLE; int ret; + if (!dev->ctrl.hmpre) + return 0; + preferred = min(preferred, max); if (min > max) { dev_warn(dev->ctrl.device, @@ -2862,11 +2865,9 @@ static void nvme_reset_work(struct work_struct *work) nvme_dbbuf_dma_alloc(dev); - if (dev->ctrl.hmpre) { - result = nvme_setup_host_mem(dev); - if (result < 0) - goto out; - } + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out; result = nvme_setup_io_queues(dev); if (result) -- cgit From eac3ef262941f62fe01bc357911fea4847183333 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Nov 2022 12:07:04 +0100 Subject: nvme-pci: split the initial probe from the rest path nvme_reset_work is a little fragile as it needs to handle both resetting a live controller and initializing one during probe. Split out the initial probe and open code it in nvme_probe and leave nvme_reset_work to just do the live controller reset. This fixes a recently introduced bug where nvme_dev_disable causes a NULL pointer dereferences in blk_mq_quiesce_tagset because the tagset pointer is not set when the reset state is entered directly from the new state. The separate probe code can skip the reset state and probe directly and fixes this. To make sure the system isn't single threaded on enabling nvme controllers, set the PROBE_PREFER_ASYNCHRONOUS flag in the device_driver structure so that the driver core probes in parallel. Fixes: 98d81f0df70c ("nvme: use blk_mq_[un]quiesce_tagset") Reported-by: Gerd Bayer Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Tested-by Gerd Bayer --- drivers/nvme/host/pci.c | 133 +++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 53 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 57fb88396fd8..2521c00a0af6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2837,15 +2837,7 @@ static void nvme_reset_work(struct work_struct *work) result = nvme_pci_enable(dev); if (result) goto out_unlock; - - if (!dev->ctrl.admin_q) { - result = nvme_pci_alloc_admin_tag_set(dev); - if (result) - goto out_unlock; - } else { - nvme_start_admin_queue(&dev->ctrl); - } - + nvme_start_admin_queue(&dev->ctrl); mutex_unlock(&dev->shutdown_lock); /* @@ -2873,37 +2865,23 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - if (dev->ctrl.tagset) { - /* - * This is a controller reset and we already have a tagset. - * Freeze and update the number of I/O queues as thos might have - * changed. If there are no I/O queues left after this reset, - * keep the controller around but remove all namespaces. - */ - if (dev->online_queues > 1) { - nvme_start_queues(&dev->ctrl); - nvme_wait_freeze(&dev->ctrl); - nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); - nvme_unfreeze(&dev->ctrl); - } else { - dev_warn(dev->ctrl.device, "IO queues lost\n"); - nvme_mark_namespaces_dead(&dev->ctrl); - nvme_start_queues(&dev->ctrl); - nvme_remove_namespaces(&dev->ctrl); - nvme_free_tagset(dev); - } + /* + * Freeze and update the number of I/O queues as thos might have + * changed. If there are no I/O queues left after this reset, keep the + * controller around but remove all namespaces. + */ + if (dev->online_queues > 1) { + nvme_start_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); + nvme_pci_update_nr_queues(dev); + nvme_dbbuf_set(dev); + nvme_unfreeze(&dev->ctrl); } else { - /* - * First probe. Still allow the controller to show up even if - * there are no namespaces. - */ - if (dev->online_queues > 1) { - nvme_pci_alloc_tag_set(dev); - nvme_dbbuf_set(dev); - } else { - dev_warn(dev->ctrl.device, "IO queues not created\n"); - } + dev_warn(dev->ctrl.device, "IO queues lost\n"); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_start_queues(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); + nvme_free_tagset(dev); } /* @@ -3059,15 +3037,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } -static void nvme_async_probe(void *data, async_cookie_t cookie) -{ - struct nvme_dev *dev = data; - - flush_work(&dev->ctrl.reset_work); - flush_work(&dev->ctrl.scan_work); - nvme_put_ctrl(&dev->ctrl); -} - static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -3159,12 +3128,69 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_release_prp_pools; dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + + result = nvme_pci_enable(dev); + if (result) + goto out_release_iod_mempool; + + result = nvme_pci_alloc_admin_tag_set(dev); + if (result) + goto out_disable; + + /* + * Mark the controller as connecting before sending admin commands to + * allow the timeout handler to do the right thing. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + result = -EBUSY; + goto out_disable; + } + + result = nvme_init_ctrl_finish(&dev->ctrl, false); + if (result) + goto out_disable; + + nvme_dbbuf_dma_alloc(dev); + + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out_disable; + + result = nvme_setup_io_queues(dev); + if (result) + goto out_disable; + + if (dev->online_queues > 1) { + nvme_pci_alloc_tag_set(dev); + nvme_dbbuf_set(dev); + } else { + dev_warn(dev->ctrl.device, "IO queues not created\n"); + } + + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { + dev_warn(dev->ctrl.device, + "failed to mark controller live state\n"); + result = -ENODEV; + goto out_disable; + } + pci_set_drvdata(pdev, dev); - nvme_reset_ctrl(&dev->ctrl); - async_schedule(nvme_async_probe, dev); + nvme_start_ctrl(&dev->ctrl); + nvme_put_ctrl(&dev->ctrl); return 0; +out_disable: + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + nvme_dev_disable(dev, true); + nvme_free_host_mem(dev); + nvme_dev_remove_admin(dev); + nvme_dbbuf_dma_free(dev); + nvme_free_queues(dev, 0); +out_release_iod_mempool: + mempool_destroy(dev->iod_mempool); out_release_prp_pools: nvme_release_prp_pools(dev); out_dev_unmap: @@ -3560,11 +3586,12 @@ static struct pci_driver nvme_driver = { .probe = nvme_probe, .remove = nvme_remove, .shutdown = nvme_shutdown, -#ifdef CONFIG_PM_SLEEP .driver = { - .pm = &nvme_dev_pm_ops, - }, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, +#ifdef CONFIG_PM_SLEEP + .pm = &nvme_dev_pm_ops, #endif + }, .sriov_configure = pci_sriov_configure_simple, .err_handler = &nvme_err_handler, }; -- cgit From c7c16c5b196772aba1d99c4c0b505fe99a6fbba8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 15:42:57 +0100 Subject: nvme-pci: don't unbind the driver on reset failure Unbind a device driver when a reset fails is very unusual behavior. Just shut the controller down and leave it in dead state if we fail to reset it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2521c00a0af6..d394498d96de 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -130,7 +130,6 @@ struct nvme_dev { u32 db_stride; void __iomem *bar; unsigned long bar_mapped_size; - struct work_struct remove_work; struct mutex shutdown_lock; bool subsystem; u64 cmb_size; @@ -2797,20 +2796,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) kfree(dev); } -static void nvme_remove_dead_ctrl(struct nvme_dev *dev) -{ - /* - * Set state to deleting now to avoid blocking nvme_wait_reset(), which - * may be holding this pci_dev's device lock. - */ - nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - nvme_get_ctrl(&dev->ctrl); - nvme_dev_disable(dev, false); - nvme_mark_namespaces_dead(&dev->ctrl); - if (!queue_work(nvme_wq, &dev->remove_work)) - nvme_put_ctrl(&dev->ctrl); -} - static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = @@ -2901,20 +2886,16 @@ static void nvme_reset_work(struct work_struct *work) out_unlock: mutex_unlock(&dev->shutdown_lock); out: - if (result) - dev_warn(dev->ctrl.device, - "Removing after probe failure status: %d\n", result); - nvme_remove_dead_ctrl(dev); -} - -static void nvme_remove_dead_ctrl_work(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if (pci_get_drvdata(pdev)) - device_release_driver(&pdev->dev); - nvme_put_ctrl(&dev->ctrl); + /* + * Set state to deleting now to avoid blocking nvme_wait_reset(), which + * may be holding this pci_dev's device lock. + */ + dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n", + result); + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + nvme_dev_disable(dev, true); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); } static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) @@ -3052,7 +3033,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, if (!dev) return NULL; INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); - INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); mutex_init(&dev->shutdown_lock); dev->nr_write_queues = write_queues; -- cgit From 9f27bd701d18f012646a06bc6c1b35d81f30359b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 11:22:14 +0100 Subject: nvme: rename the queue quiescing helpers Naming the nvme helpers that wrap the block quiesce functionality _start/_stop is rather confusing. Switch to using the quiesce naming used by the block layer instead. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d394498d96de..bd5fcdc9211c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1481,7 +1481,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) - nvme_stop_admin_queue(&nvmeq->dev->ctrl); + nvme_quiesce_admin_queue(&nvmeq->dev->ctrl); if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); return 0; @@ -1741,7 +1741,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * user requests may be waiting on a stopped queue. Start the * queue to flush these to completion. */ - nvme_start_admin_queue(&dev->ctrl); + nvme_unquiesce_admin_queue(&dev->ctrl); blk_mq_destroy_queue(dev->ctrl.admin_q); blk_put_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); @@ -2703,7 +2703,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead && shutdown && freeze) nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); - nvme_stop_queues(&dev->ctrl); + nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { nvme_disable_io_queues(dev); @@ -2723,9 +2723,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * deadlocking blk-mq hot-cpu notifier. */ if (shutdown) { - nvme_start_queues(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) - nvme_start_admin_queue(&dev->ctrl); + nvme_unquiesce_admin_queue(&dev->ctrl); } mutex_unlock(&dev->shutdown_lock); } @@ -2822,7 +2822,7 @@ static void nvme_reset_work(struct work_struct *work) result = nvme_pci_enable(dev); if (result) goto out_unlock; - nvme_start_admin_queue(&dev->ctrl); + nvme_unquiesce_admin_queue(&dev->ctrl); mutex_unlock(&dev->shutdown_lock); /* @@ -2856,7 +2856,7 @@ static void nvme_reset_work(struct work_struct *work) * controller around but remove all namespaces. */ if (dev->online_queues > 1) { - nvme_start_queues(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); nvme_pci_update_nr_queues(dev); nvme_dbbuf_set(dev); @@ -2864,7 +2864,7 @@ static void nvme_reset_work(struct work_struct *work) } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); nvme_mark_namespaces_dead(&dev->ctrl); - nvme_start_queues(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_free_tagset(dev); } -- cgit From 99722c8aa8b994db15ef60965c33f6a8e399b36c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 24 Nov 2022 22:28:12 +0100 Subject: nvme: use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bd5fcdc9211c..e0da4a6719a7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -2184,7 +2185,7 @@ static ssize_t hmb_store(struct device *dev, struct device_attribute *attr, bool new; int ret; - if (strtobool(buf, &new) < 0) + if (kstrtobool(buf, &new) < 0) return -EINVAL; if (new == ndev->hmb) -- cgit From 6887fc6495f2dfd55e088c982e983815278ee453 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Oct 2022 12:43:43 +0300 Subject: nvme: introduce nvme_start_request In preparation for nvme-multipath IO stats accounting, we want the accounting to happen in a centralized place. The request completion is already centralized, but we need a common helper to request I/O start. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Hannes Reinecke --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e0da4a6719a7..ac734c8f6640 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -907,7 +907,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) goto out_unmap_data; } - blk_mq_start_request(req); + nvme_start_request(req); return BLK_STS_OK; out_unmap_data: nvme_unmap_data(dev, req); -- cgit From 285b6e9b571714270fa503d0e32f244d15b9f85f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 11:20:12 +0100 Subject: nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl Many of the callers decide which one to use based on a bool argument and there is at least some code to be shared, so merge these two. Also move a comment specific to a single callsite to that callsite. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Hector Martin --- drivers/nvme/host/pci.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ac734c8f6640..84226dce9b3b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1500,11 +1500,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - if (shutdown) - nvme_shutdown_ctrl(&dev->ctrl); - else - nvme_disable_ctrl(&dev->ctrl); - + nvme_disable_ctrl(&dev->ctrl, shutdown); nvme_poll_irqdisable(nvmeq); } @@ -1819,7 +1815,14 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); - result = nvme_disable_ctrl(&dev->ctrl); + /* + * If the device has been passed off to us in an enabled state, just + * clear the enabled bit. The spec says we should set the 'shutdown + * notification bits', but doing so may cause the device to complete + * commands to the admin queue ... and we don't know what memory that + * might be pointing at! + */ + result = nvme_disable_ctrl(&dev->ctrl, false); if (result < 0) return result; -- cgit From 47d42d229a181e52b32fa9c80aaa36cf8b6b46fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 08:59:55 +0100 Subject: nvme-pci: remove nvme_disable_admin_queue nvme_disable_admin_queue has only a single caller, and just calls two other funtions, so remove it to clean up the remove path a little more. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Eric Curtin Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 84226dce9b3b..c6a02210d22b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1496,14 +1496,6 @@ static void nvme_suspend_io_queues(struct nvme_dev *dev) nvme_suspend_queue(&dev->queues[i]); } -static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) -{ - struct nvme_queue *nvmeq = &dev->queues[0]; - - nvme_disable_ctrl(&dev->ctrl, shutdown); - nvme_poll_irqdisable(nvmeq); -} - /* * Called only on a device that has been disabled and after all other threads * that can check this device's completion queues have synced, except @@ -2711,7 +2703,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead && dev->ctrl.queue_count > 0) { nvme_disable_io_queues(dev); - nvme_disable_admin_queue(dev, shutdown); + nvme_disable_ctrl(&dev->ctrl, shutdown); + nvme_poll_irqdisable(&dev->queues[0]); } nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); -- cgit From c80767f770ed722264688f65c89e420971eb897a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:01:30 +0100 Subject: nvme-pci: remove nvme_pci_disable nvme_pci_disable has a single caller, fold it into that. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Eric Curtin Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c6a02210d22b..c3d9b23699d3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2657,18 +2657,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_release_mem_regions(to_pci_dev(dev->dev)); } -static void nvme_pci_disable(struct nvme_dev *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - pci_free_irq_vectors(pdev); - - if (pci_is_enabled(pdev)) { - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); - } -} - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { bool dead = true, freeze = false; @@ -2708,7 +2696,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) } nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); - nvme_pci_disable(dev); + pci_free_irq_vectors(pdev); + if (pci_is_enabled(pdev)) { + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + } nvme_reap_pending_cqes(dev); nvme_cancel_tagset(&dev->ctrl); -- cgit From 10981f23a1b865290a5e21ad43a0f93710271cc6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Nov 2022 10:51:05 +0100 Subject: nvme-pci: cleanup nvme_suspend_queue Remove the unused returne value, pass a dev + qid instead of the queue as that is better for the callers as well as the function itself, and remove the entirely pointless kerneldoc comment. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c3d9b23699d3..2c8dd0f7ef46 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1468,14 +1468,12 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) } } -/** - * nvme_suspend_queue - put queue into suspended state - * @nvmeq: queue to suspend - */ -static int nvme_suspend_queue(struct nvme_queue *nvmeq) +static void nvme_suspend_queue(struct nvme_dev *dev, unsigned int qid) { + struct nvme_queue *nvmeq = &dev->queues[qid]; + if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) - return 1; + return; /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ mb(); @@ -1484,8 +1482,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) nvme_quiesce_admin_queue(&nvmeq->dev->ctrl); if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) - pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); - return 0; + pci_free_irq(to_pci_dev(dev->dev), nvmeq->cq_vector, nvmeq); } static void nvme_suspend_io_queues(struct nvme_dev *dev) @@ -1493,7 +1490,7 @@ static void nvme_suspend_io_queues(struct nvme_dev *dev) int i; for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(&dev->queues[i]); + nvme_suspend_queue(dev, i); } /* @@ -2695,7 +2692,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_poll_irqdisable(&dev->queues[0]); } nvme_suspend_io_queues(dev); - nvme_suspend_queue(&dev->queues[0]); + nvme_suspend_queue(dev, 0); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); -- cgit From 7d879c90ae6cf58f4a4caad8562d700b95bc1dbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:07:48 +0100 Subject: nvme-pci: rename nvme_disable_io_queues This function really deletes the I/O queues, so rename it to match the functionality. Also move the main wrapper right next to the actual underlying implementation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c8dd0f7ef46..1b527377e351 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -109,7 +109,7 @@ struct nvme_dev; struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); -static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); +static void nvme_delete_io_queues(struct nvme_dev *dev); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -2310,12 +2310,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); } -static void nvme_disable_io_queues(struct nvme_dev *dev) -{ - if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) - __nvme_disable_io_queues(dev, nvme_admin_delete_cq); -} - static unsigned int nvme_max_io_queues(struct nvme_dev *dev) { /* @@ -2423,7 +2417,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->online_queues - 1 < dev->max_qid) { nr_io_queues = dev->online_queues - 1; - nvme_disable_io_queues(dev); + nvme_delete_io_queues(dev); result = nvme_setup_io_queues_trylock(dev); if (result) return result; @@ -2487,7 +2481,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) +static bool __nvme_delete_io_queues(struct nvme_dev *dev, u8 opcode) { int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; @@ -2515,6 +2509,12 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; } +static void nvme_delete_io_queues(struct nvme_dev *dev) +{ + if (__nvme_delete_io_queues(dev, nvme_admin_delete_sq)) + __nvme_delete_io_queues(dev, nvme_admin_delete_cq); +} + static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) { struct blk_mq_tag_set * set = &dev->tagset; @@ -2687,7 +2687,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - nvme_disable_io_queues(dev); + nvme_delete_io_queues(dev); nvme_disable_ctrl(&dev->ctrl, shutdown); nvme_poll_irqdisable(&dev->queues[0]); } -- cgit From 8cb9f10b7151e308824cdcf3168010d673e7888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Nov 2022 08:48:13 +0100 Subject: nvme-pci: return early on ctrl state mismatch in nvme_reset_work The only way nvme_reset_work could be called when not in resetting state is if a reset and remove happen near the same time. This should not happen, but if it did we don't want the reset work to disable the controller because the remove is already doing that. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b527377e351..02940b4f42b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2792,8 +2792,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.state != NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", dev->ctrl.state); - result = -ENODEV; - goto out; + return; } /* -- cgit From 68e81eba6763cd834aa8ff9ac0cd6174fa69fa39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:09:48 +0100 Subject: nvme-pci: split out a nvme_pci_ctrl_is_dead helper Clean up nvme_dev_disable by splitting the logic to detect if a controller is dead into a separate helper. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02940b4f42b1..d613b4292c0f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2654,36 +2654,39 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_release_mem_regions(to_pci_dev(dev->dev)); } -static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) +static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev) { - bool dead = true, freeze = false; struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts; - mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(pdev)) { - u32 csts; + if (!pci_is_enabled(pdev) || !pci_device_is_present(pdev)) + return true; + if (pdev->error_state != pci_channel_io_normal) + return true; - if (pci_device_is_present(pdev)) - csts = readl(dev->bar + NVME_REG_CSTS); - else - csts = ~0; + csts = readl(dev->bar + NVME_REG_CSTS); + return (csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY); +} + +static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + bool dead; - if (dev->ctrl.state == NVME_CTRL_LIVE || - dev->ctrl.state == NVME_CTRL_RESETTING) { - freeze = true; + mutex_lock(&dev->shutdown_lock); + dead = nvme_pci_ctrl_is_dead(dev); + if (dev->ctrl.state == NVME_CTRL_LIVE || + dev->ctrl.state == NVME_CTRL_RESETTING) { + if (pci_is_enabled(pdev)) nvme_start_freeze(&dev->ctrl); - } - dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || - pdev->error_state != pci_channel_io_normal); + /* + * Give the controller a chance to complete all entered requests + * if doing a safe shutdown. + */ + if (!dead && shutdown) + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); } - /* - * Give the controller a chance to complete all entered requests if - * doing a safe shutdown. - */ - if (!dead && shutdown && freeze) - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); - nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { -- cgit From 0da7feaa5913090a2b97177ed3d7fa07b2d1d99c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:26:25 +0100 Subject: nvme-pci: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_dev. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 89 ++++++++++--------------------------------------- 1 file changed, 18 insertions(+), 71 deletions(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d613b4292c0f..f005e8569266 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -398,7 +398,7 @@ static int nvme_pci_npages_sgl(void) static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = to_nvme_dev(data); struct nvme_queue *nvmeq = &dev->queues[0]; WARN_ON(hctx_idx != 0); @@ -411,7 +411,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = to_nvme_dev(data); struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); @@ -423,7 +423,7 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = set->driver_data; + struct nvme_dev *dev = to_nvme_dev(set->driver_data); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &dev->ctrl; @@ -442,7 +442,7 @@ static int queue_irq_offset(struct nvme_dev *dev) static void nvme_pci_map_queues(struct blk_mq_tag_set *set) { - struct nvme_dev *dev = set->driver_data; + struct nvme_dev *dev = to_nvme_dev(set->driver_data); int i, qoff, offset; offset = queue_irq_offset(dev); @@ -1728,39 +1728,10 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * queue to flush these to completion. */ nvme_unquiesce_admin_queue(&dev->ctrl); - blk_mq_destroy_queue(dev->ctrl.admin_q); - blk_put_queue(dev->ctrl.admin_q); - blk_mq_free_tag_set(&dev->admin_tagset); + nvme_remove_admin_tag_set(&dev->ctrl); } } -static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev) -{ - struct blk_mq_tag_set *set = &dev->admin_tagset; - - set->ops = &nvme_mq_admin_ops; - set->nr_hw_queues = 1; - - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->timeout = NVME_ADMIN_TIMEOUT; - set->numa_node = dev->ctrl.numa_node; - set->cmd_size = sizeof(struct nvme_iod); - set->flags = BLK_MQ_F_NO_SCHED; - set->driver_data = dev; - - if (blk_mq_alloc_tag_set(set)) - return -ENOMEM; - dev->ctrl.admin_tagset = set; - - dev->ctrl.admin_q = blk_mq_init_queue(set); - if (IS_ERR(dev->ctrl.admin_q)) { - blk_mq_free_tag_set(set); - dev->ctrl.admin_q = NULL; - return -ENOMEM; - } - return 0; -} - static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) { return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride); @@ -2515,40 +2486,13 @@ static void nvme_delete_io_queues(struct nvme_dev *dev) __nvme_delete_io_queues(dev, nvme_admin_delete_cq); } -static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) +static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev) { - struct blk_mq_tag_set * set = &dev->tagset; - int ret; - - set->ops = &nvme_mq_ops; - set->nr_hw_queues = dev->online_queues - 1; - set->nr_maps = 1; - if (dev->io_queues[HCTX_TYPE_READ]) - set->nr_maps = 2; if (dev->io_queues[HCTX_TYPE_POLL]) - set->nr_maps = 3; - set->timeout = NVME_IO_TIMEOUT; - set->numa_node = dev->ctrl.numa_node; - set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - set->cmd_size = sizeof(struct nvme_iod); - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->driver_data = dev; - - /* - * Some Apple controllers requires tags to be unique - * across admin and IO queue, so reserve the first 32 - * tags of the IO queue. - */ - if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) - set->reserved_tags = NVME_AQ_DEPTH; - - ret = blk_mq_alloc_tag_set(set); - if (ret) { - dev_warn(dev->ctrl.device, - "IO queues tagset allocation failed %d\n", ret); - return; - } - dev->ctrl.tagset = set; + return 3; + if (dev->io_queues[HCTX_TYPE_READ]) + return 2; + return 1; } static void nvme_pci_update_nr_queues(struct nvme_dev *dev) @@ -2770,7 +2714,7 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) static void nvme_free_tagset(struct nvme_dev *dev) { if (dev->tagset.tags) - blk_mq_free_tag_set(&dev->tagset); + nvme_remove_io_tag_set(&dev->ctrl); dev->ctrl.tagset = NULL; } @@ -3101,7 +3045,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto out_release_iod_mempool; - result = nvme_pci_alloc_admin_tag_set(dev); + result = nvme_alloc_admin_tag_set(&dev->ctrl, &dev->admin_tagset, + &nvme_mq_admin_ops, sizeof(struct nvme_iod)); if (result) goto out_disable; @@ -3131,12 +3076,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_disable; if (dev->online_queues > 1) { - nvme_pci_alloc_tag_set(dev); + nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, + nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); nvme_dbbuf_set(dev); - } else { - dev_warn(dev->ctrl.device, "IO queues not created\n"); } + if (!dev->ctrl.tagset) + dev_warn(dev->ctrl.device, "IO queues not created\n"); + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { dev_warn(dev->ctrl.device, "failed to mark controller live state\n"); -- cgit