From 608cc4b14aeadcf3e4dc325fc211b7052e74b50c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Sep 2017 11:45:24 +0200 Subject: nvme: fix lightnvm check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvme_nvm_ns_supported assumes every device is a pci_dev, which leads to reading an incorrect field, or possible even a dereference of unallocated memory for fabrics controllers. Fix this by introducing a quirk for lighnvm capable devices instead. Signed-off-by: Christoph Hellwig Reviewed-by: Matias Bjørling Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 9 +++++---- drivers/nvme/host/lightnvm.c | 26 -------------------------- drivers/nvme/host/nvme.h | 10 +++++----- drivers/nvme/host/pci.c | 4 ++++ 4 files changed, 14 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 277a7a02cba5..8040fc14fd15 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2377,10 +2377,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); - if (nvme_nvm_ns_supported(ns, id) && - nvme_nvm_register(ns, disk_name, node)) { - dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__); - goto out_free_id; + if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { + if (nvme_nvm_register(ns, disk_name, node)) { + dev_warn(ctrl->device, "LightNVM init failure\n"); + goto out_free_id; + } } disk = alloc_disk_node(0, node); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index c1a28569e843..1f79e3f141e6 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvm_dev_attr_group); } - -/* move to shared place when used in multiple places. */ -#define PCI_VENDOR_ID_CNEX 0x1d1d -#define PCI_DEVICE_ID_CNEX_WL 0x2807 -#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f - -int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) -{ - struct nvme_ctrl *ctrl = ns->ctrl; - /* XXX: this is poking into PCI structures from generic code! */ - struct pci_dev *pdev = to_pci_dev(ctrl->dev); - - /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ - if (pdev->vendor == PCI_VENDOR_ID_CNEX && - pdev->device == PCI_DEVICE_ID_CNEX_QEMU && - id->vs[0] == 0x1) - return 1; - - /* CNEX Labs - PCI ID + Vendor specific bit */ - if (pdev->vendor == PCI_VENDOR_ID_CNEX && - pdev->device == PCI_DEVICE_ID_CNEX_WL && - id->vs[0] == 0x1) - return 1; - - return 0; -} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a19a587d60ed..b8ba7c85e61b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -75,6 +75,11 @@ enum nvme_quirks { * The deepest sleep state should not be used. */ NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), + + /* + * Supports the LighNVM command set if indicated in vs[1]. + */ + NVME_QUIRK_LIGHTNVM = (1 << 6), }; /* @@ -320,7 +325,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); #ifdef CONFIG_NVM -int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); @@ -339,10 +343,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) return 0; } static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; -static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) -{ - return 0; -} static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 198245faba6b..6ba955ceaec8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2497,6 +2497,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -- cgit From 92dc689563170b90ba844b8a2eb95e8a5eda2e83 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Sep 2017 12:08:43 -0400 Subject: nvme-pci: fix host memory buffer allocation fallback nvme_alloc_host_mem currently contains two loops that are interwinded, and the outer retry loop turns out to be broken. Fix this by untangling the two. Based on a report an initial patch from Akinobu Mita. Signed-off-by: Christoph Hellwig Reported-by: Akinobu Mita Tested-by: Akinobu Mita Reviewed-by: Keith Busch Cc: stable@vger.kernel.org --- drivers/nvme/host/pci.c | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6ba955ceaec8..82e23c867e42 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1612,18 +1612,16 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->host_mem_descs = NULL; } -static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) +static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, + u32 chunk_size) { struct nvme_host_mem_buf_desc *descs; - u32 chunk_size, max_entries, len; + u32 max_entries, len; dma_addr_t descs_dma; int i = 0; void **bufs; u64 size = 0, tmp; - /* start big and work our way down */ - chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER); -retry: tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; @@ -1650,15 +1648,9 @@ retry: i++; } - if (!size || (min && size < min)) { - dev_warn(dev->ctrl.device, - "failed to allocate host memory buffer.\n"); + if (!size) goto out_free_bufs; - } - dev_info(dev->ctrl.device, - "allocated %lld MiB host memory buffer.\n", - size >> ilog2(SZ_1M)); dev->nr_host_mem_descs = i; dev->host_mem_size = size; dev->host_mem_descs = descs; @@ -1679,15 +1671,28 @@ out_free_descs: dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, descs_dma); out: - /* try a smaller chunk size if we failed early */ - if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) { - chunk_size /= 2; - goto retry; - } dev->host_mem_descs = NULL; return -ENOMEM; } +static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) +{ + u32 chunk_size; + + /* start big and work our way down */ + for (chunk_size = min_t(u64, preferred, PAGE_SIZE << MAX_ORDER); + chunk_size >= PAGE_SIZE * 2; + chunk_size /= 2) { + if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { + if (!min || dev->host_mem_size >= min) + return 0; + nvme_free_host_mem(dev); + } + } + + return -ENOMEM; +} + static void nvme_setup_host_mem(struct nvme_dev *dev) { u64 max = (u64)max_host_mem_size_mb * SZ_1M; @@ -1715,8 +1720,15 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) } if (!dev->host_mem_descs) { - if (nvme_alloc_host_mem(dev, min, preferred)) + if (nvme_alloc_host_mem(dev, min, preferred)) { + dev_warn(dev->ctrl.device, + "failed to allocate host memory buffer.\n"); return; + } + + dev_info(dev->ctrl.device, + "allocated %lld MiB host memory buffer.\n", + dev->host_mem_size >> ilog2(SZ_1M)); } if (nvme_set_host_mem(dev, enable_bits)) -- cgit From 30f92d62e5b41a94de2d0bbd677a6ea2fcfed74f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 6 Sep 2017 12:15:31 +0200 Subject: nvme-pci: use appropriate initial chunk size for HMB allocation The initial chunk size for host memory buffer allocation is currently PAGE_SIZE << MAX_ORDER. MAX_ORDER order allocation is usually failed without CONFIG_DMA_CMA. So the HMB allocation is retried with chunk size PAGE_SIZE << (MAX_ORDER - 1) in general, but there is no problem if the retry allocation works correctly. Signed-off-by: Akinobu Mita [hch: rebased] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Cc: stable@vger.kernel.org --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 82e23c867e42..a370f702fceb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1680,7 +1680,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) u32 chunk_size; /* start big and work our way down */ - for (chunk_size = min_t(u64, preferred, PAGE_SIZE << MAX_ORDER); + for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); chunk_size >= PAGE_SIZE * 2; chunk_size /= 2) { if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { -- cgit From 9620cfba97a8b88ae91f0e275e8ff110b578bb6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Sep 2017 12:19:57 +0200 Subject: nvme-pci: propagate (some) errors from host memory buffer setup We want to catch command execution errors when resetting the device, so propagate errors from the Set Features when setting up the host memory buffer. We keep ignoring memory allocation failures, as the spec clearly says that the controller must work without a host memory buffer. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Cc: stable@vger.kernel.org --- drivers/nvme/host/pci.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a370f702fceb..5ed12fbfaad6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1693,12 +1693,13 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) return -ENOMEM; } -static void nvme_setup_host_mem(struct nvme_dev *dev) +static int nvme_setup_host_mem(struct nvme_dev *dev) { u64 max = (u64)max_host_mem_size_mb * SZ_1M; u64 preferred = (u64)dev->ctrl.hmpre * 4096; u64 min = (u64)dev->ctrl.hmmin * 4096; u32 enable_bits = NVME_HOST_MEM_ENABLE; + int ret = 0; preferred = min(preferred, max); if (min > max) { @@ -1706,7 +1707,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) "min host memory (%lld MiB) above limit (%d MiB).\n", min >> ilog2(SZ_1M), max_host_mem_size_mb); nvme_free_host_mem(dev); - return; + return 0; } /* @@ -1723,7 +1724,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) if (nvme_alloc_host_mem(dev, min, preferred)) { dev_warn(dev->ctrl.device, "failed to allocate host memory buffer.\n"); - return; + return 0; /* controller must work without HMB */ } dev_info(dev->ctrl.device, @@ -1731,8 +1732,10 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) dev->host_mem_size >> ilog2(SZ_1M)); } - if (nvme_set_host_mem(dev, enable_bits)) + ret = nvme_set_host_mem(dev, enable_bits); + if (ret) nvme_free_host_mem(dev); + return ret; } static int nvme_setup_io_queues(struct nvme_dev *dev) @@ -2176,8 +2179,11 @@ static void nvme_reset_work(struct work_struct *work) "unable to allocate dma for dbbuf\n"); } - if (dev->ctrl.hmpre) - nvme_setup_host_mem(dev); + if (dev->ctrl.hmpre) { + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out; + } result = nvme_setup_io_queues(dev); if (result) -- cgit From 044a9df1a7cbb89f48fcc0e9e39997989342966b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Sep 2017 12:09:28 -0400 Subject: nvme-pci: implement the HMB entry number and size limitations Adds support for the new Host Memory Buffer Minimum Descriptor Entry Size and Host Memory Maximum Descriptors Entries field that were added in TP 4002 HMB Enhancements. These allow the controller to advertise limits for the usual number of segments in the host memory buffer, as well as a minimum usable per-segment size. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 3 +++ drivers/nvme/host/pci.c | 6 +++++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8040fc14fd15..acc816b67582 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->cntlid = le16_to_cpu(id->cntlid); ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); + ctrl->hmminds = le32_to_cpu(id->hmminds); + ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); } kfree(id); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b8ba7c85e61b..d3f3c4447515 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -181,8 +181,11 @@ struct nvme_ctrl { u64 ps_max_latency_us; bool apst_enabled; + /* PCIe only: */ u32 hmpre; u32 hmmin; + u32 hmminds; + u16 hmmaxd; /* Fabrics only */ u16 sqsize; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5ed12fbfaad6..4a2121335f48 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1625,6 +1625,10 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; + + if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) + max_entries = dev->ctrl.hmmaxd; + descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), &descs_dma, GFP_KERNEL); if (!descs) @@ -1681,7 +1685,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) /* start big and work our way down */ for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); - chunk_size >= PAGE_SIZE * 2; + chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); chunk_size /= 2) { if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { if (!min || dev->host_mem_size >= min) -- cgit