summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
commit0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
treecefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/nvme
parent9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent796baeeef85a40b3495a907fb7425086e7010102 (diff)
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/Makefile4
-rw-r--r--drivers/nvme/host/core.c134
-rw-r--r--drivers/nvme/host/fabrics.c22
-rw-r--r--drivers/nvme/host/fabrics.h2
-rw-r--r--drivers/nvme/host/fc.c7
-rw-r--r--drivers/nvme/host/lightnvm.c185
-rw-r--r--drivers/nvme/host/multipath.c44
-rw-r--r--drivers/nvme/host/nvme.h9
-rw-r--r--drivers/nvme/host/pci.c216
-rw-r--r--drivers/nvme/host/rdma.c6
-rw-r--r--drivers/nvme/host/trace.c130
-rw-r--r--drivers/nvme/host/trace.h165
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/core.c14
-rw-r--r--drivers/nvme/target/fabrics-cmd.c2
-rw-r--r--drivers/nvme/target/fc.c60
-rw-r--r--drivers/nvme/target/fcloop.c244
-rw-r--r--drivers/nvme/target/loop.c3
-rw-r--r--drivers/nvme/target/rdma.c83
19 files changed, 833 insertions, 499 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index a25fd43650ad..441e67e3a9d7 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y += -I$(src)
+
obj-$(CONFIG_NVME_CORE) += nvme-core.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -6,6 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
nvme-core-y := core.o
+nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 839650e0926a..e8104871cbbf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -29,6 +29,9 @@
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#include "nvme.h"
#include "fabrics.h"
@@ -65,9 +68,26 @@ static bool streams;
module_param(streams, bool, 0644);
MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+/*
+ * nvme_wq - hosts nvme related works that are not reset or delete
+ * nvme_reset_wq - hosts nvme reset works
+ * nvme_delete_wq - hosts nvme delete works
+ *
+ * nvme_wq will host works such are scan, aen handling, fw activation,
+ * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * runs reset works which also flush works hosted on nvme_wq for
+ * serialization purposes. nvme_delete_wq host controller deletion
+ * works which flush reset works for serialization.
+ */
struct workqueue_struct *nvme_wq;
EXPORT_SYMBOL_GPL(nvme_wq);
+struct workqueue_struct *nvme_reset_wq;
+EXPORT_SYMBOL_GPL(nvme_reset_wq);
+
+struct workqueue_struct *nvme_delete_wq;
+EXPORT_SYMBOL_GPL(nvme_delete_wq);
+
static DEFINE_IDA(nvme_subsystems_ida);
static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);
@@ -89,13 +109,13 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return -EBUSY;
- if (!queue_work(nvme_wq, &ctrl->reset_work))
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
-static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
{
int ret;
@@ -104,6 +124,7 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
flush_work(&ctrl->reset_work);
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
static void nvme_delete_ctrl_work(struct work_struct *work)
{
@@ -122,7 +143,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!queue_work(nvme_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
}
@@ -157,13 +178,20 @@ static blk_status_t nvme_error_status(struct request *req)
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
return BLK_STS_NOSPC;
+ case NVME_SC_LBA_RANGE:
+ return BLK_STS_TARGET;
+ case NVME_SC_BAD_ATTRIBUTES:
case NVME_SC_ONCS_NOT_SUPPORTED:
+ case NVME_SC_INVALID_OPCODE:
+ case NVME_SC_INVALID_FIELD:
+ case NVME_SC_INVALID_NS:
return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
case NVME_SC_ACCESS_DENIED:
case NVME_SC_READ_ONLY:
+ case NVME_SC_COMPARE_FAILED:
return BLK_STS_MEDIUM;
case NVME_SC_GUARD_CHECK:
case NVME_SC_APPTAG_CHECK:
@@ -190,8 +218,12 @@ static inline bool nvme_req_needs_retry(struct request *req)
void nvme_complete_rq(struct request *req)
{
- if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
- if (nvme_req_needs_failover(req)) {
+ blk_status_t status = nvme_error_status(req);
+
+ trace_nvme_complete_rq(req);
+
+ if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
+ if (nvme_req_needs_failover(req, status)) {
nvme_failover_req(req);
return;
}
@@ -202,8 +234,7 @@ void nvme_complete_rq(struct request *req)
return;
}
}
-
- blk_mq_end_request(req, nvme_error_status(req));
+ blk_mq_end_request(req, status);
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
@@ -232,6 +263,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
+ case NVME_CTRL_ADMIN_ONLY:
+ switch (old_state) {
+ case NVME_CTRL_RECONNECTING:
+ changed = true;
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -247,6 +287,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -266,6 +307,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_RECONNECTING:
changed = true;
@@ -591,6 +633,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
}
cmd->common.command_id = req->tag;
+ if (ns)
+ trace_nvme_setup_nvm_cmd(req->q->id, cmd);
+ else
+ trace_nvme_setup_admin_cmd(cmd);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
@@ -1217,16 +1263,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
#ifdef CONFIG_NVME_MULTIPATH
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(ns->head->disk))
- return -ENXIO;
+ goto fail;
#endif
if (!kref_get_unless_zero(&ns->kref))
- return -ENXIO;
+ goto fail;
+ if (!try_module_get(ns->ctrl->ops->module))
+ goto fail_put_ns;
+
return 0;
+
+fail_put_ns:
+ nvme_put_ns(ns);
+fail:
+ return -ENXIO;
}
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
- nvme_put_ns(disk->private_data);
+ struct nvme_ns *ns = disk->private_data;
+
+ module_put(ns->ctrl->ops->module);
+ nvme_put_ns(ns);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -2052,6 +2109,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
NULL,
};
+static int nvme_active_ctrls(struct nvme_subsystem *subsys)
+{
+ int count = 0;
+ struct nvme_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->state != NVME_CTRL_DELETING &&
+ ctrl->state != NVME_CTRL_DEAD)
+ count++;
+ }
+ mutex_unlock(&subsys->lock);
+
+ return count;
+}
+
static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
struct nvme_subsystem *subsys, *found;
@@ -2090,7 +2163,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
- if (!(id->cmic & (1 << 1))) {
+ if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
@@ -2257,7 +2330,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
shutdown_timeout, 60);
if (ctrl->shutdown_timeout != shutdown_timeout)
- dev_warn(ctrl->device,
+ dev_info(ctrl->device,
"Shutdown timeout set to %u seconds\n",
ctrl->shutdown_timeout);
} else
@@ -2341,8 +2414,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
- if (ctrl->state != NVME_CTRL_LIVE)
+ switch (ctrl->state) {
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
+ break;
+ default:
return -EWOULDBLOCK;
+ }
+
file->private_data = ctrl;
return 0;
}
@@ -2606,6 +2685,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_RECONNECTING]= "reconnecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3079,6 +3159,8 @@ static void nvme_scan_work(struct work_struct *work)
if (ctrl->state != NVME_CTRL_LIVE)
return;
+ WARN_ON_ONCE(!ctrl->tagset);
+
if (nvme_identify_ctrl(ctrl, &id))
return;
@@ -3099,8 +3181,7 @@ static void nvme_scan_work(struct work_struct *work)
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
- * Do not queue new scan work when a controller is reset during
- * removal.
+ * Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE)
queue_work(nvme_wq, &ctrl->scan_work);
@@ -3477,16 +3558,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
int __init nvme_core_init(void)
{
- int result;
+ int result = -ENOMEM;
nvme_wq = alloc_workqueue("nvme-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!nvme_wq)
- return -ENOMEM;
+ goto out;
+
+ nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_reset_wq)
+ goto destroy_wq;
+
+ nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_delete_wq)
+ goto destroy_reset_wq;
result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
if (result < 0)
- goto destroy_wq;
+ goto destroy_delete_wq;
nvme_class = class_create(THIS_MODULE, "nvme");
if (IS_ERR(nvme_class)) {
@@ -3505,8 +3596,13 @@ destroy_class:
class_destroy(nvme_class);
unregister_chrdev:
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+destroy_delete_wq:
+ destroy_workqueue(nvme_delete_wq);
+destroy_reset_wq:
+ destroy_workqueue(nvme_reset_wq);
destroy_wq:
destroy_workqueue(nvme_wq);
+out:
return result;
}
@@ -3516,6 +3612,8 @@ void nvme_core_exit(void)
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+ destroy_workqueue(nvme_delete_wq);
+ destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 894c2ccb3891..5dd4ceefed8f 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
*/
int nvmf_register_transport(struct nvmf_transport_ops *ops)
{
- if (!ops->create_ctrl)
+ if (!ops->create_ctrl || !ops->module)
return -EINVAL;
down_write(&nvmf_transports_rwsem);
@@ -739,11 +739,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
- if (uuid_parse(p, &hostid)) {
+ ret = uuid_parse(p, &hostid);
+ if (ret) {
pr_err("Invalid hostid %s\n", p);
ret = -EINVAL;
+ kfree(p);
goto out;
}
+ kfree(p);
break;
case NVMF_OPT_DUP_CONNECT:
opts->duplicate_connect = true;
@@ -869,32 +872,41 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_unlock;
}
+ if (!try_module_get(ops->module)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
ret = nvmf_check_required_opts(opts, ops->required_opts);
if (ret)
- goto out_unlock;
+ goto out_module_put;
ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
ops->allowed_opts | ops->required_opts);
if (ret)
- goto out_unlock;
+ goto out_module_put;
ctrl = ops->create_ctrl(dev, opts);
if (IS_ERR(ctrl)) {
ret = PTR_ERR(ctrl);
- goto out_unlock;
+ goto out_module_put;
}
if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
dev_warn(ctrl->device,
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subsys->subnqn);
+ module_put(ops->module);
up_read(&nvmf_transports_rwsem);
nvme_delete_ctrl_sync(ctrl);
return ERR_PTR(-EINVAL);
}
+ module_put(ops->module);
up_read(&nvmf_transports_rwsem);
return ctrl;
+out_module_put:
+ module_put(ops->module);
out_unlock:
up_read(&nvmf_transports_rwsem);
out_free_opts:
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 9ba614953607..25b19f722f5b 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -108,6 +108,7 @@ struct nvmf_ctrl_options {
* fabric implementation of NVMe fabrics.
* @entry: Used by the fabrics library to add the new
* registration entry to its linked-list internal tree.
+ * @module: Transport module reference
* @name: Name of the NVMe fabric driver implementation.
* @required_opts: sysfs command-line options that must be specified
* when adding a new NVMe controller.
@@ -126,6 +127,7 @@ struct nvmf_ctrl_options {
*/
struct nvmf_transport_ops {
struct list_head entry;
+ struct module *module;
const char *name;
int required_opts;
int allowed_opts;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 794e66e4aa20..99bf51c7e513 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2921,6 +2921,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
nvme_fc_free_queue(&ctrl->queues[0]);
+ /* re-enable the admin_q so anything new can fast fail */
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
nvme_fc_ctlr_inactive_on_rport(ctrl);
}
@@ -2935,6 +2938,9 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
* waiting for io to terminate
*/
nvme_fc_delete_association(ctrl);
+
+ /* resume the io queues so that things will fast fail */
+ nvme_start_queues(nctrl);
}
static void
@@ -3380,6 +3386,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
static struct nvmf_transport_ops nvme_fc_transport = {
.name = "fc",
+ .module = THIS_MODULE,
.required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
.allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
.create_ctrl = nvme_fc_create_ctrl,
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ba3d7f3349e5..50ef71ee3d86 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -31,27 +31,10 @@
enum nvme_nvm_admin_opcode {
nvme_nvm_admin_identity = 0xe2,
- nvme_nvm_admin_get_l2p_tbl = 0xea,
nvme_nvm_admin_get_bb_tbl = 0xf2,
nvme_nvm_admin_set_bb_tbl = 0xf1,
};
-struct nvme_nvm_hb_rw {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd2;
- __le64 metadata;
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 slba;
-};
-
struct nvme_nvm_ph_rw {
__u8 opcode;
__u8 flags;
@@ -80,19 +63,6 @@ struct nvme_nvm_identity {
__u32 rsvd11[5];
};
-struct nvme_nvm_l2ptbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __le32 cdw2[4];
- __le64 prp1;
- __le64 prp2;
- __le64 slba;
- __le32 nlb;
- __le16 cdw14[6];
-};
-
struct nvme_nvm_getbbtbl {
__u8 opcode;
__u8 flags;
@@ -139,9 +109,7 @@ struct nvme_nvm_command {
union {
struct nvme_common_command common;
struct nvme_nvm_identity identity;
- struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
- struct nvme_nvm_l2ptbl l2p;
struct nvme_nvm_getbbtbl get_bb;
struct nvme_nvm_setbbtbl set_bb;
struct nvme_nvm_erase_blk erase;
@@ -167,7 +135,7 @@ struct nvme_nvm_id_group {
__u8 num_lun;
__u8 num_pln;
__u8 rsvd1;
- __le16 num_blk;
+ __le16 num_chk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
@@ -234,11 +202,9 @@ struct nvme_nvm_bb_tbl {
static inline void _nvme_nvm_check_size(void)
{
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
@@ -249,51 +215,58 @@ static inline void _nvme_nvm_check_size(void)
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
{
struct nvme_nvm_id_group *src;
- struct nvm_id_group *dst;
+ struct nvm_id_group *grp;
+ int sec_per_pg, sec_per_pl, pg_per_blk;
if (nvme_nvm_id->cgrps != 1)
return -EINVAL;
src = &nvme_nvm_id->groups[0];
- dst = &nvm_id->grp;
-
- dst->mtype = src->mtype;
- dst->fmtype = src->fmtype;
- dst->num_ch = src->num_ch;
- dst->num_lun = src->num_lun;
- dst->num_pln = src->num_pln;
-
- dst->num_pg = le16_to_cpu(src->num_pg);
- dst->num_blk = le16_to_cpu(src->num_blk);
- dst->fpg_sz = le16_to_cpu(src->fpg_sz);
- dst->csecs = le16_to_cpu(src->csecs);
- dst->sos = le16_to_cpu(src->sos);
-
- dst->trdt = le32_to_cpu(src->trdt);
- dst->trdm = le32_to_cpu(src->trdm);
- dst->tprt = le32_to_cpu(src->tprt);
- dst->tprm = le32_to_cpu(src->tprm);
- dst->tbet = le32_to_cpu(src->tbet);
- dst->tbem = le32_to_cpu(src->tbem);
- dst->mpos = le32_to_cpu(src->mpos);
- dst->mccap = le32_to_cpu(src->mccap);
-
- dst->cpar = le16_to_cpu(src->cpar);
-
- if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
- memcpy(dst->lptbl.id, src->lptbl.id, 8);
- dst->lptbl.mlc.num_pairs =
- le16_to_cpu(src->lptbl.mlc.num_pairs);
-
- if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
- pr_err("nvm: number of MLC pairs not supported\n");
- return -EINVAL;
- }
+ grp = &nvm_id->grp;
+
+ grp->mtype = src->mtype;
+ grp->fmtype = src->fmtype;
+
+ grp->num_ch = src->num_ch;
+ grp->num_lun = src->num_lun;
+
+ grp->num_chk = le16_to_cpu(src->num_chk);
+ grp->csecs = le16_to_cpu(src->csecs);
+ grp->sos = le16_to_cpu(src->sos);
+
+ pg_per_blk = le16_to_cpu(src->num_pg);
+ sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs;
+ sec_per_pl = sec_per_pg * src->num_pln;
+ grp->clba = sec_per_pl * pg_per_blk;
+ grp->ws_per_chk = pg_per_blk;
- memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
- dst->lptbl.mlc.num_pairs);
+ grp->mpos = le32_to_cpu(src->mpos);
+ grp->cpar = le16_to_cpu(src->cpar);
+ grp->mccap = le32_to_cpu(src->mccap);
+
+ grp->ws_opt = grp->ws_min = sec_per_pg;
+ grp->ws_seq = NVM_IO_SNGL_ACCESS;
+
+ if (grp->mpos & 0x020202) {
+ grp->ws_seq = NVM_IO_DUAL_ACCESS;
+ grp->ws_opt <<= 1;
+ } else if (grp->mpos & 0x040404) {
+ grp->ws_seq = NVM_IO_QUAD_ACCESS;
+ grp->ws_opt <<= 2;
}
+ grp->trdt = le32_to_cpu(src->trdt);
+ grp->trdm = le32_to_cpu(src->trdm);
+ grp->tprt = le32_to_cpu(src->tprt);
+ grp->tprm = le32_to_cpu(src->tprm);
+ grp->tbet = le32_to_cpu(src->tbet);
+ grp->tbem = le32_to_cpu(src->tbem);
+
+ /* 1.2 compatibility */
+ grp->num_pln = src->num_pln;
+ grp->num_pg = le16_to_cpu(src->num_pg);
+ grp->fpg_sz = le16_to_cpu(src->fpg_sz);
+
return 0;
}
@@ -332,62 +305,6 @@ out:
return ret;
}
-static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
- nvm_l2p_update_fn *update_l2p, void *priv)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
- u32 nlb_pr_rq = len / sizeof(u64);
- u64 cmd_slba = slba;
- void *entries;
- int ret = 0;
-
- c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
- c.l2p.nsid = cpu_to_le32(ns->head->ns_id);
- entries = kmalloc(len, GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
-
- while (nlb) {
- u32 cmd_nlb = min(nlb_pr_rq, nlb);
- u64 elba = slba + cmd_nlb;
-
- c.l2p.slba = cpu_to_le64(cmd_slba);
- c.l2p.nlb = cpu_to_le32(cmd_nlb);
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
- (struct nvme_command *)&c, entries, len);
- if (ret) {
- dev_err(ns->ctrl->device,
- "L2P table transfer failed (%d)\n", ret);
- ret = -EIO;
- goto out;
- }
-
- if (unlikely(elba > nvmdev->total_secs)) {
- pr_err("nvm: L2P data from device is out of bounds!\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* Transform physical address to target address space */
- nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
-
- if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
- ret = -EINTR;
- goto out;
- }
-
- cmd_slba += cmd_nlb;
- nlb -= cmd_nlb;
- }
-
-out:
- kfree(entries);
- return ret;
-}
-
static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
u8 *blks)
{
@@ -397,7 +314,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
- int nr_blks = geo->blks_per_lun * geo->plane_mode;
+ int nr_blks = geo->nr_chks * geo->plane_mode;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
int ret = 0;
@@ -438,7 +355,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
goto out;
}
- memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode);
+ memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode);
out:
kfree(bb_tbl);
return ret;
@@ -474,10 +391,6 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
c->ph_rw.control = cpu_to_le16(rqd->flags);
c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
-
- if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
- c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
- rqd->bio->bi_iter.bi_sector));
}
static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
@@ -597,8 +510,6 @@ static void nvme_nvm_dev_dma_free(void *pool, void *addr,
static struct nvm_dev_ops nvme_nvm_dev_ops = {
.identity = nvme_nvm_identity,
- .get_l2p_tbl = nvme_nvm_get_l2p_tbl,
-
.get_bb_tbl = nvme_nvm_get_bb_tbl,
.set_bb_tbl = nvme_nvm_set_bb_tbl,
@@ -883,7 +794,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
} else if (strcmp(attr->name, "num_planes") == 0) {
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
} else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk);
+ return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk);
} else if (strcmp(attr->name, "num_pages") == 0) {
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
} else if (strcmp(attr->name, "page_size") == 0) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 1218a9fca846..3b211d9e58b8 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -33,51 +33,11 @@ void nvme_failover_req(struct request *req)
kblockd_schedule_work(&ns->head->requeue_work);
}
-bool nvme_req_needs_failover(struct request *req)
+bool nvme_req_needs_failover(struct request *req, blk_status_t error)
{
if (!(req->cmd_flags & REQ_NVME_MPATH))
return false;
-
- switch (nvme_req(req)->status & 0x7ff) {
- /*
- * Generic command status:
- */
- case NVME_SC_INVALID_OPCODE:
- case NVME_SC_INVALID_FIELD:
- case NVME_SC_INVALID_NS:
- case NVME_SC_LBA_RANGE:
- case NVME_SC_CAP_EXCEEDED:
- case NVME_SC_RESERVATION_CONFLICT:
- return false;
-
- /*
- * I/O command set specific error. Unfortunately these values are
- * reused for fabrics commands, but those should never get here.
- */
- case NVME_SC_BAD_ATTRIBUTES:
- case NVME_SC_INVALID_PI:
- case NVME_SC_READ_ONLY:
- case NVME_SC_ONCS_NOT_SUPPORTED:
- WARN_ON_ONCE(nvme_req(req)->cmd->common.opcode ==
- nvme_fabrics_command);
- return false;
-
- /*
- * Media and Data Integrity Errors:
- */
- case NVME_SC_WRITE_FAULT:
- case NVME_SC_READ_ERROR:
- case NVME_SC_GUARD_CHECK:
- case NVME_SC_APPTAG_CHECK:
- case NVME_SC_REFTAG_CHECK:
- case NVME_SC_COMPARE_FAILED:
- case NVME_SC_ACCESS_DENIED:
- case NVME_SC_UNWRITTEN_BLOCK:
- return false;
- }
-
- /* Everything else could be a path failure, so should be retried */
- return true;
+ return blk_path_error(error);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a00eabd06427..8e4550fa08f8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -32,6 +32,8 @@ extern unsigned int admin_timeout;
#define NVME_KATO_GRACE 10
extern struct workqueue_struct *nvme_wq;
+extern struct workqueue_struct *nvme_reset_wq;
+extern struct workqueue_struct *nvme_delete_wq;
enum {
NVME_NS_LBA = 0,
@@ -119,6 +121,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
+ NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
NVME_CTRL_RECONNECTING,
NVME_CTRL_DELETING,
@@ -393,6 +396,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
@@ -401,7 +405,7 @@ extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
void nvme_failover_req(struct request *req);
-bool nvme_req_needs_failover(struct request *req);
+bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
@@ -430,7 +434,8 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
static inline void nvme_failover_req(struct request *req)
{
}
-static inline bool nvme_req_needs_failover(struct request *req)
+static inline bool nvme_req_needs_failover(struct request *req,
+ blk_status_t error)
{
return false;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4276ebfff22b..6fe7af00a1f4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -75,7 +75,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
- struct nvme_queue **queues;
+ struct nvme_queue *queues;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
@@ -365,7 +365,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[0];
+ struct nvme_queue *nvmeq = &dev->queues[0];
WARN_ON(hctx_idx != 0);
WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
@@ -387,7 +387,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
if (!nvmeq->tags)
nvmeq->tags = &dev->tagset.tags[hctx_idx];
@@ -403,7 +403,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
- struct nvme_queue *nvmeq = dev->queues[queue_idx];
+ struct nvme_queue *nvmeq = &dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
@@ -1044,7 +1044,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
- struct nvme_queue *nvmeq = dev->queues[0];
+ struct nvme_queue *nvmeq = &dev->queues[0];
struct nvme_command c;
memset(&c, 0, sizeof(c));
@@ -1138,9 +1138,14 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
*/
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ /* If there is a reset/reinit ongoing, we shouldn't reset again. */
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RECONNECTING:
return false;
+ default:
+ break;
+ }
/* We shouldn't reset unless the controller is on fatal error state
* _or_ if we lost the communication with it.
@@ -1280,7 +1285,6 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
if (nvmeq->sq_cmds)
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
- kfree(nvmeq);
}
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1288,10 +1292,8 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
int i;
for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
dev->ctrl.queue_count--;
- dev->queues[i] = NULL;
- nvme_free_queue(nvmeq);
+ nvme_free_queue(&dev->queues[i]);
}
}
@@ -1323,12 +1325,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
- struct nvme_queue *nvmeq = dev->queues[0];
-
- if (!nvmeq)
- return;
- if (nvme_suspend_queue(nvmeq))
- return;
+ struct nvme_queue *nvmeq = &dev->queues[0];
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
@@ -1367,7 +1364,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+ if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
@@ -1382,13 +1379,13 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
return 0;
}
-static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth, int node)
+static int nvme_alloc_queue(struct nvme_dev *dev, int qid,
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
- node);
- if (!nvmeq)
- return NULL;
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+
+ if (dev->ctrl.queue_count > qid)
+ return 0;
nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
@@ -1407,17 +1404,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_depth = depth;
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
- dev->queues[qid] = nvmeq;
dev->ctrl.queue_count++;
- return nvmeq;
+ return 0;
free_cqdma:
dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
- kfree(nvmeq);
- return NULL;
+ return -ENOMEM;
}
static int queue_request_irq(struct nvme_queue *nvmeq)
@@ -1590,14 +1585,12 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
if (result < 0)
return result;
- nvmeq = dev->queues[0];
- if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
- dev_to_node(dev->dev));
- if (!nvmeq)
- return -ENOMEM;
- }
+ result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
+ if (result)
+ return result;
+ nvmeq = &dev->queues[0];
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
@@ -1627,7 +1620,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
/* vector == qid - 1, match nvme_create_queue */
- if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ if (nvme_alloc_queue(dev, i, dev->q_depth,
pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
@@ -1636,15 +1629,15 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
- ret = nvme_create_queue(dev->queues[i], i);
+ ret = nvme_create_queue(&dev->queues[i], i);
if (ret)
break;
}
/*
* Ignore failing Create SQ/CQ commands, we can continue with less
- * than the desired aount of queues, and even a controller without
- * I/O queues an still be used to issue admin commands. This might
+ * than the desired amount of queues, and even a controller without
+ * I/O queues can still be used to issue admin commands. This might
* be useful to upgrade a buggy firmware for example.
*/
return ret >= 0 ? 0 : ret;
@@ -1661,30 +1654,40 @@ static ssize_t nvme_cmb_show(struct device *dev,
}
static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
-static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
{
- u64 szu, size, offset;
+ u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
+
+ return 1ULL << (12 + 4 * szu);
+}
+
+static u32 nvme_cmb_size(struct nvme_dev *dev)
+{
+ return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK;
+}
+
+static void nvme_map_cmb(struct nvme_dev *dev)
+{
+ u64 size, offset;
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- void __iomem *cmb;
int bar;
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
- if (!(NVME_CMB_SZ(dev->cmbsz)))
- return NULL;
+ if (!dev->cmbsz)
+ return;
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
if (!use_cmb_sqes)
- return NULL;
+ return;
- szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
- size = szu * NVME_CMB_SZ(dev->cmbsz);
- offset = szu * NVME_CMB_OFST(dev->cmbloc);
+ size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
+ offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
bar = NVME_CMB_BIR(dev->cmbloc);
bar_size = pci_resource_len(pdev, bar);
if (offset > bar_size)
- return NULL;
+ return;
/*
* Controllers may support a CMB size larger than their BAR,
@@ -1694,13 +1697,16 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
- if (!cmb)
- return NULL;
-
+ dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
+ if (!dev->cmb)
+ return;
dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
dev->cmb_size = size;
- return cmb;
+
+ if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL))
+ dev_warn(dev->ctrl.device,
+ "failed to add sysfs attribute for CMB\n");
}
static inline void nvme_release_cmb(struct nvme_dev *dev)
@@ -1768,7 +1774,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
dma_addr_t descs_dma;
int i = 0;
void **bufs;
- u64 size = 0, tmp;
+ u64 size, tmp;
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
@@ -1851,7 +1857,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
u64 min = (u64)dev->ctrl.hmmin * 4096;
u32 enable_bits = NVME_HOST_MEM_ENABLE;
- int ret = 0;
+ int ret;
preferred = min(preferred, max);
if (min > max) {
@@ -1892,7 +1898,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
- struct nvme_queue *adminq = dev->queues[0];
+ struct nvme_queue *adminq = &dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, nr_io_queues;
unsigned long size;
@@ -1905,7 +1911,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
+ if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
@@ -2005,9 +2011,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
+static void nvme_disable_io_queues(struct nvme_dev *dev)
{
- int pass;
+ int pass, queues = dev->online_queues - 1;
unsigned long timeout;
u8 opcode = nvme_admin_delete_sq;
@@ -2018,7 +2024,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
retry:
timeout = ADMIN_TIMEOUT;
for (; i > 0; i--, sent++)
- if (nvme_delete_queue(dev->queues[i], opcode))
+ if (nvme_delete_queue(&dev->queues[i], opcode))
break;
while (sent--) {
@@ -2033,13 +2039,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
}
/*
- * Return: error value if an error occurred setting up the queues or calling
- * Identify Device. 0 if these succeeded, even if adding some of the
- * namespaces failed. At the moment, these failures are silent. TBD which
- * failures should be reported.
+ * return error value only when tagset allocation failed
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
+ int ret;
+
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
@@ -2055,8 +2060,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev;
- if (blk_mq_alloc_tag_set(&dev->tagset))
- return 0;
+ ret = blk_mq_alloc_tag_set(&dev->tagset);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "IO queues tagset allocation failed %d\n", ret);
+ return ret;
+ }
dev->ctrl.tagset = &dev->tagset;
nvme_dbbuf_set(dev);
@@ -2122,22 +2131,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
"set queue depth=%u\n", dev->q_depth);
}
- /*
- * CMBs can currently only exist on >=1.2 PCIe devices. We only
- * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group
- * has no name we can pass NULL as final argument to
- * sysfs_add_file_to_group.
- */
-
- if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
- dev->cmb = nvme_map_cmb(dev);
- if (dev->cmb) {
- if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL))
- dev_warn(dev->ctrl.device,
- "failed to add sysfs attribute for CMB\n");
- }
- }
+ nvme_map_cmb(dev);
pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
@@ -2170,7 +2164,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- int i, queues;
+ int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -2205,21 +2199,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
}
nvme_stop_queues(&dev->ctrl);
- queues = dev->online_queues - 1;
- for (i = dev->ctrl.queue_count - 1; i > 0; i--)
- nvme_suspend_queue(dev->queues[i]);
-
- if (dead) {
- /* A device might become IO incapable very soon during
- * probe, before the admin queue is configured. Thus,
- * queue_count can be 0 here.
- */
- if (dev->ctrl.queue_count)
- nvme_suspend_queue(dev->queues[0]);
- } else {
- nvme_disable_io_queues(dev, queues);
+ if (!dead) {
+ nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
+ for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
+ nvme_suspend_queue(&dev->queues[i]);
+
nvme_pci_disable(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2289,6 +2275,7 @@ static void nvme_reset_work(struct work_struct *work)
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
+ enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
goto out;
@@ -2300,6 +2287,16 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
+ /*
+ * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+ * initializing procedure here.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller RECONNECTING\n");
+ goto out;
+ }
+
result = nvme_pci_enable(dev);
if (result)
goto out;
@@ -2352,15 +2349,23 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
+ new_state = NVME_CTRL_ADMIN_ONLY;
} else {
nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- nvme_dev_add(dev);
+ /* hit this only when allocate tagset fails */
+ if (nvme_dev_add(dev))
+ new_state = NVME_CTRL_ADMIN_ONLY;
nvme_unfreeze(&dev->ctrl);
}
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
- dev_warn(dev->ctrl.device, "failed to mark controller live\n");
+ /*
+ * If only admin queue live, keep it to do further investigation or
+ * recovery.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller state %d\n", new_state);
goto out;
}
@@ -2468,8 +2473,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev)
return -ENOMEM;
- dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *),
- GFP_KERNEL, node);
+
+ dev->queues = kcalloc_node(num_possible_cpus() + 1,
+ sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues)
goto free;
@@ -2496,10 +2502,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release_pools;
- nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_wq, &dev->ctrl.reset_work);
+ nvme_reset_ctrl(&dev->ctrl);
+
return 0;
release_pools:
@@ -2523,7 +2529,7 @@ static void nvme_reset_prepare(struct pci_dev *pdev)
static void nvme_reset_done(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_reset_ctrl(&dev->ctrl);
+ nvme_reset_ctrl_sync(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2a0bba7f50cf..2bc059f7d73c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -66,7 +66,6 @@ struct nvme_rdma_request {
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
int nents;
- bool inline_data;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
@@ -1092,7 +1091,6 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
- req->inline_data = true;
req->num_sge++;
return 0;
}
@@ -1164,7 +1162,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
int count, ret;
req->num_sge = 1;
- req->inline_data = false;
refcount_set(&req->ref, 2); /* send and recv completions */
c->common.flags |= NVME_CMD_SGL_METABUF;
@@ -2018,6 +2015,7 @@ out_free_ctrl:
static struct nvmf_transport_ops nvme_rdma_transport = {
.name = "rdma",
+ .module = THIS_MODULE,
.required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
@@ -2040,7 +2038,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
- flush_workqueue(nvme_wq);
+ flush_workqueue(nvme_delete_wq);
}
static struct ib_client nvme_rdma_ib_client = {
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
new file mode 100644
index 000000000000..41944bbef835
--- /dev/null
+++ b/drivers/nvme/host/trace.c
@@ -0,0 +1,130 @@
+/*
+ * NVM Express device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <asm/unaligned.h>
+#include "trace.h"
+
+static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 sqid = get_unaligned_le16(cdw10);
+ u16 qsize = get_unaligned_le16(cdw10 + 2);
+ u16 sq_flags = get_unaligned_le16(cdw10 + 4);
+ u16 cqid = get_unaligned_le16(cdw10 + 6);
+
+
+ trace_seq_printf(p, "sqid=%u, qsize=%u, sq_flags=0x%x, cqid=%u",
+ sqid, qsize, sq_flags, cqid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 cqid = get_unaligned_le16(cdw10);
+ u16 qsize = get_unaligned_le16(cdw10 + 2);
+ u16 cq_flags = get_unaligned_le16(cdw10 + 4);
+ u16 irq_vector = get_unaligned_le16(cdw10 + 6);
+
+ trace_seq_printf(p, "cqid=%u, qsize=%u, cq_flags=0x%x, irq_vector=%u",
+ cqid, qsize, cq_flags, irq_vector);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 cns = cdw10[0];
+ u16 ctrlid = get_unaligned_le16(cdw10 + 2);
+
+ trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+
+
+static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u16 length = get_unaligned_le16(cdw10 + 8);
+ u16 control = get_unaligned_le16(cdw10 + 10);
+ u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+ u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+ trace_seq_printf(p,
+ "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+ slba, length, control, dsmgmt, reftag);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "nr=%u, attributes=%u",
+ get_unaligned_le32(cdw10),
+ get_unaligned_le32(cdw10 + 4));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_common(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "cdw10=%*ph", 24, cdw10);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_admin_create_sq:
+ return nvme_trace_create_sq(p, cdw10);
+ case nvme_admin_create_cq:
+ return nvme_trace_create_cq(p, cdw10);
+ case nvme_admin_identify:
+ return nvme_trace_admin_identify(p, cdw10);
+ default:
+ return nvme_trace_common(p, cdw10);
+ }
+}
+
+const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ case nvme_cmd_write_zeroes:
+ return nvme_trace_read_write(p, cdw10);
+ case nvme_cmd_dsm:
+ return nvme_trace_dsm(p, cdw10);
+ default:
+ return nvme_trace_common(p, cdw10);
+ }
+}
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
new file mode 100644
index 000000000000..ea91fccd1bc0
--- /dev/null
+++ b/drivers/nvme/host/trace.h
@@ -0,0 +1,165 @@
+/*
+ * NVM Express device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nvme
+
+#if !defined(_TRACE_NVME_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NVME_H
+
+#include <linux/nvme.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "nvme.h"
+
+#define nvme_admin_opcode_name(opcode) { opcode, #opcode }
+#define show_admin_opcode_name(val) \
+ __print_symbolic(val, \
+ nvme_admin_opcode_name(nvme_admin_delete_sq), \
+ nvme_admin_opcode_name(nvme_admin_create_sq), \
+ nvme_admin_opcode_name(nvme_admin_get_log_page), \
+ nvme_admin_opcode_name(nvme_admin_delete_cq), \
+ nvme_admin_opcode_name(nvme_admin_create_cq), \
+ nvme_admin_opcode_name(nvme_admin_identify), \
+ nvme_admin_opcode_name(nvme_admin_abort_cmd), \
+ nvme_admin_opcode_name(nvme_admin_set_features), \
+ nvme_admin_opcode_name(nvme_admin_get_features), \
+ nvme_admin_opcode_name(nvme_admin_async_event), \
+ nvme_admin_opcode_name(nvme_admin_ns_mgmt), \
+ nvme_admin_opcode_name(nvme_admin_activate_fw), \
+ nvme_admin_opcode_name(nvme_admin_download_fw), \
+ nvme_admin_opcode_name(nvme_admin_ns_attach), \
+ nvme_admin_opcode_name(nvme_admin_keep_alive), \
+ nvme_admin_opcode_name(nvme_admin_directive_send), \
+ nvme_admin_opcode_name(nvme_admin_directive_recv), \
+ nvme_admin_opcode_name(nvme_admin_dbbuf), \
+ nvme_admin_opcode_name(nvme_admin_format_nvm), \
+ nvme_admin_opcode_name(nvme_admin_security_send), \
+ nvme_admin_opcode_name(nvme_admin_security_recv), \
+ nvme_admin_opcode_name(nvme_admin_sanitize_nvm))
+
+const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+#define __parse_nvme_admin_cmd(opcode, cdw10) \
+ nvme_trace_parse_admin_cmd(p, opcode, cdw10)
+
+#define nvme_opcode_name(opcode) { opcode, #opcode }
+#define show_opcode_name(val) \
+ __print_symbolic(val, \
+ nvme_opcode_name(nvme_cmd_flush), \
+ nvme_opcode_name(nvme_cmd_write), \
+ nvme_opcode_name(nvme_cmd_read), \
+ nvme_opcode_name(nvme_cmd_write_uncor), \
+ nvme_opcode_name(nvme_cmd_compare), \
+ nvme_opcode_name(nvme_cmd_write_zeroes), \
+ nvme_opcode_name(nvme_cmd_dsm), \
+ nvme_opcode_name(nvme_cmd_resv_register), \
+ nvme_opcode_name(nvme_cmd_resv_report), \
+ nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_resv_release))
+
+const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+#define __parse_nvme_cmd(opcode, cdw10) \
+ nvme_trace_parse_nvm_cmd(p, opcode, cdw10)
+
+TRACE_EVENT(nvme_setup_admin_cmd,
+ TP_PROTO(struct nvme_command *cmd),
+ TP_ARGS(cmd),
+ TP_STRUCT__entry(
+ __field(u8, opcode)
+ __field(u8, flags)
+ __field(u16, cid)
+ __field(u64, metadata)
+ __array(u8, cdw10, 24)
+ ),
+ TP_fast_assign(
+ __entry->opcode = cmd->common.opcode;
+ __entry->flags = cmd->common.flags;
+ __entry->cid = cmd->common.command_id;
+ __entry->metadata = le64_to_cpu(cmd->common.metadata);
+ memcpy(__entry->cdw10, cmd->common.cdw10,
+ sizeof(__entry->cdw10));
+ ),
+ TP_printk(" cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
+ __entry->cid, __entry->flags, __entry->metadata,
+ show_admin_opcode_name(__entry->opcode),
+ __parse_nvme_admin_cmd(__entry->opcode, __entry->cdw10))
+);
+
+
+TRACE_EVENT(nvme_setup_nvm_cmd,
+ TP_PROTO(int qid, struct nvme_command *cmd),
+ TP_ARGS(qid, cmd),
+ TP_STRUCT__entry(
+ __field(int, qid)
+ __field(u8, opcode)
+ __field(u8, flags)
+ __field(u16, cid)
+ __field(u32, nsid)
+ __field(u64, metadata)
+ __array(u8, cdw10, 24)
+ ),
+ TP_fast_assign(
+ __entry->qid = qid;
+ __entry->opcode = cmd->common.opcode;
+ __entry->flags = cmd->common.flags;
+ __entry->cid = cmd->common.command_id;
+ __entry->nsid = le32_to_cpu(cmd->common.nsid);
+ __entry->metadata = le64_to_cpu(cmd->common.metadata);
+ memcpy(__entry->cdw10, cmd->common.cdw10,
+ sizeof(__entry->cdw10));
+ ),
+ TP_printk("qid=%d, nsid=%u, cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
+ __entry->qid, __entry->nsid, __entry->cid,
+ __entry->flags, __entry->metadata,
+ show_opcode_name(__entry->opcode),
+ __parse_nvme_cmd(__entry->opcode, __entry->cdw10))
+);
+
+TRACE_EVENT(nvme_complete_rq,
+ TP_PROTO(struct request *req),
+ TP_ARGS(req),
+ TP_STRUCT__entry(
+ __field(int, qid)
+ __field(int, cid)
+ __field(u64, result)
+ __field(u8, retries)
+ __field(u8, flags)
+ __field(u16, status)
+ ),
+ TP_fast_assign(
+ __entry->qid = req->q->id;
+ __entry->cid = req->tag;
+ __entry->result = le64_to_cpu(nvme_req(req)->result.u64);
+ __entry->retries = nvme_req(req)->retries;
+ __entry->flags = nvme_req(req)->flags;
+ __entry->status = nvme_req(req)->status;
+ ),
+ TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u",
+ __entry->cid, __entry->qid, __entry->result,
+ __entry->retries, __entry->flags, __entry->status)
+
+);
+
+#endif /* _TRACE_NVME_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 03e4ab65fe77..5f4f8b16685f 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -29,6 +29,7 @@ config NVME_TARGET_RDMA
tristate "NVMe over Fabrics RDMA target support"
depends on INFINIBAND
depends on NVME_TARGET
+ select SGL_ALLOC
help
This enables the NVMe RDMA target support, which allows exporting NVMe
devices over RDMA.
@@ -39,6 +40,7 @@ config NVME_TARGET_FC
tristate "NVMe over Fabrics FC target driver"
depends on NVME_TARGET
depends on HAS_DMA
+ select SGL_ALLOC
help
This enables the NVMe FC target support, which allows exporting NVMe
devices over FC.
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b54748ad5f48..0bd737117a80 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -512,6 +512,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sg_cnt = 0;
req->transfer_len = 0;
req->rsp->status = 0;
+ req->ns = NULL;
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
@@ -557,6 +558,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
void nvmet_req_uninit(struct nvmet_req *req)
{
percpu_ref_put(&req->sq->ref);
+ if (req->ns)
+ nvmet_put_namespace(req->ns);
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
@@ -830,7 +833,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
/* Don't accept keep-alive timeout for discovery controllers */
if (kato) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- goto out_free_sqs;
+ goto out_remove_ida;
}
/*
@@ -860,6 +863,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
*ctrlp = ctrl;
return 0;
+out_remove_ida:
+ ida_simple_remove(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_cqs:
@@ -877,21 +882,22 @@ static void nvmet_ctrl_free(struct kref *ref)
struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
struct nvmet_subsys *subsys = ctrl->subsys;
- nvmet_stop_keep_alive_timer(ctrl);
-
mutex_lock(&subsys->lock);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
+ nvmet_stop_keep_alive_timer(ctrl);
+
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fatal_err_work);
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
- nvmet_subsys_put(subsys);
kfree(ctrl->sqs);
kfree(ctrl->cqs);
kfree(ctrl);
+
+ nvmet_subsys_put(subsys);
}
void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index db3bf6b8bf9e..19e9e42ae943 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -225,7 +225,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
goto out_ctrl_put;
}
- pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
+ pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
out:
kfree(d);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 5fd86039e353..9b39a6cb1935 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1697,31 +1697,12 @@ static int
nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
{
struct scatterlist *sg;
- struct page *page;
unsigned int nent;
- u32 page_len, length;
- int i = 0;
- length = fod->req.transfer_len;
- nent = DIV_ROUND_UP(length, PAGE_SIZE);
- sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL);
+ sg = sgl_alloc(fod->req.transfer_len, GFP_KERNEL, &nent);
if (!sg)
goto out;
- sg_init_table(sg, nent);
-
- while (length) {
- page_len = min_t(u32, length, PAGE_SIZE);
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto out_free_pages;
-
- sg_set_page(&sg[i], page, page_len, 0);
- length -= page_len;
- i++;
- }
-
fod->data_sg = sg;
fod->data_sg_cnt = nent;
fod->data_sg_cnt = fc_dma_map_sg(fod->tgtport->dev, sg, nent,
@@ -1731,14 +1712,6 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
return 0;
-out_free_pages:
- while (i > 0) {
- i--;
- __free_page(sg_page(&sg[i]));
- }
- kfree(sg);
- fod->data_sg = NULL;
- fod->data_sg_cnt = 0;
out:
return NVME_SC_INTERNAL;
}
@@ -1746,18 +1719,13 @@ out:
static void
nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
{
- struct scatterlist *sg;
- int count;
-
if (!fod->data_sg || !fod->data_sg_cnt)
return;
fc_dma_unmap_sg(fod->tgtport->dev, fod->data_sg, fod->data_sg_cnt,
((fod->io_dir == NVMET_FCP_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE));
- for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count)
- __free_page(sg_page(sg));
- kfree(fod->data_sg);
+ sgl_free(fod->data_sg);
fod->data_sg = NULL;
fod->data_sg_cnt = 0;
}
@@ -2522,14 +2490,8 @@ nvmet_fc_add_port(struct nvmet_port *port)
list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) {
if ((tgtport->fc_target_port.node_name == traddr.nn) &&
(tgtport->fc_target_port.port_name == traddr.pn)) {
- /* a FC port can only be 1 nvmet port id */
- if (!tgtport->port) {
- tgtport->port = port;
- port->priv = tgtport;
- nvmet_fc_tgtport_get(tgtport);
- ret = 0;
- } else
- ret = -EALREADY;
+ tgtport->port = port;
+ ret = 0;
break;
}
}
@@ -2540,19 +2502,7 @@ nvmet_fc_add_port(struct nvmet_port *port)
static void
nvmet_fc_remove_port(struct nvmet_port *port)
{
- struct nvmet_fc_tgtport *tgtport = port->priv;
- unsigned long flags;
- bool matched = false;
-
- spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
- if (tgtport->port == port) {
- matched = true;
- tgtport->port = NULL;
- }
- spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
-
- if (matched)
- nvmet_fc_tgtport_put(tgtport);
+ /* nothing to do */
}
static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 6a018a0bd6ce..34712def81b1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -204,6 +204,10 @@ struct fcloop_lport {
struct completion unreg_done;
};
+struct fcloop_lport_priv {
+ struct fcloop_lport *lport;
+};
+
struct fcloop_rport {
struct nvme_fc_remote_port *remoteport;
struct nvmet_fc_target_port *targetport;
@@ -238,21 +242,32 @@ struct fcloop_lsreq {
int status;
};
+enum {
+ INI_IO_START = 0,
+ INI_IO_ACTIVE = 1,
+ INI_IO_ABORTED = 2,
+ INI_IO_COMPLETED = 3,
+};
+
struct fcloop_fcpreq {
struct fcloop_tport *tport;
struct nvmefc_fcp_req *fcpreq;
spinlock_t reqlock;
u16 status;
+ u32 inistate;
bool active;
bool aborted;
- struct work_struct work;
+ struct kref ref;
+ struct work_struct fcp_rcv_work;
+ struct work_struct abort_rcv_work;
+ struct work_struct tio_done_work;
struct nvmefc_tgt_fcp_req tgt_fcp_req;
};
struct fcloop_ini_fcpreq {
struct nvmefc_fcp_req *fcpreq;
struct fcloop_fcpreq *tfcp_req;
- struct work_struct iniwork;
+ spinlock_t inilock;
};
static inline struct fcloop_lsreq *
@@ -343,17 +358,122 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport,
return 0;
}
-/*
- * FCP IO operation done by initiator abort.
- * call back up initiator "done" flows.
- */
static void
-fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work)
+fcloop_tfcp_req_free(struct kref *ref)
{
- struct fcloop_ini_fcpreq *inireq =
- container_of(work, struct fcloop_ini_fcpreq, iniwork);
+ struct fcloop_fcpreq *tfcp_req =
+ container_of(ref, struct fcloop_fcpreq, ref);
+
+ kfree(tfcp_req);
+}
+
+static void
+fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req)
+{
+ kref_put(&tfcp_req->ref, fcloop_tfcp_req_free);
+}
+
+static int
+fcloop_tfcp_req_get(struct fcloop_fcpreq *tfcp_req)
+{
+ return kref_get_unless_zero(&tfcp_req->ref);
+}
+
+static void
+fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq,
+ struct fcloop_fcpreq *tfcp_req, int status)
+{
+ struct fcloop_ini_fcpreq *inireq = NULL;
+
+ if (fcpreq) {
+ inireq = fcpreq->private;
+ spin_lock(&inireq->inilock);
+ inireq->tfcp_req = NULL;
+ spin_unlock(&inireq->inilock);
+
+ fcpreq->status = status;
+ fcpreq->done(fcpreq);
+ }
+
+ /* release original io reference on tgt struct */
+ fcloop_tfcp_req_put(tfcp_req);
+}
+
+static void
+fcloop_fcp_recv_work(struct work_struct *work)
+{
+ struct fcloop_fcpreq *tfcp_req =
+ container_of(work, struct fcloop_fcpreq, fcp_rcv_work);
+ struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+ int ret = 0;
+ bool aborted = false;
+
+ spin_lock(&tfcp_req->reqlock);
+ switch (tfcp_req->inistate) {
+ case INI_IO_START:
+ tfcp_req->inistate = INI_IO_ACTIVE;
+ break;
+ case INI_IO_ABORTED:
+ aborted = true;
+ break;
+ default:
+ spin_unlock(&tfcp_req->reqlock);
+ WARN_ON(1);
+ return;
+ }
+ spin_unlock(&tfcp_req->reqlock);
+
+ if (unlikely(aborted))
+ ret = -ECANCELED;
+ else
+ ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport,
+ &tfcp_req->tgt_fcp_req,
+ fcpreq->cmdaddr, fcpreq->cmdlen);
+ if (ret)
+ fcloop_call_host_done(fcpreq, tfcp_req, ret);
+
+ return;
+}
+
+static void
+fcloop_fcp_abort_recv_work(struct work_struct *work)
+{
+ struct fcloop_fcpreq *tfcp_req =
+ container_of(work, struct fcloop_fcpreq, abort_rcv_work);
+ struct nvmefc_fcp_req *fcpreq;
+ bool completed = false;
+
+ spin_lock(&tfcp_req->reqlock);
+ fcpreq = tfcp_req->fcpreq;
+ switch (tfcp_req->inistate) {
+ case INI_IO_ABORTED:
+ break;
+ case INI_IO_COMPLETED:
+ completed = true;
+ break;
+ default:
+ spin_unlock(&tfcp_req->reqlock);
+ WARN_ON(1);
+ return;
+ }
+ spin_unlock(&tfcp_req->reqlock);
+
+ if (unlikely(completed)) {
+ /* remove reference taken in original abort downcall */
+ fcloop_tfcp_req_put(tfcp_req);
+ return;
+ }
- inireq->fcpreq->done(inireq->fcpreq);
+ if (tfcp_req->tport->targetport)
+ nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
+ &tfcp_req->tgt_fcp_req);
+
+ spin_lock(&tfcp_req->reqlock);
+ tfcp_req->fcpreq = NULL;
+ spin_unlock(&tfcp_req->reqlock);
+
+ fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
+ /* call_host_done releases reference for abort downcall */
}
/*
@@ -364,20 +484,15 @@ static void
fcloop_tgt_fcprqst_done_work(struct work_struct *work)
{
struct fcloop_fcpreq *tfcp_req =
- container_of(work, struct fcloop_fcpreq, work);
- struct fcloop_tport *tport = tfcp_req->tport;
+ container_of(work, struct fcloop_fcpreq, tio_done_work);
struct nvmefc_fcp_req *fcpreq;
spin_lock(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
+ tfcp_req->inistate = INI_IO_COMPLETED;
spin_unlock(&tfcp_req->reqlock);
- if (tport->remoteport && fcpreq) {
- fcpreq->status = tfcp_req->status;
- fcpreq->done(fcpreq);
- }
-
- kfree(tfcp_req);
+ fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
}
@@ -390,7 +505,6 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
struct fcloop_rport *rport = remoteport->private;
struct fcloop_ini_fcpreq *inireq = fcpreq->private;
struct fcloop_fcpreq *tfcp_req;
- int ret = 0;
if (!rport->targetport)
return -ECONNREFUSED;
@@ -401,16 +515,20 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
inireq->fcpreq = fcpreq;
inireq->tfcp_req = tfcp_req;
- INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work);
+ spin_lock_init(&inireq->inilock);
+
tfcp_req->fcpreq = fcpreq;
tfcp_req->tport = rport->targetport->private;
+ tfcp_req->inistate = INI_IO_START;
spin_lock_init(&tfcp_req->reqlock);
- INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work);
+ INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work);
+ INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work);
+ INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work);
+ kref_init(&tfcp_req->ref);
- ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req,
- fcpreq->cmdaddr, fcpreq->cmdlen);
+ schedule_work(&tfcp_req->fcp_rcv_work);
- return ret;
+ return 0;
}
static void
@@ -589,7 +707,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
{
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
- schedule_work(&tfcp_req->work);
+ schedule_work(&tfcp_req->tio_done_work);
}
static void
@@ -605,27 +723,47 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
void *hw_queue_handle,
struct nvmefc_fcp_req *fcpreq)
{
- struct fcloop_rport *rport = remoteport->private;
struct fcloop_ini_fcpreq *inireq = fcpreq->private;
- struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req;
+ struct fcloop_fcpreq *tfcp_req;
+ bool abortio = true;
+
+ spin_lock(&inireq->inilock);
+ tfcp_req = inireq->tfcp_req;
+ if (tfcp_req)
+ fcloop_tfcp_req_get(tfcp_req);
+ spin_unlock(&inireq->inilock);
if (!tfcp_req)
/* abort has already been called */
return;
- if (rport->targetport)
- nvmet_fc_rcv_fcp_abort(rport->targetport,
- &tfcp_req->tgt_fcp_req);
-
/* break initiator/target relationship for io */
spin_lock(&tfcp_req->reqlock);
- inireq->tfcp_req = NULL;
- tfcp_req->fcpreq = NULL;
+ switch (tfcp_req->inistate) {
+ case INI_IO_START:
+ case INI_IO_ACTIVE:
+ tfcp_req->inistate = INI_IO_ABORTED;
+ break;
+ case INI_IO_COMPLETED:
+ abortio = false;
+ break;
+ default:
+ spin_unlock(&tfcp_req->reqlock);
+ WARN_ON(1);
+ return;
+ }
spin_unlock(&tfcp_req->reqlock);
- /* post the aborted io completion */
- fcpreq->status = -ECANCELED;
- schedule_work(&inireq->iniwork);
+ if (abortio)
+ /* leave the reference while the work item is scheduled */
+ WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work));
+ else {
+ /*
+ * as the io has already had the done callback made,
+ * nothing more to do. So release the reference taken above
+ */
+ fcloop_tfcp_req_put(tfcp_req);
+ }
}
static void
@@ -657,7 +795,8 @@ fcloop_nport_get(struct fcloop_nport *nport)
static void
fcloop_localport_delete(struct nvme_fc_local_port *localport)
{
- struct fcloop_lport *lport = localport->private;
+ struct fcloop_lport_priv *lport_priv = localport->private;
+ struct fcloop_lport *lport = lport_priv->lport;
/* release any threads waiting for the unreg to complete */
complete(&lport->unreg_done);
@@ -697,7 +836,7 @@ static struct nvme_fc_port_template fctemplate = {
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
.dma_boundary = FCLOOP_DMABOUND_4G,
/* sizes of additional private data for data structures */
- .local_priv_sz = sizeof(struct fcloop_lport),
+ .local_priv_sz = sizeof(struct fcloop_lport_priv),
.remote_priv_sz = sizeof(struct fcloop_rport),
.lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
.fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq),
@@ -714,8 +853,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
.dma_boundary = FCLOOP_DMABOUND_4G,
/* optional features */
- .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR |
- NVMET_FCTGTFEAT_OPDONE_IN_ISR,
+ .target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct fcloop_tport),
};
@@ -728,11 +866,17 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
struct fcloop_ctrl_options *opts;
struct nvme_fc_local_port *localport;
struct fcloop_lport *lport;
- int ret;
+ struct fcloop_lport_priv *lport_priv;
+ unsigned long flags;
+ int ret = -ENOMEM;
+
+ lport = kzalloc(sizeof(*lport), GFP_KERNEL);
+ if (!lport)
+ return -ENOMEM;
opts = kzalloc(sizeof(*opts), GFP_KERNEL);
if (!opts)
- return -ENOMEM;
+ goto out_free_lport;
ret = fcloop_parse_options(opts, buf);
if (ret)
@@ -752,23 +896,25 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
ret = nvme_fc_register_localport(&pinfo, &fctemplate, NULL, &localport);
if (!ret) {
- unsigned long flags;
-
/* success */
- lport = localport->private;
+ lport_priv = localport->private;
+ lport_priv->lport = lport;
+
lport->localport = localport;
INIT_LIST_HEAD(&lport->lport_list);
spin_lock_irqsave(&fcloop_lock, flags);
list_add_tail(&lport->lport_list, &fcloop_lports);
spin_unlock_irqrestore(&fcloop_lock, flags);
-
- /* mark all of the input buffer consumed */
- ret = count;
}
out_free_opts:
kfree(opts);
+out_free_lport:
+ /* free only if we're going to fail */
+ if (ret)
+ kfree(lport);
+
return ret ? ret : count;
}
@@ -790,6 +936,8 @@ __wait_localport_unreg(struct fcloop_lport *lport)
wait_for_completion(&lport->unreg_done);
+ kfree(lport);
+
return ret;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1e21b286f299..7991ec3a17db 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -686,6 +686,7 @@ static struct nvmet_fabrics_ops nvme_loop_ops = {
static struct nvmf_transport_ops nvme_loop_transport = {
.name = "loop",
+ .module = THIS_MODULE,
.create_ctrl = nvme_loop_create_ctrl,
};
@@ -716,7 +717,7 @@ static void __exit nvme_loop_cleanup_module(void)
nvme_delete_ctrl(&ctrl->ctrl);
mutex_unlock(&nvme_loop_ctrl_mutex);
- flush_workqueue(nvme_wq);
+ flush_workqueue(nvme_delete_wq);
}
module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 49912909c298..978e169c11bf 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -185,59 +185,6 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
}
-static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents)
-{
- struct scatterlist *sg;
- int count;
-
- if (!sgl || !nents)
- return;
-
- for_each_sg(sgl, sg, nents, count)
- __free_page(sg_page(sg));
- kfree(sgl);
-}
-
-static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
- u32 length)
-{
- struct scatterlist *sg;
- struct page *page;
- unsigned int nent;
- int i = 0;
-
- nent = DIV_ROUND_UP(length, PAGE_SIZE);
- sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL);
- if (!sg)
- goto out;
-
- sg_init_table(sg, nent);
-
- while (length) {
- u32 page_len = min_t(u32, length, PAGE_SIZE);
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto out_free_pages;
-
- sg_set_page(&sg[i], page, page_len, 0);
- length -= page_len;
- i++;
- }
- *sgl = sg;
- *nents = nent;
- return 0;
-
-out_free_pages:
- while (i > 0) {
- i--;
- __free_page(sg_page(&sg[i]));
- }
- kfree(sg);
-out:
- return NVME_SC_INTERNAL;
-}
-
static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin)
{
@@ -484,7 +431,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
}
if (rsp->req.sg != &rsp->cmd->inline_sg)
- nvmet_rdma_free_sgl(rsp->req.sg, rsp->req.sg_cnt);
+ sgl_free(rsp->req.sg);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -621,16 +568,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
u32 len = get_unaligned_le24(sgl->length);
u32 key = get_unaligned_le32(sgl->key);
int ret;
- u16 status;
/* no data command? */
if (!len)
return 0;
- status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
- len);
- if (status)
- return status;
+ rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt);
+ if (!rsp->req.sg)
+ return NVME_SC_INTERNAL;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -976,7 +921,7 @@ static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
{
- pr_info("freeing queue %d\n", queue->idx);
+ pr_debug("freeing queue %d\n", queue->idx);
nvmet_sq_destroy(&queue->nvme_sq);
@@ -1558,25 +1503,9 @@ err_ib_client:
static void __exit nvmet_rdma_exit(void)
{
- struct nvmet_rdma_queue *queue;
-
nvmet_unregister_transport(&nvmet_rdma_ops);
-
- flush_scheduled_work();
-
- mutex_lock(&nvmet_rdma_queue_mutex);
- while ((queue = list_first_entry_or_null(&nvmet_rdma_queue_list,
- struct nvmet_rdma_queue, queue_list))) {
- list_del_init(&queue->queue_list);
-
- mutex_unlock(&nvmet_rdma_queue_mutex);
- __nvmet_rdma_queue_disconnect(queue);
- mutex_lock(&nvmet_rdma_queue_mutex);
- }
- mutex_unlock(&nvmet_rdma_queue_mutex);
-
- flush_scheduled_work();
ib_unregister_client(&nvmet_rdma_ib_client);
+ WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
ida_destroy(&nvmet_rdma_queue_ida);
}