summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c59
-rw-r--r--drivers/nvme/host/fabrics.c7
-rw-r--r--drivers/nvme/host/fabrics.h9
-rw-r--r--drivers/nvme/host/fc.c184
-rw-r--r--drivers/nvme/host/multipath.c43
-rw-r--r--drivers/nvme/host/nvme.h11
-rw-r--r--drivers/nvme/host/pci.c59
-rw-r--r--drivers/nvme/host/rdma.c20
-rw-r--r--drivers/nvme/target/core.c9
-rw-r--r--drivers/nvme/target/io-cmd.c7
-rw-r--r--drivers/nvme/target/loop.c4
11 files changed, 163 insertions, 249 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f431c32774f3..7aeca5db7916 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
int ret;
ret = nvme_reset_ctrl(ctrl);
- if (!ret)
+ if (!ret) {
flush_work(&ctrl->reset_work);
+ if (ctrl->state != NVME_CTRL_LIVE)
+ ret = -ENETRESET;
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
@@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (new_state) {
case NVME_CTRL_ADMIN_ONLY:
switch (old_state) {
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
break;
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
switch (old_state) {
- case NVME_CTRL_LIVE:
+ case NVME_CTRL_NEW:
case NVME_CTRL_RESETTING:
changed = true;
/* FALLTHRU */
@@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
- range[n].cattr = cpu_to_le32(0);
- range[n].nlb = cpu_to_le32(nlb);
- range[n].slba = cpu_to_le64(slba);
+ if (n < segments) {
+ range[n].cattr = cpu_to_le32(0);
+ range[n].nlb = cpu_to_le32(nlb);
+ range[n].slba = cpu_to_le64(slba);
+ }
n++;
}
@@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
- struct nvme_command c;
struct request *rq;
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_keep_alive;
-
- rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
+ rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
NVME_QID_ANY);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
return;
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+ memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+ ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
@@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
static void nvme_update_formats(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
+ struct nvme_ns *ns, *next;
+ LIST_HEAD(rm_list);
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->disk && nvme_revalidate_disk(ns->disk))
- nvme_ns_remove(ns);
+ if (ns->disk && nvme_revalidate_disk(ns->disk)) {
+ list_move_tail(&ns->list, &rm_list);
+ }
}
mutex_unlock(&ctrl->namespaces_mutex);
+
+ list_for_each_entry_safe(ns, next, &rm_list, list)
+ nvme_ns_remove(ns);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
[NVME_CTRL_LIVE] = "live",
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
- [NVME_CTRL_RECONNECTING]= "reconnecting",
+ [NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
[NVME_CTRL_DEAD] = "dead",
};
@@ -2835,7 +2844,7 @@ out:
}
static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
- struct nvme_id_ns *id, bool *new)
+ struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
bool is_shared = id->nmic & (1 << 0);
@@ -2851,8 +2860,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
ret = PTR_ERR(head);
goto out_unlock;
}
-
- *new = true;
} else {
struct nvme_ns_ids ids;
@@ -2864,8 +2871,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
ret = -EINVAL;
goto out_unlock;
}
-
- *new = false;
}
list_add_tail(&ns->siblings, &head->list);
@@ -2936,7 +2941,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
struct nvme_id_ns *id;
char disk_name[DISK_NAME_LEN];
int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
- bool new = true;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -2962,7 +2966,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (id->ncap == 0)
goto out_free_id;
- if (nvme_init_ns_head(ns, nsid, id, &new))
+ if (nvme_init_ns_head(ns, nsid, id))
goto out_free_id;
nvme_setup_streams_ns(ctrl, ns);
@@ -3028,9 +3032,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name);
- if (new)
- nvme_mpath_add_disk(ns->head);
- nvme_mpath_add_disk_links(ns);
+ nvme_mpath_add_disk(ns->head);
return;
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
@@ -3050,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
return;
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
- nvme_mpath_remove_disk_links(ns);
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group);
if (ns->ndev)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5dd4ceefed8f..8f0f34d06d46 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
*/
int nvmf_register_transport(struct nvmf_transport_ops *ops)
{
- if (!ops->create_ctrl || !ops->module)
+ if (!ops->create_ctrl)
return -EINVAL;
down_write(&nvmf_transports_rwsem);
@@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
+ if (opts->discovery_nqn) {
+ pr_debug("Ignoring nr_io_queues value for discovery controller\n");
+ break;
+ }
+
opts->nr_io_queues = min_t(unsigned int,
num_online_cpus(), token);
break;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 25b19f722f5b..a3145d90c1d2 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect) {
/*
- * Reconnecting state means transport disruption, which can take
- * a long time and even might fail permanently, fail fast to
- * give upper layers a chance to failover.
+ * Connecting state means transport disruption or initial
+ * establishment, which can take a long time and even might
+ * fail permanently, fail fast to give upper layers a chance
+ * to failover.
* Deleting state means that the ctrl will never accept commands
* again, fail it permanently.
*/
- if (ctrl->state == NVME_CTRL_RECONNECTING ||
+ if (ctrl->state == NVME_CTRL_CONNECTING ||
ctrl->state == NVME_CTRL_DELETING) {
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b856d7c919d2..1dc1387b7134 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -55,9 +55,7 @@ struct nvme_fc_queue {
enum nvme_fcop_flags {
FCOP_FLAGS_TERMIO = (1 << 0),
- FCOP_FLAGS_RELEASED = (1 << 1),
- FCOP_FLAGS_COMPLETE = (1 << 2),
- FCOP_FLAGS_AEN = (1 << 3),
+ FCOP_FLAGS_AEN = (1 << 1),
};
struct nvmefc_ls_req_op {
@@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
/*
* As all reconnects were suppressed, schedule a
* connect.
@@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
}
break;
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
/*
* The association has already been terminated and the
* controller is attempting reconnects. No need to do anything
@@ -1208,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
- assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
+ assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
/* Linux supports only Dynamic controllers */
assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
@@ -1323,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum);
- conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize);
+ conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
lsop->queue = queue;
lsreq->rqstaddr = conn_rqst;
@@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
/* *********************** NVME Ctrl Routines **************************** */
-static void __nvme_fc_final_op_cleanup(struct request *rq);
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static int
@@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
static int
__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
{
- int state;
+ unsigned long flags;
+ int opstate;
+
+ spin_lock_irqsave(&ctrl->lock, flags);
+ opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+ if (opstate != FCPOP_STATE_ACTIVE)
+ atomic_set(&op->state, opstate);
+ else if (ctrl->flags & FCCTRL_TERMIO)
+ ctrl->iocnt++;
+ spin_unlock_irqrestore(&ctrl->lock, flags);
- state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
- if (state != FCPOP_STATE_ACTIVE) {
- atomic_set(&op->state, state);
+ if (opstate != FCPOP_STATE_ACTIVE)
return -ECANCELED;
- }
ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
&ctrl->rport->remoteport,
@@ -1532,60 +1535,26 @@ static void
nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
{
struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
- unsigned long flags;
- int i, ret;
-
- for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
- if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
- continue;
-
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO) {
- ctrl->iocnt++;
- aen_op->flags |= FCOP_FLAGS_TERMIO;
- }
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- ret = __nvme_fc_abort_op(ctrl, aen_op);
- if (ret) {
- /*
- * if __nvme_fc_abort_op failed the io wasn't
- * active. Thus this call path is running in
- * parallel to the io complete. Treat as non-error.
- */
+ int i;
- /* back out the flags/counters */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
- aen_op->flags &= ~FCOP_FLAGS_TERMIO;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- return;
- }
- }
+ for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
+ __nvme_fc_abort_op(ctrl, aen_op);
}
-static inline int
+static inline void
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
- struct nvme_fc_fcp_op *op)
+ struct nvme_fc_fcp_op *op, int opstate)
{
unsigned long flags;
- bool complete_rq = false;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+ if (opstate == FCPOP_STATE_ABORTED) {
+ spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO) {
if (!--ctrl->iocnt)
wake_up(&ctrl->ioabort_wait);
}
+ spin_unlock_irqrestore(&ctrl->lock, flags);
}
- if (op->flags & FCOP_FLAGS_RELEASED)
- complete_rq = true;
- else
- op->flags |= FCOP_FLAGS_COMPLETE;
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- return complete_rq;
}
static void
@@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result;
bool terminate_assoc = true;
+ int opstate;
/*
* WARNING:
@@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
* association to be terminated.
*/
+ opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
+
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
- if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
- op->flags & FCOP_FLAGS_TERMIO)
+ if (opstate == FCPOP_STATE_ABORTED)
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
else if (freq->status)
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done:
if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
- __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+ __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl);
@@ -1722,13 +1693,11 @@ done:
if (status &&
(blk_queue_dying(rq->q) ||
ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+ ctrl->ctrl.state == NVME_CTRL_CONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1);
- if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
- __nvme_fc_final_op_cleanup(rq);
- else
- nvme_end_request(rq, status, result);
+ __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+ nvme_end_request(rq, status, result);
check_error:
if (terminate_assoc)
@@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
}
static void
-__nvme_fc_final_op_cleanup(struct request *rq)
+nvme_fc_complete_rq(struct request *rq)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl;
atomic_set(&op->state, FCPOP_STATE_IDLE);
- op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
- FCOP_FLAGS_COMPLETE);
nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq);
nvme_fc_ctrl_put(ctrl);
-
-}
-
-static void
-nvme_fc_complete_rq(struct request *rq)
-{
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_ctrl *ctrl = op->ctrl;
- unsigned long flags;
- bool completed = false;
-
- /*
- * the core layer, on controller resets after calling
- * nvme_shutdown_ctrl(), calls complete_rq without our
- * calling blk_mq_complete_request(), thus there may still
- * be live i/o outstanding with the LLDD. Means transport has
- * to track complete calls vs fcpio_done calls to know what
- * path to take on completes and dones.
- */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (op->flags & FCOP_FLAGS_COMPLETE)
- completed = true;
- else
- op->flags |= FCOP_FLAGS_RELEASED;
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- if (completed)
- __nvme_fc_final_op_cleanup(rq);
}
/*
@@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_ctrl *nctrl = data;
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
- unsigned long flags;
- int status;
if (!blk_mq_request_started(req))
return;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO) {
- ctrl->iocnt++;
- op->flags |= FCOP_FLAGS_TERMIO;
- }
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- status = __nvme_fc_abort_op(ctrl, op);
- if (status) {
- /*
- * if __nvme_fc_abort_op failed the io wasn't
- * active. Thus this call path is running in
- * parallel to the io complete. Treat as non-error.
- */
-
- /* back out the flags/counters */
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
- op->flags &= ~FCOP_FLAGS_TERMIO;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- return;
- }
+ __nvme_fc_abort_op(ctrl, op);
}
@@ -2566,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
goto out_free_tag_set;
}
- ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+ ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_cleanup_blk_queue;
- ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+ ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_delete_hw_queues;
@@ -2617,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_free_io_queues;
- ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+ ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_free_io_queues;
- ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+ ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_delete_hw_queues;
@@ -2717,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_init_queue(ctrl, 0);
ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
- NVME_AQ_BLK_MQ_DEPTH);
+ NVME_AQ_DEPTH);
if (ret)
goto out_free_queue;
ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
- NVME_AQ_BLK_MQ_DEPTH,
- (NVME_AQ_BLK_MQ_DEPTH / 4));
+ NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
if (ret)
goto out_delete_hw_queue;
@@ -2751,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
}
ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
+ min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (ret)
@@ -2784,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
opts->queue_size = ctrl->ctrl.maxcmd;
}
+ if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
+ /* warn if sqsize is lower than queue_size */
+ dev_warn(ctrl->ctrl.device,
+ "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ opts->queue_size, ctrl->ctrl.sqsize + 1);
+ opts->queue_size = ctrl->ctrl.sqsize + 1;
+ }
+
ret = nvme_fc_init_aen_ops(ctrl);
if (ret)
goto out_term_aen_ops;
@@ -2943,7 +2865,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;
- if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
return;
if (portptr->port_state == FC_OBJSTATE_ONLINE)
@@ -2991,10 +2913,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
- "to RECONNECTING\n", ctrl->cnum);
+ "to CONNECTING\n", ctrl->cnum);
return;
}
@@ -3195,7 +3117,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
* transport errors (frame drop, LS failure) inherently must kill
* the association. The transport is coded so that any command used
* to create the association (prior to a LIVE state transition
- * while NEW or RECONNECTING) will fail if it completes in error or
+ * while NEW or CONNECTING) will fail if it completes in error or
* times out.
*
* As such: as the connect request was mostly likely due to a
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3b211d9e58b8..060f69e03427 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -198,30 +198,16 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- device_add_disk(&head->subsys->dev, head->disk);
- if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
- &nvme_ns_id_attr_group))
- pr_warn("%s: failed to create sysfs group for identification\n",
- head->disk->disk_name);
-}
-
-void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
- struct kobject *slave_disk_kobj, *holder_disk_kobj;
-
- if (!ns->head->disk)
- return;
-
- slave_disk_kobj = &disk_to_dev(ns->disk)->kobj;
- if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj,
- kobject_name(slave_disk_kobj)))
- return;
- holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj;
- if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj,
- kobject_name(holder_disk_kobj)))
- sysfs_remove_link(ns->head->disk->slave_dir,
- kobject_name(slave_disk_kobj));
+ mutex_lock(&head->subsys->lock);
+ if (!(head->disk->flags & GENHD_FL_UP)) {
+ device_add_disk(&head->subsys->dev, head->disk);
+ if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+ &nvme_ns_id_attr_group))
+ pr_warn("%s: failed to create sysfs group for identification\n",
+ head->disk->disk_name);
+ }
+ mutex_unlock(&head->subsys->lock);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -238,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
blk_cleanup_queue(head->disk->queue);
put_disk(head->disk);
}
-
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
- if (!ns->head->disk)
- return;
-
- sysfs_remove_link(ns->disk->part0.holder_dir,
- kobject_name(&disk_to_dev(ns->head->disk)->kobj));
- sysfs_remove_link(ns->head->disk->slave_dir,
- kobject_name(&disk_to_dev(ns->disk)->kobj));
-}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8e4550fa08f8..d733b14ede9d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -123,7 +123,7 @@ enum nvme_ctrl_state {
NVME_CTRL_LIVE,
NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
- NVME_CTRL_RECONNECTING,
+ NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
NVME_CTRL_DEAD,
};
@@ -183,6 +183,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ struct nvme_command ka_cmd;
struct work_struct fw_act_work;
/* Power saving configuration */
@@ -409,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
-void nvme_mpath_add_disk_links(struct nvme_ns *ns);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
@@ -453,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
-static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-}
-static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-}
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6fe7af00a1f4..b6f43b738f03 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1141,7 +1141,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
/* If there is a reset/reinit ongoing, we shouldn't reset again. */
switch (dev->ctrl.state) {
case NVME_CTRL_RESETTING:
- case NVME_CTRL_RECONNECTING:
+ case NVME_CTRL_CONNECTING:
return false;
default:
break;
@@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
if (!(csts & NVME_CSTS_CFS) && !nssro)
return false;
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
return true;
}
@@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_command cmd;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ mb();
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return BLK_EH_RESET_TIMER;
+
/*
* Reset immediately if the controller is failed
*/
@@ -1215,13 +1216,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
* cancellation error. All outstanding requests are completed on
* shutdown, so we return BLK_EH_HANDLED.
*/
- if (dev->ctrl.state == NVME_CTRL_RESETTING) {
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_CONNECTING:
+ case NVME_CTRL_RESETTING:
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
+ default:
+ break;
}
/*
@@ -1364,18 +1369,14 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
- } else {
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- if (!nvmeq->sq_cmds)
- return -ENOMEM;
- }
+ /* CMB SQEs will be mapped before creation */
+ if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
+ return 0;
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ if (!nvmeq->sq_cmds)
+ return -ENOMEM;
return 0;
}
@@ -1449,10 +1450,17 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
struct nvme_dev *dev = nvmeq->dev;
int result;
+ if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
+ dev->ctrl.page_size);
+ nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
+ nvmeq->sq_cmds_io = dev->cmb + offset;
+ }
+
nvmeq->cq_vector = qid - 1;
result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
- return result;
+ goto release_vector;
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
@@ -1466,9 +1474,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
release_sq:
+ dev->online_queues--;
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
+ release_vector:
+ nvmeq->cq_vector = -1;
return result;
}
@@ -1903,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
int result, nr_io_queues;
unsigned long size;
- nr_io_queues = num_present_cpus();
+ nr_io_queues = num_possible_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
@@ -2288,12 +2299,12 @@ static void nvme_reset_work(struct work_struct *work)
nvme_dev_disable(dev, false);
/*
- * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+ * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
* initializing procedure here.
*/
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(dev->ctrl.device,
- "failed to mark controller RECONNECTING\n");
+ "failed to mark controller CONNECTING\n");
goto out;
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2bc059f7d73c..4d84a73ee12d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -887,7 +887,7 @@ free_ctrl:
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
/* If we are resetting/deleting then do nothing */
- if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
return;
@@ -973,7 +973,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_start_queues(&ctrl->ctrl);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure should never happen */
WARN_ON_ONCE(1);
return;
@@ -1051,7 +1051,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
- if (!blk_rq_bytes(rq))
+ if (!blk_rq_payload_bytes(rq))
return;
if (req->mr) {
@@ -1166,7 +1166,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
c->common.flags |= NVME_CMD_SGL_METABUF;
- if (!blk_rq_bytes(rq))
+ if (!blk_rq_payload_bytes(rq))
return nvme_rdma_set_sg_null(c);
req->sg_table.sgl = req->first_sgl;
@@ -1756,7 +1756,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure should never happen */
WARN_ON_ONCE(1);
return;
@@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
out_fail:
- dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl, true);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ ++ctrl->ctrl.nr_reconnects;
+ nvme_rdma_reconnect_or_remove(ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1942,6 +1939,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (!ctrl->queues)
goto out_uninit_ctrl;
+ changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
+ WARN_ON_ONCE(!changed);
+
ret = nvme_rdma_configure_admin_queue(ctrl, true);
if (ret)
goto out_kfree_queues;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 0bd737117a80..a78029e4e5f4 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -520,9 +520,12 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
goto fail;
}
- /* either variant of SGLs is fine, as we don't support metadata */
- if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
- (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
+ /*
+ * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
+ * contains an address of a single contiguous physical buffer that is
+ * byte aligned.
+ */
+ if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;
}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 0a4372a016f2..28bbdff4a88b 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -105,10 +105,13 @@ static void nvmet_execute_flush(struct nvmet_req *req)
static u16 nvmet_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
- if (__blkdev_issue_discard(ns->bdev,
+ int ret;
+
+ ret = __blkdev_issue_discard(ns->bdev,
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
- GFP_KERNEL, 0, bio))
+ GFP_KERNEL, 0, bio);
+ if (ret && ret != -EOPNOTSUPP)
return NVME_SC_INTERNAL | NVME_SC_DNR;
return 0;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 7991ec3a17db..861d1509b22b 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -184,7 +184,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
- if (blk_rq_bytes(req)) {
+ if (blk_rq_payload_bytes(req)) {
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
@@ -193,7 +193,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
- iod->req.transfer_len = blk_rq_bytes(req);
+ iod->req.transfer_len = blk_rq_payload_bytes(req);
}
blk_mq_start_request(req);