summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c226
1 files changed, 181 insertions, 45 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 108f60b46804..fa7ba09dca77 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
*/
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue);
+ /*
+ * Revalidate after unblocking dispatchers that may be holding bd_butex
+ */
+ revalidate_disk(ns->disk);
}
static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -113,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
- if (ctrl->state == NVME_CTRL_LIVE)
+ if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->state != NVME_CTRL_RESETTING)
+ return -EBUSY;
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -134,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
ret = -ENETRESET;
}
@@ -312,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
- case NVME_CTRL_ADMIN_ONLY:
- switch (old_state) {
- case NVME_CTRL_CONNECTING:
- changed = true;
- /* FALLTHRU */
- default:
- break;
- }
- break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -336,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -356,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
@@ -378,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
- if (changed)
+ if (changed) {
ctrl->state = new_state;
+ wake_up_all(&ctrl->state_wq);
+ }
spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -388,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+ switch (ctrl->state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ return false;
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DEAD:
+ return true;
+ default:
+ WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+ return true;
+ }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+ wait_event(ctrl->state_wq,
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+ nvme_state_terminal(ctrl));
+ return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
static void nvme_free_ns_head(struct kref *ref)
{
struct nvme_ns_head *head =
@@ -847,7 +889,7 @@ out:
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u32 *result, unsigned timeout)
+ u32 meta_seed, u64 *result, unsigned timeout)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -888,7 +930,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
else
ret = nvme_req(req)->status;
if (result)
- *result = le32_to_cpu(nvme_req(req)->result.u32);
+ *result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta && !ret && !write) {
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
@@ -1303,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem);
-
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1320,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1335,6 +1376,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_command c;
unsigned timeout = 0;
u32 effects;
+ u64 result;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags)
+ return -EINVAL;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+ c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+ c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+ c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+ (void __user *)(uintptr_t)cmd.metadata,
+ cmd.metadata_len, 0, &result, timeout);
+ nvme_passthru_end(ctrl, effects);
+
+ if (status >= 0) {
+ if (put_user(result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd64 __user *ucmd)
+{
+ struct nvme_passthru_cmd64 cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ u32 effects;
int status;
if (!capable(CAP_SYS_ADMIN))
@@ -1405,6 +1494,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}
+static bool is_ctrl_ioctl(unsigned int cmd)
+{
+ if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
+ return true;
+ if (is_sed_ioctl(cmd))
+ return true;
+ return false;
+}
+
+static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ void __user *argp,
+ struct nvme_ns_head *head,
+ int srcu_idx)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ int ret;
+
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ ret = nvme_user_cmd(ctrl, NULL, argp);
+ break;
+ case NVME_IOCTL_ADMIN64_CMD:
+ ret = nvme_user_cmd64(ctrl, NULL, argp);
+ break;
+ default:
+ ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
+ break;
+ }
+ nvme_put_ctrl(ctrl);
+ return ret;
+}
+
static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1422,20 +1546,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
* seperately and drop the ns SRCU reference early. This avoids a
* deadlock when deleting namespaces using the passthrough interface.
*/
- if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
- struct nvme_ctrl *ctrl = ns->ctrl;
-
- nvme_get_ctrl(ns->ctrl);
- nvme_put_ns_from_disk(head, srcu_idx);
-
- if (cmd == NVME_IOCTL_ADMIN_CMD)
- ret = nvme_user_cmd(ctrl, NULL, argp);
- else
- ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
-
- nvme_put_ctrl(ctrl);
- return ret;
- }
+ if (is_ctrl_ioctl(cmd))
+ return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
switch (cmd) {
case NVME_IOCTL_ID:
@@ -1448,6 +1560,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
case NVME_IOCTL_SUBMIT_IO:
ret = nvme_submit_io(ns, argp);
break;
+ case NVME_IOCTL_IO64_CMD:
+ ret = nvme_user_cmd64(ns->ctrl, ns, argp);
+ break;
default:
if (ns->ndev)
ret = nvme_nvm_ioctl(ns, cmd, arg);
@@ -2289,6 +2404,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
.vid = 0x14a4,
.fr = "22301111",
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
+ },
+ {
+ /*
+ * This Kingston E8FK11.T firmware version has no interrupt
+ * after resume with actions related to suspend to idle
+ * https://bugzilla.kernel.org/show_bug.cgi?id=204887
+ */
+ .vid = 0x2646,
+ .fr = "E8FK11.T",
+ .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
}
};
@@ -2540,8 +2665,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
list_add_tail(&subsys->entry, &nvme_subsystems);
}
- if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
- dev_name(ctrl->device))) {
+ ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
+ dev_name(ctrl->device));
+ if (ret) {
dev_err(ctrl->device,
"failed to create sysfs link from subsystem.\n");
goto out_put_subsystem;
@@ -2786,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
switch (ctrl->state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
break;
default:
return -EWOULDBLOCK;
@@ -2838,6 +2963,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_ADMIN64_CMD:
+ return nvme_user_cmd64(ctrl, NULL, argp);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
@@ -3045,6 +3172,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);
+nvme_show_int_function(queue_count);
+nvme_show_int_function(sqsize);
static ssize_t nvme_sysfs_delete(struct device *dev,
struct device_attribute *attr, const char *buf,
@@ -3076,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
- [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3125,6 +3253,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_address.attr,
&dev_attr_state.attr,
&dev_attr_numa_node.attr,
+ &dev_attr_queue_count.attr,
+ &dev_attr_sqsize.attr,
NULL
};
@@ -3585,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
struct nvme_id_ctrl *id;
unsigned nn;
- if (ctrl->state != NVME_CTRL_LIVE)
+ /* No tagset on a live ctrl means IO queues could not created */
+ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
- WARN_ON_ONCE(!ctrl->tagset);
-
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
@@ -3750,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
- nvme_reset_ctrl(ctrl);
- break;
+ nvme_try_sched_reset(ctrl);
+ return;
}
msleep(100);
}
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_start_queues(ctrl);
@@ -3776,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
+ /*
+ * We are (ab)using the RESETTING state to prevent subsequent
+ * recovery actions from interfering with the controller's
+ * firmware activation.
+ */
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
@@ -3899,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+ init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));