summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c319
1 files changed, 161 insertions, 158 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7e3893d06bab..e26b085a007a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -384,6 +384,8 @@ static inline void nvme_end_req(struct request *req)
nvme_log_error(req);
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
+ if (req->cmd_flags & REQ_NVME_MPATH)
+ nvme_mpath_end_request(req);
blk_mq_end_request(req, status);
}
@@ -851,8 +853,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
+
if (nvme_ns_has_pi(ns)) {
- cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
@@ -1118,11 +1123,12 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
- if (effects & NVME_CMD_EFFECTS_CCC)
- nvme_init_ctrl_finish(ctrl);
+ if (effects & NVME_CMD_EFFECTS_CCC) {
+ dev_info(ctrl->device,
+"controller capabilities changed, reset may be required to take effect.\n");
+ }
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
@@ -2003,6 +2009,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
}
}
+ /*
+ * Only set the DEAC bit if the device guarantees that reads from
+ * deallocated data return zeroes. While the DEAC bit does not
+ * require that, it must be a no-op if reads from deallocated data
+ * do not return zeroes.
+ */
+ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+ ns->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2179,7 +2193,7 @@ const struct pr_ops nvme_pr_ops = {
};
#ifdef CONFIG_BLK_SED_OPAL
-int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
+static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
{
struct nvme_ctrl *ctrl = data;
@@ -2196,7 +2210,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
NVME_QID_ANY, 1, 0);
}
-EXPORT_SYMBOL_GPL(nvme_sec_submit);
+
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+ if (ctrl->oacs & NVME_CTRL_OACS_SEC_SUPP) {
+ if (!ctrl->opal_dev)
+ ctrl->opal_dev = init_opal_dev(ctrl, &nvme_sec_submit);
+ else if (was_suspended)
+ opal_unlock_from_suspend(ctrl->opal_dev);
+ } else {
+ free_opal_dev(ctrl->opal_dev);
+ ctrl->opal_dev = NULL;
+ }
+}
+#else
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+}
#endif /* CONFIG_BLK_SED_OPAL */
#ifdef CONFIG_BLK_DEV_ZONED
@@ -2221,16 +2251,17 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val,
+ u32 timeout, const char *op)
{
- unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
- u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+ unsigned long timeout_jiffies = jiffies + timeout * HZ;
+ u32 csts;
int ret;
while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
if (csts == ~0)
return -ENODEV;
- if ((csts & NVME_CSTS_RDY) == bit)
+ if ((csts & mask) == val)
break;
usleep_range(1000, 2000);
@@ -2239,7 +2270,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
if (time_after(jiffies, timeout_jiffies)) {
dev_err(ctrl->device,
"Device not ready; aborting %s, CSTS=0x%x\n",
- enabled ? "initialisation" : "reset", csts);
+ op, csts);
return -ENODEV;
}
}
@@ -2247,27 +2278,29 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
return ret;
}
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit. The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
int ret;
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
- ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+ if (shutdown)
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+ else
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
+ if (shutdown) {
+ return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
+ NVME_CSTS_SHST_CMPLT,
+ ctrl->shutdown_timeout, "shutdown");
+ }
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
-
- return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
+ return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0,
+ (NVME_CAP_TIMEOUT(ctrl->cap) + 1) / 2, "reset");
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
@@ -2332,41 +2365,11 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
- return nvme_wait_ready(ctrl, timeout, true);
+ return nvme_wait_ready(ctrl, NVME_CSTS_RDY, NVME_CSTS_RDY,
+ (timeout + 1) / 2, "initialisation");
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
-int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
-{
- unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
- u32 csts;
- int ret;
-
- ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
- ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
-
- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
- if (ret)
- return ret;
-
- while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
- if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
- break;
-
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(ctrl->device,
- "Device shutdown incomplete; abort shutdown\n");
- return -ENODEV;
- }
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
-
static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
{
__le64 ts;
@@ -3049,7 +3052,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
- return 0;
+ return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CS_CTRL;
@@ -3229,7 +3232,7 @@ out_free:
* register in our nvme_ctrl structure. This should be called as soon as
* the admin queue is fully up and running.
*/
-int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
{
int ret;
@@ -3260,6 +3263,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ nvme_configure_opal(ctrl, was_suspended);
+
if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
/*
* Do not return errors unless we are in a controller reset,
@@ -3745,15 +3750,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_secret)) {
+ struct nvme_dhchap_key *key, *host_key;
int ret;
- ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key);
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret)
return ret;
kfree(opts->dhchap_secret);
opts->dhchap_secret = dhchap_secret;
- /* Key has changed; re-authentication with new key */
- nvme_auth_reset(ctrl);
+ host_key = ctrl->host_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->host_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(host_key);
}
/* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3795,15 +3804,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
+ struct nvme_dhchap_key *key, *ctrl_key;
int ret;
- ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key);
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret)
return ret;
kfree(opts->dhchap_ctrl_secret);
opts->dhchap_ctrl_secret = dhchap_secret;
- /* Key has changed; re-authentication with new key */
- nvme_auth_reset(ctrl);
+ ctrl_key = ctrl->ctrl_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->ctrl_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(ctrl_key);
}
/* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3875,10 +3888,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-static const struct attribute_group nvme_dev_attrs_group = {
+const struct attribute_group nvme_dev_attrs_group = {
.attrs = nvme_dev_attrs,
.is_visible = nvme_dev_attrs_are_visible,
};
+EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
static const struct attribute_group *nvme_dev_attr_groups[] = {
&nvme_dev_attrs_group,
@@ -4333,10 +4347,6 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
{
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
- if (test_bit(NVME_NS_DEAD, &ns->flags))
- goto out;
-
- ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
dev_err(ns->ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
@@ -4407,7 +4417,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
- if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
+ if (ns->head->ns_id > nsid)
list_move_tail(&ns->list, &rm_list);
}
up_write(&ctrl->namespaces_rwsem);
@@ -4424,9 +4434,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
u32 prev = 0;
int ret = 0, i;
- if (nvme_ctrl_limited_cns(ctrl))
- return -EOPNOTSUPP;
-
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;
@@ -4534,8 +4541,18 @@ static void nvme_scan_work(struct work_struct *work)
}
mutex_lock(&ctrl->scan_lock);
- if (nvme_scan_ns_list(ctrl) != 0)
+ if (nvme_ctrl_limited_cns(ctrl)) {
nvme_scan_ns_sequential(ctrl);
+ } else {
+ /*
+ * Fall back to sequential scan if DNR is set to handle broken
+ * devices which should support Identify NS List (as per the VS
+ * they report) but don't actually support it.
+ */
+ ret = nvme_scan_ns_list(ctrl);
+ if (ret > 0 && ret & NVME_SC_DNR)
+ nvme_scan_ns_sequential(ctrl);
+ }
mutex_unlock(&ctrl->scan_lock);
}
@@ -4565,8 +4582,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later.
*/
- if (ctrl->state == NVME_CTRL_DEAD)
- nvme_kill_queues(ctrl);
+ if (ctrl->state == NVME_CTRL_DEAD) {
+ nvme_mark_namespaces_dead(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
+ }
/* this is a no-op when called from the controller reset handler */
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
@@ -4692,7 +4711,7 @@ static void nvme_fw_act_work(struct work_struct *work)
fw_act_timeout = jiffies +
msecs_to_jiffies(admin_timeout * 1000);
- nvme_stop_queues(ctrl);
+ nvme_quiesce_io_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) {
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
@@ -4706,7 +4725,7 @@ static void nvme_fw_act_work(struct work_struct *work)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
/* read FW slot information to clear the AER */
nvme_get_fw_slot_info(ctrl);
@@ -4811,8 +4830,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
EXPORT_SYMBOL_GPL(nvme_complete_async_event);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
- unsigned int cmd_size)
+ const struct blk_mq_ops *ops, unsigned int cmd_size)
{
int ret;
@@ -4822,7 +4840,9 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ctrl->ops->flags & NVME_F_FABRICS)
set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = ctrl->numa_node;
- set->flags = flags;
+ set->flags = BLK_MQ_F_NO_SCHED;
+ if (ctrl->ops->flags & NVME_F_BLOCKING)
+ set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
@@ -4850,6 +4870,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
out_cleanup_admin_q:
blk_mq_destroy_queue(ctrl->admin_q);
+ blk_put_queue(ctrl->admin_q);
out_free_tagset:
blk_mq_free_tag_set(ctrl->admin_tagset);
return ret;
@@ -4859,14 +4880,17 @@ EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
{
blk_mq_destroy_queue(ctrl->admin_q);
- if (ctrl->ops->flags & NVME_F_FABRICS)
+ blk_put_queue(ctrl->admin_q);
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
blk_mq_destroy_queue(ctrl->fabrics_q);
+ blk_put_queue(ctrl->fabrics_q);
+ }
blk_mq_free_tag_set(ctrl->admin_tagset);
}
EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set);
int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
+ const struct blk_mq_ops *ops, unsigned int nr_maps,
unsigned int cmd_size)
{
int ret;
@@ -4874,15 +4898,23 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
memset(set, 0, sizeof(*set));
set->ops = ops;
set->queue_depth = ctrl->sqsize + 1;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ /*
+ * Some Apple controllers requires tags to be unique across admin and
+ * the (only) I/O queue, so reserve the first 32 tags of the I/O queue.
+ */
+ if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
+ set->reserved_tags = NVME_AQ_DEPTH;
+ else if (ctrl->ops->flags & NVME_F_FABRICS)
+ set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = ctrl->numa_node;
- set->flags = flags;
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ if (ctrl->ops->flags & NVME_F_BLOCKING)
+ set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size,
set->driver_data = ctrl;
set->nr_hw_queues = ctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- if (ops->map_queues)
- set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ set->nr_maps = nr_maps;
ret = blk_mq_alloc_tag_set(set);
if (ret)
return ret;
@@ -4893,6 +4925,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
+ blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE,
+ ctrl->connect_q);
}
ctrl->tagset = set;
@@ -4906,8 +4940,10 @@ EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set);
void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl)
{
- if (ctrl->ops->flags & NVME_F_FABRICS)
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
blk_mq_destroy_queue(ctrl->connect_q);
+ blk_put_queue(ctrl->connect_q);
+ }
blk_mq_free_tag_set(ctrl->tagset);
}
EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set);
@@ -4943,7 +4979,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
nvme_mpath_update(ctrl);
}
@@ -4988,6 +5024,7 @@ static void nvme_free_ctrl(struct device *dev)
nvme_auth_stop(ctrl);
nvme_auth_free(ctrl);
__free_page(ctrl->discard_page);
+ free_opal_dev(ctrl->opal_dev);
if (subsys) {
mutex_lock(&nvme_subsystems_lock);
@@ -5053,7 +5090,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->instance);
ctrl->device->class = nvme_class;
ctrl->device->parent = ctrl->dev;
- ctrl->device->groups = nvme_dev_attr_groups;
+ if (ops->dev_attr_groups)
+ ctrl->device->groups = ops->dev_attr_groups;
+ else
+ ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
@@ -5077,9 +5117,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
nvme_mpath_init_ctrl(ctrl);
- nvme_auth_init_ctrl(ctrl);
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out_free_cdev;
return 0;
+out_free_cdev:
+ cdev_device_del(&ctrl->cdev, ctrl->device);
out_free_name:
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
@@ -5092,62 +5136,17 @@ out:
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
-static void nvme_start_ns_queue(struct nvme_ns *ns)
-{
- if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
- blk_mq_unquiesce_queue(ns->queue);
-}
-
-static void nvme_stop_ns_queue(struct nvme_ns *ns)
-{
- if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
- blk_mq_quiesce_queue(ns->queue);
- else
- blk_mq_wait_quiesce_done(ns->queue);
-}
-
-/*
- * Prepare a queue for teardown.
- *
- * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
- * the capacity to 0 after that to avoid blocking dispatchers that may be
- * holding bd_butex. This will end buffered writers dirtying pages that can't
- * be synced.
- */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
-{
- if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- return;
-
- blk_mark_disk_dead(ns->disk);
- nvme_start_ns_queue(ns);
-
- set_capacity_and_notify(ns->disk, 0);
-}
-
-/**
- * nvme_kill_queues(): Ends all namespace queues
- * @ctrl: the dead controller that needs to end
- *
- * Call this function when the driver determines it is unable to get the
- * controller in a state capable of servicing IO.
- */
-void nvme_kill_queues(struct nvme_ctrl *ctrl)
+/* let I/O to all namespaces fail in preparation for surprise removal */
+void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem);
-
- /* Forcibly unquiesce queues to avoid blocking dispatch */
- if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
- nvme_start_admin_queue(ctrl);
-
list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_set_queue_dying(ns);
-
+ blk_mark_disk_dead(ns->disk);
up_read(&ctrl->namespaces_rwsem);
}
-EXPORT_SYMBOL_GPL(nvme_kill_queues);
+EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
void nvme_unfreeze(struct nvme_ctrl *ctrl)
{
@@ -5197,43 +5196,41 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_freeze);
-void nvme_stop_queues(struct nvme_ctrl *ctrl)
+void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_stop_ns_queue(ns);
- up_read(&ctrl->namespaces_rwsem);
+ if (!ctrl->tagset)
+ return;
+ if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+ blk_mq_quiesce_tagset(ctrl->tagset);
+ else
+ blk_mq_wait_quiesce_done(ctrl->tagset);
}
-EXPORT_SYMBOL_GPL(nvme_stop_queues);
+EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues);
-void nvme_start_queues(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_start_ns_queue(ns);
- up_read(&ctrl->namespaces_rwsem);
+ if (!ctrl->tagset)
+ return;
+ if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+ blk_mq_unquiesce_tagset(ctrl->tagset);
}
-EXPORT_SYMBOL_GPL(nvme_start_queues);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_io_queues);
-void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl)
{
if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_quiesce_queue(ctrl->admin_q);
else
- blk_mq_wait_quiesce_done(ctrl->admin_q);
+ blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set);
}
-EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_quiesce_admin_queue);
-void nvme_start_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl)
{
if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_unquiesce_queue(ctrl->admin_q);
}
-EXPORT_SYMBOL_GPL(nvme_start_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{
@@ -5344,8 +5341,13 @@ static int __init nvme_core_init(void)
goto unregister_generic_ns;
}
+ result = nvme_init_auth();
+ if (result)
+ goto destroy_ns_chr;
return 0;
+destroy_ns_chr:
+ class_destroy(nvme_ns_chr_class);
unregister_generic_ns:
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class:
@@ -5366,6 +5368,7 @@ out:
static void __exit nvme_core_exit(void)
{
+ nvme_exit_auth();
class_destroy(nvme_ns_chr_class);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);