summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-17 16:57:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-17 16:57:47 -0700
commit7ad67ca5534ee7c958559c4ad610f05c4578e361 (patch)
treedc6b6a8a6b70b5f25b07bcdc06d8e77e705f6822 /drivers/nvme/host/core.c
parent5260c2b863ef1152445ce93476c95d8c8a727eef (diff)
parent9c7eddf1b080f98fed1aadb74fe784f29bf77a08 (diff)
Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - Two NVMe pull requests: - ana log parse fix from Anton - nvme quirks support for Apple devices from Ben - fix missing bio completion tracing for multipath stack devices from Hannes and Mikhail - IP TOS settings for nvme rdma and tcp transports from Israel - rq_dma_dir cleanups from Israel - tracing for Get LBA Status command from Minwoo - Some nvme-tcp cleanups from Minwoo, Potnuri and Myself - Some consolidation between the fabrics transports for handling the CAP register - reset race with ns scanning fix for fabrics (move fabrics commands to a dedicated request queue with a different lifetime from the admin request queue)." - controller reset and namespace scan races fixes - nvme discovery log change uevent support - naming improvements from Keith - multiple discovery controllers reject fix from James - some regular cleanups from various people - Series fixing (and re-fixing) null_blk debug printing and nr_devices checks (André) - A few pull requests from Song, with fixes from Andy, Guoqing, Guilherme, Neil, Nigel, and Yufen. - REQ_OP_ZONE_RESET_ALL support (Chaitanya) - Bio merge handling unification (Christoph) - Pick default elevator correctly for devices with special needs (Damien) - Block stats fixes (Hou) - Timeout and support devices nbd fixes (Mike) - Series fixing races around elevator switching and device add/remove (Ming) - sed-opal cleanups (Revanth) - Per device weight support for BFQ (Fam) - Support for blk-iocost, a new model that can properly account cost of IO workloads. (Tejun) - blk-cgroup writeback fixes (Tejun) - paride queue init fixes (zhengbin) - blk_set_runtime_active() cleanup (Stanley) - Block segment mapping optimizations (Bart) - lightnvm fixes (Hans/Minwoo/YueHaibing) - Various little fixes and cleanups * tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits) null_blk: format pr_* logs with pr_fmt null_blk: match the type of parameter nr_devices null_blk: do not fail the module load with zero devices block: also check RQF_STATS in blk_mq_need_time_stamp() block: make rq sector size accessible for block stats bfq: Fix bfq linkage error raid5: use bio_end_sector in r5_next_bio raid5: remove STRIPE_OPS_REQ_PENDING md: add feature flag MD_FEATURE_RAID0_LAYOUT md/raid0: avoid RAID0 data corruption due to layout confusion. raid5: don't set STRIPE_HANDLE to stripe which is in batch list raid5: don't increment read_errors on EILSEQ return nvmet: fix a wrong error status returned in error log page nvme: send discovery log page change events to userspace nvme: add uevent variables for controller devices nvme: enable aen regardless of the presence of I/O queues nvme-fabrics: allow discovery subsystems accept a kato nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery() nvme: Remove redundant assignment of cq vector nvme: Assign subsys instance from first ctrl ...
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c201
1 files changed, 140 insertions, 61 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d3d6b7bd6903..1ede1763a5ee 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -22,12 +22,12 @@
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include "nvme.h"
#include "fabrics.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define NVME_MINORS (1U << MINORBITS)
unsigned int admin_timeout = 60;
@@ -81,7 +81,6 @@ EXPORT_SYMBOL_GPL(nvme_reset_wq);
struct workqueue_struct *nvme_delete_wq;
EXPORT_SYMBOL_GPL(nvme_delete_wq);
-static DEFINE_IDA(nvme_subsystems_ida);
static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);
@@ -197,9 +196,9 @@ static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
-static blk_status_t nvme_error_status(struct request *req)
+static blk_status_t nvme_error_status(u16 status)
{
- switch (nvme_req(req)->status & 0x7ff) {
+ switch (status & 0x7ff) {
case NVME_SC_SUCCESS:
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
@@ -226,6 +225,8 @@ static blk_status_t nvme_error_status(struct request *req)
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
return BLK_STS_NEXUS;
+ case NVME_SC_HOST_PATH_ERROR:
+ return BLK_STS_TRANSPORT;
default:
return BLK_STS_IOERR;
}
@@ -260,7 +261,7 @@ static void nvme_retry_req(struct request *req)
void nvme_complete_rq(struct request *req)
{
- blk_status_t status = nvme_error_status(req);
+ blk_status_t status = nvme_error_status(nvme_req(req)->status);
trace_nvme_complete_rq(req);
@@ -279,6 +280,8 @@ void nvme_complete_rq(struct request *req)
return;
}
}
+
+ nvme_trace_bio_complete(req, status);
blk_mq_end_request(req, status);
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
@@ -288,8 +291,12 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
- nvme_req(req)->status = NVME_SC_ABORT_REQ;
- blk_mq_complete_request_sync(req);
+ /* don't abort one completed request */
+ if (blk_mq_request_completed(req))
+ return true;
+
+ nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
+ blk_mq_complete_request(req);
return true;
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -1088,10 +1095,9 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
NVME_IDENTIFY_DATA_SIZE);
}
-static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
- unsigned nsid)
+static int nvme_identify_ns(struct nvme_ctrl *ctrl,
+ unsigned nsid, struct nvme_id_ns **id)
{
- struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
@@ -1100,18 +1106,17 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
- id = kmalloc(sizeof(*id), GFP_KERNEL);
- if (!id)
- return NULL;
+ *id = kmalloc(sizeof(**id), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
- error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
if (error) {
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
- kfree(id);
- return NULL;
+ kfree(*id);
}
- return id;
+ return error;
}
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
@@ -1180,7 +1185,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \
- (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
+ NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{
@@ -1195,6 +1201,8 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
if (status)
dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
supported_aens);
+
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
@@ -1594,9 +1602,11 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
}
-static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
+static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, struct nvme_ns_ids *ids)
{
+ int ret = 0;
+
memset(ids, 0, sizeof(*ids));
if (ctrl->vs >= NVME_VS(1, 1, 0))
@@ -1607,10 +1617,12 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
/* Don't treat error as fatal we potentially
* already have a NGUID or EUI-64
*/
- if (nvme_identify_ns_descs(ctrl, nsid, ids))
+ ret = nvme_identify_ns_descs(ctrl, nsid, ids);
+ if (ret)
dev_warn(ctrl->device,
- "%s: Identify Descriptors failed\n", __func__);
+ "Identify Descriptors failed (%d)\n", ret);
}
+ return ret;
}
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -1738,25 +1750,37 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return -ENODEV;
}
- id = nvme_identify_ns(ctrl, ns->head->ns_id);
- if (!id)
- return -ENODEV;
+ ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id);
+ if (ret)
+ goto out;
if (id->ncap == 0) {
ret = -ENODEV;
- goto out;
+ goto free_id;
}
__nvme_revalidate_disk(disk, id);
- nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
+ ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
+ if (ret)
+ goto free_id;
+
if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) {
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
ret = -ENODEV;
}
-out:
+free_id:
kfree(id);
+out:
+ /*
+ * Only fail the function if we got a fatal error back from the
+ * device, otherwise ignore the error and just move on.
+ */
+ if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR)))
+ ret = 0;
+ else if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -1952,7 +1976,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
* bits', but doing so may cause the device to complete commands to the
* admin queue ... and we don't know what memory that might be pointing at!
*/
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
{
int ret;
@@ -1966,20 +1990,27 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
- return nvme_wait_ready(ctrl, cap, false);
+ return nvme_wait_ready(ctrl, ctrl->cap, false);
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
-int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
/*
* Default to a 4K page size, with the intention to update this
* path in the future to accomodate architectures with differing
* kernel and IO page sizes.
*/
- unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+ unsigned dev_page_min, page_shift = 12;
int ret;
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+ if (ret) {
+ dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+
if (page_shift < dev_page_min) {
dev_err(ctrl->device,
"Minimum device page size %u too large for host (%u)\n",
@@ -1998,7 +2029,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
- return nvme_wait_ready(ctrl, cap, true);
+ return nvme_wait_ready(ctrl, ctrl->cap, true);
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
@@ -2332,7 +2363,8 @@ static void nvme_release_subsystem(struct device *dev)
struct nvme_subsystem *subsys =
container_of(dev, struct nvme_subsystem, dev);
- ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
+ if (subsys->instance >= 0)
+ ida_simple_remove(&nvme_instance_ida, subsys->instance);
kfree(subsys);
}
@@ -2361,6 +2393,17 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn)
lockdep_assert_held(&nvme_subsystems_lock);
+ /*
+ * Fail matches for discovery subsystems. This results
+ * in each discovery controller bound to a unique subsystem.
+ * This avoids issues with validating controller values
+ * that can only be true when there is a single unique subsystem.
+ * There may be multiple and completely independent entities
+ * that provide discovery controllers.
+ */
+ if (!strcmp(subsysnqn, NVME_DISC_SUBSYS_NAME))
+ return NULL;
+
list_for_each_entry(subsys, &nvme_subsystems, entry) {
if (strcmp(subsys->subnqn, subsysnqn))
continue;
@@ -2461,12 +2504,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
if (!subsys)
return -ENOMEM;
- ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL);
- if (ret < 0) {
- kfree(subsys);
- return ret;
- }
- subsys->instance = ret;
+
+ subsys->instance = -1;
mutex_init(&subsys->lock);
kref_init(&subsys->ref);
INIT_LIST_HEAD(&subsys->ctrls);
@@ -2485,7 +2524,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys->dev.class = nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups;
- dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance);
+ dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
device_initialize(&subsys->dev);
mutex_lock(&nvme_subsystems_lock);
@@ -2517,6 +2556,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
goto out_put_subsystem;
}
+ if (!found)
+ subsys->instance = ctrl->instance;
ctrl->subsys = subsys;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
mutex_unlock(&nvme_subsystems_lock);
@@ -2574,7 +2615,6 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
int nvme_init_identify(struct nvme_ctrl *ctrl)
{
struct nvme_id_ctrl *id;
- u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
bool prev_apst_enabled;
@@ -2584,16 +2624,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
return ret;
}
-
- ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
- if (ret) {
- dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
- return ret;
- }
- page_shift = NVME_CAP_MPSMIN(cap) + 12;
+ page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+ ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
if (ctrl->vs >= NVME_VS(1, 1, 0))
- ctrl->subsystem = NVME_CAP_NSSRC(cap);
+ ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
ret = nvme_identify_ctrl(ctrl, &id);
if (ret) {
@@ -3184,7 +3219,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = nsid;
kref_init(&head->ref);
- nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
+ if (ret)
+ goto out_cleanup_srcu;
ret = __nvme_check_ids(ctrl->subsys, head);
if (ret) {
@@ -3209,6 +3246,8 @@ out_ida_remove:
out_free_head:
kfree(head);
out:
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ERR_PTR(ret);
}
@@ -3232,7 +3271,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
} else {
struct nvme_ns_ids ids;
- nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ if (ret)
+ goto out_unlock;
+
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
@@ -3247,6 +3289,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -3338,11 +3382,9 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
- id = nvme_identify_ns(ctrl, nsid);
- if (!id) {
- ret = -EIO;
+ ret = nvme_identify_ns(ctrl, nsid, &id);
+ if (ret)
goto out_free_queue;
- }
if (id->ncap == 0) {
ret = -EINVAL;
@@ -3404,6 +3446,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_cleanup_queue(ns->queue);
out_free_ns:
kfree(ns);
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -3617,6 +3661,33 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
+static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct nvme_ctrl *ctrl =
+ container_of(dev, struct nvme_ctrl, ctrl_device);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int ret;
+
+ ret = add_uevent_var(env, "NVME_TRTYPE=%s", ctrl->ops->name);
+ if (ret)
+ return ret;
+
+ if (opts) {
+ ret = add_uevent_var(env, "NVME_TRADDR=%s", opts->traddr);
+ if (ret)
+ return ret;
+
+ ret = add_uevent_var(env, "NVME_TRSVCID=%s",
+ opts->trsvcid ?: "none");
+ if (ret)
+ return ret;
+
+ ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
+ opts->host_traddr ?: "none");
+ }
+ return ret;
+}
+
static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
{
char *envp[2] = { NULL, NULL };
@@ -3723,6 +3794,9 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
+ case NVME_AER_NOTICE_DISC_CHANGED:
+ ctrl->aen_result = result;
+ break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@@ -3769,10 +3843,10 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->kato)
nvme_start_keep_alive(ctrl);
+ nvme_enable_aen(ctrl);
+
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
- nvme_enable_aen(ctrl);
- queue_work(nvme_wq, &ctrl->async_event_work);
nvme_start_queues(ctrl);
}
}
@@ -3792,7 +3866,9 @@ static void nvme_free_ctrl(struct device *dev)
container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys;
- ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+ if (subsys && ctrl->instance != subsys->instance)
+ ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+
kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page);
@@ -3992,6 +4068,9 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_sync_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem);
+
+ if (ctrl->admin_q)
+ blk_sync_queue(ctrl->admin_q);
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);
@@ -4050,6 +4129,7 @@ static int __init nvme_core_init(void)
result = PTR_ERR(nvme_class);
goto unregister_chrdev;
}
+ nvme_class->dev_uevent = nvme_class_uevent;
nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
if (IS_ERR(nvme_subsys_class)) {
@@ -4074,7 +4154,6 @@ out:
static void __exit nvme_core_exit(void)
{
- ida_destroy(&nvme_subsystems_ida);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);