summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c571
1 files changed, 438 insertions, 133 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1a8d32a4a5c3..7bf228df6001 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -38,6 +38,8 @@ struct nvme_ns_info {
u32 nsid;
__le32 anagrpid;
u8 pi_offset;
+ u16 endgid;
+ u64 runs;
bool is_shared;
bool is_readonly;
bool is_ready;
@@ -150,6 +152,8 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
struct nvme_command *cmd);
+static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page,
+ u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi);
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
@@ -286,7 +290,6 @@ static blk_status_t nvme_error_status(u16 status)
case NVME_SC_NS_NOT_READY:
return BLK_STS_TARGET;
case NVME_SC_BAD_ATTRIBUTES:
- case NVME_SC_ONCS_NOT_SUPPORTED:
case NVME_SC_INVALID_OPCODE:
case NVME_SC_INVALID_FIELD:
case NVME_SC_INVALID_NS:
@@ -378,12 +381,12 @@ static void nvme_log_err_passthru(struct request *req)
nr->status & NVME_SC_MASK, /* Status Code */
nr->status & NVME_STATUS_MORE ? "MORE " : "",
nr->status & NVME_STATUS_DNR ? "DNR " : "",
- nr->cmd->common.cdw10,
- nr->cmd->common.cdw11,
- nr->cmd->common.cdw12,
- nr->cmd->common.cdw13,
- nr->cmd->common.cdw14,
- nr->cmd->common.cdw14);
+ le32_to_cpu(nr->cmd->common.cdw10),
+ le32_to_cpu(nr->cmd->common.cdw11),
+ le32_to_cpu(nr->cmd->common.cdw12),
+ le32_to_cpu(nr->cmd->common.cdw13),
+ le32_to_cpu(nr->cmd->common.cdw14),
+ le32_to_cpu(nr->cmd->common.cdw15));
}
enum nvme_disposition {
@@ -431,6 +434,12 @@ static inline void nvme_end_req_zoned(struct request *req)
static inline void __nvme_end_req(struct request *req)
{
+ if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+ if (blk_rq_is_passthrough(req))
+ nvme_log_err_passthru(req);
+ else
+ nvme_log_error(req);
+ }
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
if (req->cmd_flags & REQ_NVME_MPATH)
@@ -441,12 +450,6 @@ void nvme_end_req(struct request *req)
{
blk_status_t status = nvme_error_status(nvme_req(req)->status);
- if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
- if (blk_rq_is_passthrough(req))
- nvme_log_err_passthru(req);
- else
- nvme_log_error(req);
- }
__nvme_end_req(req);
blk_mq_end_request(req, status);
}
@@ -564,8 +567,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (new_state) {
case NVME_CTRL_LIVE:
switch (old_state) {
- case NVME_CTRL_NEW:
- case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
fallthrough;
@@ -666,10 +667,11 @@ static void nvme_free_ns_head(struct kref *ref)
struct nvme_ns_head *head =
container_of(ref, struct nvme_ns_head, ref);
- nvme_mpath_remove_disk(head);
+ nvme_mpath_put_disk(head);
ida_free(&head->subsys->ns_ida, head->instance);
cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
+ kfree(head->plids);
kfree(head);
}
@@ -702,7 +704,7 @@ void nvme_put_ns(struct nvme_ns *ns)
{
kref_put(&ns->kref, nvme_free_ns);
}
-EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_put_ns, "NVME_TARGET_PASSTHRU");
static inline void nvme_clear_nvme_request(struct request *req)
{
@@ -762,6 +764,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
+
+ if (!(rq->rq_flags & RQF_DONTPREP))
+ nvme_clear_nvme_request(rq);
+
return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
@@ -885,12 +891,27 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
return BLK_STS_OK;
}
+static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd)
+{
+ cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag);
+ cmnd->rw.lbatm = cpu_to_le16(0xffff);
+}
+
static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
struct request *req)
{
u32 upper, lower;
u64 ref48;
+ /* only type1 and type 2 PI formats have a reftag */
+ switch (ns->head->pi_type) {
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ break;
+ default:
+ return;
+ }
+
/* both rw and write zeroes share the same reftag format */
switch (ns->head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
@@ -930,13 +951,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
if (nvme_ns_has_pi(ns->head)) {
cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
-
- switch (ns->head->pi_type) {
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- nvme_set_ref_tag(ns, cmnd, req);
- break;
- }
+ nvme_set_ref_tag(ns, cmnd, req);
}
return BLK_STS_OK;
@@ -987,6 +1002,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+ if (op == nvme_cmd_write && ns->head->nr_plids) {
+ u16 write_stream = req->bio->bi_write_stream;
+
+ if (WARN_ON_ONCE(write_stream > ns->head->nr_plids))
+ return BLK_STS_INVAL;
+
+ if (write_stream) {
+ dsmgmt |= ns->head->plids[write_stream - 1] << 16;
+ control |= NVME_RW_DTYPE_DPLCMT;
+ }
+ }
+
if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req))
return BLK_STS_INVAL;
@@ -1006,7 +1033,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (ns->head->ms) {
/*
- * If formated with metadata, the block layer always provides a
+ * If formatted with metadata, the block layer always provides a
* metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else
* we enable the PRACT bit for protection information or set the
* namespace capacity to zero to prevent any I/O.
@@ -1015,20 +1042,20 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
+ nvme_set_ref_tag(ns, cmnd, req);
}
- switch (ns->head->pi_type) {
- case NVME_NS_DPS_PI_TYPE3:
+ if (bio_integrity_flagged(req->bio, BIP_CHECK_GUARD))
control |= NVME_RW_PRINFO_PRCHK_GUARD;
- break;
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- control |= NVME_RW_PRINFO_PRCHK_GUARD |
- NVME_RW_PRINFO_PRCHK_REF;
+ if (bio_integrity_flagged(req->bio, BIP_CHECK_REFTAG)) {
+ control |= NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
nvme_set_ref_tag(ns, cmnd, req);
- break;
+ }
+ if (bio_integrity_flagged(req->bio, BIP_CHECK_APPTAG)) {
+ control |= NVME_RW_PRINFO_PRCHK_APP;
+ nvme_set_app_tag(req, cmnd);
}
}
@@ -1123,7 +1150,7 @@ int nvme_execute_rq(struct request *rq, bool at_head)
return nvme_req(rq)->status;
return blk_status_to_errno(status);
}
-EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, "NVME_TARGET_PASSTHRU");
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
@@ -1154,7 +1181,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
req->cmd_flags &= ~REQ_FAILFAST_DRIVER;
if (buffer && bufflen) {
- ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+ ret = blk_rq_map_kern(req, buffer, bufflen, GFP_KERNEL);
if (ret)
goto out;
}
@@ -1203,7 +1230,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
return effects;
}
-EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_command_effects, "NVME_TARGET_PASSTHRU");
u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
{
@@ -1223,7 +1250,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
return effects;
}
-EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, "NVME_TARGET_PASSTHRU");
void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status)
@@ -1268,7 +1295,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
break;
}
}
-EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, "NVME_TARGET_PASSTHRU");
/*
* Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
@@ -1305,9 +1332,10 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
}
-static void nvme_keep_alive_finish(struct request *rq,
- blk_status_t status, struct nvme_ctrl *ctrl)
+static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
+ blk_status_t status)
{
+ struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
unsigned long delay = nvme_keep_alive_work_period(ctrl);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
@@ -1324,17 +1352,20 @@ static void nvme_keep_alive_finish(struct request *rq,
delay = 0;
}
+ blk_mq_free_request(rq);
+
if (status) {
dev_err(ctrl->device,
"failed nvme_keep_alive_end_io error=%d\n",
status);
- return;
+ return RQ_END_IO_NONE;
}
ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false;
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
+ return RQ_END_IO_NONE;
}
static void nvme_keep_alive_work(struct work_struct *work)
@@ -1343,7 +1374,6 @@ static void nvme_keep_alive_work(struct work_struct *work)
struct nvme_ctrl, ka_work);
bool comp_seen = ctrl->comp_seen;
struct request *rq;
- blk_status_t status;
ctrl->ka_last_check_time = jiffies;
@@ -1366,9 +1396,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
nvme_init_request(rq, &ctrl->ka_cmd);
rq->timeout = ctrl->kato * HZ;
- status = blk_execute_rq(rq, false);
- nvme_keep_alive_finish(rq, status, ctrl);
- blk_mq_free_request(rq);
+ rq->end_io = nvme_keep_alive_end_io;
+ rq->end_io_data = ctrl;
+ blk_execute_rq_nowait(rq, false);
}
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -1603,6 +1633,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = true;
+ info->endgid = le16_to_cpu(id->endgid);
if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
dev_info(ctrl->device,
"Ignoring bogus Namespace Identifiers\n");
@@ -1643,6 +1674,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
+ info->endgid = le16_to_cpu(id->endgid);
}
kfree(id);
return ret;
@@ -1668,7 +1700,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result)
+ void *result)
{
return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
buflen, result);
@@ -1677,7 +1709,7 @@ EXPORT_SYMBOL_GPL(nvme_set_features);
int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
- u32 *result)
+ void *result)
{
return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
buflen, result);
@@ -1692,7 +1724,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
&result);
- if (status < 0)
+
+ /*
+ * It's either a kernel error or the host observed a connection
+ * lost. In either case it's not possible communicate with the
+ * controller and thus enter the error code path.
+ */
+ if (status < 0 || status == NVME_SC_HOST_PATH_ERROR)
return status;
/*
@@ -1769,12 +1807,12 @@ static void nvme_release(struct gendisk *disk)
nvme_ns_release(disk->private_data);
}
-int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+int nvme_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
/* some standard values */
geo->heads = 1 << 6;
geo->sectors = 1 << 5;
- geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+ geo->cylinders = get_capacity(disk) >> 11;
return 0;
}
@@ -1836,8 +1874,11 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
break;
}
- bi->tuple_size = head->ms;
- bi->pi_offset = info->pi_offset;
+ bi->metadata_size = head->ms;
+ if (bi->csum_type) {
+ bi->pi_tuple_size = head->pi_size;
+ bi->pi_offset = info->pi_offset;
+ }
return true;
}
@@ -1985,20 +2026,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
- struct nvme_id_ns *id, struct queue_limits *lim,
- u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
{
- unsigned int boundary = 0;
+ u32 atomic_bs, boundary = 0;
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
- if (le16_to_cpu(id->nabspf))
+ /*
+ * We do not support an offset for the atomic boundaries.
+ */
+ if (id->nabo)
+ return bs;
+
+ if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+ /*
+ * Use the per-namespace atomic write unit when available.
+ */
+ atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+ if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ } else {
+ /*
+ * Use the controller wide atomic write unit. This sucks
+ * because the limit is defined in terms of logical blocks while
+ * namespaces can have different formats, and because there is
+ * no clear language in the specification prohibiting different
+ * values for different controllers in the subsystem.
+ */
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
+
lim->atomic_write_hw_max = atomic_bs;
lim->atomic_write_hw_boundary = boundary;
lim->atomic_write_hw_unit_min = bs;
lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
+ lim->features |= BLK_FEAT_ATOMIC_WRITES;
+ return atomic_bs;
}
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2007,13 +2069,13 @@ static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
}
static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
- struct queue_limits *lim)
+ struct queue_limits *lim, bool is_admin)
{
lim->max_hw_sectors = ctrl->max_hw_sectors;
lim->max_segments = min_t(u32, USHRT_MAX,
min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
lim->max_integrity_segments = ctrl->max_integrity_segments;
- lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
+ lim->virt_boundary_mask = ctrl->ops->get_virt_boundary(ctrl, is_admin);
lim->max_segment_size = UINT_MAX;
lim->dma_alignment = 3;
}
@@ -2031,25 +2093,13 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
* or smaller than a sector size yet, so catch this early and don't
* allow block I/O.
*/
- if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
+ if (blk_validate_block_size(bs)) {
bs = (1 << 9);
valid = false;
}
- atomic_bs = phys_bs = bs;
- if (id->nabo == 0) {
- /*
- * Bit 1 indicates whether NAWUPF is defined for this namespace
- * and whether it should be used instead of AWUPF. If NAWUPF ==
- * 0 then AWUPF must be used instead.
- */
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
- atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
- else
- atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
-
- nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
- }
+ phys_bs = bs;
+ atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
@@ -2068,7 +2118,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
lim->physical_block_size = min(phys_bs, atomic_bs);
lim->io_min = phys_bs;
lim->io_opt = io_opt;
- if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+ if ((ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) &&
+ (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM))
lim->max_write_zeroes_sectors = UINT_MAX;
else
lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
@@ -2122,14 +2173,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
struct queue_limits lim;
+ unsigned int memflags;
int ret;
- blk_mq_freeze_queue(ns->disk->queue);
lim = queue_limits_start_update(ns->disk->queue);
- nvme_set_ctrl_limits(ns->ctrl, &lim);
+ nvme_set_ctrl_limits(ns->ctrl, &lim, false);
+
+ memflags = blk_mq_freeze_queue(ns->disk->queue);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
- blk_mq_unfreeze_queue(ns->disk->queue);
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
/* Hide the block-interface for these devices */
if (!ret)
@@ -2137,6 +2190,148 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
return ret;
}
+static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl,
+ struct nvme_ns_info *info, u8 fdp_idx)
+{
+ struct nvme_fdp_config_log hdr, *h;
+ struct nvme_fdp_config_desc *desc;
+ size_t size = sizeof(hdr);
+ void *log, *end;
+ int i, n, ret;
+
+ ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
+ NVME_CSI_NVM, &hdr, size, 0, info->endgid);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "FDP configs log header status:0x%x endgid:%d\n", ret,
+ info->endgid);
+ return ret;
+ }
+
+ size = le32_to_cpu(hdr.sze);
+ if (size > PAGE_SIZE * MAX_ORDER_NR_PAGES) {
+ dev_warn(ctrl->device, "FDP config size too large:%zu\n",
+ size);
+ return 0;
+ }
+
+ h = kvmalloc(size, GFP_KERNEL);
+ if (!h)
+ return -ENOMEM;
+
+ ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
+ NVME_CSI_NVM, h, size, 0, info->endgid);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "FDP configs log status:0x%x endgid:%d\n", ret,
+ info->endgid);
+ goto out;
+ }
+
+ n = le16_to_cpu(h->numfdpc) + 1;
+ if (fdp_idx > n) {
+ dev_warn(ctrl->device, "FDP index:%d out of range:%d\n",
+ fdp_idx, n);
+ /* Proceed without registering FDP streams */
+ ret = 0;
+ goto out;
+ }
+
+ log = h + 1;
+ desc = log;
+ end = log + size - sizeof(*h);
+ for (i = 0; i < fdp_idx; i++) {
+ log += le16_to_cpu(desc->dsze);
+ desc = log;
+ if (log >= end) {
+ dev_warn(ctrl->device,
+ "FDP invalid config descriptor list\n");
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (le32_to_cpu(desc->nrg) > 1) {
+ dev_warn(ctrl->device, "FDP NRG > 1 not supported\n");
+ ret = 0;
+ goto out;
+ }
+
+ info->runs = le64_to_cpu(desc->runs);
+out:
+ kvfree(h);
+ return ret;
+}
+
+static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info)
+{
+ struct nvme_ns_head *head = ns->head;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_fdp_ruh_status *ruhs;
+ struct nvme_fdp_config fdp;
+ struct nvme_command c = {};
+ size_t size;
+ int i, ret;
+
+ /*
+ * The FDP configuration is static for the lifetime of the namespace,
+ * so return immediately if we've already registered this namespace's
+ * streams.
+ */
+ if (head->nr_plids)
+ return 0;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0,
+ &fdp);
+ if (ret) {
+ dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret);
+ return ret;
+ }
+
+ if (!(fdp.flags & FDPCFG_FDPE))
+ return 0;
+
+ ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx);
+ if (!info->runs)
+ return ret;
+
+ size = struct_size(ruhs, ruhsd, S8_MAX - 1);
+ ruhs = kzalloc(size, GFP_KERNEL);
+ if (!ruhs)
+ return -ENOMEM;
+
+ c.imr.opcode = nvme_cmd_io_mgmt_recv;
+ c.imr.nsid = cpu_to_le32(head->ns_id);
+ c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS;
+ c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size));
+ ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+ if (ret) {
+ dev_warn(ctrl->device, "FDP io-mgmt status:0x%x\n", ret);
+ goto free;
+ }
+
+ head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+ if (!head->nr_plids)
+ goto free;
+
+ head->plids = kcalloc(head->nr_plids, sizeof(*head->plids),
+ GFP_KERNEL);
+ if (!head->plids) {
+ dev_warn(ctrl->device,
+ "failed to allocate %u FDP placement IDs\n",
+ head->nr_plids);
+ head->nr_plids = 0;
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < head->nr_plids; i++)
+ head->plids[i] = le16_to_cpu(ruhs->ruhsd[i].pid);
+free:
+ kfree(ruhs);
+ return ret;
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2144,6 +2339,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
+ unsigned int memflags;
sector_t capacity;
unsigned lbaf;
int ret;
@@ -2173,17 +2369,24 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
- blk_mq_freeze_queue(ns->disk->queue);
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+ ret = nvme_query_fdp_info(ns, info);
+ if (ret < 0)
+ goto out;
+ }
+
+ lim = queue_limits_start_update(ns->disk->queue);
+
+ memflags = blk_mq_freeze_queue(ns->disk->queue);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
-
- lim = queue_limits_start_update(ns->disk->queue);
- nvme_set_ctrl_limits(ns->ctrl, &lim);
+ nvme_set_ctrl_limits(ns->ctrl, &lim, false);
nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info);
nvme_set_chunk_sectors(ns, id, &lim);
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
+
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@@ -2206,13 +2409,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_init_integrity(ns->head, &lim, info))
capacity = 0;
- ret = queue_limits_commit_update(ns->disk->queue, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
-
- set_capacity_and_notify(ns->disk, capacity);
+ lim.max_write_streams = ns->head->nr_plids;
+ if (lim.max_write_streams)
+ lim.write_stream_granularity = min(info->runs, U32_MAX);
+ else
+ lim.write_stream_granularity = 0;
/*
* Only set the DEAC bit if the device guarantees that reads from
@@ -2220,11 +2421,21 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
* require that, it must be a no-op if reads from deallocated data
* do not return zeroes.
*/
- if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) {
ns->head->features |= NVME_NS_DEAC;
+ lim.max_hw_wzeroes_unmap_sectors = lim.max_write_zeroes_sectors;
+ }
+
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
+ if (ret) {
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
+ goto out;
+ }
+
+ set_capacity_and_notify(ns->disk, capacity);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
- blk_mq_unfreeze_queue(ns->disk->queue);
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
if (blk_queue_is_zoned(ns->queue)) {
ret = blk_revalidate_disk_zones(ns->disk);
@@ -2280,8 +2491,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
if (!ret && nvme_ns_head_multipath(ns->head)) {
struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
+ unsigned int memflags;
- blk_mq_freeze_queue(ns->head->disk->queue);
+ lim = queue_limits_start_update(ns->head->disk->queue);
+ memflags = blk_mq_freeze_queue(ns->head->disk->queue);
/*
* queue_limits mixes values that are the hardware limitations
* for bio splitting with what is the device configuration.
@@ -2297,7 +2510,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
* the splitting limits in to make sure we still obey possibly
* lower limitations of other controllers.
*/
- lim = queue_limits_start_update(ns->head->disk->queue);
lim.logical_block_size = ns_lim->logical_block_size;
lim.physical_block_size = ns_lim->physical_block_size;
lim.io_min = ns_lim->io_min;
@@ -2308,13 +2520,15 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
ns->head->disk->flags |= GENHD_FL_HIDDEN;
else
nvme_init_integrity(ns->head, &lim, info);
+ lim.max_write_streams = ns_lim->max_write_streams;
+ lim.write_stream_granularity = ns_lim->write_stream_granularity;
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
- blk_mq_unfreeze_queue(ns->head->disk->queue);
+ blk_mq_unfreeze_queue(ns->head->disk->queue, memflags);
}
return ret;
@@ -2385,10 +2599,9 @@ static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
#ifdef CONFIG_BLK_DEV_ZONED
static int nvme_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data)
+ unsigned int nr_zones, struct blk_report_zones_args *args)
{
- return nvme_ns_report_zones(disk->private_data, sector, nr_zones, cb,
- data);
+ return nvme_ns_report_zones(disk->private_data, sector, nr_zones, args);
}
#else
#define nvme_report_zones NULL
@@ -2948,6 +3161,16 @@ static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl)
return ctrl->opts && ctrl->opts->discovery_nqn;
}
+static inline bool nvme_admin_ctrl(struct nvme_ctrl *ctrl)
+{
+ return ctrl->cntrltype == NVME_CTRL_ADMIN;
+}
+
+static inline bool nvme_is_io_ctrl(struct nvme_ctrl *ctrl)
+{
+ return !nvme_discovery_ctrl(ctrl) && !nvme_admin_ctrl(ctrl);
+}
+
static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
@@ -2998,6 +3221,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC ||
@@ -3013,7 +3237,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
kfree(subsys);
return -EINVAL;
}
- subsys->awupf = le16_to_cpu(id->awupf);
nvme_mpath_default_iopolicy(subsys);
subsys->dev.class = &nvme_subsys_class;
@@ -3066,8 +3289,8 @@ out_unlock:
return ret;
}
-int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
- void *log, size_t size, u64 offset)
+static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page,
+ u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi)
{
struct nvme_command c = { };
u32 dwlen = nvme_bytes_to_numd(size);
@@ -3081,14 +3304,22 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
c.get_log_page.csi = csi;
+ c.get_log_page.lsi = cpu_to_le16(lsi);
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
+int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
+ void *log, size_t size, u64 offset)
+{
+ return nvme_get_log_lsi(ctrl, nsid, log_page, lsp, csi, log, size,
+ offset, 0);
+}
+
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
- struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi);
+ struct nvme_effects_log *old, *cel = xa_load(&ctrl->cels, csi);
int ret;
if (cel)
@@ -3105,7 +3336,11 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
return ret;
}
- xa_store(&ctrl->cels, csi, cel, GFP_KERNEL);
+ old = xa_store(&ctrl->cels, csi, cel, GFP_KERNEL);
+ if (xa_is_err(old)) {
+ kfree(cel);
+ return xa_err(old);
+ }
out:
*log = cel;
return 0;
@@ -3138,7 +3373,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
else
ctrl->max_zeroes_sectors = 0;
- if (ctrl->subsys->subtype != NVME_NQN_NVME ||
+ if (!nvme_is_io_ctrl(ctrl) ||
!nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
return 0;
@@ -3167,6 +3402,25 @@ free_data:
return ret;
}
+static int nvme_init_effects_log(struct nvme_ctrl *ctrl,
+ u8 csi, struct nvme_effects_log **log)
+{
+ struct nvme_effects_log *effects, *old;
+
+ effects = kzalloc(sizeof(*effects), GFP_KERNEL);
+ if (!effects)
+ return -ENOMEM;
+
+ old = xa_store(&ctrl->cels, csi, effects, GFP_KERNEL);
+ if (xa_is_err(old)) {
+ kfree(effects);
+ return xa_err(old);
+ }
+
+ *log = effects;
+ return 0;
+}
+
static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl)
{
struct nvme_effects_log *log = ctrl->effects;
@@ -3213,10 +3467,9 @@ static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
}
if (!ctrl->effects) {
- ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL);
- if (!ctrl->effects)
- return -ENOMEM;
- xa_store(&ctrl->cels, NVME_CSI_NVM, ctrl->effects, GFP_KERNEL);
+ ret = nvme_init_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects);
+ if (ret < 0)
+ return ret;
}
nvme_init_known_nvm_effects(ctrl);
@@ -3242,14 +3495,14 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
return -EINVAL;
}
- if (!nvme_discovery_ctrl(ctrl) && ctrl->ioccsz < 4) {
+ if (nvme_is_io_ctrl(ctrl) && ctrl->ioccsz < 4) {
dev_err(ctrl->device,
"I/O queue command capsule supported size %d < 4\n",
ctrl->ioccsz);
return -EINVAL;
}
- if (!nvme_discovery_ctrl(ctrl) && ctrl->iorcsz < 1) {
+ if (nvme_is_io_ctrl(ctrl) && ctrl->iorcsz < 1) {
dev_err(ctrl->device,
"I/O queue response capsule supported size %d < 1\n",
ctrl->iorcsz);
@@ -3257,8 +3510,9 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
}
if (!ctrl->maxcmd) {
- dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
- return -EINVAL;
+ dev_warn(ctrl->device,
+ "Firmware bug: maximum outstanding commands is 0\n");
+ ctrl->maxcmd = ctrl->sqsize + 1;
}
return 0;
@@ -3334,7 +3588,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
lim = queue_limits_start_update(ctrl->admin_q);
- nvme_set_ctrl_limits(ctrl, &lim);
+ nvme_set_ctrl_limits(ctrl, &lim, true);
ret = queue_limits_commit_update(ctrl->admin_q, &lim);
if (ret)
goto out_free;
@@ -3400,7 +3654,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
-
out_free:
kfree(id);
return ret;
@@ -3430,6 +3683,17 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
if (ret)
return ret;
+ if (nvme_admin_ctrl(ctrl)) {
+ /*
+ * An admin controller has one admin queue, but no I/O queues.
+ * Override queue_count so it only creates an admin queue.
+ */
+ dev_dbg(ctrl->device,
+ "Subsystem %s is an administrative controller",
+ ctrl->subsys->subnqn);
+ ctrl->queue_count = 1;
+ }
+
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
@@ -3519,7 +3783,7 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
*/
if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h))
continue;
- if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
+ if (nvme_tryget_ns_head(h))
return h;
}
@@ -3763,7 +4027,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
}
} else {
ret = -EINVAL;
- if (!info->is_shared || !head->shared) {
+ if ((!info->is_shared || !head->shared) &&
+ !list_empty(&head->list)) {
dev_err(ctrl->device,
"Duplicate unshared namespace %d\n",
info->nsid);
@@ -3781,13 +4046,17 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
- "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
+ "Shared namespace support requires core_nvme.multipath=Y.\n");
}
}
list_add_tail_rcu(&ns->siblings, &head->list);
ns->head = head;
mutex_unlock(&ctrl->subsys->lock);
+
+#ifdef CONFIG_NVME_MULTIPATH
+ cancel_delayed_work(&head->remove_work);
+#endif
return 0;
out_put_ns_head:
@@ -3817,7 +4086,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
srcu_read_unlock(&ctrl->srcu, srcu_idx);
return ret;
}
-EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, "NVME_TARGET_PASSTHRU");
/*
* Add the namespace to the controller list while keeping the list ordered.
@@ -3832,7 +4101,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
return;
}
}
- list_add(&ns->list, &ns->ctrl->namespaces);
+ list_add_rcu(&ns->list, &ns->ctrl->namespaces);
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
@@ -3841,6 +4110,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
struct nvme_ns *ns;
struct gendisk *disk;
int node = ctrl->numa_node;
+ bool last_path = false;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -3933,9 +4203,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
- if (list_empty(&ns->head->list))
+ if (list_empty(&ns->head->list)) {
list_del_init(&ns->head->entry);
+ /*
+ * If multipath is not configured, we still create a namespace
+ * head (nshead), but head->disk is not initialized in that
+ * case. As a result, only a single reference to nshead is held
+ * (via kref_init()) when it is created. Therefore, ensure that
+ * we do not release the reference to nshead twice if head->disk
+ * is not present.
+ */
+ if (ns->head->disk)
+ last_path = true;
+ }
mutex_unlock(&ctrl->subsys->lock);
+ if (last_path)
+ nvme_put_ns_head(ns->head);
nvme_put_ns_head(ns->head);
out_cleanup_disk:
put_disk(disk);
@@ -3967,7 +4250,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
if (list_empty(&ns->head->list)) {
- list_del_init(&ns->head->entry);
+ if (!nvme_mpath_queue_if_no_path(ns->head))
+ list_del_init(&ns->head->entry);
last_path = true;
}
mutex_unlock(&ns->ctrl->subsys->lock);
@@ -3977,6 +4261,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+
+ nvme_mpath_remove_sysfs_link(ns);
+
del_gendisk(ns->disk);
mutex_lock(&ns->ctrl->namespaces_lock);
@@ -3985,7 +4272,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
synchronize_srcu(&ns->ctrl->srcu);
if (last_path)
- nvme_mpath_shutdown_disk(ns->head);
+ nvme_mpath_remove_disk(ns->head);
nvme_put_ns(ns);
}
@@ -4037,7 +4324,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
}
/*
- * If available try to use the Command Set Idependent Identify Namespace
+ * If available try to use the Command Set Independent Identify Namespace
* data structure to find all the generic information that is needed to
* set up a namespace. If not fall back to the legacy version.
*/
@@ -4251,6 +4538,15 @@ static void nvme_scan_work(struct work_struct *work)
nvme_scan_ns_sequential(ctrl);
}
mutex_unlock(&ctrl->scan_lock);
+
+ /* Requeue if we have missed AENs */
+ if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events))
+ nvme_queue_scan(ctrl);
+#ifdef CONFIG_NVME_MULTIPATH
+ else if (ctrl->ana_log_buf)
+ /* Re-read the ANA log page to not miss updates */
+ queue_work(nvme_wq, &ctrl->ana_work);
+#endif
}
/*
@@ -4425,11 +4721,9 @@ static void nvme_fw_act_work(struct work_struct *work)
nvme_auth_stop(ctrl);
if (ctrl->mtfa)
- fw_act_timeout = jiffies +
- msecs_to_jiffies(ctrl->mtfa * 100);
+ fw_act_timeout = jiffies + msecs_to_jiffies(ctrl->mtfa * 100);
else
- fw_act_timeout = jiffies +
- msecs_to_jiffies(admin_timeout * 1000);
+ fw_act_timeout = jiffies + secs_to_jiffies(admin_timeout);
nvme_quiesce_io_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) {
@@ -4442,7 +4736,8 @@ static void nvme_fw_act_work(struct work_struct *work)
msleep(100);
}
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) ||
+ !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_unquiesce_io_queues(ctrl);
@@ -4559,7 +4854,6 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
/* Reserved for fabric connect and keep alive */
set->reserved_tags = 2;
set->numa_node = ctrl->numa_node;
- set->flags = BLK_MQ_F_NO_SCHED;
if (ctrl->ops->flags & NVME_F_BLOCKING)
set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size;
@@ -4600,8 +4894,12 @@ EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
{
+ /*
+ * As we're about to destroy the queue and free tagset
+ * we can not have keep-alive work running.
+ */
+ nvme_stop_keep_alive(ctrl);
blk_mq_destroy_queue(ctrl->admin_q);
- blk_put_queue(ctrl->admin_q);
if (ctrl->ops->flags & NVME_F_FABRICS) {
blk_mq_destroy_queue(ctrl->fabrics_q);
blk_put_queue(ctrl->fabrics_q);
@@ -4629,7 +4927,6 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
/* Reserved for fabric connect */
set->reserved_tags = 1;
set->numa_node = ctrl->numa_node;
- set->flags = BLK_MQ_F_SHOULD_MERGE;
if (ctrl->ops->flags & NVME_F_BLOCKING)
set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size;
@@ -4696,8 +4993,14 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
* checking that they started once before, hence are reconnecting back.
*/
if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
- nvme_discovery_ctrl(ctrl))
+ nvme_discovery_ctrl(ctrl)) {
+ if (!ctrl->kato) {
+ nvme_stop_keep_alive(ctrl);
+ ctrl->kato = NVME_DEFAULT_KATO;
+ nvme_start_keep_alive(ctrl);
+ }
nvme_change_uevent(ctrl, "NVME_EVENT=rediscover");
+ }
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
@@ -4740,6 +5043,8 @@ static void nvme_free_ctrl(struct device *dev)
container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys;
+ if (ctrl->admin_q)
+ blk_put_queue(ctrl->admin_q);
if (!subsys || ctrl->instance != subsys->instance)
ida_free(&nvme_instance_ida, ctrl->instance);
nvme_free_cels(ctrl);
@@ -5022,7 +5327,7 @@ struct nvme_ctrl *nvme_ctrl_from_file(struct file *file)
return NULL;
return file->private_data;
}
-EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, NVME_TARGET_PASSTHRU);
+EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, "NVME_TARGET_PASSTHRU");
/*
* Check we didn't inadvertently grow the command structure sizes: