summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 18:08:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 18:08:55 -0700
commit9b960d8cd6f712cb2c03e2bdd4d5ca058238037f (patch)
treea1381af6c79626c0a28679f804477b43b7c91565 /drivers/nvme/host
parent91928e0d3cc29789f4483bffee5f36218f23942b (diff)
parent3c9f0c9326b625bf008962d58996f89a3bba1e12 (diff)
Merge tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - Fixes for integrity handling - NVMe pull request via Keith: - Secure concatenation for TCP transport (Hannes) - Multipath sysfs visibility (Nilay) - Various cleanups (Qasim, Baruch, Wang, Chen, Mike, Damien, Li) - Correct use of 64-bit BARs for pci-epf target (Niklas) - Socket fix for selinux when used in containers (Peijie) - MD pull request via Yu: - fix recovery can preempt resync (Li Nan) - fix md-bitmap IO limit (Su Yue) - fix raid10 discard with REQ_NOWAIT (Xiao Ni) - fix raid1 memory leak (Zheng Qixing) - fix mddev uaf (Yu Kuai) - fix raid1,raid10 IO flags (Yu Kuai) - some refactor and cleanup (Yu Kuai) - Series cleaning up and fixing bugs in the bad block handling code - Improve support for write failure simulation in null_blk - Various lock ordering fixes - Fixes for locking for debugfs attributes - Various ublk related fixes and improvements - Cleanups for blk-rq-qos wait handling - blk-throttle fixes - Fixes for loop dio and sync handling - Fixes and cleanups for the auto-PI code - Block side support for hardware encryption keys in blk-crypto - Various cleanups and fixes * tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux: (105 commits) nvmet: replace max(a, min(b, c)) by clamp(val, lo, hi) nvme-tcp: fix selinux denied when calling sock_sendmsg nvmet: pci-epf: Always configure BAR0 as 64-bit nvmet: Remove duplicate uuid_copy nvme: zns: Simplify nvme_zone_parse_entry() nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls nvmet-fc: Remove unused functions nvme-pci: remove stale comment nvme-fc: Utilise min3() to simplify queue count calculation nvme-multipath: Add visibility for queue-depth io-policy nvme-multipath: Add visibility for numa io-policy nvme-multipath: Add visibility for round-robin io-policy nvmet: add tls_concat and tls_key debugfs entries nvmet-tcp: support secure channel concatenation nvmet: Add 'sq' argument to alloc_ctrl_args nvme-fabrics: reset admin connection for secure concatenation nvme-tcp: request secure channel concatenation nvme-keyring: add nvme_tls_psk_refresh() nvme: add nvme_auth_derive_tls_psk() nvme: add nvme_auth_generate_digest() ...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/Kconfig2
-rw-r--r--drivers/nvme/host/apple.c2
-rw-r--r--drivers/nvme/host/auth.c115
-rw-r--r--drivers/nvme/host/core.c3
-rw-r--r--drivers/nvme/host/fabrics.c34
-rw-r--r--drivers/nvme/host/fabrics.h3
-rw-r--r--drivers/nvme/host/fc.c6
-rw-r--r--drivers/nvme/host/multipath.c138
-rw-r--r--drivers/nvme/host/nvme.h22
-rw-r--r--drivers/nvme/host/pci.c5
-rw-r--r--drivers/nvme/host/rdma.c3
-rw-r--r--drivers/nvme/host/sysfs.c24
-rw-r--r--drivers/nvme/host/tcp.c67
-rw-r--r--drivers/nvme/host/zns.c10
14 files changed, 397 insertions, 37 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 486afe598184..10e453b2436e 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -109,7 +109,7 @@ config NVME_HOST_AUTH
bool "NVMe over Fabrics In-Band Authentication in host side"
depends on NVME_CORE
select NVME_AUTH
- select NVME_KEYRING if NVME_TCP_TLS
+ select NVME_KEYRING
help
This provides support for NVMe over Fabrics In-Band Authentication in
host side.
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 8971aca41e63..57e863ecde58 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -525,7 +525,7 @@ static blk_status_t apple_nvme_map_data(struct apple_nvme *anv,
if (!iod->sg)
return BLK_STS_RESOURCE;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
- iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
+ iod->nents = blk_rq_map_sg(req, iod->sg);
if (!iod->nents)
goto out_free_sg;
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 5ea0e21709da..6115fef74c1e 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -12,6 +12,7 @@
#include "nvme.h"
#include "fabrics.h"
#include <linux/nvme-auth.h>
+#include <linux/nvme-keyring.h>
#define CHAP_BUF_SIZE 4096
static struct kmem_cache *nvme_chap_buf_cache;
@@ -131,7 +132,13 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
data->auth_type = NVME_AUTH_COMMON_MESSAGES;
data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
data->t_id = cpu_to_le16(chap->transaction);
- data->sc_c = 0; /* No secure channel concatenation */
+ if (ctrl->opts->concat && chap->qid == 0) {
+ if (ctrl->opts->tls_key)
+ data->sc_c = NVME_AUTH_SECP_REPLACETLSPSK;
+ else
+ data->sc_c = NVME_AUTH_SECP_NEWTLSPSK;
+ } else
+ data->sc_c = NVME_AUTH_SECP_NOSC;
data->napd = 1;
data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID;
data->auth_protocol[0].dhchap.halen = 3;
@@ -311,8 +318,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
data->hl = chap->hash_len;
data->dhvlen = cpu_to_le16(chap->host_key_len);
memcpy(data->rval, chap->response, chap->hash_len);
- if (ctrl->ctrl_key) {
+ if (ctrl->ctrl_key)
chap->bi_directional = true;
+ if (ctrl->ctrl_key || ctrl->opts->concat) {
get_random_bytes(chap->c2, chap->hash_len);
data->cvalid = 1;
memcpy(data->rval + chap->hash_len, chap->c2,
@@ -322,7 +330,10 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
} else {
memset(chap->c2, 0, chap->hash_len);
}
- chap->s2 = nvme_auth_get_seqnum();
+ if (ctrl->opts->concat)
+ chap->s2 = 0;
+ else
+ chap->s2 = nvme_auth_get_seqnum();
data->seqnum = cpu_to_le32(chap->s2);
if (chap->host_key_len) {
dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n",
@@ -677,6 +688,92 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
crypto_free_kpp(chap->dh_tfm);
}
+void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl)
+{
+ dev_dbg(ctrl->device, "Wipe generated TLS PSK %08x\n",
+ key_serial(ctrl->opts->tls_key));
+ key_revoke(ctrl->opts->tls_key);
+ key_put(ctrl->opts->tls_key);
+ ctrl->opts->tls_key = NULL;
+}
+EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key);
+
+static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl,
+ struct nvme_dhchap_queue_context *chap)
+{
+ u8 *psk, *digest, *tls_psk;
+ struct key *tls_key;
+ size_t psk_len;
+ int ret = 0;
+
+ if (!chap->sess_key) {
+ dev_warn(ctrl->device,
+ "%s: qid %d no session key negotiated\n",
+ __func__, chap->qid);
+ return -ENOKEY;
+ }
+
+ if (chap->qid) {
+ dev_warn(ctrl->device,
+ "qid %d: secure concatenation not supported on I/O queues\n",
+ chap->qid);
+ return -EINVAL;
+ }
+ ret = nvme_auth_generate_psk(chap->hash_id, chap->sess_key,
+ chap->sess_key_len,
+ chap->c1, chap->c2,
+ chap->hash_len, &psk, &psk_len);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "%s: qid %d failed to generate PSK, error %d\n",
+ __func__, chap->qid, ret);
+ return ret;
+ }
+ dev_dbg(ctrl->device,
+ "%s: generated psk %*ph\n", __func__, (int)psk_len, psk);
+
+ ret = nvme_auth_generate_digest(chap->hash_id, psk, psk_len,
+ ctrl->opts->subsysnqn,
+ ctrl->opts->host->nqn, &digest);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "%s: qid %d failed to generate digest, error %d\n",
+ __func__, chap->qid, ret);
+ goto out_free_psk;
+ };
+ dev_dbg(ctrl->device, "%s: generated digest %s\n",
+ __func__, digest);
+ ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len,
+ digest, &tls_psk);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "%s: qid %d failed to derive TLS psk, error %d\n",
+ __func__, chap->qid, ret);
+ goto out_free_digest;
+ };
+
+ tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring,
+ ctrl->opts->host->nqn,
+ ctrl->opts->subsysnqn, chap->hash_id,
+ tls_psk, psk_len, digest);
+ if (IS_ERR(tls_key)) {
+ ret = PTR_ERR(tls_key);
+ dev_warn(ctrl->device,
+ "%s: qid %d failed to insert generated key, error %d\n",
+ __func__, chap->qid, ret);
+ tls_key = NULL;
+ }
+ kfree_sensitive(tls_psk);
+ if (ctrl->opts->tls_key)
+ nvme_auth_revoke_tls_key(ctrl);
+ ctrl->opts->tls_key = tls_key;
+out_free_digest:
+ kfree_sensitive(digest);
+out_free_psk:
+ kfree_sensitive(psk);
+ return ret;
+}
+
static void nvme_queue_auth_work(struct work_struct *work)
{
struct nvme_dhchap_queue_context *chap =
@@ -833,6 +930,13 @@ static void nvme_queue_auth_work(struct work_struct *work)
}
if (!ret) {
chap->error = 0;
+ if (ctrl->opts->concat &&
+ (ret = nvme_auth_secure_concat(ctrl, chap))) {
+ dev_warn(ctrl->device,
+ "%s: qid %d failed to enable secure concatenation\n",
+ __func__, chap->qid);
+ chap->error = ret;
+ }
return;
}
@@ -912,6 +1016,11 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
"qid 0: authentication failed\n");
return;
}
+ /*
+ * Only run authentication on the admin queue for secure concatenation.
+ */
+ if (ctrl->opts->concat)
+ return;
for (q = 1; q < ctrl->queue_count; q++) {
ret = nvme_auth_negotiate(ctrl, q);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8359d0aa0e44..777db89fdaa7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4018,6 +4018,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+
+ nvme_mpath_remove_sysfs_link(ns);
+
del_gendisk(ns->disk);
mutex_lock(&ns->ctrl->namespaces_lock);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 432efcbf9e2f..93e9041b9657 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -472,8 +472,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
result = le32_to_cpu(res.u32);
ctrl->cntlid = result & 0xFFFF;
if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
- /* Secure concatenation is not implemented */
- if (result & NVME_CONNECT_AUTHREQ_ASCR) {
+ /* Check for secure concatenation */
+ if ((result & NVME_CONNECT_AUTHREQ_ASCR) &&
+ !ctrl->opts->concat) {
dev_warn(ctrl->device,
"qid 0: secure concatenation is not supported\n");
ret = -EOPNOTSUPP;
@@ -550,7 +551,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
/* Secure concatenation is not implemented */
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
dev_warn(ctrl->device,
- "qid 0: secure concatenation is not supported\n");
+ "qid %d: secure concatenation is not supported\n", qid);
ret = -EOPNOTSUPP;
goto out_free_data;
}
@@ -706,6 +707,7 @@ static const match_table_t opt_tokens = {
#endif
#ifdef CONFIG_NVME_TCP_TLS
{ NVMF_OPT_TLS, "tls" },
+ { NVMF_OPT_CONCAT, "concat" },
#endif
{ NVMF_OPT_ERR, NULL }
};
@@ -735,6 +737,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->tls = false;
opts->tls_key = NULL;
opts->keyring = NULL;
+ opts->concat = false;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -1053,6 +1056,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->tls = true;
break;
+ case NVMF_OPT_CONCAT:
+ if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
+ pr_err("TLS is not supported\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->concat = true;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -1079,6 +1090,23 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
opts->fast_io_fail_tmo, ctrl_loss_tmo);
}
+ if (opts->concat) {
+ if (opts->tls) {
+ pr_err("Secure concatenation over TLS is not supported\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (opts->tls_key) {
+ pr_err("Cannot specify a TLS key for secure concatenation\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!opts->dhchap_secret) {
+ pr_err("Need to enable DH-CHAP for secure concatenation\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
opts->host = nvmf_host_add(hostnqn, &hostid);
if (IS_ERR(opts->host)) {
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 21d75dc4a3a0..9cf5b020adba 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -66,6 +66,7 @@ enum {
NVMF_OPT_TLS = 1 << 25,
NVMF_OPT_KEYRING = 1 << 26,
NVMF_OPT_TLS_KEY = 1 << 27,
+ NVMF_OPT_CONCAT = 1 << 28,
};
/**
@@ -101,6 +102,7 @@ enum {
* @keyring: Keyring to use for key lookups
* @tls_key: TLS key for encrypted connections (TCP)
* @tls: Start TLS encrypted connections (TCP)
+ * @concat: Enabled Secure channel concatenation (TCP)
* @disable_sqflow: disable controller sq flow control
* @hdr_digest: generate/verify header digest (TCP)
* @data_digest: generate/verify data digest (TCP)
@@ -130,6 +132,7 @@ struct nvmf_ctrl_options {
struct key *keyring;
struct key *tls_key;
bool tls;
+ bool concat;
bool disable_sqflow;
bool hdr_digest;
bool data_digest;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b9929a5a7f4e..2257c3c96dd2 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2571,7 +2571,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
if (ret)
return -ENOMEM;
- op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
+ op->nents = blk_rq_map_sg(rq, freq->sg_table.sgl);
WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
op->nents, rq_dma_dir(rq));
@@ -2858,7 +2858,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
unsigned int nr_io_queues;
int ret;
- nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+ nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
ctrl->lport->ops->max_hw_queues);
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) {
@@ -2912,7 +2912,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
unsigned int nr_io_queues;
int ret;
- nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+ nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
ctrl->lport->ops->max_hw_queues);
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2a7635565083..6b12ca80aa27 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
kblockd_schedule_work(&head->partition_scan_work);
}
+ nvme_mpath_add_sysfs_link(ns->head);
+
mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) {
int node, srcu_idx;
@@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
if (nvme_state_is_live(ns->ana_state) &&
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
nvme_mpath_set_live(ns);
+ else {
+ /*
+ * Add sysfs link from multipath head gendisk node to path
+ * device gendisk node.
+ * If path's ana state is live (i.e. state is either optimized
+ * or non-optimized) while we alloc the ns then sysfs link would
+ * be created from nvme_mpath_set_live(). In that case we would
+ * not fallthrough this code path. However for the path's ana
+ * state other than live, we call nvme_mpath_set_live() only
+ * after ana state transitioned to the live state. But we still
+ * want to create the sysfs link from head node to a path device
+ * irrespctive of the path's ana state.
+ * If we reach through here then it means that path's ana state
+ * is not live but still create the sysfs link to this path from
+ * head node if head node of the path has already come alive.
+ */
+ if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags))
+ nvme_mpath_add_sysfs_link(ns->head);
+ }
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -955,6 +976,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
}
DEVICE_ATTR_RO(ana_state);
+static ssize_t queue_depth_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+
+ if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD)
+ return 0;
+
+ return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active));
+}
+DEVICE_ATTR_RO(queue_depth);
+
+static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int node, srcu_idx;
+ nodemask_t numa_nodes;
+ struct nvme_ns *current_ns;
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ struct nvme_ns_head *head = ns->head;
+
+ if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA)
+ return 0;
+
+ nodes_clear(numa_nodes);
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ for_each_node(node) {
+ current_ns = srcu_dereference(head->current_path[node],
+ &head->srcu);
+ if (ns == current_ns)
+ node_set(node, numa_nodes);
+ }
+ srcu_read_unlock(&head->srcu, srcu_idx);
+
+ return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes));
+}
+DEVICE_ATTR_RO(numa_nodes);
+
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
@@ -967,6 +1027,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
return -ENXIO; /* just break out of the loop */
}
+void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
+{
+ struct device *target;
+ int rc, srcu_idx;
+ struct nvme_ns *ns;
+ struct kobject *kobj;
+
+ /*
+ * Ensure head disk node is already added otherwise we may get invalid
+ * kobj for head disk node
+ */
+ if (!test_bit(GD_ADDED, &head->disk->state))
+ return;
+
+ kobj = &disk_to_dev(head->disk)->kobj;
+
+ /*
+ * loop through each ns chained through the head->list and create the
+ * sysfs link from head node to the ns path node
+ */
+ srcu_idx = srcu_read_lock(&head->srcu);
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ /*
+ * Avoid creating link if it already exists for the given path.
+ * When path ana state transitions from optimized to non-
+ * optimized or vice-versa, the nvme_mpath_set_live() is
+ * invoked which in truns call this function. Now if the sysfs
+ * link already exists for the given path and we attempt to re-
+ * create the link then sysfs code would warn about it loudly.
+ * So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure
+ * that we're not creating duplicate link.
+ * The test_and_set_bit() is used because it is protecting
+ * against multiple nvme paths being simultaneously added.
+ */
+ if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
+ continue;
+
+ /*
+ * Ensure that ns path disk node is already added otherwise we
+ * may get invalid kobj name for target
+ */
+ if (!test_bit(GD_ADDED, &ns->disk->state))
+ continue;
+
+ target = disk_to_dev(ns->disk);
+ /*
+ * Create sysfs link from head gendisk kobject @kobj to the
+ * ns path gendisk kobject @target->kobj.
+ */
+ rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name,
+ &target->kobj, dev_name(target));
+ if (unlikely(rc)) {
+ dev_err(disk_to_dev(ns->head->disk),
+ "failed to create link to %s\n",
+ dev_name(target));
+ clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
+ }
+ }
+
+ srcu_read_unlock(&head->srcu, srcu_idx);
+}
+
+void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
+{
+ struct device *target;
+ struct kobject *kobj;
+
+ if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
+ return;
+
+ target = disk_to_dev(ns->disk);
+ kobj = &disk_to_dev(ns->head->disk)->kobj;
+ sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name,
+ dev_name(target));
+ clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
+}
+
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7be92d07430e..51e078642127 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -534,10 +534,11 @@ struct nvme_ns {
struct nvme_ns_head *head;
unsigned long flags;
-#define NVME_NS_REMOVING 0
-#define NVME_NS_ANA_PENDING 2
-#define NVME_NS_FORCE_RO 3
-#define NVME_NS_READY 4
+#define NVME_NS_REMOVING 0
+#define NVME_NS_ANA_PENDING 2
+#define NVME_NS_FORCE_RO 3
+#define NVME_NS_READY 4
+#define NVME_NS_SYSFS_ATTR_LINK 5
struct cdev cdev;
struct device cdev_device;
@@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_attr_groups[];
+extern const struct attribute_group nvme_ns_mpath_attr_group;
extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
extern const struct attribute_group nvme_dev_attrs_group;
@@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
+void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
+void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns);
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
@@ -980,6 +984,8 @@ static inline void nvme_trace_bio_complete(struct request *req)
extern bool multipath;
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
+extern struct device_attribute dev_attr_queue_depth;
+extern struct device_attribute dev_attr_numa_nodes;
extern struct device_attribute subsys_attr_iopolicy;
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
@@ -1009,6 +1015,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
+static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns)
+{
+}
+static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
+{
+}
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
return false;
@@ -1147,6 +1159,7 @@ void nvme_auth_stop(struct nvme_ctrl *ctrl);
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
void nvme_auth_free(struct nvme_ctrl *ctrl);
+void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl);
#else
static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
{
@@ -1169,6 +1182,7 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
return -EPROTONOSUPPORT;
}
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
+static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {};
#endif
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3ad7f197c808..2883d17ee1eb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -812,7 +812,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (!iod->sgt.sgl)
return BLK_STS_RESOURCE;
sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req));
- iod->sgt.orig_nents = blk_rq_map_sg(req->q, req, iod->sgt.sgl);
+ iod->sgt.orig_nents = blk_rq_map_sg(req, iod->sgt.sgl);
if (!iod->sgt.orig_nents)
goto out_free_sg;
@@ -953,9 +953,6 @@ out_free_cmd:
return ret;
}
-/*
- * NOTE: ns is NULL when called on the admin queue.
- */
static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 86a2891d9bcc..b5a0295b5bf4 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1476,8 +1476,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
if (ret)
return -ENOMEM;
- req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
- req->data_sgl.sg_table.sgl);
+ req->data_sgl.nents = blk_rq_map_sg(rq, req->data_sgl.sg_table.sgl);
*count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
req->data_sgl.nents, rq_dma_dir(rq));
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 3a41b9ab0f13..6d31226f7a4f 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -258,6 +258,8 @@ static struct attribute *nvme_ns_attrs[] = {
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
+ &dev_attr_queue_depth.attr,
+ &dev_attr_numa_nodes.attr,
#endif
&dev_attr_io_passthru_err_log_enabled.attr,
NULL,
@@ -290,6 +292,10 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
+ if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) {
+ if (nvme_disk_is_ns_head(dev_to_disk(dev)))
+ return 0;
+ }
#endif
return a->mode;
}
@@ -299,8 +305,22 @@ static const struct attribute_group nvme_ns_attr_group = {
.is_visible = nvme_ns_attrs_are_visible,
};
+#ifdef CONFIG_NVME_MULTIPATH
+static struct attribute *nvme_ns_mpath_attrs[] = {
+ NULL,
+};
+
+const struct attribute_group nvme_ns_mpath_attr_group = {
+ .name = "multipath",
+ .attrs = nvme_ns_mpath_attrs,
+};
+#endif
+
const struct attribute_group *nvme_ns_attr_groups[] = {
&nvme_ns_attr_group,
+#ifdef CONFIG_NVME_MULTIPATH
+ &nvme_ns_mpath_attr_group,
+#endif
NULL,
};
@@ -780,10 +800,10 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
return 0;
if (a == &dev_attr_tls_key.attr &&
- !ctrl->opts->tls)
+ !ctrl->opts->tls && !ctrl->opts->concat)
return 0;
if (a == &dev_attr_tls_configured_key.attr &&
- !ctrl->opts->tls_key)
+ (!ctrl->opts->tls_key || ctrl->opts->concat))
return 0;
if (a == &dev_attr_tls_keyring.attr &&
!ctrl->opts->keyring)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 327f3f2f5399..26c459f0198d 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -8,7 +8,6 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/key.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
#include <net/sock.h>
@@ -249,7 +248,7 @@ static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl)
if (!IS_ENABLED(CONFIG_NVME_TCP_TLS))
return 0;
- return ctrl->opts->tls;
+ return ctrl->opts->tls || ctrl->opts->concat;
}
static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
@@ -1790,7 +1789,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
queue->cmnd_capsule_len = sizeof(struct nvme_command) +
NVME_TCP_ADMIN_CCSZ;
- ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
+ ret = sock_create_kern(current->nsproxy->net_ns,
+ ctrl->addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &queue->sock);
if (ret) {
dev_err(nctrl->device,
@@ -2060,7 +2060,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
if (nvme_tcp_tls_configured(ctrl)) {
if (ctrl->opts->tls_key)
pskid = key_serial(ctrl->opts->tls_key);
- else {
+ else if (ctrl->opts->tls) {
pskid = nvme_tls_psk_default(ctrl->opts->keyring,
ctrl->opts->host->nqn,
ctrl->opts->subsysnqn);
@@ -2090,9 +2090,25 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
{
int i, ret;
- if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) {
- dev_err(ctrl->device, "no PSK negotiated\n");
- return -ENOKEY;
+ if (nvme_tcp_tls_configured(ctrl)) {
+ if (ctrl->opts->concat) {
+ /*
+ * The generated PSK is stored in the
+ * fabric options
+ */
+ if (!ctrl->opts->tls_key) {
+ dev_err(ctrl->device, "no PSK generated\n");
+ return -ENOKEY;
+ }
+ if (ctrl->tls_pskid &&
+ ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) {
+ dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid);
+ ctrl->tls_pskid = 0;
+ }
+ } else if (!ctrl->tls_pskid) {
+ dev_err(ctrl->device, "no PSK negotiated\n");
+ return -ENOKEY;
+ }
}
for (i = 1; i < ctrl->queue_count; i++) {
@@ -2310,6 +2326,27 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl,
}
}
+/*
+ * The TLS key is set by secure concatenation after negotiation has been
+ * completed on the admin queue. We need to revoke the key when:
+ * - concatenation is enabled (otherwise it's a static key set by the user)
+ * and
+ * - the generated key is present in ctrl->tls_key (otherwise there's nothing
+ * to revoke)
+ * and
+ * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS
+ * negotiation has not run).
+ *
+ * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called
+ * twice during secure concatenation, once on a 'normal' connection to run the
+ * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set),
+ * and once after the negotiation (which uses the key, so it _must_ be set).
+ */
+static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl)
+{
+ return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid;
+}
+
static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
{
struct nvmf_ctrl_options *opts = ctrl->opts;
@@ -2319,6 +2356,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
if (ret)
return ret;
+ if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) {
+ /* See comments for nvme_tcp_key_revoke_needed() */
+ dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n");
+ nvme_stop_keep_alive(ctrl);
+ nvme_tcp_teardown_admin_queue(ctrl, false);
+ ret = nvme_tcp_configure_admin_queue(ctrl, false);
+ if (ret)
+ return ret;
+ }
+
if (ctrl->icdoff) {
ret = -EOPNOTSUPP;
dev_err(ctrl->device, "icdoff is not supported!\n");
@@ -2415,6 +2462,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_tcp_ctrl, err_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ if (nvme_tcp_key_revoke_needed(ctrl))
+ nvme_auth_revoke_tls_key(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
nvme_tcp_teardown_io_queues(ctrl, false);
@@ -2455,6 +2504,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, reset_work);
int ret;
+ if (nvme_tcp_key_revoke_needed(ctrl))
+ nvme_auth_revoke_tls_key(ctrl);
nvme_stop_ctrl(ctrl);
nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2951,7 +3002,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS |
- NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY,
+ NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT,
.create_ctrl = nvme_tcp_create_ctrl,
};
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 382949e18c6a..cce4c5b55aa9 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -146,17 +146,16 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
return NULL;
}
-static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl,
- struct nvme_ns_head *head,
+static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
void *data)
{
+ struct nvme_ns_head *head = ns->head;
struct blk_zone zone = { };
if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
- dev_err(ctrl->device, "invalid zone type %#x\n",
- entry->zt);
+ dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt);
return -EINVAL;
}
@@ -213,8 +212,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
break;
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
- ret = nvme_zone_parse_entry(ns->ctrl, ns->head,
- &report->entries[i],
+ ret = nvme_zone_parse_entry(ns, &report->entries[i],
zone_idx, cb, data);
if (ret)
goto out_free;