From 75618ac6e98faee6ed1f17ae64875cc2d7784204 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 13 Mar 2025 09:23:18 +0530 Subject: block: remove unused parameter 'q' parameter in __blk_rq_map_sg() request_queue param is no longer used by blk_rq_map_sg and __blk_rq_map_sg. Remove it. Signed-off-by: Anuj Gupta Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250313035322.243239-1-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 1de11b722f04..fe2f9b143c9f 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -525,7 +525,7 @@ static blk_status_t apple_nvme_map_data(struct apple_nvme *anv, if (!iod->sg) return BLK_STS_RESOURCE; sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); - iod->nents = blk_rq_map_sg(req->q, req, iod->sg); + iod->nents = blk_rq_map_sg(req, iod->sg); if (!iod->nents) goto out_free_sg; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f4f1866fbd5b..7de29dae8e74 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2620,7 +2620,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, if (ret) return -ENOMEM; - op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); + op->nents = blk_rq_map_sg(rq, freq->sg_table.sgl); WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, rq_dma_dir(rq)); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9197a5b173fd..a65978b6cdd8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -812,7 +812,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (!iod->sgt.sgl) return BLK_STS_RESOURCE; sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req)); - iod->sgt.orig_nents = blk_rq_map_sg(req->q, req, iod->sgt.sgl); + iod->sgt.orig_nents = blk_rq_map_sg(req, iod->sgt.sgl); if (!iod->sgt.orig_nents) goto out_free_sg; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 86a2891d9bcc..b5a0295b5bf4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1476,8 +1476,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, if (ret) return -ENOMEM; - req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, - req->data_sgl.sg_table.sgl); + req->data_sgl.nents = blk_rq_map_sg(rq, req->data_sgl.sg_table.sgl); *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, rq_dma_dir(rq)); -- cgit From 62eb89323cb08f1d6a3b41b84972ff4f373a1960 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:13 +0100 Subject: nvme-keyring: add nvme_tls_psk_refresh() Add a function to refresh a generated PSK in the specified keyring. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 841238f38fdd..b50972257e49 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include -- cgit From e88a7595b57f2a04f1be796419444b4a14a55d18 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:14 +0100 Subject: nvme-tcp: request secure channel concatenation Add a fabrics option 'concat' to request secure channel concatenation as specified the NVME Base Specification v2.1, section 8.3.4.3: Secure Channel Concatenation. When secure channel concatenation is enabled a 'generated PSK' is inserted into the keyring such that it's available after reset. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 2 +- drivers/nvme/host/auth.c | 115 ++++++++++++++++++++++++++++++++++++++++++-- drivers/nvme/host/fabrics.c | 34 +++++++++++-- drivers/nvme/host/fabrics.h | 3 ++ drivers/nvme/host/nvme.h | 2 + drivers/nvme/host/sysfs.c | 4 +- drivers/nvme/host/tcp.c | 53 +++++++++++++++++--- 7 files changed, 198 insertions(+), 15 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 486afe598184..10e453b2436e 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -109,7 +109,7 @@ config NVME_HOST_AUTH bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH - select NVME_KEYRING if NVME_TCP_TLS + select NVME_KEYRING help This provides support for NVMe over Fabrics In-Band Authentication in host side. diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 5ea0e21709da..6115fef74c1e 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -12,6 +12,7 @@ #include "nvme.h" #include "fabrics.h" #include +#include #define CHAP_BUF_SIZE 4096 static struct kmem_cache *nvme_chap_buf_cache; @@ -131,7 +132,13 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_type = NVME_AUTH_COMMON_MESSAGES; data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; data->t_id = cpu_to_le16(chap->transaction); - data->sc_c = 0; /* No secure channel concatenation */ + if (ctrl->opts->concat && chap->qid == 0) { + if (ctrl->opts->tls_key) + data->sc_c = NVME_AUTH_SECP_REPLACETLSPSK; + else + data->sc_c = NVME_AUTH_SECP_NEWTLSPSK; + } else + data->sc_c = NVME_AUTH_SECP_NOSC; data->napd = 1; data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID; data->auth_protocol[0].dhchap.halen = 3; @@ -311,8 +318,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->hl = chap->hash_len; data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); - if (ctrl->ctrl_key) { + if (ctrl->ctrl_key) chap->bi_directional = true; + if (ctrl->ctrl_key || ctrl->opts->concat) { get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; memcpy(data->rval + chap->hash_len, chap->c2, @@ -322,7 +330,10 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, } else { memset(chap->c2, 0, chap->hash_len); } - chap->s2 = nvme_auth_get_seqnum(); + if (ctrl->opts->concat) + chap->s2 = 0; + else + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", @@ -677,6 +688,92 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) crypto_free_kpp(chap->dh_tfm); } +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) +{ + dev_dbg(ctrl->device, "Wipe generated TLS PSK %08x\n", + key_serial(ctrl->opts->tls_key)); + key_revoke(ctrl->opts->tls_key); + key_put(ctrl->opts->tls_key); + ctrl->opts->tls_key = NULL; +} +EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key); + +static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, + struct nvme_dhchap_queue_context *chap) +{ + u8 *psk, *digest, *tls_psk; + struct key *tls_key; + size_t psk_len; + int ret = 0; + + if (!chap->sess_key) { + dev_warn(ctrl->device, + "%s: qid %d no session key negotiated\n", + __func__, chap->qid); + return -ENOKEY; + } + + if (chap->qid) { + dev_warn(ctrl->device, + "qid %d: secure concatenation not supported on I/O queues\n", + chap->qid); + return -EINVAL; + } + ret = nvme_auth_generate_psk(chap->hash_id, chap->sess_key, + chap->sess_key_len, + chap->c1, chap->c2, + chap->hash_len, &psk, &psk_len); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate PSK, error %d\n", + __func__, chap->qid, ret); + return ret; + } + dev_dbg(ctrl->device, + "%s: generated psk %*ph\n", __func__, (int)psk_len, psk); + + ret = nvme_auth_generate_digest(chap->hash_id, psk, psk_len, + ctrl->opts->subsysnqn, + ctrl->opts->host->nqn, &digest); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate digest, error %d\n", + __func__, chap->qid, ret); + goto out_free_psk; + }; + dev_dbg(ctrl->device, "%s: generated digest %s\n", + __func__, digest); + ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len, + digest, &tls_psk); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to derive TLS psk, error %d\n", + __func__, chap->qid, ret); + goto out_free_digest; + }; + + tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring, + ctrl->opts->host->nqn, + ctrl->opts->subsysnqn, chap->hash_id, + tls_psk, psk_len, digest); + if (IS_ERR(tls_key)) { + ret = PTR_ERR(tls_key); + dev_warn(ctrl->device, + "%s: qid %d failed to insert generated key, error %d\n", + __func__, chap->qid, ret); + tls_key = NULL; + } + kfree_sensitive(tls_psk); + if (ctrl->opts->tls_key) + nvme_auth_revoke_tls_key(ctrl); + ctrl->opts->tls_key = tls_key; +out_free_digest: + kfree_sensitive(digest); +out_free_psk: + kfree_sensitive(psk); + return ret; +} + static void nvme_queue_auth_work(struct work_struct *work) { struct nvme_dhchap_queue_context *chap = @@ -833,6 +930,13 @@ static void nvme_queue_auth_work(struct work_struct *work) } if (!ret) { chap->error = 0; + if (ctrl->opts->concat && + (ret = nvme_auth_secure_concat(ctrl, chap))) { + dev_warn(ctrl->device, + "%s: qid %d failed to enable secure concatenation\n", + __func__, chap->qid); + chap->error = ret; + } return; } @@ -912,6 +1016,11 @@ static void nvme_ctrl_auth_work(struct work_struct *work) "qid 0: authentication failed\n"); return; } + /* + * Only run authentication on the admin queue for secure concatenation. + */ + if (ctrl->opts->concat) + return; for (q = 1; q < ctrl->queue_count; q++) { ret = nvme_auth_negotiate(ctrl, q); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 432efcbf9e2f..93e9041b9657 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -472,8 +472,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { - /* Secure concatenation is not implemented */ - if (result & NVME_CONNECT_AUTHREQ_ASCR) { + /* Check for secure concatenation */ + if ((result & NVME_CONNECT_AUTHREQ_ASCR) && + !ctrl->opts->concat) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); ret = -EOPNOTSUPP; @@ -550,7 +551,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) /* Secure concatenation is not implemented */ if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, - "qid 0: secure concatenation is not supported\n"); + "qid %d: secure concatenation is not supported\n", qid); ret = -EOPNOTSUPP; goto out_free_data; } @@ -706,6 +707,7 @@ static const match_table_t opt_tokens = { #endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, + { NVMF_OPT_CONCAT, "concat" }, #endif { NVMF_OPT_ERR, NULL } }; @@ -735,6 +737,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->tls = false; opts->tls_key = NULL; opts->keyring = NULL; + opts->concat = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -1053,6 +1056,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tls = true; break; + case NVMF_OPT_CONCAT: + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { + pr_err("TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + opts->concat = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -1079,6 +1090,23 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fast_io_fail_tmo, ctrl_loss_tmo); } + if (opts->concat) { + if (opts->tls) { + pr_err("Secure concatenation over TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + if (opts->tls_key) { + pr_err("Cannot specify a TLS key for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + if (!opts->dhchap_secret) { + pr_err("Need to enable DH-CHAP for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + } opts->host = nvmf_host_add(hostnqn, &hostid); if (IS_ERR(opts->host)) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 21d75dc4a3a0..9cf5b020adba 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -66,6 +66,7 @@ enum { NVMF_OPT_TLS = 1 << 25, NVMF_OPT_KEYRING = 1 << 26, NVMF_OPT_TLS_KEY = 1 << 27, + NVMF_OPT_CONCAT = 1 << 28, }; /** @@ -101,6 +102,7 @@ enum { * @keyring: Keyring to use for key lookups * @tls_key: TLS key for encrypted connections (TCP) * @tls: Start TLS encrypted connections (TCP) + * @concat: Enabled Secure channel concatenation (TCP) * @disable_sqflow: disable controller sq flow control * @hdr_digest: generate/verify header digest (TCP) * @data_digest: generate/verify data digest (TCP) @@ -130,6 +132,7 @@ struct nvmf_ctrl_options { struct key *keyring; struct key *tls_key; bool tls; + bool concat; bool disable_sqflow; bool hdr_digest; bool data_digest; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7be92d07430e..daa4c91e4848 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1147,6 +1147,7 @@ void nvme_auth_stop(struct nvme_ctrl *ctrl); int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid); int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid); void nvme_auth_free(struct nvme_ctrl *ctrl); +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl); #else static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) { @@ -1169,6 +1170,7 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) return -EPROTONOSUPPORT; } static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; +static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {}; #endif u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 3a41b9ab0f13..52683943be15 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -780,10 +780,10 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_tls_key.attr && - !ctrl->opts->tls) + !ctrl->opts->tls && !ctrl->opts->concat) return 0; if (a == &dev_attr_tls_configured_key.attr && - !ctrl->opts->tls_key) + (!ctrl->opts->tls_key || ctrl->opts->concat)) return 0; if (a == &dev_attr_tls_keyring.attr && !ctrl->opts->keyring) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b50972257e49..196d36318853 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -235,7 +235,7 @@ static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) return 0; - return ctrl->opts->tls; + return ctrl->opts->tls || ctrl->opts->concat; } static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) @@ -1987,7 +1987,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) if (nvme_tcp_tls_configured(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); - else { + else if (ctrl->opts->tls) { pskid = nvme_tls_psk_default(ctrl->opts->keyring, ctrl->opts->host->nqn, ctrl->opts->subsysnqn); @@ -2017,9 +2017,25 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) { - dev_err(ctrl->device, "no PSK negotiated\n"); - return -ENOKEY; + if (nvme_tcp_tls_configured(ctrl)) { + if (ctrl->opts->concat) { + /* + * The generated PSK is stored in the + * fabric options + */ + if (!ctrl->opts->tls_key) { + dev_err(ctrl->device, "no PSK generated\n"); + return -ENOKEY; + } + if (ctrl->tls_pskid && + ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) { + dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } + } else if (!ctrl->tls_pskid) { + dev_err(ctrl->device, "no PSK negotiated\n"); + return -ENOKEY; + } } for (i = 1; i < ctrl->queue_count; i++) { @@ -2237,6 +2253,27 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, } } +/* + * The TLS key is set by secure concatenation after negotiation has been + * completed on the admin queue. We need to revoke the key when: + * - concatenation is enabled (otherwise it's a static key set by the user) + * and + * - the generated key is present in ctrl->tls_key (otherwise there's nothing + * to revoke) + * and + * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS + * negotiation has not run). + * + * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called + * twice during secure concatenation, once on a 'normal' connection to run the + * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set), + * and once after the negotiation (which uses the key, so it _must_ be set). + */ +static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl) +{ + return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid; +} + static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) { struct nvmf_ctrl_options *opts = ctrl->opts; @@ -2342,6 +2379,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_tcp_ctrl, err_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); @@ -2382,6 +2421,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work) container_of(work, struct nvme_ctrl, reset_work); int ret; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2877,7 +2918,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = { NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | - NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, + NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT, .create_ctrl = nvme_tcp_create_ctrl, }; -- cgit From 104d0e2f622233477ef7e57e59e8a4c3bb062c82 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:15 +0100 Subject: nvme-fabrics: reset admin connection for secure concatenation When secure concatenation is requested the connection needs to be reset to enable TLS encryption on the new cnnection. That implies that the original connection used for the DH-CHAP negotiation really shouldn't be used, and we should reset as soon as the DH-CHAP negotiation has succeeded on the admin queue. Based on an idea from Sagi. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 196d36318853..feb2d7e17c4a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2283,6 +2283,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) if (ret) return ret; + if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) { + /* See comments for nvme_tcp_key_revoke_needed() */ + dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); + ret = nvme_tcp_configure_admin_queue(ctrl, false); + if (ret) + return ret; + } + if (ctrl->icdoff) { ret = -EOPNOTSUPP; dev_err(ctrl->device, "icdoff is not supported!\n"); -- cgit From 4dbd2b2ebe4cc5f101881e2c091a70ccd38db7ee Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:44 +0530 Subject: nvme-multipath: Add visibility for round-robin io-policy This patch helps add nvme native multipath visibility for round-robin io-policy. It creates a "multipath" sysfs directory under head gendisk device node directory and then from "multipath" directory it adds a link to each namespace path device the head node refers. For instance, if we have a shared namespace accessible from two different controllers/paths then we create a soft link to each path device from head disk node as shown below: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For round-robin I/O policy, we could easily infer from the above output that I/O workload targeted to nvme1n1 would toggle across paths nvme1c1n1 and nvme1c3n1. Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++ drivers/nvme/host/multipath.c | 99 +++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 18 ++++++-- drivers/nvme/host/sysfs.c | 14 ++++++ 4 files changed, 130 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 818d4e49aab5..870314c52107 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4020,6 +4020,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); + + nvme_mpath_remove_sysfs_link(ns); + del_gendisk(ns->disk); mutex_lock(&ns->ctrl->namespaces_lock); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2a7635565083..7c89c8da46d6 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) kblockd_schedule_work(&head->partition_scan_work); } + nvme_mpath_add_sysfs_link(ns->head); + mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { int node, srcu_idx; @@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, if (nvme_state_is_live(ns->ana_state) && nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); + else { + /* + * Add sysfs link from multipath head gendisk node to path + * device gendisk node. + * If path's ana state is live (i.e. state is either optimized + * or non-optimized) while we alloc the ns then sysfs link would + * be created from nvme_mpath_set_live(). In that case we would + * not fallthrough this code path. However for the path's ana + * state other than live, we call nvme_mpath_set_live() only + * after ana state transitioned to the live state. But we still + * want to create the sysfs link from head node to a path device + * irrespctive of the path's ana state. + * If we reach through here then it means that path's ana state + * is not live but still create the sysfs link to this path from + * head node if head node of the path has already come alive. + */ + if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags)) + nvme_mpath_add_sysfs_link(ns->head); + } } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -967,6 +988,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, return -ENXIO; /* just break out of the loop */ } +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) +{ + struct device *target; + int rc, srcu_idx; + struct nvme_ns *ns; + struct kobject *kobj; + + /* + * Ensure head disk node is already added otherwise we may get invalid + * kobj for head disk node + */ + if (!test_bit(GD_ADDED, &head->disk->state)) + return; + + kobj = &disk_to_dev(head->disk)->kobj; + + /* + * loop through each ns chained through the head->list and create the + * sysfs link from head node to the ns path node + */ + srcu_idx = srcu_read_lock(&head->srcu); + + list_for_each_entry_rcu(ns, &head->list, siblings) { + /* + * Avoid creating link if it already exists for the given path. + * When path ana state transitions from optimized to non- + * optimized or vice-versa, the nvme_mpath_set_live() is + * invoked which in truns call this function. Now if the sysfs + * link already exists for the given path and we attempt to re- + * create the link then sysfs code would warn about it loudly. + * So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure + * that we're not creating duplicate link. + * The test_and_set_bit() is used because it is protecting + * against multiple nvme paths being simultaneously added. + */ + if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + continue; + + /* + * Ensure that ns path disk node is already added otherwise we + * may get invalid kobj name for target + */ + if (!test_bit(GD_ADDED, &ns->disk->state)) + continue; + + target = disk_to_dev(ns->disk); + /* + * Create sysfs link from head gendisk kobject @kobj to the + * ns path gendisk kobject @target->kobj. + */ + rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name, + &target->kobj, dev_name(target)); + if (unlikely(rc)) { + dev_err(disk_to_dev(ns->head->disk), + "failed to create link to %s\n", + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); + } + } + + srcu_read_unlock(&head->srcu, srcu_idx); +} + +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ + struct device *target; + struct kobject *kobj; + + if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + return; + + target = disk_to_dev(ns->disk); + kobj = &disk_to_dev(ns->head->disk)->kobj; + sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name, + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); +} + void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) { if (nvme_ctrl_use_ana(ns->ctrl)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index daa4c91e4848..1eeb782947db 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -534,10 +534,11 @@ struct nvme_ns { struct nvme_ns_head *head; unsigned long flags; -#define NVME_NS_REMOVING 0 -#define NVME_NS_ANA_PENDING 2 -#define NVME_NS_FORCE_RO 3 -#define NVME_NS_READY 4 +#define NVME_NS_REMOVING 0 +#define NVME_NS_ANA_PENDING 2 +#define NVME_NS_FORCE_RO 3 +#define NVME_NS_READY 4 +#define NVME_NS_SYSFS_ATTR_LINK 5 struct cdev cdev; struct device cdev_device; @@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_attr_groups[]; +extern const struct attribute_group nvme_ns_mpath_attr_group; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; @@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns); +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns); void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); @@ -1009,6 +1013,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } +static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns) +{ +} +static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ +} static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { return false; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 52683943be15..477c123a4b86 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -299,8 +299,22 @@ static const struct attribute_group nvme_ns_attr_group = { .is_visible = nvme_ns_attrs_are_visible, }; +#ifdef CONFIG_NVME_MULTIPATH +static struct attribute *nvme_ns_mpath_attrs[] = { + NULL, +}; + +const struct attribute_group nvme_ns_mpath_attr_group = { + .name = "multipath", + .attrs = nvme_ns_mpath_attrs, +}; +#endif + const struct attribute_group *nvme_ns_attr_groups[] = { &nvme_ns_attr_group, +#ifdef CONFIG_NVME_MULTIPATH + &nvme_ns_mpath_attr_group, +#endif NULL, }; -- cgit From 6546cc4a56618fda55e76c92ff7aea31d8f9fb88 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:45 +0530 Subject: nvme-multipath: Add visibility for numa io-policy This patch helps add nvme native multipath visibility for numa io-policy. It adds a new attribute file named "numa_nodes" under namespace gendisk device path node which prints the list of numa nodes preferred by the given namespace path. The numa nodes value is comma delimited list of nodes or A-B range of nodes. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and this namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For numa io-policy we can then refer the "numa_nodes" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/numa_nodes 0-1 $ cat /sys/block/nvme1n1/multipath/nvme1c3n1/numa_nodes 2-3 >From the above output, we infer that I/O workload targeted at nvme1n1 and running on numa nodes 0 and 1 would prefer using path nvme1c1n1. Similarly, I/O workload running on numa nodes 2 and 3 would prefer using path nvme1c3n1. Reading "numa_nodes" file when configured io-policy is anything but numa would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 5 +++++ 3 files changed, 33 insertions(+) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7c89c8da46d6..b47e01942ca4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,33 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int node, srcu_idx; + nodemask_t numa_nodes; + struct nvme_ns *current_ns; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_head *head = ns->head; + + if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) + return 0; + + nodes_clear(numa_nodes); + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) { + current_ns = srcu_dereference(head->current_path[node], + &head->srcu); + if (ns == current_ns) + node_set(node, numa_nodes); + } + srcu_read_unlock(&head->srcu, srcu_idx); + + return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes)); +} +DEVICE_ATTR_RO(numa_nodes); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1eeb782947db..ff4bd0c2e401 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 477c123a4b86..96eaecda0ab9 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, NULL, @@ -290,6 +291,10 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } + if (a == &dev_attr_numa_nodes.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } #endif return a->mode; } -- cgit From 7cbafa3ff0187cdfa922aa7eb3d578a93999b3a9 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:46 +0530 Subject: nvme-multipath: Add visibility for queue-depth io-policy This patch helps add nvme native multipath visibility for queue-depth io-policy. It adds a new attribute file named "queue_depth" under namespace device path node which would print the number of active/ in-flight I/O requests currently queued for the given path. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head block node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For queue-depth io-policy we can then refer the "queue_depth" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/queue_depth 518 $cat /sys/block/nvme1n1/multipath/nvme1c3n1/queue_depth 504 >From the above output, we can infer that I/O workload targeted at nvme1n1 uses two paths nvme1c1n1 and nvme1c3n1 and the current queue depth of each path is 518 and 504 respectively. Reading "queue_depth" file when configured io-policy is anything but queue-depth would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 12 ++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b47e01942ca4..6b12ca80aa27 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,18 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t queue_depth_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD) + return 0; + + return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active)); +} +DEVICE_ATTR_RO(queue_depth); + static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ff4bd0c2e401..51e078642127 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 96eaecda0ab9..6d31226f7a4f 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_queue_depth.attr, &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, @@ -291,7 +292,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } - if (a == &dev_attr_numa_nodes.attr) { + if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) { if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; } -- cgit From 978540050a85a2b7fc77b60a1cfaec110682e9c3 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Thu, 13 Feb 2025 22:16:22 +0000 Subject: nvme-fc: Utilise min3() to simplify queue count calculation Refactor nvme_fc_create_io_queues() and nvme_fc_recreate_io_queues() to use the min3() macro to find the minimum between 3 values instead of multiple min()'s. This shortens the code and makes it easier to read. Signed-off-by: Qasim Ijaz Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7de29dae8e74..38fa2d049dcf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2907,7 +2907,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { @@ -2961,7 +2961,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { -- cgit From f1b47aed535c0509e8a101490a5600ecd0641050 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 6 Mar 2025 10:53:31 +0200 Subject: nvme-pci: remove stale comment The ns variable has been removed in commit 62451a2b2e7e ("nvme: separate command prep and issue"). Drop reference to ns in comment. Fixes: 62451a2b2e7e ("nvme: separate command prep and issue") Signed-off-by: Baruch Siach Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a65978b6cdd8..08779f220577 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -953,9 +953,6 @@ out_free_cmd: return ret; } -/* - * NOTE: ns is NULL when called on the admin queue. - */ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { -- cgit From 945e82633ecfd54015d2447a9ba05941665db2ee Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Mar 2025 14:25:20 +0900 Subject: nvme: zns: Simplify nvme_zone_parse_entry() Instead of passing a pointer to a struct nvme_ctrl and a pointer to a struct nvme_ns_head as the first two arguments of nvme_zone_parse_entry(), pass only a pointer to a struct nvme_ns as both the controller structure and ns head structure can be infered from the namespace structure. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/zns.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 382949e18c6a..cce4c5b55aa9 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -146,17 +146,16 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, - struct nvme_ns_head *head, +static int nvme_zone_parse_entry(struct nvme_ns *ns, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, void *data) { + struct nvme_ns_head *head = ns->head; struct blk_zone zone = { }; if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { - dev_err(ctrl->device, "invalid zone type %#x\n", - entry->zt); + dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt); return -EINVAL; } @@ -213,8 +212,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { - ret = nvme_zone_parse_entry(ns->ctrl, ns->head, - &report->entries[i], + ret = nvme_zone_parse_entry(ns, &report->entries[i], zone_idx, cb, data); if (ret) goto out_free; -- cgit From 1be52169c3488ef98582ed553ab35cefa3978817 Mon Sep 17 00:00:00 2001 From: Peijie Shao Date: Thu, 20 Mar 2025 14:35:23 +0800 Subject: nvme-tcp: fix selinux denied when calling sock_sendmsg In a SELinux enabled kernel, socket_create() initializes the security label of the socket using the security label of the calling process, this typically works well. However, in a containerized environment like Kubernetes, problem arises when a privileged container(domain spc_t) connects to an NVMe target and mounts the NVMe as persistent storage for unprivileged containers(domain container_t). This is because the container_t domain cannot access resources labeled with spc_t, resulting in socket_sendmsg returning -EACCES. The solution is to use socket_create_kern() instead of socket_create(), which labels the socket context to kernel_t. Access control will then be handled by the VFS layer rather than the socket itself. Signed-off-by: Peijie Shao Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index feb2d7e17c4a..542ffc921a3f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1717,7 +1717,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->cmnd_capsule_len = sizeof(struct nvme_command) + NVME_TCP_ADMIN_CCSZ; - ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, + ret = sock_create_kern(current->nsproxy->net_ns, + ctrl->addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &queue->sock); if (ret) { dev_err(nctrl->device, -- cgit