diff options
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r-- | drivers/nvme/host/tcp.c | 606 |
1 files changed, 419 insertions, 187 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d058d990532b..d924008c3949 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,7 +8,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/err.h> -#include <linux/key.h> +#include <linux/crc32.h> #include <linux/nvme-tcp.h> #include <linux/nvme-keyring.h> #include <net/sock.h> @@ -17,7 +17,6 @@ #include <net/tls_prot.h> #include <net/handshake.h> #include <linux/blk-mq.h> -#include <crypto/hash.h> #include <net/busy_poll.h> #include <trace/events/sock.h> @@ -37,6 +36,14 @@ module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); /* + * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity + * from sysfs. + */ +static bool wq_unbound; +module_param(wq_unbound, bool, 0644); +MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)"); + +/* * TLS handshake timeout */ static int tls_handshake_timeout = 10; @@ -46,6 +53,8 @@ MODULE_PARM_DESC(tls_handshake_timeout, "nvme TLS handshake timeout in seconds (default 10)"); #endif +static atomic_t nvme_tcp_cpu_queues[NR_CPUS]; + #ifdef CONFIG_DEBUG_LOCK_ALLOC /* lockdep can detect a circular dependency of the form * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock @@ -119,6 +128,7 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, + NVME_TCP_Q_IO_CPU_SET = 3, }; enum nvme_tcp_recv_state { @@ -157,8 +167,9 @@ struct nvme_tcp_queue { bool hdr_digest; bool data_digest; - struct ahash_request *rcv_hash; - struct ahash_request *snd_hash; + bool tls_enabled; + u32 rcv_crc; + u32 snd_crc; __le32 exp_ddgst; __le32 recv_ddgst; struct completion tls_complete; @@ -205,12 +216,39 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } -static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl) +static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) +{ + switch (type) { + case nvme_tcp_c2h_term: + case nvme_tcp_c2h_data: + case nvme_tcp_r2t: + case nvme_tcp_rsp: + return true; + default: + return false; + } +} + +/* + * Check if the queue is TLS encrypted + */ +static inline bool nvme_tcp_queue_tls(struct nvme_tcp_queue *queue) +{ + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) + return 0; + + return queue->tls_enabled; +} + +/* + * Check if TLS is configured for the controller. + */ +static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) { if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) return 0; - return ctrl->opts->tls; + return ctrl->opts->tls || ctrl->opts->concat; } static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) @@ -352,14 +390,20 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue) { return !list_empty(&queue->send_list) || !llist_empty(&queue->req_list); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !nvme_tcp_queue_tls(queue) && + nvme_tcp_queue_has_pending(queue); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, - bool sync, bool last) + bool last) { struct nvme_tcp_queue *queue = req->queue; bool empty; @@ -373,12 +417,12 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * are on the same cpu, so we don't introduce contention. */ if (queue->io_cpu == raw_smp_processor_id() && - sync && empty && mutex_trylock(&queue->send_mutex)) { + empty && mutex_trylock(&queue->send_mutex)) { nvme_tcp_send_all(queue); mutex_unlock(&queue->send_mutex); } - if (last && nvme_tcp_queue_more(queue)) + if (last && nvme_tcp_queue_has_pending(queue)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } @@ -408,36 +452,43 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) return NULL; } - list_del(&req->entry); + list_del_init(&req->entry); + init_llist_node(&req->lentry); return req; } -static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, - __le32 *dgst) +#define NVME_TCP_CRC_SEED (~0) + +static inline void nvme_tcp_ddgst_update(u32 *crcp, + struct page *page, size_t off, size_t len) { - ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); - crypto_ahash_final(hash); + page += off / PAGE_SIZE; + off %= PAGE_SIZE; + while (len) { + const void *vaddr = kmap_local_page(page); + size_t n = min(len, (size_t)PAGE_SIZE - off); + + *crcp = crc32c(*crcp, vaddr + off, n); + kunmap_local(vaddr); + page++; + off = 0; + len -= n; + } } -static inline void nvme_tcp_ddgst_update(struct ahash_request *hash, - struct page *page, off_t off, size_t len) +static inline __le32 nvme_tcp_ddgst_final(u32 crc) { - struct scatterlist sg; - - sg_init_table(&sg, 1); - sg_set_page(&sg, page, len, off); - ahash_request_set_crypt(hash, &sg, NULL, len); - crypto_ahash_update(hash); + return cpu_to_le32(~crc); } -static inline void nvme_tcp_hdgst(struct ahash_request *hash, - void *pdu, size_t len) +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len) { - struct scatterlist sg; + return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len)); +} - sg_init_one(&sg, pdu, len); - ahash_request_set_crypt(hash, &sg, pdu + len, len); - crypto_ahash_digest(hash); +static inline void nvme_tcp_set_hdgst(void *pdu, size_t len) +{ + *(__le32 *)(pdu + len) = nvme_tcp_hdgst(pdu, len); } static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, @@ -455,8 +506,7 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, } recv_digest = *(__le32 *)(pdu + hdr->hlen); - nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len); - exp_digest = *(__le32 *)(pdu + hdr->hlen); + exp_digest = nvme_tcp_hdgst(pdu, pdu_len); if (recv_digest != exp_digest) { dev_err(queue->ctrl->ctrl.device, "header digest error: recv %#x expected %#x\n", @@ -482,7 +532,7 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) nvme_tcp_queue_id(queue)); return -EPROTO; } - crypto_ahash_init(queue->rcv_hash); + queue->rcv_crc = NVME_TCP_CRC_SEED; return 0; } @@ -516,6 +566,8 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, req->queue = queue; nvme_req(rq)->ctrl = &ctrl->ctrl; nvme_req(rq)->cmd = &pdu->cmd; + init_llist_node(&req->lentry); + INIT_LIST_HEAD(&req->entry); return 0; } @@ -720,17 +772,61 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return -EPROTO; } + if (llist_on_list(&req->lentry) || + !list_empty(&req->entry)) { + dev_err(queue->ctrl->ctrl.device, + "req %d unexpected r2t while processing request\n", + rq->tag); + return -EPROTO; + } + req->pdu_len = 0; req->h2cdata_left = r2t_length; req->h2cdata_offset = r2t_offset; req->ttag = pdu->ttag; nvme_tcp_setup_h2c_data_pdu(req); - nvme_tcp_queue_request(req, false, true); + + llist_add(&req->lentry, &queue->req_list); + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); return 0; } +static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, + struct nvme_tcp_term_pdu *pdu) +{ + u16 fes; + const char *msg; + u32 plen = le32_to_cpu(pdu->hdr.plen); + + static const char * const msg_table[] = { + [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field", + [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", + [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", + [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", + [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", + [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", + }; + + if (plen < NVME_TCP_MIN_C2HTERM_PLEN || + plen > NVME_TCP_MAX_C2HTERM_PLEN) { + dev_err(queue->ctrl->ctrl.device, + "Received a malformed C2HTermReq PDU (plen = %u)\n", + plen); + return; + } + + fes = le16_to_cpu(pdu->fes); + if (fes && fes < ARRAY_SIZE(msg_table)) + msg = msg_table[fes]; + else + msg = "Unknown"; + + dev_err(queue->ctrl->ctrl.device, + "Received C2HTermReq (FES = %s)\n", msg); +} + static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -752,6 +848,25 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { + if (!nvme_tcp_recv_pdu_supported(hdr->type)) + goto unsupported_pdu; + + dev_err(queue->ctrl->ctrl.device, + "pdu type %d has unexpected header length (%d)\n", + hdr->type, hdr->hlen); + return -EPROTO; + } + + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { + /* + * C2HTermReq never includes Header or Data digests. + * Skip the checks. + */ + nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu); + return -EINVAL; + } + if (queue->hdr_digest) { ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); if (unlikely(ret)) @@ -775,10 +890,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_init_recv_ctx(queue); return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); default: - dev_err(queue->ctrl->ctrl.device, - "unsupported pdu type (%d)\n", hdr->type); - return -EINVAL; + goto unsupported_pdu; } + +unsupported_pdu: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; } static inline void nvme_tcp_end_request(struct request *rq, u16 status) @@ -826,8 +944,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, iov_iter_count(&req->iter)); if (queue->data_digest) - ret = skb_copy_and_hash_datagram_iter(skb, *offset, - &req->iter, recv_len, queue->rcv_hash); + ret = skb_copy_and_crc32c_datagram_iter(skb, *offset, + &req->iter, recv_len, &queue->rcv_crc); else ret = skb_copy_datagram_iter(skb, *offset, &req->iter, recv_len); @@ -845,7 +963,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, if (!queue->data_remaining) { if (queue->data_digest) { - nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); + queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc); queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; } else { if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { @@ -1037,7 +1155,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) else msg.msg_flags |= MSG_MORE; - if (!sendpage_ok(page)) + if (!sendpages_ok(page, len, offset)) msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page, len, offset); @@ -1047,7 +1165,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) return ret; if (queue->data_digest) - nvme_tcp_ddgst_update(queue->snd_hash, page, + nvme_tcp_ddgst_update(&queue->snd_crc, page, offset, ret); /* @@ -1061,8 +1179,8 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) /* fully successful last send in current PDU */ if (last && ret == len) { if (queue->data_digest) { - nvme_tcp_ddgst_final(queue->snd_hash, - &req->ddgst); + req->ddgst = + nvme_tcp_ddgst_final(queue->snd_crc); req->state = NVME_TCP_SEND_DDGST; req->offset = 0; } else { @@ -1094,7 +1212,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) msg.msg_flags |= MSG_EOR; if (queue->hdr_digest && !req->offset) - nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); bvec_set_virt(&bvec, (void *)pdu + req->offset, len); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); @@ -1107,7 +1225,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) if (inline_data) { req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) - crypto_ahash_init(queue->snd_hash); + queue->snd_crc = NVME_TCP_CRC_SEED; } else { nvme_tcp_done_send_req(queue); } @@ -1129,7 +1247,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) int ret; if (queue->hdr_digest && !req->offset) - nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); if (!req->h2cdata_left) msg.msg_flags |= MSG_SPLICE_PAGES; @@ -1144,7 +1262,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) if (!len) { req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) - crypto_ahash_init(queue->snd_hash); + queue->snd_crc = NVME_TCP_CRC_SEED; return 1; } req->offset += ret; @@ -1248,7 +1366,7 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue) queue->nr_cqe = 0; consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb); release_sock(sk); - return consumed; + return consumed == -EAGAIN ? 0 : consumed; } static void nvme_tcp_io_work(struct work_struct *w) @@ -1276,6 +1394,11 @@ static void nvme_tcp_io_work(struct work_struct *w) else if (unlikely(result < 0)) return; + /* did we get some space after spending time in recv? */ + if (nvme_tcp_queue_has_pending(queue) && + sk_stream_is_writeable(queue->sock->sk)) + pending = true; + if (!pending || !queue->rd_enabled) return; @@ -1284,41 +1407,6 @@ static void nvme_tcp_io_work(struct work_struct *w) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } -static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); - - ahash_request_free(queue->rcv_hash); - ahash_request_free(queue->snd_hash); - crypto_free_ahash(tfm); -} - -static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue) -{ - struct crypto_ahash *tfm; - - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->snd_hash) - goto free_tfm; - ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); - - queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->rcv_hash) - goto free_snd_hash; - ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); - - return 0; -free_snd_hash: - ahash_request_free(queue->snd_hash); -free_tfm: - crypto_free_ahash(tfm); - return -ENOMEM; -} - static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) { struct nvme_tcp_request *async = &ctrl->async_req; @@ -1344,7 +1432,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) { - struct page *page; struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; unsigned int noreclaim_flag; @@ -1352,14 +1439,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) return; - if (queue->hdr_digest || queue->data_digest) - nvme_tcp_free_crypto(queue); - - if (queue->pf_cache.va) { - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); - queue->pf_cache.va = NULL; - } + page_frag_cache_drain(&queue->pf_cache); noreclaim_flag = memalloc_noreclaim_save(); /* ->sock will be released by fput() */ @@ -1418,19 +1498,22 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) memset(&msg, 0, sizeof(msg)); iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); - if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + if (nvme_tcp_queue_tls(queue)) { msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } + msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + if (ret >= 0 && ret < sizeof(*icresp)) + ret = -ECONNRESET; if (ret < 0) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); goto free_icresp; } ret = -ENOTCONN; - if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + if (nvme_tcp_queue_tls(queue)) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { @@ -1538,20 +1621,56 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) ctrl->io_queues[HCTX_TYPE_POLL]; } +/* + * Track the number of queues assigned to each cpu using a global per-cpu + * counter and select the least used cpu from the mq_map. Our goal is to spread + * different controllers I/O threads across different cpu cores. + * + * Note that the accounting is not 100% perfect, but we don't need to be, we're + * simply putting our best effort to select the best candidate cpu core that we + * find at any given point. + */ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) { struct nvme_tcp_ctrl *ctrl = queue->ctrl; - int qid = nvme_tcp_queue_id(queue); - int n = 0; + struct blk_mq_tag_set *set = &ctrl->tag_set; + int qid = nvme_tcp_queue_id(queue) - 1; + unsigned int *mq_map = NULL; + int cpu, min_queues = INT_MAX, io_cpu; + + if (wq_unbound) + goto out; if (nvme_tcp_default_queue(queue)) - n = qid - 1; + mq_map = set->map[HCTX_TYPE_DEFAULT].mq_map; else if (nvme_tcp_read_queue(queue)) - n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + mq_map = set->map[HCTX_TYPE_READ].mq_map; else if (nvme_tcp_poll_queue(queue)) - n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - - ctrl->io_queues[HCTX_TYPE_READ] - 1; - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + mq_map = set->map[HCTX_TYPE_POLL].mq_map; + + if (WARN_ON(!mq_map)) + goto out; + + /* Search for the least used cpu from the mq_map */ + io_cpu = WORK_CPU_UNBOUND; + for_each_online_cpu(cpu) { + int num_queues = atomic_read(&nvme_tcp_cpu_queues[cpu]); + + if (mq_map[cpu] != qid) + continue; + if (num_queues < min_queues) { + io_cpu = cpu; + min_queues = num_queues; + } + } + if (io_cpu != WORK_CPU_UNBOUND) { + queue->io_cpu = io_cpu; + atomic_inc(&nvme_tcp_cpu_queues[io_cpu]); + set_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags); + } +out: + dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n", + qid, queue->io_cpu); } static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) @@ -1569,13 +1688,16 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) goto out_complete; } - tls_key = key_lookup(pskid); + tls_key = nvme_tls_key_lookup(pskid); if (IS_ERR(tls_key)) { dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n", qid, pskid); queue->tls_err = -ENOKEY; } else { - ctrl->ctrl.tls_key = tls_key; + queue->tls_enabled = true; + if (qid == 0) + ctrl->ctrl.tls_pskid = key_serial(tls_key); + key_put(tls_key); queue->tls_err = 0; } @@ -1652,7 +1774,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->cmnd_capsule_len = sizeof(struct nvme_command) + NVME_TCP_ADMIN_CCSZ; - ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, + ret = sock_create_kern(current->nsproxy->net_ns, + ctrl->addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &queue->sock); if (ret) { dev_err(nctrl->device, @@ -1665,6 +1788,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, ret = PTR_ERR(sock_file); goto err_destroy_mutex; } + + sk_net_refcnt_upgrade(queue->sock->sk); nvme_tcp_reclassify_socket(queue->sock); /* Single syn retry */ @@ -1692,7 +1817,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->sock->sk->sk_allocation = GFP_ATOMIC; queue->sock->sk->sk_use_task_frag = false; - nvme_tcp_set_queue_io_cpu(queue); + queue->io_cpu = WORK_CPU_UNBOUND; queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0; @@ -1727,21 +1852,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->hdr_digest = nctrl->opts->hdr_digest; queue->data_digest = nctrl->opts->data_digest; - if (queue->hdr_digest || queue->data_digest) { - ret = nvme_tcp_alloc_crypto(queue); - if (ret) { - dev_err(nctrl->device, - "failed to allocate queue %d crypto\n", qid); - goto err_sock; - } - } rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + nvme_tcp_hdgst_len(queue); queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); if (!queue->pdu) { ret = -ENOMEM; - goto err_crypto; + goto err_sock; } dev_dbg(nctrl->device, "connecting queue %d\n", @@ -1756,7 +1873,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, } /* If PSKs are configured try to start TLS */ - if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) { + if (nvme_tcp_tls_configured(nctrl) && pskid) { ret = nvme_tcp_start_tls(nctrl, queue, pskid); if (ret) goto err_init_connect; @@ -1774,9 +1891,6 @@ err_init_connect: kernel_sock_shutdown(queue->sock, SHUT_RDWR); err_rcv_pdu: kfree(queue->pdu); -err_crypto: - if (queue->hdr_digest || queue->data_digest) - nvme_tcp_free_crypto(queue); err_sock: /* ->sock will be released by fput() */ fput(queue->sock->file); @@ -1806,7 +1920,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1814,12 +1928,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) return; + if (test_and_clear_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags)) + atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]); + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) __nvme_tcp_stop_queue(queue); + /* Stopping the queue will disable TLS */ + queue->tls_enabled = false; mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -1846,9 +1990,10 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) nvme_tcp_init_recv_ctx(queue); nvme_tcp_setup_sock_ops(queue); - if (idx) + if (idx) { + nvme_tcp_set_queue_io_cpu(queue); ret = nvmf_connect_io_queue(nctrl, idx); - else + } else ret = nvmf_connect_admin_queue(nctrl); if (!ret) { @@ -1886,7 +2031,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, @@ -1913,16 +2060,17 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) int ret; key_serial_t pskid = 0; - if (nvme_tcp_tls(ctrl)) { + if (nvme_tcp_tls_configured(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); - else + else if (ctrl->opts->tls) { pskid = nvme_tls_psk_default(ctrl->opts->keyring, ctrl->opts->host->nqn, ctrl->opts->subsysnqn); - if (!pskid) { - dev_err(ctrl->device, "no valid PSK found\n"); - return -ENOKEY; + if (!pskid) { + dev_err(ctrl->device, "no valid PSK found\n"); + return -ENOKEY; + } } } @@ -1945,13 +2093,30 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) { - dev_err(ctrl->device, "no PSK negotiated\n"); - return -ENOKEY; + if (nvme_tcp_tls_configured(ctrl)) { + if (ctrl->opts->concat) { + /* + * The generated PSK is stored in the + * fabric options + */ + if (!ctrl->opts->tls_key) { + dev_err(ctrl->device, "no PSK generated\n"); + return -ENOKEY; + } + if (ctrl->tls_pskid && + ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) { + dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } + } else if (!ctrl->tls_pskid) { + dev_err(ctrl->device, "no PSK negotiated\n"); + return -ENOKEY; + } } + for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_tcp_alloc_queue(ctrl, i, - key_serial(ctrl->tls_key)); + ctrl->tls_pskid); if (ret) goto out_free_queues; } @@ -1990,14 +2155,6 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) return __nvme_tcp_alloc_io_queues(ctrl); } -static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) -{ - nvme_tcp_stop_io_queues(ctrl); - if (remove) - nvme_remove_io_tag_set(ctrl); - nvme_tcp_free_io_queues(ctrl); -} - static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) { int ret, nr_queues; @@ -2067,14 +2224,6 @@ out_free_io_queues: return ret; } -static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) -{ - nvme_tcp_stop_queue(ctrl, 0); - if (remove) - nvme_remove_admin_tag_set(ctrl); - nvme_tcp_free_admin_queue(ctrl); -} - static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) { int error; @@ -2129,9 +2278,16 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); - if (remove) + if (remove) { nvme_unquiesce_admin_queue(ctrl); - nvme_tcp_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(ctrl); + } + nvme_tcp_free_admin_queue(ctrl); + if (ctrl->tls_pskid) { + dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n", + ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, @@ -2139,17 +2295,19 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, { if (ctrl->queue_count <= 1) return; - nvme_quiesce_admin_queue(ctrl); nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); nvme_cancel_tagset(ctrl); - if (remove) + if (remove) { nvme_unquiesce_io_queues(ctrl); - nvme_tcp_destroy_io_queues(ctrl, remove); + nvme_remove_io_tag_set(ctrl); + } + nvme_tcp_free_io_queues(ctrl); } -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, + int status) { enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); @@ -2159,17 +2317,39 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) return; } - if (nvmf_should_reconnect(ctrl)) { + if (nvmf_should_reconnect(ctrl, status)) { dev_info(ctrl->device, "Reconnecting in %d seconds...\n", ctrl->opts->reconnect_delay); queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, ctrl->opts->reconnect_delay * HZ); } else { - dev_info(ctrl->device, "Removing controller...\n"); + dev_info(ctrl->device, "Removing controller (%d)...\n", + status); nvme_delete_ctrl(ctrl); } } +/* + * The TLS key is set by secure concatenation after negotiation has been + * completed on the admin queue. We need to revoke the key when: + * - concatenation is enabled (otherwise it's a static key set by the user) + * and + * - the generated key is present in ctrl->tls_key (otherwise there's nothing + * to revoke) + * and + * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS + * negotiation has not run). + * + * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called + * twice during secure concatenation, once on a 'normal' connection to run the + * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set), + * and once after the negotiation (which uses the key, so it _must_ be set). + */ +static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl) +{ + return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid; +} + static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) { struct nvmf_ctrl_options *opts = ctrl->opts; @@ -2179,6 +2359,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) if (ret) return ret; + if (ctrl->opts->concat && !ctrl->tls_pskid) { + /* See comments for nvme_tcp_key_revoke_needed() */ + dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); + ret = nvme_tcp_configure_admin_queue(ctrl, false); + if (ret) + goto destroy_admin; + } + if (ctrl->icdoff) { ret = -EOPNOTSUPP; dev_err(ctrl->device, "icdoff is not supported!\n"); @@ -2233,11 +2423,13 @@ destroy_io: nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); nvme_cancel_tagset(ctrl); - nvme_tcp_destroy_io_queues(ctrl, new); + if (new) + nvme_remove_io_tag_set(ctrl); + nvme_tcp_free_io_queues(ctrl); } destroy_admin: nvme_stop_keep_alive(ctrl); - nvme_tcp_teardown_admin_queue(ctrl, false); + nvme_tcp_teardown_admin_queue(ctrl, new); return ret; } @@ -2246,23 +2438,25 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), struct nvme_tcp_ctrl, connect_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + int ret; ++ctrl->nr_reconnects; - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto requeue; - dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", - ctrl->nr_reconnects); + dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", + ctrl->nr_reconnects, ctrl->opts->max_reconnects); ctrl->nr_reconnects = 0; return; requeue: - dev_info(ctrl->device, "Failed reconnect attempt %d\n", - ctrl->nr_reconnects); - nvme_tcp_reconnect_or_remove(ctrl); + dev_info(ctrl->device, "Failed reconnect attempt %d/%d\n", + ctrl->nr_reconnects, ctrl->opts->max_reconnects); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_error_recovery_work(struct work_struct *work) @@ -2271,6 +2465,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_tcp_ctrl, err_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); @@ -2289,7 +2485,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) return; } - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, 0); } static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) @@ -2309,7 +2505,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, reset_work); + int ret; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2322,14 +2521,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->nr_reconnects; - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) @@ -2410,8 +2610,10 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.offset = 0; ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; + init_llist_node(&ctrl->async_req.lentry); + INIT_LIST_HEAD(&ctrl->async_req.entry); - nvme_tcp_queue_request(&ctrl->async_req, true, true); + nvme_tcp_queue_request(&ctrl->async_req, true); } static void nvme_tcp_complete_timed_out(struct request *rq) @@ -2428,13 +2630,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); - u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + struct nvme_command *cmd = &pdu->cmd; int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), pdu->hdr.type, opc, - nvme_opcode_str(qid, opc, fctype), qid); + rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* @@ -2563,7 +2765,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_start_request(rq); - nvme_tcp_queue_request(req, true, bd->last); + nvme_tcp_queue_request(req, bd->last); return BLK_STS_OK; } @@ -2579,6 +2781,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + int ret; if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; @@ -2586,9 +2789,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); - nvme_tcp_try_recv(queue); + ret = nvme_tcp_try_recv(queue); clear_bit(NVME_TCP_Q_POLLING, &queue->flags); - return queue->nr_cqe; + return ret < 0 ? ret : queue->nr_cqe; } static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) @@ -2599,10 +2802,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len = nvmf_get_address(ctrl, buf, size); + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return len; + mutex_lock(&queue->queue_lock); - if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) - goto done; ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); if (ret > 0) { if (len > 0) @@ -2610,7 +2814,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", (len) ? "," : "", &src_addr); } -done: + mutex_unlock(&queue->queue_lock); return len; @@ -2644,6 +2848,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, + .subsystem_reset = nvmf_subsystem_reset, .free_ctrl = nvme_tcp_free_ctrl, .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, @@ -2668,7 +2873,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) return found; } -static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, +static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_tcp_ctrl *ctrl; @@ -2743,6 +2948,28 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, if (ret) goto out_kfree_queues; + return ctrl; +out_kfree_queues: + kfree(ctrl->queues); +out_free_ctrl: + kfree(ctrl); + return ERR_PTR(ret); +} + +static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, + struct nvmf_ctrl_options *opts) +{ + struct nvme_tcp_ctrl *ctrl; + int ret; + + ctrl = nvme_tcp_alloc_ctrl(dev, opts); + if (IS_ERR(ctrl)) + return ERR_CAST(ctrl); + + ret = nvme_add_ctrl(&ctrl->ctrl); + if (ret) + goto out_put_ctrl; + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { WARN_ON_ONCE(1); ret = -EINTR; @@ -2753,8 +2980,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); @@ -2764,15 +2991,11 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); +out_put_ctrl: nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); -out_kfree_queues: - kfree(ctrl->queues); -out_free_ctrl: - kfree(ctrl); - return ERR_PTR(ret); } static struct nvmf_transport_ops nvme_tcp_transport = { @@ -2784,12 +3007,15 @@ static struct nvmf_transport_ops nvme_tcp_transport = { NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | - NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, + NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT, .create_ctrl = nvme_tcp_create_ctrl, }; static int __init nvme_tcp_init_module(void) { + unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS; + int cpu; + BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); @@ -2799,11 +3025,16 @@ static int __init nvme_tcp_init_module(void) BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); - nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (wq_unbound) + wq_flags |= WQ_UNBOUND; + + nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0); if (!nvme_tcp_wq) return -ENOMEM; + for_each_possible_cpu(cpu) + atomic_set(&nvme_tcp_cpu_queues[cpu], 0); + nvmf_register_transport(&nvme_tcp_transport); return 0; } @@ -2826,4 +3057,5 @@ static void __exit nvme_tcp_cleanup_module(void) module_init(nvme_tcp_init_module); module_exit(nvme_tcp_cleanup_module); +MODULE_DESCRIPTION("NVMe host TCP transport driver"); MODULE_LICENSE("GPL v2"); |