diff options
Diffstat (limited to 'drivers/nvme/target/tcp.c')
| -rw-r--r-- | drivers/nvme/target/tcp.c | 1102 |
1 files changed, 801 insertions, 301 deletions
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ad0df786fe93..15416ff0eac4 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -7,17 +7,80 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/crc32c.h> #include <linux/err.h> #include <linux/nvme-tcp.h> +#include <linux/nvme-keyring.h> #include <net/sock.h> #include <net/tcp.h> +#include <net/tls.h> +#include <net/tls_prot.h> +#include <net/handshake.h> #include <linux/inet.h> #include <linux/llist.h> -#include <crypto/hash.h> +#include <trace/events/sock.h> #include "nvmet.h" #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */ +#define NVMET_TCP_BACKLOG 128 + +static int param_store_val(const char *str, int *val, int min, int max) +{ + int ret, new_val; + + ret = kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val = new_val; + return 0; +} + +static int set_params(const char *str, const struct kernel_param *kp) +{ + return param_store_val(str, kp->arg, 0, INT_MAX); +} + +static const struct kernel_param_ops set_param_ops = { + .set = set_params, + .get = param_get_int, +}; + +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +device_param_cb(so_priority, &set_param_ops, &so_priority, 0644); +MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority: Default 0"); + +/* Define a time period (in usecs) that io_work() shall sample an activated + * queue before determining it to be idle. This optional module behavior + * can enable NIC solutions that support socket optimized packet processing + * using advanced interrupt moderation techniques. + */ +static int idle_poll_period_usecs; +device_param_cb(idle_poll_period_usecs, &set_param_ops, + &idle_poll_period_usecs, 0644); +MODULE_PARM_DESC(idle_poll_period_usecs, + "nvmet tcp io_work poll till idle time period in usecs: Default 0"); + +#ifdef CONFIG_NVME_TARGET_TCP_TLS +/* + * TLS handshake timeout + */ +static int tls_handshake_timeout = 10; +module_param(tls_handshake_timeout, int, 0644); +MODULE_PARM_DESC(tls_handshake_timeout, + "nvme TLS handshake timeout in seconds (default 10)"); +#endif #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 @@ -57,9 +120,9 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; - int nr_mapped; + char recv_cbuf[CMSG_LEN(sizeof(char))]; struct msghdr recv_msg; - struct kvec *iov; + struct bio_vec *iov; u32 flags; struct list_head entry; @@ -76,17 +139,19 @@ struct nvmet_tcp_cmd { enum nvmet_tcp_queue_state { NVMET_TCP_Q_CONNECTING, + NVMET_TCP_Q_TLS_HANDSHAKE, NVMET_TCP_Q_LIVE, NVMET_TCP_Q_DISCONNECTING, + NVMET_TCP_Q_FAILED, }; struct nvmet_tcp_queue { struct socket *sock; struct nvmet_tcp_port *port; struct work_struct io_work; - int cpu; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; + struct kref kref; /* send state */ struct nvmet_tcp_cmd *cmds; @@ -107,8 +172,12 @@ struct nvmet_tcp_queue { /* digest state */ bool hdr_digest; bool data_digest; - struct ahash_request *snd_hash; - struct ahash_request *rcv_hash; + + /* TLS state */ + key_serial_t tls_pskid; + struct delayed_work tls_handshake_tmo_work; + + unsigned long poll_end; spinlock_t state_lock; enum nvmet_tcp_queue_state state; @@ -134,7 +203,6 @@ struct nvmet_tcp_port { struct work_struct accept_work; struct nvmet_port *nport; struct sockaddr_storage addr; - int last_cpu; void (*data_ready)(struct sock *); }; @@ -143,13 +211,18 @@ static LIST_HEAD(nvmet_tcp_queue_list); static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; -static struct nvmet_fabrics_ops nvmet_tcp_ops; +static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); +static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) { + if (unlikely(!queue->nr_cmds)) { + /* We didn't allocate cmds yet, send 0xffff */ + return USHRT_MAX; + } + return cmd - queue->cmds; } @@ -161,14 +234,14 @@ static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) { - return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status; + return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status; } static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) { return !nvme_is_write(cmd->req.cmd) && cmd->req.transfer_len > 0 && - !cmd->req.rsp->status; + !cmd->req.cqe->status; } static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) @@ -204,6 +277,11 @@ static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) list_add_tail(&cmd->entry, &cmd->queue->free_list); } +static inline int queue_cpu(struct nvmet_tcp_queue *queue) +{ + return queue->sock->sk->sk_incoming_cpu; +} + static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) { return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; @@ -214,14 +292,9 @@ static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; } -static inline void nvmet_tcp_hdgst(struct ahash_request *hash, - void *pdu, size_t len) +static inline void nvmet_tcp_hdgst(void *pdu, size_t len) { - struct scatterlist sg; - - sg_init_one(&sg, pdu, len); - ahash_request_set_crypt(hash, &sg, pdu + len, len); - crypto_ahash_digest(hash); + put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len); } static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, @@ -238,7 +311,7 @@ static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, } recv_digest = *(__le32 *)(pdu + hdr->hlen); - nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); + nvmet_tcp_hdgst(pdu, len); exp_digest = *(__le32 *)(pdu + hdr->hlen); if (recv_digest != exp_digest) { pr_err("queue %d: header digest error: recv %#x expected %#x\n", @@ -267,43 +340,43 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) return 0; } -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) +/* If cmd buffers are NULL, no operation is performed */ +static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { - struct scatterlist *sg; - int i; - - sg = &cmd->req.sg[cmd->sg_idx]; - - for (i = 0; i < cmd->nr_mapped; i++) - kunmap(sg_page(&sg[i])); + kfree(cmd->iov); + sgl_free(cmd->req.sg); + cmd->iov = NULL; + cmd->req.sg = NULL; } -static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { - struct kvec *iov = cmd->iov; + struct bio_vec *iov = cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; + int nr_pages; length = cmd->pdu_len; - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); + nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; - cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); + cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; sg = &cmd->req.sg[cmd->sg_idx]; while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; - iov->iov_len = iov_len; + bvec_set_page(iov, sg_page(sg), iov_len, + sg->offset + sg_offset); length -= iov_len; sg = sg_next(sg); iov++; + sg_offset = 0; } - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, - cmd->nr_mapped, cmd->pdu_len); + iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov, + nr_pages, cmd->pdu_len); } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -315,21 +388,30 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) kernel_sock_shutdown(queue->sock, SHUT_RDWR); } +static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) +{ + queue->rcv_state = NVMET_TCP_RECV_ERR; + if (status == -EPIPE || status == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); +} + static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) { struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; u32 len = le32_to_cpu(sgl->length); - if (!cmd->req.data_len) + if (!len) return 0; if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET)) { if (!nvme_is_write(cmd->req.cmd)) - return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; if (len > cmd->req.port->inline_data_size) - return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; + return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR; cmd->pdu_len = len; } cmd->req.transfer_len += len; @@ -348,16 +430,28 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - sgl_free(cmd->req.sg); + nvmet_tcp_free_cmd_buffers(cmd); return NVME_SC_INTERNAL; } -static void nvmet_tcp_ddgst(struct ahash_request *hash, - struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_calc_ddgst(struct nvmet_tcp_cmd *cmd) { - ahash_request_set_crypt(hash, cmd->req.sg, - (void *)&cmd->exp_ddgst, cmd->req.transfer_len); - crypto_ahash_digest(hash); + size_t total_len = cmd->req.transfer_len; + struct scatterlist *sg = cmd->req.sg; + u32 crc = ~0; + + while (total_len) { + size_t len = min_t(size_t, total_len, sg->length); + + /* + * Note that the scatterlist does not contain any highmem pages, + * as it was allocated by sgl_alloc() with GFP_KERNEL. + */ + crc = crc32c(crc, sg_virt(sg), len); + total_len -= len; + sg = sg_next(sg); + } + cmd->exp_ddgst = cpu_to_le32(~crc); } static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) @@ -371,31 +465,31 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) cmd->state = NVMET_TCP_SEND_DATA_PDU; pdu->hdr.type = nvme_tcp_c2h_data; - pdu->hdr.flags = NVME_TCP_F_DATA_LAST; + pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ? + NVME_TCP_F_DATA_SUCCESS : 0); pdu->hdr.hlen = sizeof(*pdu); pdu->hdr.pdo = pdu->hdr.hlen + hdgst; pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst + cmd->req.transfer_len + ddgst); - pdu->command_id = cmd->req.rsp->command_id; + pdu->command_id = cmd->req.cqe->command_id; pdu->data_length = cpu_to_le32(cmd->req.transfer_len); pdu->data_offset = cpu_to_le32(cmd->wbytes_done); if (queue->data_digest) { pdu->hdr.flags |= NVME_TCP_F_DDGST; - nvmet_tcp_ddgst(queue->snd_hash, cmd); + nvmet_tcp_calc_ddgst(cmd); } if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; - struct nvmet_tcp_queue *queue = cmd->queue; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; @@ -413,14 +507,13 @@ static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; - struct nvmet_tcp_queue *queue = cmd->queue; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; @@ -433,24 +526,18 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) { struct llist_node *node; + struct nvmet_tcp_cmd *cmd; - node = llist_del_all(&queue->resp_list); - if (!node) - return; - - while (node) { - struct nvmet_tcp_cmd *cmd = llist_entry(node, - struct nvmet_tcp_cmd, lentry); - + for (node = llist_del_all(&queue->resp_list); node; node = node->next) { + cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry); list_add(&cmd->entry, &queue->resp_send_list); - node = node->next; queue->send_list_len++; } } @@ -486,20 +573,55 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + enum nvmet_tcp_recv_state queue_state; + struct nvmet_tcp_cmd *queue_cmd; + struct nvme_sgl_desc *sgl; + u32 len; + + /* Pairs with store_release in nvmet_prepare_receive_pdu() */ + queue_state = smp_load_acquire(&queue->rcv_state); + queue_cmd = READ_ONCE(queue->cmd); + + if (unlikely(cmd == queue_cmd)) { + sgl = &cmd->req.cmd->common.dptr.sgl; + len = le32_to_cpu(sgl->length); + + /* + * Wait for inline data before processing the response. + * Avoid using helpers, this might happen before + * nvmet_req_init is completed. + */ + if (queue_state == NVMET_TCP_RECV_PDU && + len && len <= cmd->req.port->inline_data_size && + nvme_is_write(cmd->req.cmd)) + return; + } llist_add(&cmd->lentry, &queue->resp_list); - queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work); +} + +static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd) +{ + if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED)) + nvmet_tcp_queue_response(&cmd->req); + else + cmd->req.execute(&cmd->req); } static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) { + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_MORE | MSG_SPLICE_PAGES, + }; + struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst; int ret; - ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), - offset_in_page(cmd->data_pdu) + cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + bvec_set_virt(&bvec, (void *)cmd->data_pdu + cmd->offset, left); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); + ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; @@ -514,17 +636,27 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) return 1; } -static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; int ret; while (cmd->cur_sg) { + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, + }; struct page *page = sg_page(cmd->cur_sg); + struct bio_vec bvec; u32 left = cmd->cur_sg->length - cmd->offset; - ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + if ((!last_in_batch && cmd->queue->send_list_len) || + cmd->wbytes_done + left < cmd->req.transfer_len || + queue->data_digest || !queue->nvme_sq.sqhd_disabled) + msg.msg_flags |= MSG_MORE; + + bvec_set_page(&bvec, page, left, cmd->offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); + ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; @@ -542,8 +674,17 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) cmd->state = NVMET_TCP_SEND_DDGST; cmd->offset = 0; } else { - nvmet_setup_response_pdu(cmd); + if (queue->nvme_sq.sqhd_disabled) { + cmd->queue->snd_cmd = NULL; + nvmet_tcp_put_cmd(cmd); + } else { + nvmet_setup_response_pdu(cmd); + } } + + if (queue->nvme_sq.sqhd_disabled) + nvmet_tcp_free_cmd_buffers(cmd); + return 1; } @@ -551,18 +692,20 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; + struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst; - int flags = MSG_DONTWAIT; int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + msg.msg_flags |= MSG_MORE; else - flags |= MSG_EOR; + msg.msg_flags |= MSG_EOR; - ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu), - offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags); + bvec_set_virt(&bvec, (void *)cmd->rsp_pdu + cmd->offset, left); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); + ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; @@ -571,8 +714,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, if (left) return -EAGAIN; - kfree(cmd->iov); - sgl_free(cmd->req.sg); + nvmet_tcp_free_cmd_buffers(cmd); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -580,18 +722,20 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; + struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst; - int flags = MSG_DONTWAIT; int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + msg.msg_flags |= MSG_MORE; else - flags |= MSG_EOR; + msg.msg_flags |= MSG_EOR; - ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu), - offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags); + bvec_set_virt(&bvec, (void *)cmd->r2t_pdu + cmd->offset, left); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); + ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; @@ -604,22 +748,38 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) return 1; } -static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; + int left = NVME_TCP_DIGEST_LENGTH - cmd->offset; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, - .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, + .iov_len = left }; int ret; + if (!last_in_batch && cmd->queue->send_list_len) + msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) return ret; cmd->offset += ret; - nvmet_setup_response_pdu(cmd); + left -= ret; + + if (left) + return -EAGAIN; + + if (queue->nvme_sq.sqhd_disabled) { + cmd->queue->snd_cmd = NULL; + nvmet_tcp_put_cmd(cmd); + } else { + nvmet_setup_response_pdu(cmd); + } return 1; } @@ -642,13 +802,13 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, } if (cmd->state == NVMET_TCP_SEND_DATA) { - ret = nvmet_try_send_data(cmd); + ret = nvmet_try_send_data(cmd, last_in_batch); if (ret <= 0) goto done_send; } if (cmd->state == NVMET_TCP_SEND_DDGST) { - ret = nvmet_try_send_ddgst(cmd); + ret = nvmet_try_send_ddgst(cmd, last_in_batch); if (ret <= 0) goto done_send; } @@ -679,11 +839,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_send_one(queue, i == budget - 1); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*sends)++; } - +done: return ret; } @@ -691,46 +855,11 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); - queue->cmd = NULL; - queue->rcv_state = NVMET_TCP_RECV_PDU; -} - -static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); - - ahash_request_free(queue->rcv_hash); - ahash_request_free(queue->snd_hash); - crypto_free_ahash(tfm); -} - -static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) -{ - struct crypto_ahash *tfm; - - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->snd_hash) - goto free_tfm; - ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); - - queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->rcv_hash) - goto free_snd_hash; - ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); - - return 0; -free_snd_hash: - ahash_request_free(queue->snd_hash); -free_tfm: - crypto_free_ahash(tfm); - return -ENOMEM; + WRITE_ONCE(queue->cmd, NULL); + /* Ensure rcv_state is visible only after queue->cmd is set */ + smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } - static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) { struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; @@ -743,6 +872,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) pr_err("bad nvme-tcp pdu length (%d)\n", le32_to_cpu(icreq->hdr.plen)); nvmet_tcp_fatal_error(queue); + return -EPROTO; } if (icreq->pfv != NVME_TCP_PFV_1_0) { @@ -756,19 +886,8 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) return -EPROTO; } - if (icreq->maxr2t != 0) { - pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, - le32_to_cpu(icreq->maxr2t) + 1); - return -EPROTO; - } - queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); - if (queue->hdr_digest || queue->data_digest) { - ret = nvmet_tcp_alloc_crypto(queue); - if (ret) - return ret; - } memset(icresp, 0, sizeof(*icresp)); icresp->hdr.type = nvme_tcp_icresp; @@ -776,7 +895,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ + icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA); icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; @@ -786,28 +905,31 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); - if (ret < 0) - goto free_crypto; + if (ret < 0) { + queue->state = NVMET_TCP_Q_FAILED; + return ret; /* queue removal will cleanup */ + } queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) { + size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); int ret; - /* recover the expected data transfer length */ - req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); - - if (!nvme_is_write(cmd->req.cmd) || - req->data_len > cmd->req.port->inline_data_size) { + /* + * This command has not been processed yet, hence we are trying to + * figure out if there is still pending data left to receive. If + * we don't, we can simply prepare for the next pdu and bail out, + * otherwise we will need to prepare a buffer and receive the + * stale data before continuing forward. + */ + if (!nvme_is_write(cmd->req.cmd) || !data_len || + data_len > cmd->req.port->inline_data_size) { nvmet_prepare_receive_pdu(queue); return; } @@ -820,7 +942,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, } queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |= NVMET_TCP_F_INIT_FAILED; } @@ -828,26 +950,49 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; + unsigned int exp_data_len; - cmd = &queue->cmds[data->ttag]; + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + goto err_proto; + } + cmd = &queue->cmds[data->ttag]; + } else { + cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", data->ttag, le32_to_cpu(data->data_offset), cmd->rbytes_done); - /* FIXME: use path and transport errors */ - nvmet_req_complete(&cmd->req, - NVME_SC_INVALID_FIELD | NVME_SC_DNR); - return -EPROTO; + goto err_proto; } + exp_data_len = le32_to_cpu(data->hdr.plen) - + nvmet_tcp_hdgst_len(queue) - + nvmet_tcp_ddgst_len(queue) - + sizeof(*data); + cmd->pdu_len = le32_to_cpu(data->data_length); + if (unlikely(cmd->pdu_len != exp_data_len || + cmd->pdu_len == 0 || + cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) { + pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); + goto err_proto; + } cmd->pdu_recv = 0; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; return 0; + +err_proto: + /* FIXME: use proper transport errors */ + nvmet_tcp_fatal_error(queue); + return -EPROTO; } static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) @@ -867,6 +1012,13 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) return nvmet_tcp_handle_icreq(queue); } + if (unlikely(hdr->type == nvme_tcp_icreq)) { + pr_err("queue %d: received icreq pdu in state %d\n", + queue->idx, queue->state); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + if (hdr->type == nvme_tcp_h2c_data) { ret = nvmet_tcp_handle_h2c_data_pdu(queue); if (unlikely(ret)) @@ -887,15 +1039,15 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) req = &queue->cmd->req; memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); - if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, - &queue->nvme_sq, &nvmet_tcp_ops))) { - pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", + if (unlikely(!nvmet_req_init(req, &queue->nvme_sq, &nvmet_tcp_ops))) { + pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n", req->cmd, req->cmd->common.command_id, req->cmd->common.opcode, - le32_to_cpu(req->cmd->common.dptr.sgl.length)); + le32_to_cpu(req->cmd->common.dptr.sgl.length), + le16_to_cpu(req->cqe->status)); nvmet_tcp_handle_req_failure(queue, queue->cmd, req); - return -EAGAIN; + return 0; } ret = nvmet_tcp_map_data(queue->cmd); @@ -912,7 +1064,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(queue->cmd); + nvmet_tcp_build_pdu_iovec(queue->cmd); return 0; } /* send back R2T */ @@ -920,7 +1072,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) goto out; } - nvmet_req_execute(&queue->cmd->req); + queue->cmd->req.execute(&queue->cmd->req); out: nvmet_prepare_receive_pdu(queue); return ret; @@ -954,20 +1106,65 @@ static inline bool nvmet_tcp_pdu_valid(u8 type) return false; } +static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue, + struct msghdr *msg, char *cbuf) +{ + struct cmsghdr *cmsg = (struct cmsghdr *)cbuf; + u8 ctype, level, description; + int ret = 0; + + ctype = tls_get_record_type(queue->sock->sk, cmsg); + switch (ctype) { + case 0: + break; + case TLS_RECORD_TYPE_DATA: + break; + case TLS_RECORD_TYPE_ALERT: + tls_alert_recv(queue->sock->sk, msg, &level, &description); + if (level == TLS_ALERT_LEVEL_FATAL) { + pr_err("queue %d: TLS Alert desc %u\n", + queue->idx, description); + ret = -ENOTCONN; + } else { + pr_warn("queue %d: TLS Alert desc %u\n", + queue->idx, description); + ret = -EAGAIN; + } + break; + default: + /* discard this record type */ + pr_err("queue %d: TLS record %d unhandled\n", + queue->idx, ctype); + ret = -EAGAIN; + break; + } + return ret; +} + static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; - int len; + int len, ret; struct kvec iov; + char cbuf[CMSG_LEN(sizeof(char))] = {}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; recv: iov.iov_base = (void *)&queue->pdu + queue->offset; iov.iov_len = queue->left; + if (queue->tls_pskid) { + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + } len = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (unlikely(len < 0)) return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + } queue->offset += len; queue->left -= len; @@ -993,7 +1190,7 @@ recv: } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; } @@ -1011,7 +1208,7 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) { struct nvmet_tcp_queue *queue = cmd->queue; - nvmet_tcp_ddgst(queue->rcv_hash, cmd); + nvmet_tcp_calc_ddgst(cmd); queue->offset = 0; queue->left = NVME_TCP_DIGEST_LENGTH; queue->rcv_state = NVMET_TCP_RECV_DDGST; @@ -1020,29 +1217,32 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmd; - int ret; + int len, ret; while (msg_data_left(&cmd->recv_msg)) { - ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, + len = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, cmd->recv_msg.msg_flags); - if (ret <= 0) - return ret; + if (len <= 0) + return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(cmd->queue, + &cmd->recv_msg, cmd->recv_cbuf); + if (ret < 0) + return ret; + } - cmd->pdu_recv += ret; - cmd->rbytes_done += ret; + cmd->pdu_recv += len; + cmd->rbytes_done += len; } - nvmet_tcp_unmap_pdu_iovec(cmd); - - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && - cmd->rbytes_done == cmd->req.transfer_len) { - if (queue->data_digest) { - nvmet_tcp_prep_recv_ddgst(cmd); - return 0; - } - nvmet_req_execute(&cmd->req); + if (queue->data_digest) { + nvmet_tcp_prep_recv_ddgst(cmd); + return 0; } + if (cmd->rbytes_done == cmd->req.transfer_len) + nvmet_tcp_execute_request(cmd); + nvmet_prepare_receive_pdu(queue); return 0; } @@ -1050,20 +1250,30 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmd; - int ret; + int ret, len; + char cbuf[CMSG_LEN(sizeof(char))] = {}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { .iov_base = (void *)&cmd->recv_ddgst + queue->offset, .iov_len = queue->left }; - ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, + if (queue->tls_pskid) { + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + } + len = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (unlikely(ret < 0)) - return ret; + if (unlikely(len < 0)) + return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + } - queue->offset += ret; - queue->left -= ret; + queue->offset += len; + queue->left -= len; if (queue->left) return -EAGAIN; @@ -1072,15 +1282,16 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); + nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; } - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && - cmd->rbytes_done == cmd->req.transfer_len) - nvmet_req_execute(&cmd->req); + if (cmd->rbytes_done == cmd->req.transfer_len) + nvmet_tcp_execute_request(cmd); + ret = 0; out: nvmet_prepare_receive_pdu(queue); @@ -1128,22 +1339,56 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_recv_one(queue); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*recvs)++; } - +done: return ret; } +static void nvmet_tcp_release_queue(struct kref *kref) +{ + struct nvmet_tcp_queue *queue = + container_of(kref, struct nvmet_tcp_queue, kref); + + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); + queue_work(nvmet_wq, &queue->release_work); +} + static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) { - spin_lock(&queue->state_lock); + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + /* Socket closed during handshake */ + tls_handshake_cancel(queue->sock->sk); + } if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - schedule_work(&queue->release_work); + kref_put(&queue->kref, nvmet_tcp_release_queue); } - spin_unlock(&queue->state_lock); + spin_unlock_bh(&queue->state_lock); +} + +static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue) +{ + queue->poll_end = jiffies + usecs_to_jiffies(idle_poll_period_usecs); +} + +static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue *queue, + int ops) +{ + if (!idle_poll_period_usecs) + return false; + + if (ops) + nvmet_tcp_arm_queue_deadline(queue); + + return !time_after(jiffies, queue->poll_end); } static void nvmet_tcp_io_work(struct work_struct *w) @@ -1157,35 +1402,25 @@ static void nvmet_tcp_io_work(struct work_struct *w) pending = false; ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); - if (ret > 0) { + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); - if (ret > 0) { - /* transmitted message/data */ + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); /* - * We exahusted our budget, requeue our selves + * Requeue the worker if idle deadline period is in progress or any + * ops activity was recorded during the do-while loop above. */ - if (pending) - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + if (nvmet_tcp_check_queue_deadline(queue, ops) || pending) + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, @@ -1206,7 +1441,7 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); if (!c->rsp_pdu) goto out_free_cmd; - c->req.rsp = &c->rsp_pdu->cqe; + c->req.cqe = &c->rsp_pdu->cqe; c->data_pdu = page_frag_alloc(&queue->pf_cache, sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); @@ -1218,6 +1453,10 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, if (!c->r2t_pdu) goto out_free_data; + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + c->recv_msg.msg_control = c->recv_cbuf; + c->recv_msg.msg_controllen = sizeof(c->recv_cbuf); + } c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; list_add_tail(&c->entry, &queue->free_list); @@ -1245,7 +1484,7 @@ static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) struct nvmet_tcp_cmd *cmds; int i, ret = -EINVAL, nr_cmds = queue->nr_cmds; - cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); + cmds = kvcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); if (!cmds) goto out; @@ -1261,7 +1500,7 @@ static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) out_free: while (--i >= 0) nvmet_tcp_free_cmd(cmds + i); - kfree(cmds); + kvfree(cmds); out: return ret; } @@ -1275,13 +1514,16 @@ static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue) nvmet_tcp_free_cmd(cmds + i); nvmet_tcp_free_cmd(&queue->connect); - kfree(cmds); + kvfree(cmds); } static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; @@ -1290,13 +1532,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) -{ - nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); - sgl_free(cmd->req.sg); -} - static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmds; @@ -1304,15 +1539,25 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) for (i = 0; i < queue->nr_cmds; i++, cmd++) { if (nvmet_tcp_need_data_in(cmd)) - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); } if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { /* failed in connect */ - nvmet_tcp_finish_cmd(&queue->connect); + nvmet_req_uninit(&queue->connect.req); } } +static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) + nvmet_tcp_free_cmd_buffers(cmd); + nvmet_tcp_free_cmd_buffers(&queue->connect); +} + static void nvmet_tcp_release_queue_work(struct work_struct *w) { struct nvmet_tcp_queue *queue = @@ -1323,17 +1568,22 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) mutex_unlock(&nvmet_tcp_queue_mutex); nvmet_tcp_restore_socket_callbacks(queue); - flush_work(&queue->io_work); + cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); + cancel_work_sync(&queue->io_work); + /* stop accepting incoming data */ + queue->rcv_state = NVMET_TCP_RECV_ERR; + nvmet_sq_put_tls_key(&queue->nvme_sq); nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); + nvmet_cq_put(&queue->nvme_cq); cancel_work_sync(&queue->io_work); - sock_release(queue->sock); + nvmet_tcp_free_cmd_data_in_buffers(queue); + /* ->sock will be released by fput() */ + fput(queue->sock->file); nvmet_tcp_free_cmds(queue); - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); - + ida_free(&nvmet_tcp_queue_ida, queue->idx); + page_frag_cache_drain(&queue->pf_cache); kfree(queue); } @@ -1341,10 +1591,17 @@ static void nvmet_tcp_data_ready(struct sock *sk) { struct nvmet_tcp_queue *queue; + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue)) - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + if (likely(queue)) { + if (queue->data_ready) + queue->data_ready(sk); + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, + &queue->io_work); + } read_unlock_bh(&sk->sk_callback_lock); } @@ -1364,7 +1621,7 @@ static void nvmet_tcp_write_space(struct sock *sk) if (sk_stream_is_writeable(sk)) { clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } out: read_unlock_bh(&sk->sk_callback_lock); @@ -1374,17 +1631,19 @@ static void nvmet_tcp_state_change(struct sock *sk) { struct nvmet_tcp_queue *queue; - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (!queue) goto done; switch (sk->sk_state) { + case TCP_FIN_WAIT2: + case TCP_LAST_ACK: + break; case TCP_FIN_WAIT1: case TCP_CLOSE_WAIT: case TCP_CLOSE: /* FALLTHRU */ - sk->sk_user_data = NULL; nvmet_tcp_schedule_release_queue(queue); break; default: @@ -1392,13 +1651,13 @@ static void nvmet_tcp_state_change(struct sock *sk) queue->idx, sk->sk_state); } done: - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; - struct linger sol = { .l_onoff = 1, .l_linger = 0 }; + struct inet_sock *inet = inet_sk(sock->sk); int ret; ret = kernel_getsockname(sock, @@ -1416,87 +1675,297 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) * close. This is done to prevent stale data from being sent should * the network connection be restored before TCP times out. */ - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&sol, sizeof(sol)); - if (ret) - return ret; + sock_no_linger(sock->sk); + + if (so_priority > 0) + sock_set_priority(sock->sk, so_priority); + /* Set socket type of service */ + if (inet->rcv_tos > 0) + ip_sock_set_tos(sock->sk, inet->rcv_tos); + + ret = 0; write_lock_bh(&sock->sk->sk_callback_lock); - sock->sk->sk_user_data = queue; - queue->data_ready = sock->sk->sk_data_ready; - sock->sk->sk_data_ready = nvmet_tcp_data_ready; - queue->state_change = sock->sk->sk_state_change; - sock->sk->sk_state_change = nvmet_tcp_state_change; - queue->write_space = sock->sk->sk_write_space; - sock->sk->sk_write_space = nvmet_tcp_write_space; + if (sock->sk->sk_state != TCP_ESTABLISHED) { + /* + * If the socket is already closing, don't even start + * consuming it + */ + ret = -ENOTCONN; + } else { + sock->sk->sk_user_data = queue; + queue->data_ready = sock->sk->sk_data_ready; + sock->sk->sk_data_ready = nvmet_tcp_data_ready; + queue->state_change = sock->sk->sk_state_change; + sock->sk->sk_state_change = nvmet_tcp_state_change; + queue->write_space = sock->sk->sk_write_space; + sock->sk->sk_write_space = nvmet_tcp_write_space; + if (idle_poll_period_usecs) + nvmet_tcp_arm_queue_deadline(queue); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); + } write_unlock_bh(&sock->sk->sk_callback_lock); + return ret; +} + +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; + int len, ret; + struct kvec iov = { + .iov_base = (u8 *)&queue->pdu + queue->offset, + .iov_len = sizeof(struct nvme_tcp_hdr), + }; + char cbuf[CMSG_LEN(sizeof(char))] = {}; + struct msghdr msg = { + .msg_control = cbuf, + .msg_controllen = sizeof(cbuf), + .msg_flags = MSG_PEEK, + }; + + if (nvmet_port_secure_channel_required(queue->port->nport)) + return 0; + + len = kernel_recvmsg(queue->sock, &msg, &iov, 1, + iov.iov_len, msg.msg_flags); + if (unlikely(len < 0)) { + pr_debug("queue %d: peek error %d\n", + queue->idx, len); + return len; + } + + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + + if (len < sizeof(struct nvme_tcp_hdr)) { + pr_debug("queue %d: short read, %d bytes missing\n", + queue->idx, (int)iov.iov_len - len); + return -EAGAIN; + } + pr_debug("queue %d: hdr type %d hlen %d plen %d size %d\n", + queue->idx, hdr->type, hdr->hlen, hdr->plen, + (int)sizeof(struct nvme_tcp_icreq_pdu)); + if (hdr->type == nvme_tcp_icreq && + hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) && + hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) { + pr_debug("queue %d: icreq detected\n", + queue->idx); + return len; + } return 0; } -static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, +static int nvmet_tcp_tls_key_lookup(struct nvmet_tcp_queue *queue, + key_serial_t peerid) +{ + struct key *tls_key = nvme_tls_key_lookup(peerid); + int status = 0; + + if (IS_ERR(tls_key)) { + pr_warn("%s: queue %d failed to lookup key %x\n", + __func__, queue->idx, peerid); + spin_lock_bh(&queue->state_lock); + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + status = PTR_ERR(tls_key); + } else { + pr_debug("%s: queue %d using TLS PSK %x\n", + __func__, queue->idx, peerid); + queue->nvme_sq.tls_key = tls_key; + } + return status; +} + +static void nvmet_tcp_tls_handshake_done(void *data, int status, + key_serial_t peerid) +{ + struct nvmet_tcp_queue *queue = data; + + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", + queue->idx, peerid, status); + spin_lock_bh(&queue->state_lock); + if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) { + spin_unlock_bh(&queue->state_lock); + return; + } + if (!status) { + queue->tls_pskid = peerid; + queue->state = NVMET_TCP_Q_CONNECTING; + } else + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + + cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); + + if (!status) + status = nvmet_tcp_tls_key_lookup(queue, peerid); + + if (status) + nvmet_tcp_schedule_release_queue(queue); + else + nvmet_tcp_set_queue_sock(queue); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) +{ + struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w), + struct nvmet_tcp_queue, tls_handshake_tmo_work); + + pr_warn("queue %d: TLS handshake timeout\n", queue->idx); + /* + * If tls_handshake_cancel() fails we've lost the race with + * nvmet_tcp_tls_handshake_done() */ + if (!tls_handshake_cancel(queue->sock->sk)) + return; + spin_lock_bh(&queue->state_lock); + if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) { + spin_unlock_bh(&queue->state_lock); + return; + } + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + nvmet_tcp_schedule_release_queue(queue); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) +{ + int ret = -EOPNOTSUPP; + struct tls_handshake_args args; + + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { + pr_warn("cannot start TLS in state %d\n", queue->state); + return -EINVAL; + } + + kref_get(&queue->kref); + pr_debug("queue %d: TLS ServerHello\n", queue->idx); + memset(&args, 0, sizeof(args)); + args.ta_sock = queue->sock; + args.ta_done = nvmet_tcp_tls_handshake_done; + args.ta_data = queue; + args.ta_keyring = key_serial(queue->port->nport->keyring); + args.ta_timeout_ms = tls_handshake_timeout * 1000; + + ret = tls_server_hello_psk(&args, GFP_KERNEL); + if (ret) { + kref_put(&queue->kref, nvmet_tcp_release_queue); + pr_err("failed to start TLS, err=%d\n", ret); + } else { + queue_delayed_work(nvmet_wq, &queue->tls_handshake_tmo_work, + tls_handshake_timeout * HZ); + } + return ret; +} +#else +static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) {} +#endif + +static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, struct socket *newsock) { struct nvmet_tcp_queue *queue; + struct file *sock_file = NULL; int ret; queue = kzalloc(sizeof(*queue), GFP_KERNEL); - if (!queue) - return -ENOMEM; + if (!queue) { + ret = -ENOMEM; + goto out_release; + } INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); INIT_WORK(&queue->io_work, nvmet_tcp_io_work); + kref_init(&queue->kref); queue->sock = newsock; queue->port = port; queue->nr_cmds = 0; spin_lock_init(&queue->state_lock); - queue->state = NVMET_TCP_Q_CONNECTING; + if (queue->port->nport->disc_addr.tsas.tcp.sectype == + NVMF_TCP_SECTYPE_TLS13) + queue->state = NVMET_TCP_Q_TLS_HANDSHAKE; + else + queue->state = NVMET_TCP_Q_CONNECTING; INIT_LIST_HEAD(&queue->free_list); init_llist_head(&queue->resp_list); INIT_LIST_HEAD(&queue->resp_send_list); - queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); + sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL); + if (IS_ERR(sock_file)) { + ret = PTR_ERR(sock_file); + goto out_free_queue; + } + + queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = queue->idx; - goto out_free_queue; + goto out_sock; } ret = nvmet_tcp_alloc_cmd(queue, &queue->connect); if (ret) goto out_ida_remove; - ret = nvmet_sq_init(&queue->nvme_sq); + nvmet_cq_init(&queue->nvme_cq); + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); if (ret) goto out_free_connect; - port->last_cpu = cpumask_next_wrap(port->last_cpu, - cpu_online_mask, -1, false); - queue->cpu = port->last_cpu; nvmet_prepare_receive_pdu(queue); mutex_lock(&nvmet_tcp_queue_mutex); list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); mutex_unlock(&nvmet_tcp_queue_mutex); + INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work, + nvmet_tcp_tls_handshake_timeout); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + struct sock *sk = queue->sock->sk; + + /* Restore the default callbacks before starting upcall */ + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + sk->sk_data_ready = port->data_ready; + write_unlock_bh(&sk->sk_callback_lock); + if (!nvmet_tcp_try_peek_pdu(queue)) { + if (!nvmet_tcp_tls_handshake(queue)) + return; + /* TLS handshake failed, terminate the connection */ + goto out_destroy_sq; + } + /* Not a TLS connection, continue with normal processing */ + queue->state = NVMET_TCP_Q_CONNECTING; + } +#endif + ret = nvmet_tcp_set_queue_sock(queue); if (ret) goto out_destroy_sq; - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); - - return 0; + return; out_destroy_sq: mutex_lock(&nvmet_tcp_queue_mutex); list_del_init(&queue->queue_list); mutex_unlock(&nvmet_tcp_queue_mutex); nvmet_sq_destroy(&queue->nvme_sq); out_free_connect: + nvmet_cq_put(&queue->nvme_cq); nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + ida_free(&nvmet_tcp_queue_ida, queue->idx); +out_sock: + fput(queue->sock->file); out_free_queue: kfree(queue); - return ret; +out_release: + pr_err("failed to allocate queue, error %d\n", ret); + if (!sock_file) + sock_release(newsock); } static void nvmet_tcp_accept_work(struct work_struct *w) @@ -1513,11 +1982,7 @@ static void nvmet_tcp_accept_work(struct work_struct *w) pr_warn("failed to accept err=%d\n", ret); return; } - ret = nvmet_tcp_alloc_queue(port, newsock); - if (ret) { - pr_err("failed to allocate queue\n"); - sock_release(newsock); - } + nvmet_tcp_alloc_queue(port, newsock); } } @@ -1525,13 +1990,15 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) { struct nvmet_tcp_port *port; + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); port = sk->sk_user_data; if (!port) goto out; if (sk->sk_state == TCP_LISTEN) - schedule_work(&port->accept_work); + queue_work(nvmet_wq, &port->accept_work); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1540,7 +2007,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port; __kernel_sa_family_t af; - int opt, ret; + int ret; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -1569,7 +2036,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) } port->nport = nport; - port->last_cpu = -1; INIT_WORK(&port->accept_work, nvmet_tcp_accept_work); if (port->nport->inline_data_size < 0) port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE; @@ -1584,30 +2050,19 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) port->sock->sk->sk_user_data = port; port->data_ready = port->sock->sk->sk_data_ready; port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; + sock_set_reuseaddr(port->sock->sk); + tcp_sock_set_nodelay(port->sock->sk); + if (so_priority > 0) + sock_set_priority(port->sock->sk, so_priority); - opt = 1; - ret = kernel_setsockopt(port->sock, IPPROTO_TCP, - TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) { - pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); - goto err_sock; - } - - ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&opt, sizeof(opt)); - if (ret) { - pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret); - goto err_sock; - } - - ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, + ret = kernel_bind(port->sock, (struct sockaddr_unsized *)&port->addr, sizeof(port->addr)); if (ret) { pr_err("failed to bind port socket %d\n", ret); goto err_sock; } - ret = kernel_listen(port->sock, 128); + ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG); if (ret) { pr_err("failed to listen %d on port sock\n", ret); goto err_sock; @@ -1626,6 +2081,17 @@ err_port: return ret; } +static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port) +{ + struct nvmet_tcp_queue *queue; + + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + if (queue->port == port) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); +} + static void nvmet_tcp_remove_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port = nport->priv; @@ -1635,6 +2101,11 @@ static void nvmet_tcp_remove_port(struct nvmet_port *nport) port->sock->sk->sk_user_data = NULL; write_unlock_bh(&port->sock->sk->sk_callback_lock); cancel_work_sync(&port->accept_work); + /* + * Destroy the remaining queues, which are not belong to any + * controller yet. + */ + nvmet_tcp_destroy_port_queues(port); sock_release(port->sock); kfree(port); @@ -1657,13 +2128,26 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) container_of(sq, struct nvmet_tcp_queue, nvme_sq); if (sq->qid == 0) { - /* Let inflight controller teardown complete */ - flush_scheduled_work(); + struct nvmet_tcp_queue *q; + int pending = 0; + + /* Check for pending controller teardown */ + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) { + if (q->nvme_sq.ctrl == sq->ctrl && + q->state == NVMET_TCP_Q_DISCONNECTING) + pending++; + } + mutex_unlock(&nvmet_tcp_queue_mutex); + if (pending > NVMET_TCP_BACKLOG) + return NVME_SC_CONNECT_CTRL_BUSY; } queue->nr_cmds = sq->size * 2; - if (nvmet_tcp_alloc_cmds(queue)) + if (nvmet_tcp_alloc_cmds(queue)) { + queue->nr_cmds = 0; return NVME_SC_INTERNAL; + } return 0; } @@ -1672,7 +2156,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, { struct nvmet_tcp_port *port = nport->priv; - if (inet_addr_is_any((struct sockaddr *)&port->addr)) { + if (inet_addr_is_any(&port->addr)) { struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; @@ -1683,24 +2167,38 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, } } -static struct nvmet_fabrics_ops nvmet_tcp_ops = { +static ssize_t nvmet_tcp_host_port_addr(struct nvmet_ctrl *ctrl, + char *traddr, size_t traddr_len) +{ + struct nvmet_sq *sq = ctrl->sqs[0]; + struct nvmet_tcp_queue *queue = + container_of(sq, struct nvmet_tcp_queue, nvme_sq); + + if (queue->sockaddr_peer.ss_family == AF_UNSPEC) + return -EINVAL; + return snprintf(traddr, traddr_len, "%pISc", + (struct sockaddr *)&queue->sockaddr_peer); +} + +static const struct nvmet_fabrics_ops nvmet_tcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_TCP, .msdbd = 1, - .has_keyed_sgls = 0, .add_port = nvmet_tcp_add_port, .remove_port = nvmet_tcp_remove_port, .queue_response = nvmet_tcp_queue_response, .delete_ctrl = nvmet_tcp_delete_ctrl, .install_queue = nvmet_tcp_install_queue, .disc_traddr = nvmet_tcp_disc_port_addr, + .host_traddr = nvmet_tcp_host_port_addr, }; static int __init nvmet_tcp_init(void) { int ret; - nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0); + nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!nvmet_tcp_wq) return -ENOMEM; @@ -1720,18 +2218,20 @@ static void __exit nvmet_tcp_exit(void) nvmet_unregister_transport(&nvmet_tcp_ops); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); mutex_lock(&nvmet_tcp_queue_mutex); list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) kernel_sock_shutdown(queue->sock, SHUT_RDWR); mutex_unlock(&nvmet_tcp_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); module_exit(nvmet_tcp_exit); +MODULE_DESCRIPTION("NVMe target TCP transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ |
