From 5bfaba275ae6486700194cad962574e3eb7ae60d Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Wed, 31 Aug 2022 00:05:33 +0200 Subject: nvmet-tcp: don't map pages which can't come from HIGHMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kmap() is being deprecated in favor of kmap_local_page().[1] There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap’s pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. The pages which will be mapped are allocated in nvmet_tcp_map_data(), using the GFP_KERNEL flag. This assures that they cannot come from HIGHMEM. This imply that a straight page_address() can replace the kmap() of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because, after removing the kmap() calls, there would be no longer any need of it. In addition, there is no reason to use a kvec for the command receive data buffers iovec, use a bio_vec instead and let iov_iter handle the buffer mapping and data copy. Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. [1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/ Cc: Chaitanya Kulkarni Cc: Keith Busch Suggested-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Suggested-by: Christoph Hellwig Suggested-by: Al Viro [sagi: added bio_vec plus minor naming changes] Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) (limited to 'drivers/nvme/target/tcp.c') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dc3b4dc8fe08..c07de4f4f719 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -77,9 +77,8 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; - int nr_mapped; struct msghdr recv_msg; - struct kvec *iov; + struct bio_vec *iov; u32 flags; struct list_head entry; @@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) @@ -301,35 +299,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { - WARN_ON(unlikely(cmd->nr_mapped > 0)); - kfree(cmd->iov); sgl_free(cmd->req.sg); cmd->iov = NULL; cmd->req.sg = NULL; } -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) -{ - struct scatterlist *sg; - int i; - - sg = &cmd->req.sg[cmd->sg_idx]; - - for (i = 0; i < cmd->nr_mapped; i++) - kunmap(sg_page(&sg[i])); - - cmd->nr_mapped = 0; -} - -static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { - struct kvec *iov = cmd->iov; + struct bio_vec *iov = cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; + int nr_pages; length = cmd->pdu_len; - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); + nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; @@ -338,8 +322,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; - iov->iov_len = iov_len; + iov->bv_page = sg_page(sg); + iov->bv_len = sg->length; + iov->bv_offset = sg->offset + sg_offset; length -= iov_len; sg = sg_next(sg); @@ -347,8 +332,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) sg_offset = 0; } - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, - cmd->nr_mapped, cmd->pdu_len); + iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, + nr_pages, cmd->pdu_len); } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -926,7 +911,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, } queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |= NVMET_TCP_F_INIT_FAILED; } @@ -952,7 +937,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) cmd->pdu_len = le32_to_cpu(data->data_length); cmd->pdu_recv = 0; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; @@ -1021,7 +1006,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(queue->cmd); + nvmet_tcp_build_pdu_iovec(queue->cmd); return 0; } /* send back R2T */ @@ -1141,7 +1126,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) cmd->rbytes_done += ret; } - nvmet_tcp_unmap_pdu_iovec(cmd); if (queue->data_digest) { nvmet_tcp_prep_recv_ddgst(cmd); return 0; @@ -1411,7 +1395,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) { nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } @@ -1424,7 +1407,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } -- cgit From db94f240280c1da8ba1e679c422312676549570d Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 20 Sep 2022 21:16:17 +0800 Subject: nvmet-tcp: fix NULL pointer dereference during release nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(), and waits the inflight IO requests in nvmet_sq_destroy(). During wait the inflight IO requests, the callback nvmet_tcp_queue_response() is called from backend after IO complete, this leads a typical Use-After-Free issue like this: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp] RIP: 0010:shash_ahash_digest+0x2b/0x110 Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03 RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206 RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420 RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8 R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000 R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800 FS: 0000000000000000(0000) GS:ffff88883fa40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0 Call Trace: nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp] ? __switch_to+0x106/0x420 process_one_work+0x1ae/0x380 ? process_one_work+0x380/0x380 worker_thread+0x30/0x360 ? process_one_work+0x380/0x380 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Separate nvmet_tcp_uninit_data_in_cmds() into two steps: uninit data in cmds <- new step 1 nvmet_sq_destroy(); cancel_work_sync(&queue->io_work); free CMD buffers <- new step 2 Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/target/tcp.c') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c07de4f4f719..70baeab6af30 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1406,14 +1406,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) for (i = 0; i < queue->nr_cmds; i++, cmd++) { if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - - nvmet_tcp_free_cmd_buffers(cmd); } if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { /* failed in connect */ - nvmet_tcp_finish_cmd(&queue->connect); + nvmet_req_uninit(&queue->connect.req); + } +} + +static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) { + if (nvmet_tcp_need_data_in(cmd)) + nvmet_tcp_free_cmd_buffers(cmd); } + + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) + nvmet_tcp_free_cmd_buffers(&queue->connect); } static void nvmet_tcp_release_queue_work(struct work_struct *w) @@ -1434,6 +1446,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); + nvmet_tcp_free_cmd_data_in_buffers(queue); sock_release(queue->sock); nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) -- cgit From f614b937d850193588f161ff854a4e19940a5e43 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:04:44 +0530 Subject: nvmet-tcp: handle ICReq PDU received in NVMET_TCP_Q_LIVE state As per NVMe/TCP transport specification ICReq PDU is the first PDU received by the controller and controller should receive only one ICReq PDU. If controller receives more than one ICReq PDU then this can be considered as fatal error. nvmet-tcp driver does not check for ICReq PDU opcode if queue state is NVMET_TCP_Q_LIVE. In LIVE state ICReq PDU is treated as CapsuleCmd PDU, this can result in abnormal behavior. Add a check for ICReq PDU in nvmet_tcp_done_recv_pdu() to fix this issue. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/nvme/target/tcp.c') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 70baeab6af30..1762e2e90585 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -961,6 +961,13 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) return nvmet_tcp_handle_icreq(queue); } + if (unlikely(hdr->type == nvme_tcp_icreq)) { + pr_err("queue %d: received icreq pdu in state %d\n", + queue->idx, queue->state); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + if (hdr->type == nvme_tcp_h2c_data) { ret = nvmet_tcp_handle_h2c_data_pdu(queue); if (unlikely(ret)) -- cgit From b6a545ffa2c192b1e6da4a7924edac5ba9f4ea2b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:06:49 +0530 Subject: nvmet-tcp: add bounds check on Transfer Tag ttag is used as an index to get cmd in nvmet_tcp_handle_h2c_data_pdu(), add a bounds check to avoid out-of-bounds access. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/target/tcp.c') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1762e2e90585..98e17ea184d6 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -920,10 +920,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - if (likely(queue->nr_cmds)) + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd = &queue->cmds[data->ttag]; - else + } else { cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", -- cgit From 0700542a823ba3d3aa9c699a255aecc23dbbcaff Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 22 Sep 2022 15:06:16 +0800 Subject: nvmet-tcp: remove nvmet_tcp_finish_cmd There is only a single call-site of nvmet_tcp_finish_cmd(), this becomes redundant. Remove nvmet_tcp_finish_cmd() and use the original function body instead. Suggested-by: Sagi Grimberg Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/nvme/target/tcp.c') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 98e17ea184d6..63d72b9cb28d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -164,7 +164,6 @@ static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, @@ -1177,7 +1176,8 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); + nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; @@ -1406,12 +1406,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) -{ - nvmet_req_uninit(&cmd->req); - nvmet_tcp_free_cmd_buffers(cmd); -} - static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmds; -- cgit