From 063604265f967e90901996a1b173fe6df582d350 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Aug 2022 13:42:00 +0100 Subject: io_uring/net: use right helpers for async recycle We have a helper that checks for whether a request contains anything in ->async_data or not, namely req_has_async_data(). It's better to use it as it might have some extra considerations. Fixes: 43e0bbbd0b0e3 ("io_uring: add netmsg cache") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b7414da4e7c3c32c31fc02dfd1355af4ccf4ca5f.1660566179.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 6d71748e2c5a..2129562bfd9f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -116,7 +116,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - if (!hdr || issue_flags & IO_URING_F_UNLOCKED) + if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) return; /* Let normal cleanup path reap it if we fail adding to the cache */ -- cgit From 86dc8f23bb1b68262ca5db890ec7177b2d074640 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Aug 2022 13:42:01 +0100 Subject: io_uring/net: improve zc addr import error handling We may account memory to a memcg of a request that didn't even got to the network layer. It's not a bug as it'll be routinely cleaned up on flush, but it might be confusing for the userspace. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b8aae61f4c3ddc4da97c1da876bb73871f352d50.1660566179.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 2129562bfd9f..f7cbd716817f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -977,6 +977,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_controllen = 0; msg.msg_namelen = 0; + if (zc->addr) { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = zc->addr_len; + } + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); @@ -992,14 +1000,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) return ret; } - if (zc->addr) { - ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); - if (unlikely(ret < 0)) - return ret; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = zc->addr_len; - } - msg_flags = zc->msg_flags | MSG_ZEROCOPY; if (issue_flags & IO_URING_F_NONBLOCK) msg_flags |= MSG_DONTWAIT; -- cgit From 3f743e9bbb8fe20f4c477e4bf6341c4187a4a264 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 18 Aug 2022 12:38:34 +0100 Subject: io_uring/net: use right helpers for async_data There is another spot where we check ->async_data directly instead of using req_has_async_data(), which is the way to do it, fix it up. Fixes: 43e0bbbd0b0e3 ("io_uring: add netmsg cache") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/42f33b9a81dd6ae65dda92f0372b0ff82d548517.1660822636.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index f7cbd716817f..f8cdf1dc3863 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -152,9 +152,9 @@ static int io_setup_async_msg(struct io_kiocb *req, struct io_async_msghdr *kmsg, unsigned int issue_flags) { - struct io_async_msghdr *async_msg = req->async_data; + struct io_async_msghdr *async_msg; - if (async_msg) + if (req_has_async_data(req)) return -EAGAIN; async_msg = io_recvmsg_alloc_async(req, issue_flags); if (!async_msg) { -- cgit From 5a848b7c9e5e4d94390fbc391ccb81d40f3ccfb5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:39 +0100 Subject: io_uring/net: fix zc send link failing Failed requests should be marked with req_set_fail(), so links and cqe skipping work correctly, which is missing in io_sendzc(). Note, io_sendzc() return IOU_OK on failure, so the core code won't do the cleanup for us. Fixes: 06a5464be84e4 ("io_uring: wire send zc request type") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e47d46fda9db30154ce66a549bb0d3380b780520.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index f8cdf1dc3863..d6310c655a0f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1023,6 +1023,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) } if (ret == -ERESTARTSYS) ret = -EINTR; + req_set_fail(req); } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { io_notif_slot_flush_submit(notif_slot, 0); } -- cgit From 986e263def32eec89153babf469859d837507d34 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:40 +0100 Subject: io_uring/net: fix indentation Fix up indentation before we get complaints from tooling. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/bd5754e3764215ccd7fb04cd636ea9167aaa275d.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index d6310c655a0f..3adcb09ae264 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -989,7 +989,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); if (unlikely(ret)) - return ret; + return ret; } else { ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); -- cgit From 581711c46612c1fd7f98960f9ad53f04fdb89853 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:43 +0100 Subject: io_uring/net: save address for sendzc async execution We usually copy all bits that a request needs from the userspace for async execution, so the userspace can keep them on the stack. However, send zerocopy violates this pattern for addresses and may reloads it e.g. from io-wq. Save the address if any in ->async_data as usual. Reported-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d7512d7aa9abcd36e9afe1a4d292a24cb2d157e5.1661342812.git.asml.silence@gmail.com [axboe: fold in incremental fix] Signed-off-by: Jens Axboe --- io_uring/net.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 3adcb09ae264..0af8a02df580 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -182,6 +182,37 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, &iomsg->free_iov); } +int io_sendzc_prep_async(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_async_msghdr *io; + int ret; + + if (!zc->addr || req_has_async_data(req)) + return 0; + if (io_alloc_async_data(req)) + return -ENOMEM; + + io = req->async_data; + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); + return ret; +} + +static int io_setup_async_addr(struct io_kiocb *req, + struct sockaddr_storage *addr, + unsigned int issue_flags) +{ + struct io_async_msghdr *io; + + if (!addr || req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) + return -ENOMEM; + io = req->async_data; + memcpy(&io->addr, addr, sizeof(io->addr)); + return -EAGAIN; +} + int io_sendmsg_prep_async(struct io_kiocb *req) { int ret; @@ -944,7 +975,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { - struct sockaddr_storage address; + struct sockaddr_storage __address, *addr = NULL; struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_notif_slot *notif_slot; @@ -978,10 +1009,17 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_namelen = 0; if (zc->addr) { - ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); - if (unlikely(ret < 0)) - return ret; - msg.msg_name = (struct sockaddr *)&address; + if (req_has_async_data(req)) { + struct io_async_msghdr *io = req->async_data; + + msg.msg_name = addr = &io->addr; + } else { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&__address; + addr = &__address; + } msg.msg_namelen = zc->addr_len; } @@ -1013,13 +1051,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); + if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { zc->len -= ret; zc->buf += ret; zc->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; -- cgit From dfb58b1796d19c8405a38eb457f97669440c59d4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 26 Aug 2022 17:15:47 +0100 Subject: io_uring/net: fix overexcessive retries Length parameter of io_sg_from_iter() can be smaller than the iterator's size, as it's with TCP, so when we set from->count at the end of the function we truncate the iterator forcing TCP to return preliminary with a short send. It affects zerocopy sends with large payload sizes and leads to retries and possible request failures. Fixes: 3ff1a0d395c00 ("io_uring: enable managed frags with register buffers") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0bc0d5179c665b4ef5c328377c84c7a1f298467e.1661530037.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 0af8a02df580..7a5468cc905e 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -956,7 +956,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, shinfo->nr_frags = frag; from->bvec += bi.bi_idx; from->nr_segs -= bi.bi_idx; - from->count = bi.bi_size; + from->count -= copied; from->iov_offset = bi.bi_bvec_done; skb->data_len += copied; -- cgit From 57f332246afa5929bdf2e7a5facddedb43549be4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:03 +0100 Subject: io_uring/notif: remove notif registration We're going to remove the userspace exposed zerocopy notification API, remove notification registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6ff00b97be99869c386958a990593c9c31cf105b.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 7a5468cc905e..aac6997b7d88 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -889,7 +889,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->flags = READ_ONCE(sqe->ioprio); if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) + IORING_RECVSEND_FIXED_BUF)) return -EINVAL; if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); @@ -1063,8 +1063,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); - } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { - io_notif_slot_flush_submit(notif_slot, 0); } if (ret >= 0) -- cgit From b48c312be05e83b55a4d58bf61f80b4a3288fb7e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:04 +0100 Subject: io_uring/net: simplify zerocopy send user API Following user feedback, this patch simplifies zerocopy send API. One of the main complaints is that the current API is difficult with the userspace managing notification slots, and then send retries with error handling make it even worse. Instead of keeping notification slots change it to the per-request notifications model, which posts both completion and notification CQEs for each request when any data has been sent, and only one CQE if it fails. All notification CQEs will have IORING_CQE_F_NOTIF set and IORING_CQE_F_MORE in completion CQEs indicates whether to wait a notification or not. IOSQE_CQE_SKIP_SUCCESS is disallowed with zerocopy sends for now. This is less flexible, but greatly simplifies the user API and also the kernel implementation. We reuse notif helpers in this patch, but in the future there won't be need for keeping two requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/95287640ab98fc9417370afb16e310677c63e6ce.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index aac6997b7d88..7047c1342541 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,12 +65,12 @@ struct io_sendzc { struct file *file; void __user *buf; size_t len; - u16 slot_idx; unsigned msg_flags; unsigned flags; unsigned addr_len; void __user *addr; size_t done_io; + struct io_kiocb *notif; }; #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) @@ -879,12 +879,26 @@ out_free: return ret; } +void io_sendzc_cleanup(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + + zc->notif->flags |= REQ_F_CQE_SKIP; + io_notif_flush(zc->notif); + zc->notif = NULL; +} + int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *notif; - if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) + if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) || + READ_ONCE(sqe->__pad3[0])) + return -EINVAL; + /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ + if (req->flags & REQ_F_CQE_SKIP) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); @@ -900,11 +914,17 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(req, ctx, 0); } + notif = zc->notif = io_alloc_notif(ctx); + if (!notif) + return -ENOMEM; + notif->cqe.user_data = req->cqe.user_data; + notif->cqe.res = 0; + notif->cqe.flags = IORING_CQE_F_NOTIF; + req->flags |= REQ_F_NEED_CLEANUP; zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; - zc->slot_idx = READ_ONCE(sqe->notification_idx); if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; @@ -976,33 +996,20 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { struct sockaddr_storage __address, *addr = NULL; - struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); - struct io_notif_slot *notif_slot; - struct io_kiocb *notif; struct msghdr msg; struct iovec iov; struct socket *sock; - unsigned msg_flags; + unsigned msg_flags, cflags; int ret, min_ret = 0; if (!(req->flags & REQ_F_POLLED) && (zc->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; - - if (issue_flags & IO_URING_F_UNLOCKED) - return -EAGAIN; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; - notif_slot = io_get_notif_slot(ctx, zc->slot_idx); - if (!notif_slot) - return -EINVAL; - notif = io_get_notif(ctx, notif_slot); - if (!notif) - return -ENOMEM; - msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1033,7 +1040,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) &msg.msg_iter); if (unlikely(ret)) return ret; - ret = io_notif_account_mem(notif, zc->len); + ret = io_notif_account_mem(zc->notif, zc->len); if (unlikely(ret)) return ret; } @@ -1045,7 +1052,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); msg.msg_flags = msg_flags; - msg.msg_ubuf = &io_notif_to_data(notif)->uarg; + msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; msg.sg_from_iter = io_sg_from_iter; ret = sock_sendmsg(sock, &msg); @@ -1060,6 +1067,8 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_addr(req, addr, issue_flags); } + if (ret < 0 && !zc->done_io) + zc->notif->flags |= REQ_F_CQE_SKIP; if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); @@ -1069,7 +1078,11 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret += zc->done_io; else if (zc->done_io) ret = zc->done_io; - io_req_set_res(req, ret, 0); + + io_notif_flush(zc->notif); + req->flags &= ~REQ_F_NEED_CLEANUP; + cflags = ret >= 0 ? IORING_CQE_F_MORE : 0; + io_req_set_res(req, ret, cflags); return IOU_OK; } -- cgit