From e307e6698165ca6508ed42c69cb1be76c8eb6a3c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 27 Oct 2022 20:34:45 +0200 Subject: io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag It might be useful for applications to detect if a zero copy transfer with SEND[MSG]_ZC was actually possible or not. The application can fallback to plain SEND[MSG] in order to avoid the overhead of two cqes per request. Or it can generate a log message that could indicate to an administrator that no zero copy was possible and could explain degraded performance. Cc: stable@vger.kernel.org # 6.1 Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa Signed-off-by: Stefan Metzmacher Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/8945b01756d902f5d5b0667f20b957ad3f742e5e.1666895626.git.metze@samba.org Signed-off-by: Jens Axboe --- io_uring/net.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index ab83da7e80f0..03825b043afe 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -937,7 +937,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->flags = READ_ONCE(sqe->ioprio); if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF)) + IORING_RECVSEND_FIXED_BUF | + IORING_SEND_ZC_REPORT_USAGE)) return -EINVAL; notif = zc->notif = io_alloc_notif(ctx); if (!notif) @@ -955,6 +956,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(notif, ctx, 0); } + if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { + io_notif_to_data(notif)->zc_report = true; + } if (req->opcode == IORING_OP_SEND_ZC) { if (READ_ONCE(sqe->__pad3[0])) -- cgit From 40725d1b960f19a11a1ebd1ab537844ebf39347c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 4 Nov 2022 10:59:45 +0000 Subject: io_uring: move zc reporting from the hot path Add custom tw and notif callbacks on top of usual bits also handling zc reporting. That moves it from the hot path. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/40de4a6409042478e1f35adc4912e23226cb1b5c.1667557923.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 03825b043afe..9e3da845f906 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -923,6 +923,9 @@ void io_send_zc_cleanup(struct io_kiocb *req) } } +#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) +#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) + int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); @@ -935,11 +938,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->flags & REQ_F_CQE_SKIP) return -EINVAL; - zc->flags = READ_ONCE(sqe->ioprio); - if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF | - IORING_SEND_ZC_REPORT_USAGE)) - return -EINVAL; notif = zc->notif = io_alloc_notif(ctx); if (!notif) return -ENOMEM; @@ -947,6 +945,17 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) notif->cqe.res = 0; notif->cqe.flags = IORING_CQE_F_NOTIF; req->flags |= REQ_F_NEED_CLEANUP; + + zc->flags = READ_ONCE(sqe->ioprio); + if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { + if (zc->flags & ~IO_ZC_FLAGS_VALID) + return -EINVAL; + if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { + io_notif_set_extended(notif); + io_notif_to_data(notif)->zc_report = true; + } + } + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); @@ -956,9 +965,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(notif, ctx, 0); } - if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { - io_notif_to_data(notif)->zc_report = true; - } if (req->opcode == IORING_OP_SEND_ZC) { if (READ_ONCE(sqe->__pad3[0])) -- cgit From 42385b02baad0df55474b7f36dc13e0d4ffd0cc0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 4 Nov 2022 10:59:46 +0000 Subject: io_uring/net: move mm accounting to a slower path We can also move mm accounting to the extended callbacks. It removes a few cycles from the hot path including skipping one function call and setting io_req_task_complete as a callback directly. For user backed I/O it shouldn't make any difference taking into considering atomic mm accounting and page pinning. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1062f270273ad11c1b7b45ec59a6a317533d5e64.1667557923.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 9e3da845f906..966019fcbe8c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1097,6 +1097,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return ret; msg.sg_from_iter = io_sg_from_iter; } else { + io_notif_set_extended(zc->notif); ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); if (unlikely(ret)) @@ -1158,6 +1159,8 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) unsigned flags; int ret, min_ret = 0; + io_notif_set_extended(sr->notif); + sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; -- cgit From df730ec21f7ba395b1b22e7f93a3a85b1d1b7882 Mon Sep 17 00:00:00 2001 From: Xinghui Li Date: Wed, 2 Nov 2022 16:25:03 +0800 Subject: io_uring: fix two assignments in if conditions Fixes two errors: "ERROR: do not use assignment in if condition 130: FILE: io_uring/net.c:130: + if (!(issue_flags & IO_URING_F_UNLOCKED) && ERROR: do not use assignment in if condition 599: FILE: io_uring/poll.c:599: + } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&" reported by checkpatch.pl in net.c and poll.c . Signed-off-by: Xinghui Li Reported-by: kernel test robot Link: https://lore.kernel.org/r/20221102082503.32236-1-korantwork@gmail.com [axboe: style tweaks] Signed-off-by: Jens Axboe --- io_uring/net.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 966019fcbe8c..f32e0daeedd8 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -125,13 +125,15 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, struct io_cache_entry *entry; struct io_async_msghdr *hdr; - if (!(issue_flags & IO_URING_F_UNLOCKED) && - (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { - hdr = container_of(entry, struct io_async_msghdr, cache); - hdr->free_iov = NULL; - req->flags |= REQ_F_ASYNC_DATA; - req->async_data = hdr; - return hdr; + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + entry = io_alloc_cache_get(&ctx->netmsg_cache); + if (entry) { + hdr = container_of(entry, struct io_async_msghdr, cache); + hdr->free_iov = NULL; + req->flags |= REQ_F_ASYNC_DATA; + req->async_data = hdr; + return hdr; + } } if (!io_alloc_async_data(req)) { -- cgit From 515e26961295bee9da5e26916c27739dca6c10e1 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 21 Nov 2022 07:43:42 -0700 Subject: io_uring: revert "io_uring fix multishot accept ordering" This is no longer needed after commit aa1df3a360a0 ("io_uring: fix CQE reordering"), since all reordering is now taken care of. This reverts commit cbd25748545c ("io_uring: fix multishot accept ordering"). Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20221107125236.260132-2-dylany@meta.com Signed-off-by: Jens Axboe --- io_uring/net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index f32e0daeedd8..f7b2b068464d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1322,12 +1322,12 @@ retry: return IOU_OK; } - if (ret >= 0 && - io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) + if (ret < 0) + return ret; + if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) goto retry; - io_req_set_res(req, ret, 0); - return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; + return -ECANCELED; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- cgit From e2ad599d1ed38fe743106f10d58a0cbfc00b51e2 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 7 Nov 2022 04:52:35 -0800 Subject: io_uring: allow multishot recv CQEs to overflow With commit aa1df3a360a0 ("io_uring: fix CQE reordering"), there are stronger guarantees for overflow ordering. Specifically ensuring that userspace will not receive out of order receive CQEs. Therefore this is not needed any more for recv/recvmsg. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20221107125236.260132-4-dylany@meta.com Signed-off-by: Jens Axboe --- io_uring/net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index f7b2b068464d..0de6f78ad978 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -602,15 +602,11 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, if (!mshot_finished) { if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, - cflags | IORING_CQE_F_MORE, false)) { + cflags | IORING_CQE_F_MORE, true)) { io_recv_prep_retry(req); return false; } - /* - * Otherwise stop multishot but use the current result. - * Probably will end up going into overflow, but this means - * we cannot trust the ordering anymore - */ + /* Otherwise stop multishot but use the current result. */ } io_req_set_res(req, *ret, cflags); -- cgit From 9b8c54755a2b16d4f23c0ea184b75e2edf77d906 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 24 Nov 2022 01:35:55 -0800 Subject: io_uring: add io_aux_cqe which allows deferred completion Use the just introduced deferred post cqe completion state when possible in io_aux_cqe. If not possible fallback to io_post_aux_cqe. This introduces a complication because of allow_overflow. For deferred completions we cannot know without locking the completion_lock if it will overflow (and even if we locked it, another post could sneak in and cause this cqe to be in overflow). However since overflow protection is mostly a best effort defence in depth to prevent infinite loops of CQEs for poll, just checking the overflow bit is going to be good enough and will result in at most 16 (array size of deferred cqes) overflows. Suggested-by: Pavel Begunkov Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20221124093559.3780686-6-dylany@meta.com Signed-off-by: Jens Axboe --- io_uring/net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'io_uring/net.c') diff --git a/io_uring/net.c b/io_uring/net.c index 0de6f78ad978..90342dcb6b1d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -601,8 +601,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, } if (!mshot_finished) { - if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret, - cflags | IORING_CQE_F_MORE, true)) { + if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, + req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) { io_recv_prep_retry(req); return false; } @@ -1320,7 +1320,8 @@ retry: if (ret < 0) return ret; - if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) + if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER, + req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) goto retry; return -ECANCELED; -- cgit