From 52ea806ad983490b3132a9e526e11a10dc2fd10c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:05:09 -0700 Subject: io_uring: finish waiting before flushing overflow entries If we have overflow entries being generated after we've done the initial flush in io_cqring_wait(), then we could be flushing them in the main wait loop as well. If that's done after having added ourselves to the cq_wait waitqueue, then the task state can be != TASK_RUNNING when we enter the overflow flush. Check for the need to overflow flush, and finish our wait cycle first if we have to do so. Reported-and-tested-by: syzbot+cf6ea1d6bb30a4ce10b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/000000000000cb143a05f04eee15@google.com/ Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'io_uring/io_uring.c') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ff2bbac1a10f..ac5d39eeb3d1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -677,16 +677,20 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx) io_cq_unlock_post(ctx); } +static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx) +{ + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); +} + static void io_cqring_overflow_flush(struct io_ring_ctx *ctx) { - if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) + io_cqring_do_overflow_flush(ctx); } void __io_put_task(struct task_struct *task, int nr) @@ -2549,7 +2553,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx); + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { + finish_wait(&ctx->cq_wait, &iowq.wq); + io_cqring_do_overflow_flush(ctx); + } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); ret = io_cqring_wait_schedule(ctx, &iowq, timeout); -- cgit From 343190841a1f22b96996d9f8cfab902a4d1bfd0e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 06:37:08 -0700 Subject: io_uring: check for valid register opcode earlier We only check the register opcode value inside the restricted ring section, move it into the main io_uring_register() function instead and check it up front. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'io_uring/io_uring.c') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ac5d39eeb3d1..58ac13b69dc8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4020,8 +4020,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -EEXIST; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -4177,6 +4175,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; -- cgit