diff options
| -rw-r--r-- | include/trace/events/io_uring.h | 12 | ||||
| -rw-r--r-- | io_uring/io-wq.c | 5 | ||||
| -rw-r--r-- | io_uring/kbuf.c | 16 | ||||
| -rw-r--r-- | io_uring/poll.c | 52 | ||||
| -rw-r--r-- | io_uring/rsrc.c | 47 |
5 files changed, 67 insertions, 65 deletions
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 45d15460b495..34b31a855ea4 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -133,15 +133,15 @@ TRACE_EVENT(io_uring_file_get, * io_uring_queue_async_work - called before submitting a new async work * * @req: pointer to a submitted request - * @rw: type of workqueue, hashed or normal + * @hashed: whether async work is hashed * * Allows to trace asynchronous work submission. */ TRACE_EVENT(io_uring_queue_async_work, - TP_PROTO(struct io_kiocb *req, int rw), + TP_PROTO(struct io_kiocb *req, bool hashed), - TP_ARGS(req, rw), + TP_ARGS(req, hashed), TP_STRUCT__entry ( __field( void *, ctx ) @@ -150,7 +150,7 @@ TRACE_EVENT(io_uring_queue_async_work, __field( u8, opcode ) __field( unsigned long long, flags ) __field( struct io_wq_work *, work ) - __field( int, rw ) + __field( bool, hashed ) __string( op_str, io_uring_get_opcode(req->opcode) ) ), @@ -162,7 +162,7 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->flags = (__force unsigned long long) req->flags; __entry->opcode = req->opcode; __entry->work = &req->work; - __entry->rw = rw; + __entry->hashed = hashed; __assign_str(op_str); ), @@ -170,7 +170,7 @@ TRACE_EVENT(io_uring_queue_async_work, TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p", __entry->ctx, __entry->req, __entry->user_data, __get_str(op_str), __entry->flags, - __entry->rw ? "hashed" : "normal", __entry->work) + __entry->hashed ? "hashed" : "normal", __entry->work) ); /** diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 1d03b2fc4b25..cd13d8aac3d2 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -805,11 +805,12 @@ static inline bool io_should_retry_thread(struct io_worker *worker, long err) */ if (fatal_signal_pending(current)) return false; - if (worker->init_retries++ >= WORKER_INIT_LIMIT) - return false; + worker->init_retries++; switch (err) { case -EAGAIN: + return worker->init_retries <= WORKER_INIT_LIMIT; + /* Analogous to a fork() syscall, always retry on a restartable error */ case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8a329556f8df..796d131107dd 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) buf_len -= this_len; /* Stop looping for invalid buffer length of 0 */ if (buf_len || !this_len) { - buf->addr += this_len; - buf->len = buf_len; + WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); + WRITE_ONCE(buf->len, buf_len); return false; } - buf->len = 0; + WRITE_ONCE(buf->len, 0); bl->head++; len -= this_len; } @@ -198,9 +198,9 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, if (*len == 0 || *len > buf_len) *len = buf_len; req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; - req->buf_index = buf->bid; + req->buf_index = READ_ONCE(buf->bid); sel.buf_list = bl; - sel.addr = u64_to_user_ptr(buf->addr); + sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); if (io_should_commit(req, issue_flags)) { io_kbuf_commit(req, sel.buf_list, *len, 1); @@ -280,7 +280,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, if (!arg->max_len) arg->max_len = INT_MAX; - req->buf_index = buf->bid; + req->buf_index = READ_ONCE(buf->bid); do { u32 len = READ_ONCE(buf->len); @@ -291,11 +291,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, arg->partial_map = 1; if (iov != arg->iovs) break; - buf->len = len; + WRITE_ONCE(buf->len, len); } } - iov->iov_base = u64_to_user_ptr(buf->addr); + iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr)); iov->iov_len = len; iov++; diff --git a/io_uring/poll.c b/io_uring/poll.c index 8aa4e3a31e73..aac4b3b881fb 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -138,14 +138,32 @@ static void io_init_poll_iocb(struct io_poll *poll, __poll_t events) init_waitqueue_func_entry(&poll->wait, io_poll_wake); } +static void io_poll_remove_waitq(struct io_poll *poll) +{ + /* + * If the waitqueue is being freed early but someone is already holds + * ownership over it, we have to tear down the request as best we can. + * That means immediately removing the request from its waitqueue and + * preventing all further accesses to the waitqueue via the request. + */ + list_del_init(&poll->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon as req->head is + * NULL'ed out, the request can be completed and freed, since + * io_poll_remove_entry() will no longer need to take the waitqueue + * lock. + */ + smp_store_release(&poll->head, NULL); +} + static inline void io_poll_remove_entry(struct io_poll *poll) { struct wait_queue_head *head = smp_load_acquire(&poll->head); if (head) { spin_lock_irq(&head->lock); - list_del_init(&poll->wait.entry); - poll->head = NULL; + io_poll_remove_waitq(poll); spin_unlock_irq(&head->lock); } } @@ -368,23 +386,7 @@ static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) io_poll_mark_cancelled(req); /* we have to kick tw in case it's not already */ io_poll_execute(req, 0); - - /* - * If the waitqueue is being freed early but someone is already - * holds ownership over it, we have to tear down the request as - * best we can. That means immediately removing the request from - * its waitqueue and preventing all further accesses to the - * waitqueue via the request. - */ - list_del_init(&poll->wait.entry); - - /* - * Careful: this *must* be the last step, since as soon - * as req->head is NULL'ed out, the request can be - * completed and freed, since aio_poll_complete_work() - * will no longer need to take the waitqueue lock. - */ - smp_store_release(&poll->head, NULL); + io_poll_remove_waitq(poll); return 1; } @@ -413,8 +415,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, /* optional, saves extra locking for removal in tw handler */ if (mask && poll->events & EPOLLONESHOT) { - list_del_init(&poll->wait.entry); - poll->head = NULL; + io_poll_remove_waitq(poll); if (wqe_is_double(wait)) req->flags &= ~REQ_F_DOUBLE_POLL; else @@ -937,12 +938,17 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED); /* successfully updated, don't complete poll request */ - if (!ret2 || ret2 == -EIOCBQUEUED) + if (ret2 == IOU_ISSUE_SKIP_COMPLETE) goto out; + /* request completed as part of the update, complete it */ + else if (ret2 == IOU_COMPLETE) + goto complete; } - req_set_fail(preq); io_req_set_res(preq, -ECANCELED, 0); +complete: + if (preq->cqe.res < 0) + req_set_fail(preq); preq->io_task_work.func = io_req_task_complete; io_req_task_work_add(preq); out: diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3765a50329a8..a63474b331bf 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1186,12 +1186,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx return -EBUSY; nbufs = src_ctx->buf_table.nr; + if (!nbufs) + return -ENXIO; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) return -EINVAL; else if (arg->nr > IORING_MAX_REG_BUFFERS) return -EINVAL; + if (check_add_overflow(arg->nr, arg->src_off, &off) || off > nbufs) + return -EOVERFLOW; if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; if (nbufs > IORING_MAX_REG_BUFFERS) @@ -1201,31 +1205,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (ret) return ret; - /* Fill entries in data from dst that won't overlap with src */ + /* Copy original dst nodes from before the cloned range */ for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) { - struct io_rsrc_node *src_node = ctx->buf_table.nodes[i]; + struct io_rsrc_node *node = ctx->buf_table.nodes[i]; - if (src_node) { - data.nodes[i] = src_node; - src_node->refs++; + if (node) { + data.nodes[i] = node; + node->refs++; } } - ret = -ENXIO; - nbufs = src_ctx->buf_table.nr; - if (!nbufs) - goto out_free; - ret = -EINVAL; - if (!arg->nr) - arg->nr = nbufs; - else if (arg->nr > nbufs) - goto out_free; - ret = -EOVERFLOW; - if (check_add_overflow(arg->nr, arg->src_off, &off)) - goto out_free; - if (off > nbufs) - goto out_free; - off = arg->dst_off; i = arg->src_off; nr = arg->nr; @@ -1238,8 +1227,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx } else { dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!dst_node) { - ret = -ENOMEM; - goto out_free; + io_rsrc_data_free(ctx, &data); + return -ENOMEM; } refcount_inc(&src_node->buf->refs); @@ -1249,6 +1238,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx i++; } + /* Copy original dst nodes from after the cloned range */ + for (i = nbufs; i < ctx->buf_table.nr; i++) { + struct io_rsrc_node *node = ctx->buf_table.nodes[i]; + + if (node) { + data.nodes[i] = node; + node->refs++; + } + } + /* * If asked for replace, put the old table. data->nodes[] holds both * old and new nodes at this point. @@ -1265,10 +1264,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx WARN_ON_ONCE(ctx->buf_table.nr); ctx->buf_table = data; return 0; - -out_free: - io_rsrc_data_free(ctx, &data); - return ret; } /* |
