summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/trace/events/io_uring.h12
-rw-r--r--io_uring/io-wq.c5
-rw-r--r--io_uring/kbuf.c16
-rw-r--r--io_uring/poll.c52
-rw-r--r--io_uring/rsrc.c47
5 files changed, 67 insertions, 65 deletions
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 45d15460b495..34b31a855ea4 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -133,15 +133,15 @@ TRACE_EVENT(io_uring_file_get,
* io_uring_queue_async_work - called before submitting a new async work
*
* @req: pointer to a submitted request
- * @rw: type of workqueue, hashed or normal
+ * @hashed: whether async work is hashed
*
* Allows to trace asynchronous work submission.
*/
TRACE_EVENT(io_uring_queue_async_work,
- TP_PROTO(struct io_kiocb *req, int rw),
+ TP_PROTO(struct io_kiocb *req, bool hashed),
- TP_ARGS(req, rw),
+ TP_ARGS(req, hashed),
TP_STRUCT__entry (
__field( void *, ctx )
@@ -150,7 +150,7 @@ TRACE_EVENT(io_uring_queue_async_work,
__field( u8, opcode )
__field( unsigned long long, flags )
__field( struct io_wq_work *, work )
- __field( int, rw )
+ __field( bool, hashed )
__string( op_str, io_uring_get_opcode(req->opcode) )
),
@@ -162,7 +162,7 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->flags = (__force unsigned long long) req->flags;
__entry->opcode = req->opcode;
__entry->work = &req->work;
- __entry->rw = rw;
+ __entry->hashed = hashed;
__assign_str(op_str);
),
@@ -170,7 +170,7 @@ TRACE_EVENT(io_uring_queue_async_work,
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data,
__get_str(op_str), __entry->flags,
- __entry->rw ? "hashed" : "normal", __entry->work)
+ __entry->hashed ? "hashed" : "normal", __entry->work)
);
/**
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 1d03b2fc4b25..cd13d8aac3d2 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -805,11 +805,12 @@ static inline bool io_should_retry_thread(struct io_worker *worker, long err)
*/
if (fatal_signal_pending(current))
return false;
- if (worker->init_retries++ >= WORKER_INIT_LIMIT)
- return false;
+ worker->init_retries++;
switch (err) {
case -EAGAIN:
+ return worker->init_retries <= WORKER_INIT_LIMIT;
+ /* Analogous to a fork() syscall, always retry on a restartable error */
case -ERESTARTSYS:
case -ERESTARTNOINTR:
case -ERESTARTNOHAND:
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 8a329556f8df..796d131107dd 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
buf_len -= this_len;
/* Stop looping for invalid buffer length of 0 */
if (buf_len || !this_len) {
- buf->addr += this_len;
- buf->len = buf_len;
+ WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
+ WRITE_ONCE(buf->len, buf_len);
return false;
}
- buf->len = 0;
+ WRITE_ONCE(buf->len, 0);
bl->head++;
len -= this_len;
}
@@ -198,9 +198,9 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
if (*len == 0 || *len > buf_len)
*len = buf_len;
req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
- req->buf_index = buf->bid;
+ req->buf_index = READ_ONCE(buf->bid);
sel.buf_list = bl;
- sel.addr = u64_to_user_ptr(buf->addr);
+ sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr));
if (io_should_commit(req, issue_flags)) {
io_kbuf_commit(req, sel.buf_list, *len, 1);
@@ -280,7 +280,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
if (!arg->max_len)
arg->max_len = INT_MAX;
- req->buf_index = buf->bid;
+ req->buf_index = READ_ONCE(buf->bid);
do {
u32 len = READ_ONCE(buf->len);
@@ -291,11 +291,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
arg->partial_map = 1;
if (iov != arg->iovs)
break;
- buf->len = len;
+ WRITE_ONCE(buf->len, len);
}
}
- iov->iov_base = u64_to_user_ptr(buf->addr);
+ iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr));
iov->iov_len = len;
iov++;
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 8aa4e3a31e73..aac4b3b881fb 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -138,14 +138,32 @@ static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
}
+static void io_poll_remove_waitq(struct io_poll *poll)
+{
+ /*
+ * If the waitqueue is being freed early but someone is already holds
+ * ownership over it, we have to tear down the request as best we can.
+ * That means immediately removing the request from its waitqueue and
+ * preventing all further accesses to the waitqueue via the request.
+ */
+ list_del_init(&poll->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon as req->head is
+ * NULL'ed out, the request can be completed and freed, since
+ * io_poll_remove_entry() will no longer need to take the waitqueue
+ * lock.
+ */
+ smp_store_release(&poll->head, NULL);
+}
+
static inline void io_poll_remove_entry(struct io_poll *poll)
{
struct wait_queue_head *head = smp_load_acquire(&poll->head);
if (head) {
spin_lock_irq(&head->lock);
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
+ io_poll_remove_waitq(poll);
spin_unlock_irq(&head->lock);
}
}
@@ -368,23 +386,7 @@ static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll)
io_poll_mark_cancelled(req);
/* we have to kick tw in case it's not already */
io_poll_execute(req, 0);
-
- /*
- * If the waitqueue is being freed early but someone is already
- * holds ownership over it, we have to tear down the request as
- * best we can. That means immediately removing the request from
- * its waitqueue and preventing all further accesses to the
- * waitqueue via the request.
- */
- list_del_init(&poll->wait.entry);
-
- /*
- * Careful: this *must* be the last step, since as soon
- * as req->head is NULL'ed out, the request can be
- * completed and freed, since aio_poll_complete_work()
- * will no longer need to take the waitqueue lock.
- */
- smp_store_release(&poll->head, NULL);
+ io_poll_remove_waitq(poll);
return 1;
}
@@ -413,8 +415,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
/* optional, saves extra locking for removal in tw handler */
if (mask && poll->events & EPOLLONESHOT) {
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
+ io_poll_remove_waitq(poll);
if (wqe_is_double(wait))
req->flags &= ~REQ_F_DOUBLE_POLL;
else
@@ -937,12 +938,17 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED);
/* successfully updated, don't complete poll request */
- if (!ret2 || ret2 == -EIOCBQUEUED)
+ if (ret2 == IOU_ISSUE_SKIP_COMPLETE)
goto out;
+ /* request completed as part of the update, complete it */
+ else if (ret2 == IOU_COMPLETE)
+ goto complete;
}
- req_set_fail(preq);
io_req_set_res(preq, -ECANCELED, 0);
+complete:
+ if (preq->cqe.res < 0)
+ req_set_fail(preq);
preq->io_task_work.func = io_req_task_complete;
io_req_task_work_add(preq);
out:
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 3765a50329a8..a63474b331bf 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1186,12 +1186,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
return -EBUSY;
nbufs = src_ctx->buf_table.nr;
+ if (!nbufs)
+ return -ENXIO;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
return -EINVAL;
else if (arg->nr > IORING_MAX_REG_BUFFERS)
return -EINVAL;
+ if (check_add_overflow(arg->nr, arg->src_off, &off) || off > nbufs)
+ return -EOVERFLOW;
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
return -EOVERFLOW;
if (nbufs > IORING_MAX_REG_BUFFERS)
@@ -1201,31 +1205,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (ret)
return ret;
- /* Fill entries in data from dst that won't overlap with src */
+ /* Copy original dst nodes from before the cloned range */
for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) {
- struct io_rsrc_node *src_node = ctx->buf_table.nodes[i];
+ struct io_rsrc_node *node = ctx->buf_table.nodes[i];
- if (src_node) {
- data.nodes[i] = src_node;
- src_node->refs++;
+ if (node) {
+ data.nodes[i] = node;
+ node->refs++;
}
}
- ret = -ENXIO;
- nbufs = src_ctx->buf_table.nr;
- if (!nbufs)
- goto out_free;
- ret = -EINVAL;
- if (!arg->nr)
- arg->nr = nbufs;
- else if (arg->nr > nbufs)
- goto out_free;
- ret = -EOVERFLOW;
- if (check_add_overflow(arg->nr, arg->src_off, &off))
- goto out_free;
- if (off > nbufs)
- goto out_free;
-
off = arg->dst_off;
i = arg->src_off;
nr = arg->nr;
@@ -1238,8 +1227,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
} else {
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
- ret = -ENOMEM;
- goto out_free;
+ io_rsrc_data_free(ctx, &data);
+ return -ENOMEM;
}
refcount_inc(&src_node->buf->refs);
@@ -1249,6 +1238,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
i++;
}
+ /* Copy original dst nodes from after the cloned range */
+ for (i = nbufs; i < ctx->buf_table.nr; i++) {
+ struct io_rsrc_node *node = ctx->buf_table.nodes[i];
+
+ if (node) {
+ data.nodes[i] = node;
+ node->refs++;
+ }
+ }
+
/*
* If asked for replace, put the old table. data->nodes[] holds both
* old and new nodes at this point.
@@ -1265,10 +1264,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
WARN_ON_ONCE(ctx->buf_table.nr);
ctx->buf_table = data;
return 0;
-
-out_free:
- io_rsrc_data_free(ctx, &data);
- return ret;
}
/*