summaryrefslogtreecommitdiff
path: root/io_uring/msg_ring.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/msg_ring.c')
-rw-r--r--io_uring/msg_ring.c217
1 files changed, 138 insertions, 79 deletions
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 15602a136821..7063ea7964e7 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -13,6 +13,10 @@
#include "filetable.h"
#include "msg_ring.h"
+/* All valid masks for MSG_RING */
+#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
+ IORING_MSG_RING_FLAGS_PASS)
+
struct io_msg {
struct file *file;
struct file *src_file;
@@ -21,7 +25,10 @@ struct io_msg {
u32 len;
u32 cmd;
u32 src_fd;
- u32 dst_fd;
+ union {
+ u32 dst_fd;
+ u32 cqe_flags;
+ };
u32 flags;
};
@@ -30,8 +37,8 @@ static void io_double_unlock_ctx(struct io_ring_ctx *octx)
mutex_unlock(&octx->uring_lock);
}
-static int io_double_lock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
+static int io_lock_external_ctx(struct io_ring_ctx *octx,
+ unsigned int issue_flags)
{
/*
* To ensure proper ordering between the two ctxs, we can only
@@ -60,102 +67,111 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
{
- if (!target_ctx->task_complete)
- return false;
- return current != target_ctx->submitter_task;
+ return target_ctx->task_complete;
}
-static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw)
{
- struct io_ring_ctx *ctx = req->file->private_data;
- struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
- struct task_struct *task = READ_ONCE(ctx->submitter_task);
+ struct io_kiocb *req = tw_req.req;
+ struct io_ring_ctx *ctx = req->ctx;
- if (unlikely(!task))
- return -EOWNERDEAD;
+ io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
+ kfree_rcu(req, rcu_head);
+ percpu_ref_put(&ctx->refs);
+}
- init_task_work(&msg->tw, func);
- if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
+static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ int res, u32 cflags, u64 user_data)
+{
+ if (!READ_ONCE(ctx->submitter_task)) {
+ kfree_rcu(req, rcu_head);
return -EOWNERDEAD;
-
- return IOU_ISSUE_SKIP_COMPLETE;
+ }
+ req->opcode = IORING_OP_NOP;
+ req->cqe.user_data = user_data;
+ io_req_set_res(req, res, cflags);
+ percpu_ref_get(&ctx->refs);
+ req->ctx = ctx;
+ req->tctx = NULL;
+ req->io_task_work.func = io_msg_tw_complete;
+ io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE);
+ return 0;
}
-static void io_msg_tw_complete(struct callback_head *head)
+static int io_msg_data_remote(struct io_ring_ctx *target_ctx,
+ struct io_msg *msg)
{
- struct io_msg *msg = container_of(head, struct io_msg, tw);
- struct io_kiocb *req = cmd_to_io_kiocb(msg);
- struct io_ring_ctx *target_ctx = req->file->private_data;
- int ret = 0;
-
- if (current->flags & PF_EXITING) {
- ret = -EOWNERDEAD;
- } else {
- /*
- * If the target ring is using IOPOLL mode, then we need to be
- * holding the uring_lock for posting completions. Other ring
- * types rely on the regular completion locking, which is
- * handled while posting.
- */
- if (target_ctx->flags & IORING_SETUP_IOPOLL)
- mutex_lock(&target_ctx->uring_lock);
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = -EOVERFLOW;
- if (target_ctx->flags & IORING_SETUP_IOPOLL)
- mutex_unlock(&target_ctx->uring_lock);
- }
+ struct io_kiocb *target;
+ u32 flags = 0;
- if (ret < 0)
- req_set_fail(req);
- io_req_queue_tw_complete(req, ret);
+ target = kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO) ;
+ if (unlikely(!target))
+ return -ENOMEM;
+
+ if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
+ flags = msg->cqe_flags;
+
+ return io_msg_remote_post(target_ctx, target, msg->len, flags,
+ msg->user_data);
}
-static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
+ struct io_msg *msg, unsigned int issue_flags)
{
- struct io_ring_ctx *target_ctx = req->file->private_data;
- struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ u32 flags = 0;
int ret;
- if (msg->src_fd || msg->dst_fd || msg->flags)
+ if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
+ return -EINVAL;
+ if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
return -EINVAL;
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
return -EBADFD;
if (io_msg_need_remote(target_ctx))
- return io_msg_exec_remote(req, io_msg_tw_complete);
+ return io_msg_data_remote(target_ctx, msg);
+
+ if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
+ flags = msg->cqe_flags;
ret = -EOVERFLOW;
if (target_ctx->flags & IORING_SETUP_IOPOLL) {
- if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+ if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
return -EAGAIN;
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = 0;
- io_double_unlock_ctx(target_ctx);
- } else {
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = 0;
}
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
+ ret = 0;
+ if (target_ctx->flags & IORING_SETUP_IOPOLL)
+ io_double_unlock_ctx(target_ctx);
return ret;
}
-static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ring_ctx *target_ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+
+ return __io_msg_ring_data(target_ctx, msg, issue_flags);
+}
+
+static int io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
struct io_ring_ctx *ctx = req->ctx;
- struct file *file = NULL;
- unsigned long file_ptr;
- int idx = msg->src_fd;
+ struct io_rsrc_node *node;
+ int ret = -EBADF;
io_ring_submit_lock(ctx, issue_flags);
- if (likely(idx < ctx->nr_user_files)) {
- idx = array_index_nospec(idx, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
- file = (struct file *) (file_ptr & FFS_MASK);
- if (file)
- get_file(file);
+ node = io_rsrc_node_lookup(&ctx->file_table.data, msg->src_fd);
+ if (node) {
+ msg->src_file = io_slot_file(node);
+ if (msg->src_file)
+ get_file(msg->src_file);
+ req->flags |= REQ_F_NEED_CLEANUP;
+ ret = 0;
}
io_ring_submit_unlock(ctx, issue_flags);
- return file;
+ return ret;
}
static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
@@ -165,7 +181,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
struct file *src_file = msg->src_file;
int ret;
- if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+ if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
return -EAGAIN;
ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
@@ -183,7 +199,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
* completes with -EOVERFLOW, then the sender must ensure that a
* later IORING_OP_MSG_RING delivers the message.
*/
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
ret = -EOVERFLOW;
out_unlock:
io_double_unlock_ctx(target_ctx);
@@ -203,34 +219,47 @@ static void io_msg_tw_fd_complete(struct callback_head *head)
io_req_queue_tw_complete(req, ret);
}
+static int io_msg_fd_remote(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct task_struct *task = READ_ONCE(ctx->submitter_task);
+
+ if (unlikely(!task))
+ return -EOWNERDEAD;
+
+ init_task_work(&msg->tw, io_msg_tw_fd_complete);
+ if (task_work_add(task, &msg->tw, TWA_SIGNAL))
+ return -EOWNERDEAD;
+
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
struct io_ring_ctx *ctx = req->ctx;
- struct file *src_file = msg->src_file;
+ if (msg->len)
+ return -EINVAL;
if (target_ctx == ctx)
return -EINVAL;
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
return -EBADFD;
- if (!src_file) {
- src_file = io_msg_grab_file(req, issue_flags);
- if (!src_file)
- return -EBADF;
- msg->src_file = src_file;
- req->flags |= REQ_F_NEED_CLEANUP;
+ if (!msg->src_file) {
+ int ret = io_msg_grab_file(req, issue_flags);
+ if (unlikely(ret))
+ return ret;
}
if (io_msg_need_remote(target_ctx))
- return io_msg_exec_remote(req, io_msg_tw_fd_complete);
+ return io_msg_fd_remote(req);
return io_msg_install_complete(req, issue_flags);
}
-int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe)
{
- struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-
if (unlikely(sqe->buf_index || sqe->personality))
return -EINVAL;
@@ -241,12 +270,17 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
msg->src_fd = READ_ONCE(sqe->addr3);
msg->dst_fd = READ_ONCE(sqe->file_index);
msg->flags = READ_ONCE(sqe->msg_ring_flags);
- if (msg->flags & ~IORING_MSG_RING_CQE_SKIP)
+ if (msg->flags & ~IORING_MSG_RING_MASK)
return -EINVAL;
return 0;
}
+int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe);
+}
+
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -275,5 +309,30 @@ done:
req_set_fail(req);
}
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return IOU_COMPLETE;
+}
+
+int io_uring_sync_msg_ring(struct io_uring_sqe *sqe)
+{
+ struct io_msg io_msg = { };
+ int ret;
+
+ ret = __io_msg_ring_prep(&io_msg, sqe);
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * Only data sending supported, not IORING_MSG_SEND_FD as that one
+ * doesn't make sense without a source ring to send files from.
+ */
+ if (io_msg.cmd != IORING_MSG_DATA)
+ return -EINVAL;
+
+ CLASS(fd, f)(sqe->fd);
+ if (fd_empty(f))
+ return -EBADF;
+ if (!io_is_uring_fops(fd_file(f)))
+ return -EBADFD;
+ return __io_msg_ring_data(fd_file(f)->private_data,
+ &io_msg, IO_URING_F_UNLOCKED);
}