summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/io_uring/cmd.h2
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--io_uring/Makefile2
-rw-r--r--io_uring/alloc_cache.c44
-rw-r--r--io_uring/alloc_cache.h69
-rw-r--r--io_uring/filetable.c2
-rw-r--r--io_uring/futex.c4
-rw-r--r--io_uring/io_uring.c12
-rw-r--r--io_uring/io_uring.h21
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/net.c134
-rw-r--r--io_uring/net.h20
-rw-r--r--io_uring/poll.c6
-rw-r--r--io_uring/register.c8
-rw-r--r--io_uring/rsrc.c88
-rw-r--r--io_uring/rsrc.h5
-rw-r--r--io_uring/rw.c41
-rw-r--r--io_uring/rw.h27
-rw-r--r--io_uring/timeout.c2
-rw-r--r--io_uring/uring_cmd.c19
-rw-r--r--io_uring/waitid.c2
21 files changed, 272 insertions, 243 deletions
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index a3ce553413de..abd0c8bd950b 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -19,8 +19,8 @@ struct io_uring_cmd {
};
struct io_uring_cmd_data {
- struct io_uring_sqe sqes[2];
void *op_data;
+ struct io_uring_sqe sqes[2];
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 623d8e798a11..3def525a1da3 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -222,7 +222,8 @@ struct io_alloc_cache {
void **entries;
unsigned int nr_cached;
unsigned int max_cached;
- size_t elem_size;
+ unsigned int elem_size;
+ unsigned int init_clear;
};
struct io_ring_ctx {
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 53167bef37d7..d695b60dba4f 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
sync.o msg_ring.o advise.o openclose.o \
epoll.o statx.o timeout.o fdinfo.o \
cancel.o waitid.o register.o \
- truncate.o memmap.o
+ truncate.o memmap.o alloc_cache.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
diff --git a/io_uring/alloc_cache.c b/io_uring/alloc_cache.c
new file mode 100644
index 000000000000..58423888b736
--- /dev/null
+++ b/io_uring/alloc_cache.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "alloc_cache.h"
+
+void io_alloc_cache_free(struct io_alloc_cache *cache,
+ void (*free)(const void *))
+{
+ void *entry;
+
+ if (!cache->entries)
+ return;
+
+ while ((entry = io_alloc_cache_get(cache)) != NULL)
+ free(entry);
+
+ kvfree(cache->entries);
+ cache->entries = NULL;
+}
+
+/* returns false if the cache was initialized properly */
+bool io_alloc_cache_init(struct io_alloc_cache *cache,
+ unsigned max_nr, unsigned int size,
+ unsigned int init_bytes)
+{
+ cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
+ if (!cache->entries)
+ return true;
+
+ cache->nr_cached = 0;
+ cache->max_cached = max_nr;
+ cache->elem_size = size;
+ cache->init_clear = init_bytes;
+ return false;
+}
+
+void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp)
+{
+ void *obj;
+
+ obj = kmalloc(cache->elem_size, gfp);
+ if (obj && cache->init_clear)
+ memset(obj, 0, cache->init_clear);
+ return obj;
+}
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index a3a8cfec32ce..0dd17d8ba93a 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -1,11 +1,30 @@
#ifndef IOU_ALLOC_CACHE_H
#define IOU_ALLOC_CACHE_H
+#include <linux/io_uring_types.h>
+
/*
* Don't allow the cache to grow beyond this size.
*/
#define IO_ALLOC_CACHE_MAX 128
+void io_alloc_cache_free(struct io_alloc_cache *cache,
+ void (*free)(const void *));
+bool io_alloc_cache_init(struct io_alloc_cache *cache,
+ unsigned max_nr, unsigned int size,
+ unsigned int init_bytes);
+
+void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp);
+
+static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
+{
+ if (IS_ENABLED(CONFIG_KASAN)) {
+ kfree(*iov);
+ *iov = NULL;
+ *nr = 0;
+ }
+}
+
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
void *entry)
{
@@ -23,52 +42,30 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache)
if (cache->nr_cached) {
void *entry = cache->entries[--cache->nr_cached];
+ /*
+ * If KASAN is enabled, always clear the initial bytes that
+ * must be zeroed post alloc, in case any of them overlap
+ * with KASAN storage.
+ */
+#if defined(CONFIG_KASAN)
kasan_mempool_unpoison_object(entry, cache->elem_size);
+ if (cache->init_clear)
+ memset(entry, 0, cache->init_clear);
+#endif
return entry;
}
return NULL;
}
-static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp,
- void (*init_once)(void *obj))
+static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
{
- if (unlikely(!cache->nr_cached)) {
- void *obj = kmalloc(cache->elem_size, gfp);
+ void *obj;
- if (obj && init_once)
- init_once(obj);
+ obj = io_alloc_cache_get(cache);
+ if (obj)
return obj;
- }
- return io_alloc_cache_get(cache);
+ return io_cache_alloc_new(cache, gfp);
}
-/* returns false if the cache was initialized properly */
-static inline bool io_alloc_cache_init(struct io_alloc_cache *cache,
- unsigned max_nr, size_t size)
-{
- cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
- if (cache->entries) {
- cache->nr_cached = 0;
- cache->max_cached = max_nr;
- cache->elem_size = size;
- return false;
- }
- return true;
-}
-
-static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
- void (*free)(const void *))
-{
- void *entry;
-
- if (!cache->entries)
- return;
-
- while ((entry = io_alloc_cache_get(cache)) != NULL)
- free(entry);
-
- kvfree(cache->entries);
- cache->entries = NULL;
-}
#endif
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index a21660e3145a..dd8eeec97acf 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (slot_index >= ctx->file_table.data.nr)
return -EINVAL;
- node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node)
return -ENOMEM;
diff --git a/io_uring/futex.c b/io_uring/futex.c
index 30139cc150f2..3159a2b7eeca 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -36,7 +36,7 @@ struct io_futex_data {
bool io_futex_cache_init(struct io_ring_ctx *ctx)
{
return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX,
- sizeof(struct io_futex_data));
+ sizeof(struct io_futex_data), 0);
}
void io_futex_cache_free(struct io_ring_ctx *ctx)
@@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
}
io_ring_submit_lock(ctx, issue_flags);
- ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL);
+ ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT);
if (!ifd) {
ret = -ENOMEM;
goto done_unlock;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5a0f8a5041d6..ceacf6230e34 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
- sizeof(struct async_poll));
+ sizeof(struct async_poll), 0);
ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct io_async_msghdr));
+ sizeof(struct io_async_msghdr),
+ offsetof(struct io_async_msghdr, clear));
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct io_async_rw));
+ sizeof(struct io_async_rw),
+ offsetof(struct io_async_rw, clear));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct io_uring_cmd_data));
+ sizeof(struct io_uring_cmd_data), 0);
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct io_kiocb));
+ sizeof(struct io_kiocb), 0);
ret |= io_futex_cache_init(ctx);
if (ret)
goto free_ref;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index f65e3f3ede51..ab619e63ef39 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -226,21 +226,16 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
}
static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache,
- struct io_kiocb *req,
- void (*init_once)(void *obj))
+ struct io_kiocb *req)
{
- req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once);
- if (req->async_data)
- req->flags |= REQ_F_ASYNC_DATA;
- return req->async_data;
-}
+ if (cache) {
+ req->async_data = io_cache_alloc(cache, GFP_KERNEL);
+ } else {
+ const struct io_issue_def *def = &io_issue_defs[req->opcode];
-static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req)
-{
- const struct io_issue_def *def = &io_issue_defs[req->opcode];
-
- WARN_ON_ONCE(!def->async_size);
- req->async_data = kmalloc(def->async_size, GFP_KERNEL);
+ WARN_ON_ONCE(!def->async_size);
+ req->async_data = kmalloc(def->async_size, GFP_KERNEL);
+ }
if (req->async_data)
req->flags |= REQ_F_ASYNC_DATA;
return req->async_data;
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index bd3cd78d2dba..7e6f68e911f1 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
int res, u32 cflags, u64 user_data)
{
- req->tctx = READ_ONCE(ctx->submitter_task->io_uring);
- if (!req->tctx) {
+ if (!READ_ONCE(ctx->submitter_task)) {
kmem_cache_free(req_cachep, req);
return -EOWNERDEAD;
}
@@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
io_req_set_res(req, res, cflags);
percpu_ref_get(&ctx->refs);
req->ctx = ctx;
+ req->tctx = NULL;
req->io_task_work.func = io_msg_tw_complete;
io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
return 0;
diff --git a/io_uring/net.c b/io_uring/net.c
index 85f55fbc25c9..17852a6616ff 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_msghdr *hdr = req->async_data;
- struct iovec *iov;
/* can't recycle, ensure we free the iovec if we have one */
if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
@@ -146,44 +145,30 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
/* Let normal cleanup path reap it if we fail adding to the cache */
- iov = hdr->free_iov;
+ io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr);
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
- if (iov)
- kasan_mempool_poison_object(iov);
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
}
}
-static void io_msg_async_data_init(void *obj)
-{
- struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj;
-
- hdr->free_iov = NULL;
- hdr->free_iov_nr = 0;
-}
-
static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_async_msghdr *hdr;
- hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req,
- io_msg_async_data_init);
+ hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
if (!hdr)
return NULL;
/* If the async data was cached, we might have an iov cached inside. */
- if (hdr->free_iov) {
- kasan_mempool_unpoison_object(hdr->free_iov,
- hdr->free_iov_nr * sizeof(struct iovec));
+ if (hdr->free_iov)
req->flags |= REQ_F_NEED_CLEANUP;
- }
return hdr;
}
/* assign new iovec to kmsg, if we need to */
-static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
struct iovec *iov)
{
if (iov) {
@@ -193,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
kfree(kmsg->free_iov);
kmsg->free_iov = iov;
}
- return 0;
}
static inline void io_mshot_prep_retry(struct io_kiocb *req,
@@ -255,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
if (unlikely(ret < 0))
return ret;
- return io_net_vec_assign(req, iomsg, iov);
+ io_net_vec_assign(req, iomsg, iov);
+ return 0;
}
#endif
@@ -295,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
ret = -EINVAL;
goto ua_end;
} else {
+ struct iovec __user *uiov = msg->msg_iov;
+
/* we only need the length for provided buffers */
- if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
+ if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len)))
goto ua_end;
- unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
- ua_end);
+ unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end);
sr->len = iov->iov_len;
}
ret = 0;
@@ -314,7 +300,8 @@ ua_end:
if (unlikely(ret < 0))
return ret;
- return io_net_vec_assign(req, iomsg, iov);
+ io_net_vec_assign(req, iomsg, iov);
+ return 0;
}
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
@@ -579,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
+static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
+ struct io_async_msghdr *kmsg)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+
+ int ret;
+ struct buf_sel_arg arg = {
+ .iovs = &kmsg->fast_iov,
+ .max_len = min_not_zero(sr->len, INT_MAX),
+ .nr_iovs = 1,
+ };
+
+ if (kmsg->free_iov) {
+ arg.nr_iovs = kmsg->free_iov_nr;
+ arg.iovs = kmsg->free_iov;
+ arg.mode = KBUF_MODE_FREE;
+ }
+
+ if (!(sr->flags & IORING_RECVSEND_BUNDLE))
+ arg.nr_iovs = 1;
+ else
+ arg.mode |= KBUF_MODE_EXPAND;
+
+ ret = io_buffers_select(req, &arg, issue_flags);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+ kmsg->free_iov_nr = ret;
+ kmsg->free_iov = arg.iovs;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ sr->len = arg.out_len;
+
+ if (ret == 1) {
+ sr->buf = arg.iovs[0].iov_base;
+ ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
+ &kmsg->msg.msg_iter);
+ if (unlikely(ret))
+ return ret;
+ } else {
+ iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
+ arg.iovs, ret, arg.out_len);
+ }
+
+ return 0;
+}
+
int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -602,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
retry_bundle:
if (io_do_buffer_select(req)) {
- struct buf_sel_arg arg = {
- .iovs = &kmsg->fast_iov,
- .max_len = min_not_zero(sr->len, INT_MAX),
- .nr_iovs = 1,
- };
-
- if (kmsg->free_iov) {
- arg.nr_iovs = kmsg->free_iov_nr;
- arg.iovs = kmsg->free_iov;
- arg.mode = KBUF_MODE_FREE;
- }
-
- if (!(sr->flags & IORING_RECVSEND_BUNDLE))
- arg.nr_iovs = 1;
- else
- arg.mode |= KBUF_MODE_EXPAND;
-
- ret = io_buffers_select(req, &arg, issue_flags);
- if (unlikely(ret < 0))
+ ret = io_send_select_buffer(req, issue_flags, kmsg);
+ if (ret)
return ret;
-
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
- req->flags |= REQ_F_NEED_CLEANUP;
- }
- sr->len = arg.out_len;
-
- if (ret == 1) {
- sr->buf = arg.iovs[0].iov_base;
- ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
- &kmsg->msg.msg_iter);
- if (unlikely(ret))
- return ret;
- } else {
- iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
- arg.iovs, ret, arg.out_len);
- }
}
/*
@@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ if (unlikely(req->flags & REQ_F_FAIL)) {
+ ret = -ECONNRESET;
+ goto out;
+ }
+
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
@@ -1813,11 +1818,8 @@ void io_netmsg_cache_free(const void *entry)
{
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
- if (kmsg->free_iov) {
- kasan_mempool_unpoison_object(kmsg->free_iov,
- kmsg->free_iov_nr * sizeof(struct iovec));
+ if (kmsg->free_iov)
io_netmsg_iovec_free(kmsg);
- }
kfree(kmsg);
}
#endif
diff --git a/io_uring/net.h b/io_uring/net.h
index 52bfee05f06a..b804c2b36e60 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -5,16 +5,20 @@
struct io_async_msghdr {
#if defined(CONFIG_NET)
- struct iovec fast_iov;
- /* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
+ /* points to an allocated iov, if NULL we use fast_iov instead */
int free_iov_nr;
- int namelen;
- __kernel_size_t controllen;
- __kernel_size_t payloadlen;
- struct sockaddr __user *uaddr;
- struct msghdr msg;
- struct sockaddr_storage addr;
+ struct_group(clear,
+ int namelen;
+ struct iovec fast_iov;
+ __kernel_size_t controllen;
+ __kernel_size_t payloadlen;
+ struct sockaddr __user *uaddr;
+ struct msghdr msg;
+ struct sockaddr_storage addr;
+ );
+#else
+ struct_group(clear);
#endif
};
diff --git a/io_uring/poll.c b/io_uring/poll.c
index cc01c40b43d3..bb1c0cd4f809 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
return IOU_POLL_REISSUE;
}
}
+ if (unlikely(req->cqe.res & EPOLLERR))
+ req_set_fail(req);
if (req->apoll_events & EPOLLONESHOT)
return IOU_POLL_DONE;
@@ -315,8 +317,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
ret = io_poll_check_events(req, ts);
if (ret == IOU_POLL_NO_ACTION) {
+ io_kbuf_recycle(req, 0);
return;
} else if (ret == IOU_POLL_REQUEUE) {
+ io_kbuf_recycle(req, 0);
__io_poll_execute(req, 0);
return;
}
@@ -650,7 +654,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
kfree(apoll->double_poll);
} else {
if (!(issue_flags & IO_URING_F_UNLOCKED))
- apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL);
+ apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC);
else
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (!apoll)
diff --git a/io_uring/register.c b/io_uring/register.c
index 05025047d1da..9a4d2fbce4ae 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -552,7 +552,7 @@ overflow:
ctx->cqe_cached = ctx->cqe_sentinel = NULL;
WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
- WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags));
+ atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
@@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered)
return ERR_PTR(-EINVAL);
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
file = tctx->registered_rings[fd];
+ if (file)
+ get_file(file);
} else {
file = fget(fd);
}
@@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
ctx->buf_table.nr, ret);
mutex_unlock(&ctx->uring_lock);
- if (!use_registered_ring)
- fput(file);
+
+ fput(file);
return ret;
}
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index e32ac5853391..af39b69eb4fd 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
}
}
-struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
+struct io_rsrc_node *io_rsrc_node_alloc(int type)
{
struct io_rsrc_node *node;
@@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node) {
err = -ENOMEM;
fput(file);
@@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{
- lockdep_assert_held(&ctx->uring_lock);
-
if (node->tag)
io_post_aux_cqe(ctx, node->tag, 0, 0);
@@ -525,7 +523,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto fail;
}
ret = -ENOMEM;
- node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node) {
fput(file);
goto fail;
@@ -730,7 +728,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (!iov->iov_base)
return NULL;
- node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+ node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
if (!node)
return ERR_PTR(-ENOMEM);
node->buf = NULL;
@@ -921,6 +919,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
return 0;
}
+/* Lock two rings at once. The rings must be different! */
+static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
+{
+ if (ctx1 > ctx2)
+ swap(ctx1, ctx2);
+ mutex_lock(&ctx1->uring_lock);
+ mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING);
+}
+
+/* Both rings are locked by the caller. */
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
@@ -928,6 +936,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
int i, ret, off, nr;
unsigned int nbufs;
+ lockdep_assert_held(&ctx->uring_lock);
+ lockdep_assert_held(&src_ctx->uring_lock);
+
/*
* Accounting state is shared between the two rings; that only works if
* both rings are accounted towards the same counters.
@@ -942,7 +953,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
return -EBUSY;
- nbufs = READ_ONCE(src_ctx->buf_table.nr);
+ nbufs = src_ctx->buf_table.nr;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
@@ -966,27 +977,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
}
}
- /*
- * Drop our own lock here. We'll setup the data we need and reference
- * the source buffers, then re-grab, check, and assign at the end.
- */
- mutex_unlock(&ctx->uring_lock);
-
- mutex_lock(&src_ctx->uring_lock);
ret = -ENXIO;
nbufs = src_ctx->buf_table.nr;
if (!nbufs)
- goto out_unlock;
+ goto out_free;
ret = -EINVAL;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
- goto out_unlock;
+ goto out_free;
ret = -EOVERFLOW;
if (check_add_overflow(arg->nr, arg->src_off, &off))
- goto out_unlock;
+ goto out_free;
if (off > nbufs)
- goto out_unlock;
+ goto out_free;
off = arg->dst_off;
i = arg->src_off;
@@ -998,10 +1002,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (!src_node) {
dst_node = NULL;
} else {
- dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+ dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
refcount_inc(&src_node->buf->refs);
@@ -1011,10 +1015,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
i++;
}
- /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
- mutex_unlock(&src_ctx->uring_lock);
- mutex_lock(&ctx->uring_lock);
-
/*
* If asked for replace, put the old table. data->nodes[] holds both
* old and new nodes at this point.
@@ -1023,24 +1023,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
io_rsrc_data_free(ctx, &ctx->buf_table);
/*
- * ctx->buf_table should be empty now - either the contents are being
- * replaced and we just freed the table, or someone raced setting up
- * a buffer table while the clone was happening. If not empty, fall
- * through to failure handling.
+ * ctx->buf_table must be empty now - either the contents are being
+ * replaced and we just freed the table, or the contents are being
+ * copied to a ring that does not have buffers yet (checked at function
+ * entry).
*/
- if (!ctx->buf_table.nr) {
- ctx->buf_table = data;
- return 0;
- }
+ WARN_ON_ONCE(ctx->buf_table.nr);
+ ctx->buf_table = data;
+ return 0;
- mutex_unlock(&ctx->uring_lock);
- mutex_lock(&src_ctx->uring_lock);
- /* someone raced setting up buffers, dump ours */
- ret = -EBUSY;
-out_unlock:
+out_free:
io_rsrc_data_free(ctx, &data);
- mutex_unlock(&src_ctx->uring_lock);
- mutex_lock(&ctx->uring_lock);
return ret;
}
@@ -1054,6 +1047,7 @@ out_unlock:
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_clone_buffers buf;
+ struct io_ring_ctx *src_ctx;
bool registered_src;
struct file *file;
int ret;
@@ -1071,8 +1065,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
file = io_uring_register_get_file(buf.src_fd, registered_src);
if (IS_ERR(file))
return PTR_ERR(file);
- ret = io_clone_buffers(ctx, file->private_data, &buf);
- if (!registered_src)
- fput(file);
+
+ src_ctx = file->private_data;
+ if (src_ctx != ctx) {
+ mutex_unlock(&ctx->uring_lock);
+ lock_two_rings(ctx, src_ctx);
+ }
+
+ ret = io_clone_buffers(ctx, src_ctx, &buf);
+
+ if (src_ctx != ctx)
+ mutex_unlock(&src_ctx->uring_lock);
+
+ fput(file);
return ret;
}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index c8b093584461..190f7ee45de9 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -2,6 +2,8 @@
#ifndef IOU_RSRC_H
#define IOU_RSRC_H
+#include <linux/lockdep.h>
+
#define IO_NODE_ALLOC_CACHE_MAX 32
#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
@@ -43,7 +45,7 @@ struct io_imu_folio_data {
unsigned int nr_folios;
};
-struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
+struct io_rsrc_node *io_rsrc_node_alloc(int type);
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
@@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data
static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{
+ lockdep_assert_held(&ctx->uring_lock);
if (node && !--node->refs)
io_free_rsrc_node(ctx, node);
}
diff --git a/io_uring/rw.c b/io_uring/rw.c
index a9a2733be842..7aa1e4c9f64a 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -146,28 +146,15 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req,
return 0;
}
-static void io_rw_iovec_free(struct io_async_rw *rw)
-{
- if (rw->free_iovec) {
- kfree(rw->free_iovec);
- rw->free_iov_nr = 0;
- rw->free_iovec = NULL;
- }
-}
-
static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_rw *rw = req->async_data;
- struct iovec *iov;
- if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
- io_rw_iovec_free(rw);
+ if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
return;
- }
- iov = rw->free_iovec;
+
+ io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr);
if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
- if (iov)
- kasan_mempool_poison_object(iov);
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
}
@@ -208,27 +195,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags)
}
}
-static void io_rw_async_data_init(void *obj)
-{
- struct io_async_rw *rw = (struct io_async_rw *)obj;
-
- rw->free_iovec = NULL;
- rw->bytes_done = 0;
-}
-
static int io_rw_alloc_async(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_async_rw *rw;
- rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init);
+ rw = io_uring_alloc_async_data(&ctx->rw_cache, req);
if (!rw)
return -ENOMEM;
- if (rw->free_iovec) {
- kasan_mempool_unpoison_object(rw->free_iovec,
- rw->free_iov_nr * sizeof(struct iovec));
+ if (rw->free_iovec)
req->flags |= REQ_F_NEED_CLEANUP;
- }
rw->bytes_done = 0;
return 0;
}
@@ -1323,10 +1299,7 @@ void io_rw_cache_free(const void *entry)
{
struct io_async_rw *rw = (struct io_async_rw *) entry;
- if (rw->free_iovec) {
- kasan_mempool_unpoison_object(rw->free_iovec,
- rw->free_iov_nr * sizeof(struct iovec));
- io_rw_iovec_free(rw);
- }
+ if (rw->free_iovec)
+ kfree(rw->free_iovec);
kfree(rw);
}
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 2d7656bd268d..eaa59bd64870 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -9,19 +9,24 @@ struct io_meta_state {
struct io_async_rw {
size_t bytes_done;
- struct iov_iter iter;
- struct iov_iter_state iter_state;
- struct iovec fast_iov;
struct iovec *free_iovec;
- int free_iov_nr;
- /* wpq is for buffered io, while meta fields are used with direct io */
- union {
- struct wait_page_queue wpq;
- struct {
- struct uio_meta meta;
- struct io_meta_state meta_state;
+ struct_group(clear,
+ struct iov_iter iter;
+ struct iov_iter_state iter_state;
+ struct iovec fast_iov;
+ int free_iov_nr;
+ /*
+ * wpq is for buffered io, while meta fields are used with
+ * direct io
+ */
+ union {
+ struct wait_page_queue wpq;
+ struct {
+ struct uio_meta meta;
+ struct io_meta_state meta_state;
+ };
};
- };
+ );
};
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 2bd7e0a317bb..48fc8cf70784 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
if (WARN_ON_ONCE(req_has_async_data(req)))
return -EFAULT;
- data = io_uring_alloc_async_data_nocache(req);
+ data = io_uring_alloc_async_data(NULL, req);
if (!data)
return -ENOMEM;
data->req = req;
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index fc94c465a985..1f6a82128b47 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
-static void io_uring_cmd_init_once(void *obj)
-{
- struct io_uring_cmd_data *data = obj;
-
- data->op_data = NULL;
-}
-
static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct io_uring_cmd_data *cache;
- cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req,
- io_uring_cmd_init_once);
+ cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req);
if (!cache)
return -ENOMEM;
+ cache->op_data = NULL;
if (!(req->flags & REQ_F_FORCE_ASYNC)) {
/* defer memcpy until we need it */
@@ -192,8 +185,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
return 0;
}
- memcpy(req->async_data, sqe, uring_sqe_size(req->ctx));
- ioucmd->sqe = req->async_data;
+ memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx));
+ ioucmd->sqe = cache->sqes;
return 0;
}
@@ -260,7 +253,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
struct io_uring_cmd_data *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
- memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
+ memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
return -EAGAIN;
} else if (ret == -EIOCBQUEUED) {
return -EIOCBQUEUED;
@@ -350,7 +343,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
if (!prot || !prot->ioctl)
return -EOPNOTSUPP;
- switch (cmd->sqe->cmd_op) {
+ switch (cmd->cmd_op) {
case SOCKET_URING_OP_SIOCINQ:
ret = prot->ioctl(sk, SIOCINQ, &arg);
if (ret)
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 6778c0ee76c4..853e97a7b0ec 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
struct io_waitid_async *iwa;
int ret;
- iwa = io_uring_alloc_async_data_nocache(req);
+ iwa = io_uring_alloc_async_data(NULL, req);
if (!iwa)
return -ENOMEM;