summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-03-10 08:31:29 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-03-10 08:31:29 -0800
commitf331c5de7960d69fc767d2dc08f5f5859ce70061 (patch)
tree52cc2d35b1de98234bd7ad1ca53180dc7e15c3a5
parent49be4fb28109b86a8ffe117415c306389a394cb2 (diff)
parentfa780334a8c392d959ae05eb19f2410b3a1e6cb0 (diff)
Merge tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - Stop setting PF_NO_SETAFFINITY on io-wq workers. This has been reported in the past as it confuses some applications, as some of their threads will fail with -1/EINVAL if attempted affinitized. Most recent report was on cpusets, where enabling that with io-wq workers active will fail. Just deal with the mask changing by checking when a worker times out, and then exit if we have no work pending. - Fix an issue with passthrough support where we don't properly check if the file type has pollable uring_cmd support. - Fix a reported W=1 warning on a variable being set and unused. Add a special helper for iterating these lists that doesn't save the previous list element, if that iterator never ends up using it. * tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux: io_uring: silence variable ‘prev’ set but not used warning io_uring/uring_cmd: ensure that device supports IOPOLL io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers
-rw-r--r--io_uring/io-wq.c16
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--io_uring/slist.h5
-rw-r--r--io_uring/uring_cmd.c4
4 files changed, 20 insertions, 9 deletions
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 411bb2d1acd4..f81c0a7136a5 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -616,7 +616,7 @@ static int io_wqe_worker(void *data)
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
- bool last_timeout = false;
+ bool exit_mask = false, last_timeout = false;
char buf[TASK_COMM_LEN];
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
@@ -632,8 +632,11 @@ static int io_wqe_worker(void *data)
io_worker_handle_work(worker);
raw_spin_lock(&wqe->lock);
- /* timed out, exit unless we're the last worker */
- if (last_timeout && acct->nr_workers > 1) {
+ /*
+ * Last sleep timed out. Exit if we're not the last worker,
+ * or if someone modified our affinity.
+ */
+ if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
acct->nr_workers--;
raw_spin_unlock(&wqe->lock);
__set_current_state(TASK_RUNNING);
@@ -652,7 +655,11 @@ static int io_wqe_worker(void *data)
continue;
break;
}
- last_timeout = !ret;
+ if (!ret) {
+ last_timeout = true;
+ exit_mask = !cpumask_test_cpu(raw_smp_processor_id(),
+ wqe->cpu_mask);
+ }
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
@@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
tsk->worker_private = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
- tsk->flags |= PF_NO_SETAFFINITY;
raw_spin_lock(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index fd1cc35a1c00..722624b6d0dc 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- struct io_wq_work_node *node, *prev;
struct io_submit_state *state = &ctx->submit_state;
+ struct io_wq_work_node *node;
__io_cq_lock(ctx);
/* must come first to preserve CQE ordering in failure cases */
if (state->cqes_count)
__io_flush_post_cqes(ctx);
- wq_list_for_each(node, prev, &state->compl_reqs) {
+ __wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
diff --git a/io_uring/slist.h b/io_uring/slist.h
index 7c198a40d5f1..0eb194817242 100644
--- a/io_uring/slist.h
+++ b/io_uring/slist.h
@@ -3,6 +3,9 @@
#include <linux/io_uring_types.h>
+#define __wq_list_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = (pos)->next)
+
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
@@ -113,4 +116,4 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
return container_of(work->list.next, struct io_wq_work, list);
}
-#endif // INTERNAL_IO_SLIST_H \ No newline at end of file
+#endif // INTERNAL_IO_SLIST_H
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 446a189b78b0..2e4c483075d3 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
struct file *file = req->file;
int ret;
- if (!req->file->f_op->uring_cmd)
+ if (!file->f_op->uring_cmd)
return -EOPNOTSUPP;
ret = security_uring_cmd(ioucmd);
@@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
if (ctx->flags & IORING_SETUP_CQE32)
issue_flags |= IO_URING_F_CQE32;
if (ctx->flags & IORING_SETUP_IOPOLL) {
+ if (!file->f_op->uring_cmd_iopoll)
+ return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0;
WRITE_ONCE(ioucmd->cookie, NULL);