summaryrefslogtreecommitdiff
path: root/io_uring/cancel.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/cancel.c')
-rw-r--r--io_uring/cancel.c656
1 files changed, 656 insertions, 0 deletions
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
new file mode 100644
index 000000000000..ca12ac10c0ae
--- /dev/null
+++ b/io_uring/cancel.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/nospec.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "filetable.h"
+#include "io_uring.h"
+#include "tctx.h"
+#include "sqpoll.h"
+#include "uring_cmd.h"
+#include "poll.h"
+#include "timeout.h"
+#include "waitid.h"
+#include "futex.h"
+#include "cancel.h"
+
+struct io_cancel {
+ struct file *file;
+ u64 addr;
+ u32 flags;
+ s32 fd;
+ u8 opcode;
+};
+
+#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
+ IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
+ IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
+
+/*
+ * Returns true if the request matches the criteria outlined by 'cd'.
+ */
+bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
+{
+ bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
+
+ if (req->ctx != cd->ctx)
+ return false;
+
+ if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
+ match_user_data = true;
+
+ if (cd->flags & IORING_ASYNC_CANCEL_ANY)
+ goto check_seq;
+ if (cd->flags & IORING_ASYNC_CANCEL_FD) {
+ if (req->file != cd->file)
+ return false;
+ }
+ if (cd->flags & IORING_ASYNC_CANCEL_OP) {
+ if (req->opcode != cd->opcode)
+ return false;
+ }
+ if (match_user_data && req->cqe.user_data != cd->data)
+ return false;
+ if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
+check_seq:
+ if (io_cancel_match_sequence(req, cd->seq))
+ return false;
+ }
+
+ return true;
+}
+
+static bool io_cancel_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_cancel_data *cd = data;
+
+ return io_cancel_req_match(req, cd);
+}
+
+static int io_async_cancel_one(struct io_uring_task *tctx,
+ struct io_cancel_data *cd)
+{
+ enum io_wq_cancel cancel_ret;
+ int ret = 0;
+ bool all;
+
+ if (!tctx || !tctx->io_wq)
+ return -ENOENT;
+
+ all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+ cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
+ switch (cancel_ret) {
+ case IO_WQ_CANCEL_OK:
+ ret = 0;
+ break;
+ case IO_WQ_CANCEL_RUNNING:
+ ret = -EALREADY;
+ break;
+ case IO_WQ_CANCEL_NOTFOUND:
+ ret = -ENOENT;
+ break;
+ }
+
+ return ret;
+}
+
+int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
+ unsigned issue_flags)
+{
+ struct io_ring_ctx *ctx = cd->ctx;
+ int ret;
+
+ WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
+
+ ret = io_async_cancel_one(tctx, cd);
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
+
+ ret = io_poll_cancel(ctx, cd, issue_flags);
+ if (ret != -ENOENT)
+ return ret;
+
+ ret = io_waitid_cancel(ctx, cd, issue_flags);
+ if (ret != -ENOENT)
+ return ret;
+
+ ret = io_futex_cancel(ctx, cd, issue_flags);
+ if (ret != -ENOENT)
+ return ret;
+
+ spin_lock(&ctx->completion_lock);
+ if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
+ ret = io_timeout_cancel(ctx, cd);
+ spin_unlock(&ctx->completion_lock);
+ return ret;
+}
+
+int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
+
+ if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
+ return -EINVAL;
+ if (sqe->off || sqe->splice_fd_in)
+ return -EINVAL;
+
+ cancel->addr = READ_ONCE(sqe->addr);
+ cancel->flags = READ_ONCE(sqe->cancel_flags);
+ if (cancel->flags & ~CANCEL_FLAGS)
+ return -EINVAL;
+ if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
+ if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
+ return -EINVAL;
+ cancel->fd = READ_ONCE(sqe->fd);
+ }
+ if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
+ if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
+ return -EINVAL;
+ cancel->opcode = READ_ONCE(sqe->len);
+ }
+
+ return 0;
+}
+
+static int __io_async_cancel(struct io_cancel_data *cd,
+ struct io_uring_task *tctx,
+ unsigned int issue_flags)
+{
+ bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+ struct io_ring_ctx *ctx = cd->ctx;
+ struct io_tctx_node *node;
+ int ret, nr = 0;
+
+ do {
+ ret = io_try_cancel(tctx, cd, issue_flags);
+ if (ret == -ENOENT)
+ break;
+ if (!all)
+ return ret;
+ nr++;
+ } while (1);
+
+ /* slow path, try all io-wq's */
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = -ENOENT;
+ list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+ ret = io_async_cancel_one(node->task->io_uring, cd);
+ if (ret != -ENOENT) {
+ if (!all)
+ break;
+ nr++;
+ }
+ }
+ io_ring_submit_unlock(ctx, issue_flags);
+ return all ? nr : ret;
+}
+
+int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
+ struct io_cancel_data cd = {
+ .ctx = req->ctx,
+ .data = cancel->addr,
+ .flags = cancel->flags,
+ .opcode = cancel->opcode,
+ .seq = atomic_inc_return(&req->ctx->cancel_seq),
+ };
+ struct io_uring_task *tctx = req->tctx;
+ int ret;
+
+ if (cd.flags & IORING_ASYNC_CANCEL_FD) {
+ if (req->flags & REQ_F_FIXED_FILE ||
+ cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
+ req->flags |= REQ_F_FIXED_FILE;
+ req->file = io_file_get_fixed(req, cancel->fd,
+ issue_flags);
+ } else {
+ req->file = io_file_get_normal(req, cancel->fd);
+ }
+ if (!req->file) {
+ ret = -EBADF;
+ goto done;
+ }
+ cd.file = req->file;
+ }
+
+ ret = __io_async_cancel(&cd, tctx, issue_flags);
+done:
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_COMPLETE;
+}
+
+static int __io_sync_cancel(struct io_uring_task *tctx,
+ struct io_cancel_data *cd, int fd)
+{
+ struct io_ring_ctx *ctx = cd->ctx;
+
+ /* fixed must be grabbed every time since we drop the uring_lock */
+ if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
+ (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
+ struct io_rsrc_node *node;
+
+ node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
+ if (unlikely(!node))
+ return -EBADF;
+ cd->file = io_slot_file(node);
+ if (!cd->file)
+ return -EBADF;
+ }
+
+ return __io_async_cancel(cd, tctx, 0);
+}
+
+int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
+ __must_hold(&ctx->uring_lock)
+{
+ struct io_cancel_data cd = {
+ .ctx = ctx,
+ .seq = atomic_inc_return(&ctx->cancel_seq),
+ };
+ ktime_t timeout = KTIME_MAX;
+ struct io_uring_sync_cancel_reg sc;
+ struct file *file = NULL;
+ DEFINE_WAIT(wait);
+ int ret, i;
+
+ if (copy_from_user(&sc, arg, sizeof(sc)))
+ return -EFAULT;
+ if (sc.flags & ~CANCEL_FLAGS)
+ return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
+ if (sc.pad[i])
+ return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
+ if (sc.pad2[i])
+ return -EINVAL;
+
+ cd.data = sc.addr;
+ cd.flags = sc.flags;
+ cd.opcode = sc.opcode;
+
+ /* we can grab a normal file descriptor upfront */
+ if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
+ !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
+ file = fget(sc.fd);
+ if (!file)
+ return -EBADF;
+ cd.file = file;
+ }
+
+ ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
+
+ /* found something, done! */
+ if (ret != -EALREADY)
+ goto out;
+
+ if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
+ struct timespec64 ts = {
+ .tv_sec = sc.timeout.tv_sec,
+ .tv_nsec = sc.timeout.tv_nsec
+ };
+
+ timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
+ }
+
+ /*
+ * Keep looking until we get -ENOENT. we'll get woken everytime
+ * every time a request completes and will retry the cancelation.
+ */
+ do {
+ cd.seq = atomic_inc_return(&ctx->cancel_seq);
+
+ prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
+
+ ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
+
+ mutex_unlock(&ctx->uring_lock);
+ if (ret != -EALREADY)
+ break;
+
+ ret = io_run_task_work_sig(ctx);
+ if (ret < 0)
+ break;
+ ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
+ if (!ret) {
+ ret = -ETIME;
+ break;
+ }
+ mutex_lock(&ctx->uring_lock);
+ } while (1);
+
+ finish_wait(&ctx->cq_wait, &wait);
+ mutex_lock(&ctx->uring_lock);
+
+ if (ret == -ENOENT || ret > 0)
+ ret = 0;
+out:
+ if (file)
+ fput(file);
+ return ret;
+}
+
+bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
+ struct hlist_head *list, bool cancel_all,
+ bool (*cancel)(struct io_kiocb *))
+{
+ struct hlist_node *tmp;
+ struct io_kiocb *req;
+ bool found = false;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+ if (!io_match_task_safe(req, tctx, cancel_all))
+ continue;
+ hlist_del_init(&req->hash_node);
+ if (cancel(req))
+ found = true;
+ }
+
+ return found;
+}
+
+int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+ unsigned int issue_flags, struct hlist_head *list,
+ bool (*cancel)(struct io_kiocb *))
+{
+ struct hlist_node *tmp;
+ struct io_kiocb *req;
+ int nr = 0;
+
+ io_ring_submit_lock(ctx, issue_flags);
+ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+ if (!io_cancel_req_match(req, cd))
+ continue;
+ if (cancel(req))
+ nr++;
+ if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+ break;
+ }
+ io_ring_submit_unlock(ctx, issue_flags);
+ return nr ?: -ENOENT;
+}
+
+static bool io_match_linked(struct io_kiocb *head)
+{
+ struct io_kiocb *req;
+
+ io_for_each_link(req, head) {
+ if (req->flags & REQ_F_INFLIGHT)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * As io_match_task() but protected against racing with linked timeouts.
+ * User must not hold timeout_lock.
+ */
+bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
+ bool cancel_all)
+{
+ bool matched;
+
+ if (tctx && head->tctx != tctx)
+ return false;
+ if (cancel_all)
+ return true;
+
+ if (head->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = head->ctx;
+
+ /* protect against races with linked timeouts */
+ raw_spin_lock_irq(&ctx->timeout_lock);
+ matched = io_match_linked(head);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
+ } else {
+ matched = io_match_linked(head);
+ }
+ return matched;
+}
+
+void __io_uring_cancel(bool cancel_all)
+{
+ io_uring_unreg_ringfd();
+ io_uring_cancel_generic(cancel_all, NULL);
+}
+
+struct io_task_cancel {
+ struct io_uring_task *tctx;
+ bool all;
+};
+
+static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_task_cancel *cancel = data;
+
+ return io_match_task_safe(req, cancel->tctx, cancel->all);
+}
+
+static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
+ struct io_uring_task *tctx,
+ bool cancel_all)
+{
+ struct io_defer_entry *de;
+ LIST_HEAD(list);
+
+ list_for_each_entry_reverse(de, &ctx->defer_list, list) {
+ if (io_match_task_safe(de->req, tctx, cancel_all)) {
+ list_cut_position(&list, &ctx->defer_list, &de->list);
+ break;
+ }
+ }
+ if (list_empty(&list))
+ return false;
+
+ while (!list_empty(&list)) {
+ de = list_first_entry(&list, struct io_defer_entry, list);
+ list_del_init(&de->list);
+ ctx->nr_drained -= io_linked_nr(de->req);
+ io_req_task_queue_fail(de->req, -ECANCELED);
+ kfree(de);
+ }
+ return true;
+}
+
+__cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ return req->ctx == data;
+}
+
+static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
+{
+ struct io_tctx_node *node;
+ enum io_wq_cancel cret;
+ bool ret = false;
+
+ mutex_lock(&ctx->uring_lock);
+ list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+ struct io_uring_task *tctx = node->task->io_uring;
+
+ /*
+ * io_wq will stay alive while we hold uring_lock, because it's
+ * killed after ctx nodes, which requires to take the lock.
+ */
+ if (!tctx || !tctx->io_wq)
+ continue;
+ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
+ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+ }
+ mutex_unlock(&ctx->uring_lock);
+
+ return ret;
+}
+
+__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ struct io_uring_task *tctx,
+ bool cancel_all, bool is_sqpoll_thread)
+{
+ struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
+ enum io_wq_cancel cret;
+ bool ret = false;
+
+ /* set it so io_req_local_work_add() would wake us up */
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
+ atomic_set(&ctx->cq_wait_nr, 1);
+ smp_mb();
+ }
+
+ /* failed during ring init, it couldn't have issued any requests */
+ if (!ctx->rings)
+ return false;
+
+ if (!tctx) {
+ ret |= io_uring_try_cancel_iowq(ctx);
+ } else if (tctx->io_wq) {
+ /*
+ * Cancels requests of all rings, not only @ctx, but
+ * it's fine as the task is in exit/exec.
+ */
+ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
+ &cancel, true);
+ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+ }
+
+ /* SQPOLL thread does its own polling */
+ if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
+ is_sqpoll_thread) {
+ while (!wq_list_empty(&ctx->iopoll_list)) {
+ io_iopoll_try_reap_events(ctx);
+ ret = true;
+ cond_resched();
+ }
+ }
+
+ if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
+ io_allowed_defer_tw_run(ctx))
+ ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
+ mutex_lock(&ctx->uring_lock);
+ ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
+ ret |= io_poll_remove_all(ctx, tctx, cancel_all);
+ ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
+ ret |= io_futex_remove_all(ctx, tctx, cancel_all);
+ ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
+ mutex_unlock(&ctx->uring_lock);
+ ret |= io_kill_timeouts(ctx, tctx, cancel_all);
+ if (tctx)
+ ret |= io_run_task_work() > 0;
+ else
+ ret |= flush_delayed_work(&ctx->fallback_work);
+ return ret;
+}
+
+static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
+{
+ if (tracked)
+ return atomic_read(&tctx->inflight_tracked);
+ return percpu_counter_sum(&tctx->inflight);
+}
+
+/*
+ * Find any io_uring ctx that this task has registered or done IO on, and cancel
+ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
+ */
+__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ struct io_ring_ctx *ctx;
+ struct io_tctx_node *node;
+ unsigned long index;
+ s64 inflight;
+ DEFINE_WAIT(wait);
+
+ WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
+
+ if (!current->io_uring)
+ return;
+ if (tctx->io_wq)
+ io_wq_exit_start(tctx->io_wq);
+
+ atomic_inc(&tctx->in_cancel);
+ do {
+ bool loop = false;
+
+ io_uring_drop_tctx_refs(current);
+ if (!tctx_inflight(tctx, !cancel_all))
+ break;
+
+ /* read completions before cancelations */
+ inflight = tctx_inflight(tctx, false);
+ if (!inflight)
+ break;
+
+ if (!sqd) {
+ xa_for_each(&tctx->xa, index, node) {
+ /* sqpoll task will cancel all its requests */
+ if (node->ctx->sq_data)
+ continue;
+ loop |= io_uring_try_cancel_requests(node->ctx,
+ current->io_uring,
+ cancel_all,
+ false);
+ }
+ } else {
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+ loop |= io_uring_try_cancel_requests(ctx,
+ current->io_uring,
+ cancel_all,
+ true);
+ }
+
+ if (loop) {
+ cond_resched();
+ continue;
+ }
+
+ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
+ io_run_task_work();
+ io_uring_drop_tctx_refs(current);
+ xa_for_each(&tctx->xa, index, node) {
+ if (io_local_work_pending(node->ctx)) {
+ WARN_ON_ONCE(node->ctx->submitter_task &&
+ node->ctx->submitter_task != current);
+ goto end_wait;
+ }
+ }
+ /*
+ * If we've seen completions, retry without waiting. This
+ * avoids a race where a completion comes in before we did
+ * prepare_to_wait().
+ */
+ if (inflight == tctx_inflight(tctx, !cancel_all))
+ schedule();
+end_wait:
+ finish_wait(&tctx->wait, &wait);
+ } while (1);
+
+ io_uring_clean_tctx(tctx);
+ if (cancel_all) {
+ /*
+ * We shouldn't run task_works after cancel, so just leave
+ * ->in_cancel set for normal exit.
+ */
+ atomic_dec(&tctx->in_cancel);
+ /* for exec all current's requests should be gone, kill tctx */
+ __io_uring_free(current);
+ }
+}