diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 20:27:33 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 20:27:33 -0800 |
commit | a312e1706ce6c124f04ec85ddece240f3bb2a696 (patch) | |
tree | ba749bac4f345fc48c7c79d9a9c5713fe87af300 /io_uring/rw.c | |
parent | 1cbfb828e05171ca2dd77b5988d068e6872480fe (diff) | |
parent | 561e3a0c40dc7e3ab7b0b3647a2b89eca16215d9 (diff) |
Merge tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
"Not a lot in terms of features this time around, mostly just cleanups
and code consolidation:
- Support for PI meta data read/write via io_uring, with NVMe and
SCSI covered
- Cleanup the per-op structure caching, making it consistent across
various command types
- Consolidate the various user mapped features into a concept called
regions, making the various users of that consistent
- Various cleanups and fixes"
* tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux: (56 commits)
io_uring/fdinfo: fix io_uring_show_fdinfo() misuse of ->d_iname
io_uring: reuse io_should_terminate_tw() for cmds
io_uring: Factor out a function to parse restrictions
io_uring/rsrc: require cloned buffers to share accounting contexts
io_uring: simplify the SQPOLL thread check when cancelling requests
io_uring: expose read/write attribute capability
io_uring/rw: don't gate retry on completion context
io_uring/rw: handle -EAGAIN retry at IO completion time
io_uring/rw: use io_rw_recycle() from cleanup path
io_uring/rsrc: simplify the bvec iter count calculation
io_uring: ensure io_queue_deferred() is out-of-line
io_uring/rw: always clear ->bytes_done on io_async_rw setup
io_uring/rw: use NULL for rw->free_iovec assigment
io_uring/rw: don't mask in f_iocb_flags
io_uring/msg_ring: Drop custom destructor
io_uring: Move old async data allocation helper to header
io_uring/rw: Allocate async data through helper
io_uring/net: Allocate msghdr async data through helper
io_uring/uring_cmd: Allocate async data through generic helper
io_uring/poll: Allocate apoll with generic alloc_cache helper
...
Diffstat (limited to 'io_uring/rw.c')
-rw-r--r-- | io_uring/rw.c | 212 |
1 files changed, 123 insertions, 89 deletions
diff --git a/io_uring/rw.c b/io_uring/rw.c index 29bb3010f9c0..a9a2733be842 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -202,45 +202,40 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) * mean that the underlying data can be gone at any time. But that * should be fixed seperately, and then this check could be killed. */ - if (!(req->flags & REQ_F_REFCOUNT)) { + if (!(req->flags & (REQ_F_REISSUE | REQ_F_REFCOUNT))) { req->flags &= ~REQ_F_NEED_CLEANUP; io_rw_recycle(req, issue_flags); } } +static void io_rw_async_data_init(void *obj) +{ + struct io_async_rw *rw = (struct io_async_rw *)obj; + + rw->free_iovec = NULL; + rw->bytes_done = 0; +} + static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; - rw = io_alloc_cache_get(&ctx->rw_cache); - if (rw) { - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); - req->flags |= REQ_F_NEED_CLEANUP; - } - req->flags |= REQ_F_ASYNC_DATA; - req->async_data = rw; - goto done; - } - - if (!io_alloc_async_data(req)) { - rw = req->async_data; - rw->free_iovec = NULL; - rw->free_iov_nr = 0; -done: - rw->bytes_done = 0; - return 0; + rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); + if (!rw) + return -ENOMEM; + if (rw->free_iovec) { + kasan_mempool_unpoison_object(rw->free_iovec, + rw->free_iov_nr * sizeof(struct iovec)); + req->flags |= REQ_F_NEED_CLEANUP; } - - return -ENOMEM; + rw->bytes_done = 0; + return 0; } static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) { struct io_async_rw *rw; - int ret; if (io_rw_alloc_async(req)) return -ENOMEM; @@ -249,12 +244,48 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) return 0; rw = req->async_data; - ret = io_import_iovec(ddir, req, rw, 0); + return io_import_iovec(ddir, req, rw, 0); +} + +static inline void io_meta_save_state(struct io_async_rw *io) +{ + io->meta_state.seed = io->meta.seed; + iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta); +} + +static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb) +{ + if (kiocb->ki_flags & IOCB_HAS_METADATA) { + io->meta.seed = io->meta_state.seed; + iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta); + } +} + +static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, + u64 attr_ptr, u64 attr_type_mask) +{ + struct io_uring_attr_pi pi_attr; + struct io_async_rw *io; + int ret; + + if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr), + sizeof(pi_attr))) + return -EFAULT; + + if (pi_attr.rsvd) + return -EINVAL; + + io = req->async_data; + io->meta.flags = pi_attr.flags; + io->meta.app_tag = pi_attr.app_tag; + io->meta.seed = pi_attr.seed; + ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr), + pi_attr.len, &io->meta.iter); if (unlikely(ret < 0)) return ret; - - iov_iter_save_state(&rw->iter, &rw->iter_state); - return 0; + req->flags |= REQ_F_HAS_METADATA; + io_meta_save_state(io); + return ret; } static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, @@ -262,6 +293,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned ioprio; + u64 attr_type_mask; int ret; rw->kiocb.ki_pos = READ_ONCE(sqe->off); @@ -279,11 +311,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, rw->kiocb.ki_ioprio = get_current_ioprio(); } rw->kiocb.dio_complete = NULL; + rw->kiocb.ki_flags = 0; rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); - return io_prep_rw_setup(req, ddir, do_import); + ret = io_prep_rw_setup(req, ddir, do_import); + + if (unlikely(ret)) + return ret; + + attr_type_mask = READ_ONCE(sqe->attr_type_mask); + if (attr_type_mask) { + u64 attr_ptr; + + /* only PI attribute is supported currently */ + if (attr_type_mask != IORING_RW_ATTR_FLAG_PI) + return -EINVAL; + + attr_ptr = READ_ONCE(sqe->attr_ptr); + ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask); + } + return ret; } int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -385,7 +434,8 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) void io_readv_writev_cleanup(struct io_kiocb *req) { - io_rw_iovec_free(req->async_data); + lockdep_assert_held(&req->ctx->uring_lock); + io_rw_recycle(req, 0); } static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) @@ -405,17 +455,12 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) return NULL; } -#ifdef CONFIG_BLOCK -static void io_resubmit_prep(struct io_kiocb *req) -{ - struct io_async_rw *io = req->async_data; - - iov_iter_restore(&io->iter, &io->iter_state); -} - static bool io_rw_should_reissue(struct io_kiocb *req) { +#ifdef CONFIG_BLOCK + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); umode_t mode = file_inode(req->file)->i_mode; + struct io_async_rw *io = req->async_data; struct io_ring_ctx *ctx = req->ctx; if (!S_ISBLK(mode) && !S_ISREG(mode)) @@ -430,23 +475,14 @@ static bool io_rw_should_reissue(struct io_kiocb *req) */ if (percpu_ref_is_dying(&ctx->refs)) return false; - /* - * Play it safe and assume not safe to re-import and reissue if we're - * not in the original thread group (or in task context). - */ - if (!same_thread_group(req->tctx->task, current) || !in_task()) - return false; + + io_meta_restore(io, &rw->kiocb); + iov_iter_restore(&io->iter, &io->iter_state); return true; -} #else -static void io_resubmit_prep(struct io_kiocb *req) -{ -} -static bool io_rw_should_reissue(struct io_kiocb *req) -{ return false; -} #endif +} static void io_req_end_write(struct io_kiocb *req) { @@ -473,22 +509,16 @@ static void io_req_io_end(struct io_kiocb *req) } } -static bool __io_complete_rw_common(struct io_kiocb *req, long res) +static void __io_complete_rw_common(struct io_kiocb *req, long res) { - if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { - /* - * Reissue will start accounting again, finish the - * current cycle. - */ - io_req_io_end(req); - req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; - return true; - } + if (res == req->cqe.res) + return; + if (res == -EAGAIN && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; + } else { req_set_fail(req); req->cqe.res = res; } - return false; } static inline int io_fixup_rw_res(struct io_kiocb *req, long res) @@ -531,8 +561,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res) struct io_kiocb *req = cmd_to_io_kiocb(rw); if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) { - if (__io_complete_rw_common(req, res)) - return; + __io_complete_rw_common(req, res); io_req_set_res(req, io_fixup_rw_res(req, res), 0); } req->io_task_work.func = io_req_rw_complete; @@ -594,26 +623,19 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { - if (!__io_complete_rw_common(req, ret)) { - /* - * Safe to call io_end from here as we're inline - * from the submission path. - */ - io_req_io_end(req); - io_req_set_res(req, final_ret, - io_put_kbuf(req, ret, issue_flags)); - io_req_rw_cleanup(req, issue_flags); - return IOU_OK; - } + __io_complete_rw_common(req, ret); + /* + * Safe to call io_end from here as we're inline + * from the submission path. + */ + io_req_io_end(req); + io_req_set_res(req, final_ret, io_put_kbuf(req, ret, issue_flags)); + io_req_rw_cleanup(req, issue_flags); + return IOU_OK; } else { io_rw_done(&rw->kiocb, ret); } - if (req->flags & REQ_F_REISSUE) { - req->flags &= ~REQ_F_REISSUE; - io_resubmit_prep(req); - return -EAGAIN; - } return IOU_ISSUE_SKIP_COMPLETE; } @@ -736,8 +758,11 @@ static bool io_rw_should_retry(struct io_kiocb *req) struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct kiocb *kiocb = &rw->kiocb; - /* never retry for NOWAIT, we just complete with -EAGAIN */ - if (req->flags & REQ_F_NOWAIT) + /* + * Never retry for NOWAIT or a request with metadata, we just complete + * with -EAGAIN. + */ + if (req->flags & (REQ_F_NOWAIT | REQ_F_HAS_METADATA)) return false; /* Only for buffered IO */ @@ -828,6 +853,19 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) kiocb->ki_complete = io_complete_rw; } + if (req->flags & REQ_F_HAS_METADATA) { + struct io_async_rw *io = req->async_data; + + /* + * We have a union of meta fields with wpq used for buffered-io + * in io_async_rw, so fail it here. + */ + if (!(req->file->f_flags & O_DIRECT)) + return -EOPNOTSUPP; + kiocb->ki_flags |= IOCB_HAS_METADATA; + kiocb->private = &io->meta; + } + return 0; } @@ -876,8 +914,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) if (ret == -EOPNOTSUPP && force_nonblock) ret = -EAGAIN; - if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { - req->flags &= ~REQ_F_REISSUE; + if (ret == -EAGAIN) { /* If we can poll, just do that. */ if (io_file_can_poll(req)) return -EAGAIN; @@ -902,6 +939,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) * manually if we need to. */ iov_iter_restore(&io->iter, &io->iter_state); + io_meta_restore(io, kiocb); do { /* @@ -1087,11 +1125,6 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) else ret2 = -EINVAL; - if (req->flags & REQ_F_REISSUE) { - req->flags &= ~REQ_F_REISSUE; - ret2 = -EAGAIN; - } - /* * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just * retry them without IOCB_NOWAIT. @@ -1127,6 +1160,7 @@ done: } else { ret_eagain: iov_iter_restore(&io->iter, &io->iter_state); + io_meta_restore(io, kiocb); if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); return -EAGAIN; |