From fcb14cb1bdacec5b4374fe161e83fb8208164a85 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 14:59:25 -0400 Subject: new iov_iter flavour - ITER_UBUF Equivalent of single-segment iovec. Initialized by iov_iter_ubuf(), checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC ones. We are going to expose the things like ->write_iter() et.al. to those in subsequent commits. New predicate (user_backed_iter()) that is true for ITER_IOVEC and ITER_UBUF; places like direct-IO handling should use that for checking that pages we modify after getting them from iov_iter_get_pages() would need to be dirtied. DO NOT assume that replacing iter_is_iovec() with user_backed_iter() will solve all problems - there's code that uses iter_is_iovec() to decide how to poke around in iov_iter guts and for that the predicate replacement obviously won't suffice. Signed-off-by: Al Viro --- block/fops.c | 6 ++-- fs/ceph/file.c | 2 +- fs/cifs/file.c | 2 +- fs/direct-io.c | 2 +- fs/fuse/dev.c | 4 +-- fs/fuse/file.c | 2 +- fs/gfs2/file.c | 2 +- fs/iomap/direct-io.c | 2 +- fs/nfs/direct.c | 2 +- include/linux/uio.h | 26 ++++++++++++++++ lib/iov_iter.c | 87 +++++++++++++++++++++++++++++++++++++++++----------- mm/shmem.c | 2 +- 12 files changed, 108 insertions(+), 31 deletions(-) diff --git a/block/fops.c b/block/fops.c index a564cd81340c..b90742595317 100644 --- a/block/fops.c +++ b/block/fops.c @@ -75,7 +75,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iov_iter_rw(iter) == READ) { bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ); - if (iter_is_iovec(iter)) + if (user_backed_iter(iter)) should_dirty = true; } else { bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); @@ -204,7 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } dio->size = 0; - if (is_read && iter_is_iovec(iter)) + if (is_read && user_backed_iter(iter)) dio->flags |= DIO_SHOULD_DIRTY; blk_start_plug(&plug); @@ -335,7 +335,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, dio->size = bio->bi_iter.bi_size; if (is_read) { - if (iter_is_iovec(iter)) { + if (user_backed_iter(iter)) { dio->flags |= DIO_SHOULD_DIRTY; bio_set_pages_dirty(bio); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index da59e836a06e..c535de5852bf 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1262,7 +1262,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; - bool should_dirty = !write && iter_is_iovec(iter); + bool should_dirty = !write && user_backed_iter(iter); if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e64cda7a7610..e1e05b253daa 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4004,7 +4004,7 @@ static ssize_t __cifs_readv( if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (iter_is_iovec(to)) + if (user_backed_iter(to)) ctx->should_dirty = true; if (direct) { diff --git a/fs/direct-io.c b/fs/direct-io.c index df5e2d048799..c7fc01c2d509 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1251,7 +1251,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; - dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; + dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ; sdio.iter = iter; sdio.final_block_in_request = end >> blkbits; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0e537e580dc1..8d657c2cd6f7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1356,7 +1356,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) if (!fud) return -EPERM; - if (!iter_is_iovec(to)) + if (!user_backed_iter(to)) return -EINVAL; fuse_copy_init(&cs, 1, to); @@ -1949,7 +1949,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) if (!fud) return -EPERM; - if (!iter_is_iovec(from)) + if (!user_backed_iter(from)) return -EINVAL; fuse_copy_init(&cs, 0, from); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 00fa861aeead..c982e3afe3b4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1465,7 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, inode_unlock(inode); } - io->should_dirty = !write && iter_is_iovec(iter); + io->should_dirty = !write && user_backed_iter(iter); while (count) { ssize_t nres; fl_owner_t owner = current->files; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 2cceb193dcd8..48e6cc74fdc1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -780,7 +780,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i, if (!count) return false; - if (!iter_is_iovec(i)) + if (!user_backed_iter(i)) return false; size = PAGE_SIZE; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index c75d33d5c3ce..4eb559a16c9e 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -533,7 +533,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_NOWAIT; } - if (iter_is_iovec(iter)) + if (user_backed_iter(iter)) dio->flags |= IOMAP_DIO_DIRTY; } else { iomi.flags |= IOMAP_WRITE; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4eb2a8380a28..022e1ce63e62 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -478,7 +478,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - if (iter_is_iovec(iter)) + if (user_backed_iter(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; if (!swap) diff --git a/include/linux/uio.h b/include/linux/uio.h index 9a2dc496d535..85bef84fd294 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,7 @@ enum iter_type { ITER_PIPE, ITER_XARRAY, ITER_DISCARD, + ITER_UBUF, }; struct iov_iter_state { @@ -38,6 +39,7 @@ struct iov_iter { u8 iter_type; bool nofault; bool data_source; + bool user_backed; size_t iov_offset; size_t count; union { @@ -46,6 +48,7 @@ struct iov_iter { const struct bio_vec *bvec; struct xarray *xarray; struct pipe_inode_info *pipe; + void __user *ubuf; }; union { unsigned long nr_segs; @@ -70,6 +73,11 @@ static inline void iov_iter_save_state(struct iov_iter *iter, state->nr_segs = iter->nr_segs; } +static inline bool iter_is_ubuf(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_UBUF; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -105,6 +113,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->data_source ? WRITE : READ; } +static inline bool user_backed_iter(const struct iov_iter *i) +{ + return i->user_backed; +} + /* * Total number of bytes covered by an iovec. * @@ -322,4 +335,17 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, + void __user *buf, size_t count) +{ + WARN_ON(direction & ~(READ | WRITE)); + *i = (struct iov_iter) { + .iter_type = ITER_UBUF, + .user_backed = true, + .data_source = direction, + .ubuf = buf, + .count = count + }; +} + #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0e0be334dbee..b3493d20536e 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,6 +16,16 @@ #define PIPE_PARANOIA /* for now */ +/* covers ubuf and kbuf alike */ +#define iterate_buf(i, n, base, len, off, __p, STEP) { \ + size_t __maybe_unused off = 0; \ + len = n; \ + base = __p + i->iov_offset; \ + len -= (STEP); \ + i->iov_offset += len; \ + n = len; \ +} + /* covers iovec and kvec alike */ #define iterate_iovec(i, n, base, len, off, __p, STEP) { \ size_t off = 0; \ @@ -110,7 +120,12 @@ __out: \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ - if (likely(iter_is_iovec(i))) { \ + if (likely(iter_is_ubuf(i))) { \ + void __user *base; \ + size_t len; \ + iterate_buf(i, n, base, len, off, \ + i->ubuf, (I)) \ + } else if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ void __user *base; \ size_t len; \ @@ -275,7 +290,11 @@ out: */ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_readable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable); */ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_IOVEC, .nofault = false, + .user_backed = true, .data_source = direction, .iov = iov, .nr_segs = nr_segs, @@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyout(base, addr + off, len), @@ -583,7 +607,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); __iterate_and_advance(i, bytes, base, len, off, copyout_mc(base, addr + off, len), @@ -601,7 +625,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyin(addr + off, base, len), @@ -894,16 +918,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) size = i->count; - if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { + if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { + i->iov_offset += size; + i->count -= size; + } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { /* iovec and kvec have identical layouts */ iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); } else if (iov_iter_is_pipe(i)) { pipe_advance(i, size); - } else if (unlikely(iov_iter_is_xarray(i))) { - i->iov_offset += size; - i->count -= size; } else if (iov_iter_is_discard(i)) { i->count -= size; } @@ -950,7 +974,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) return; } unroll -= i->iov_offset; - if (iov_iter_is_xarray(i)) { + if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) { BUG(); /* We should never go beyond the start of the specified * range since we might then be straying into pages that * aren't pinned. @@ -1158,6 +1182,14 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { + if (likely(iter_is_ubuf(i))) { + if (i->count & len_mask) + return false; + if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) + return false; + return true; + } + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_aligned_iovec(i, addr_mask, len_mask); @@ -1233,6 +1265,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) unsigned long iov_iter_alignment(const struct iov_iter *i) { + if (likely(iter_is_ubuf(i))) { + size_t size = i->count; + if (size) + return ((unsigned long)i->ubuf + i->iov_offset) | size; + return 0; + } + /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_alignment_iovec(i); @@ -1263,6 +1302,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) size_t size = i->count; unsigned k; + if (iter_is_ubuf(i)) + return 0; + if (WARN_ON(!iter_is_iovec(i))) return ~0U; @@ -1385,12 +1427,15 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); } -/* must be done on non-empty ITER_IOVEC one */ +/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k; + if (iter_is_ubuf(i)) + return (unsigned long)i->ubuf + i->iov_offset; + for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { size_t len = i->iov[k].iov_len - skip; @@ -1432,7 +1477,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - if (likely(iter_is_iovec(i))) { + if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; @@ -1559,7 +1604,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - if (likely(iter_is_iovec(i))) { + if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; @@ -1715,6 +1760,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) { if (unlikely(!i->count)) return 0; + if (likely(iter_is_ubuf(i))) { + unsigned offs = offset_in_page(i->ubuf + i->iov_offset); + int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE); + return min(npages, maxpages); + } /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_npages(i, maxpages); @@ -1749,17 +1799,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) WARN_ON(1); return NULL; } - if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) - return NULL; if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), flags); - else + else if (iov_iter_is_kvec(new) || iter_is_iovec(new)) /* iovec and kvec have identical layout */ return new->iov = kmemdup(new->iov, new->nr_segs * sizeof(struct iovec), flags); + return NULL; } EXPORT_SYMBOL(dup_iter); @@ -1953,10 +2002,12 @@ EXPORT_SYMBOL(import_single_range); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && - !iov_iter_is_kvec(i)) + !iov_iter_is_kvec(i) && !iter_is_ubuf(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; + if (iter_is_ubuf(i)) + return; /* * For the *vec iters, nr_segs + iov is constant - if we increment * the vec, then we also decrement the nr_segs count. Hence we don't diff --git a/mm/shmem.c b/mm/shmem.c index e5e43b990fdc..e3a7e171bbd1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2602,7 +2602,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = copy_page_to_iter(page, offset, nr, to); put_page(page); - } else if (iter_is_iovec(to)) { + } else if (user_backed_iter(to)) { /* * Copy to user tends to be so well optimized, but * clear_user() not so much, that it is noticeably -- cgit From 3e20a751aff0e099cff496511fef8cdf655b3360 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 16:55:40 -0400 Subject: switch new_sync_{read,write}() to ITER_UBUF Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- fs/read_write.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index ea59dd0095c2..1a261dcf1778 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -378,14 +378,13 @@ EXPORT_SYMBOL(rw_verify_area); static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { - struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = (ppos ? *ppos : 0); - iov_iter_init(&iter, READ, &iov, 1, len); + iov_iter_ubuf(&iter, READ, buf, len); ret = call_read_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); @@ -481,14 +480,13 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = (ppos ? *ppos : 0); - iov_iter_init(&iter, WRITE, &iov, 1, len); + iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len); ret = call_write_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); -- cgit From 0d9649341363047be60a9ec7378d8985bdd0abba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2022 16:07:49 -0400 Subject: splice: stop abusing iov_iter_advance() to flush a pipe Use pipe_discard_from() explicitly in generic_file_read_iter(); don't bother with rather non-obvious use of iov_iter_advance() in there. Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- fs/splice.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 93a2c9bf6249..877290500050 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -301,11 +301,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { struct iov_iter to; struct kiocb kiocb; - unsigned int i_head; int ret; iov_iter_pipe(&to, READ, pipe, len); - i_head = to.head; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); @@ -313,9 +311,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, *ppos = kiocb.ki_pos; file_accessed(in); } else if (ret < 0) { - to.head = i_head; - to.iov_offset = 0; - iov_iter_advance(&to, 0); /* to free what was emitted */ + /* free what was emitted */ + pipe_discard_from(pipe, to.start_head); /* * callers of ->splice_read() expect -EAGAIN on * "can't put anything in there", rather than -EFAULT. -- cgit From 2dcedb2a549a4d7430538213b1b28ef7271bc0aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 10:24:37 -0400 Subject: ITER_PIPE: helper for getting pipe buffer by index pipe_buffer instances of a pipe are organized as a ring buffer, with power-of-2 size. Indices are kept *not* reduced modulo ring size, so the buffer refered to by index N is pipe->bufs[N & (pipe->ring_size - 1)]. Ring size can change over the lifetime of a pipe, but not while the pipe is locked. So for any iov_iter primitives it's a constant. Original conversion of pipes to this layout went overboard trying to microoptimize that - calculating pipe->ring_size - 1, storing it in a local variable and using through the function. In some cases it might be warranted, but most of the times it only obfuscates what's going on in there. Introduce a helper (pipe_buf(pipe, N)) that would encapsulate that and use it in the obvious cases. More will follow... Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- lib/iov_iter.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b3493d20536e..048026d5aa0d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -183,13 +183,18 @@ static int copyin(void *to, const void __user *from, size_t n) return n; } +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, + unsigned int slot) +{ + return &pipe->bufs[slot & (pipe->ring_size - 1)]; +} + #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; unsigned int p_head = pipe->head; unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); unsigned int i_head = i->head; unsigned int idx; @@ -201,7 +206,7 @@ static bool sanity(const struct iov_iter *i) if (unlikely(i_head != p_head - 1)) goto Bad; // must be at the last buffer... - p = &pipe->bufs[i_head & p_mask]; + p = pipe_buf(pipe, i_head); if (unlikely(p->offset + p->len != i->iov_offset)) goto Bad; // ... at the end of segment } else { @@ -386,11 +391,10 @@ static inline bool allocated(struct pipe_buffer *buf) static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { - unsigned int p_mask = i->pipe->ring_size - 1; unsigned int iter_head = i->head; size_t off = i->iov_offset; - if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || + if (off && (!allocated(pipe_buf(i->pipe, iter_head)) || off == PAGE_SIZE)) { iter_head++; off = 0; @@ -1280,10 +1284,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return iov_iter_alignment_bvec(i); if (iov_iter_is_pipe(i)) { - unsigned int p_mask = i->pipe->ring_size - 1; size_t size = i->count; - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) + if (size && i->iov_offset && allocated(pipe_buf(i->pipe, i->head))) return size | i->iov_offset; return size; } -- cgit From 47b7fcae419dc940e3fb8e58088a5b80ad813bbf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 13 Jun 2022 14:30:15 -0400 Subject: ITER_PIPE: helpers for adding pipe buffers There are only two kinds of pipe_buffer in the area used by ITER_PIPE. 1) anonymous - copy_to_iter() et.al. end up creating those and copying data there. They have zero ->offset, and their ->ops points to default_pipe_page_ops. 2) zero-copy ones - those come from copy_page_to_iter(), and page comes from caller. ->offset is also caller-supplied - it might be non-zero. ->ops points to page_cache_pipe_buf_ops. Move creation and insertion of those into helpers - push_anon(pipe, size) and push_page(pipe, page, offset, size) resp., separating them from the "could we avoid creating a new buffer by merging with the current head?" logics. Acked-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 88 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 048026d5aa0d..a5c436e564e8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -231,15 +231,39 @@ Bad: #define sanity(i) true #endif +static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) +{ + struct page *page = alloc_page(GFP_USER); + if (page) { + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &default_pipe_buf_ops, + .page = page, + .offset = 0, + .len = size + }; + } + return page; +} + +static void push_page(struct pipe_inode_info *pipe, struct page *page, + unsigned int offset, unsigned int size) +{ + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &page_cache_pipe_buf_ops, + .page = page, + .offset = offset, + .len = size + }; + get_page(page); +} + static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off; + unsigned int head = pipe->head; if (unlikely(bytes > i->count)) bytes = i->count; @@ -250,32 +274,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by if (!sanity(i)) return 0; - off = i->iov_offset; - buf = &pipe->bufs[i_head & p_mask]; - if (off) { - if (offset == off && buf->page == page) { - /* merge with the last one */ + if (offset && i->iov_offset == offset) { // could we merge it? + struct pipe_buffer *buf = pipe_buf(pipe, head - 1); + if (buf->page == page) { buf->len += bytes; i->iov_offset += bytes; - goto out; + i->count -= bytes; + return bytes; } - i_head++; - buf = &pipe->bufs[i_head & p_mask]; } - if (pipe_full(i_head, p_tail, pipe->max_usage)) + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; - buf->ops = &page_cache_pipe_buf_ops; - buf->flags = 0; - get_page(page); - buf->page = page; - buf->offset = offset; - buf->len = bytes; - - pipe->head = i_head + 1; + push_page(pipe, page, offset, bytes); i->iov_offset = offset + bytes; - i->head = i_head; -out: + i->head = head; i->count -= bytes; return bytes; } @@ -407,8 +420,6 @@ static size_t push_pipe(struct iov_iter *i, size_t size, int *iter_headp, size_t *offp) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; unsigned int iter_head; size_t off; ssize_t left; @@ -423,30 +434,23 @@ static size_t push_pipe(struct iov_iter *i, size_t size, *iter_headp = iter_head; *offp = off; if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, iter_head); + left -= PAGE_SIZE - off; if (left <= 0) { - pipe->bufs[iter_head & p_mask].len += size; + buf->len += size; return size; } - pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; - iter_head++; + buf->len = PAGE_SIZE; } - while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { - struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; - struct page *page = alloc_page(GFP_USER); + while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct page *page = push_anon(pipe, + min_t(ssize_t, left, PAGE_SIZE)); if (!page) break; - buf->ops = &default_pipe_buf_ops; - buf->flags = 0; - buf->page = page; - buf->offset = 0; - buf->len = min_t(ssize_t, left, PAGE_SIZE); - left -= buf->len; - iter_head++; - pipe->head = iter_head; - - if (left == 0) + left -= PAGE_SIZE; + if (left <= 0) return size; } return size - left; -- cgit From 8fad7767edcfd3f93e0d9985cb2dc1db270b8719 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 13:53:53 -0400 Subject: ITER_PIPE: allocate buffers as we go in copy-to-pipe primitives New helper: append_pipe(). Extends the last buffer if possible, allocates a new one otherwise. Returns page and offset in it on success, NULL on failure. iov_iter is advanced past the data we've got. Use that instead of push_pipe() in copy-to-pipe primitives; they get simpler that way. Handling of short copy (in "mc" one) is done simply by iov_iter_revert() - iov_iter is in consistent state after that one, so we can use that. [Fix for braino caught by Liu Xinpeng folded in] [another braino fix, this time in copy_pipe_to_iter() and pipe_zero(); caught by testcase from Hugh Dickins] Signed-off-by: Al Viro --- lib/iov_iter.c | 171 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 73 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a5c436e564e8..e22c272cb420 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -259,6 +259,45 @@ static void push_page(struct pipe_inode_info *pipe, struct page *page, get_page(page); } +static inline bool allocated(struct pipe_buffer *buf) +{ + return buf->ops == &default_pipe_buf_ops; +} + +static struct page *append_pipe(struct iov_iter *i, size_t size, + unsigned int *off) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t offset = i->iov_offset; + struct pipe_buffer *buf; + struct page *page; + + if (offset && offset < PAGE_SIZE) { + // some space in the last buffer; can we add to it? + buf = pipe_buf(pipe, pipe->head - 1); + if (allocated(buf)) { + size = min_t(size_t, size, PAGE_SIZE - offset); + buf->len += size; + i->iov_offset += size; + i->count -= size; + *off = offset; + return buf->page; + } + } + // OK, we need a new buffer + *off = 0; + size = min_t(size_t, size, PAGE_SIZE); + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + return NULL; + page = push_anon(pipe, size); + if (!page) + return NULL; + i->head = pipe->head - 1; + i->iov_offset = size; + i->count -= size; + return page; +} + static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -396,11 +435,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static inline bool allocated(struct pipe_buffer *buf) -{ - return buf->ops == &default_pipe_buf_ops; -} - static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { @@ -459,28 +493,24 @@ static size_t push_pipe(struct iov_iter *i, size_t size, static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int off, chunk; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; + + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + chunk = min_t(size_t, n, PAGE_SIZE - off); + if (!page) + return bytes - n; + memcpy_to_page(page, off, addr, chunk); addr += chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } @@ -494,31 +524,32 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, struct iov_iter *i, __wsum *sump) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; __wsum sum = *sump; size_t off = 0; - unsigned int i_head; - size_t r; + unsigned int chunk, r; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - bytes = push_pipe(i, bytes, &i_head, &r); while (bytes) { - size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + struct page *page = append_pipe(i, bytes, &r); + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - r); + p = kmap_local_page(page); sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); kunmap_local(p); - i->head = i_head; - i->iov_offset = r + chunk; - bytes -= chunk; off += chunk; - r = 0; - i_head++; + bytes -= chunk; } *sump = sum; - i->count -= off; return off; } @@ -550,39 +581,36 @@ static int copyout_mc(void __user *to, const void *from, size_t n) static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - unsigned int valid = pipe->head; - size_t n, off, xfer = 0; + size_t xfer = 0; + unsigned int off, chunk; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - n = push_pipe(i, bytes, &i_head, &off); - while (n) { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + while (bytes) { + struct page *page = append_pipe(i, bytes, &off); unsigned long rem; + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - off); + p = kmap_local_page(page); rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); chunk -= rem; kunmap_local(p); - if (chunk) { - i->head = i_head; - i->iov_offset = off + chunk; - xfer += chunk; - valid = i_head + 1; - } + xfer += chunk; + bytes -= chunk; if (rem) { - pipe->bufs[i_head & p_mask].len -= rem; - pipe_discard_from(pipe, valid); + iov_iter_revert(i, rem); break; } - n -= chunk; - off = 0; - i_head++; } - i->count -= xfer; return xfer; } @@ -769,30 +797,27 @@ EXPORT_SYMBOL(copy_page_from_iter); static size_t pipe_zero(size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int chunk, off; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + char *p; + + if (!page) + return bytes - n; + chunk = min_t(size_t, n, PAGE_SIZE - off); + p = kmap_local_page(page); memset(p + off, 0, chunk); kunmap_local(p); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } -- cgit From e3b42964f84c028f352c11269661d47f6ca4ab2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jun 2022 02:52:03 -0400 Subject: ITER_PIPE: fold push_pipe() into __pipe_get_pages() Expand the only remaining call of push_pipe() (in __pipe_get_pages()), combine it with the page-collecting loop there. Note that the only reason it's not a loop doing append_pipe() is that append_pipe() is advancing, while iov_iter_get_pages() is not. As soon as it switches to saner semantics, this thing will switch to using append_pipe(). Signed-off-by: Al Viro --- lib/iov_iter.c | 80 ++++++++++++++++++---------------------------------------- 1 file changed, 25 insertions(+), 55 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e22c272cb420..bf600b4fe980 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -450,46 +450,6 @@ static inline void data_start(const struct iov_iter *i, *offp = off; } -static size_t push_pipe(struct iov_iter *i, size_t size, - int *iter_headp, size_t *offp) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int iter_head; - size_t off; - ssize_t left; - - if (unlikely(size > i->count)) - size = i->count; - if (unlikely(!size)) - return 0; - - left = size; - data_start(i, &iter_head, &off); - *iter_headp = iter_head; - *offp = off; - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, iter_head); - - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += size; - return size; - } - buf->len = PAGE_SIZE; - } - while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { - struct page *page = push_anon(pipe, - min_t(ssize_t, left, PAGE_SIZE)); - if (!page) - break; - - left -= PAGE_SIZE; - if (left <= 0) - return size; - } - return size - left; -} - static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { @@ -1359,23 +1319,33 @@ static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, int iter_head, - size_t *start) + size_t off) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - ssize_t n = push_pipe(i, maxsize, &iter_head, start); - if (!n) - return -EFAULT; + ssize_t left = maxsize; - maxsize = n; - n += *start; - while (n > 0) { - get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); - iter_head++; - n -= PAGE_SIZE; - } + if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, iter_head); - return maxsize; + get_page(*pages++ = buf->page); + left -= PAGE_SIZE - off; + if (left <= 0) { + buf->len += maxsize; + return maxsize; + } + buf->len = PAGE_SIZE; + } + while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct page *page = push_anon(pipe, + min_t(ssize_t, left, PAGE_SIZE)); + if (!page) + break; + get_page(*pages++ = page); + left -= PAGE_SIZE; + if (left <= 0) + return maxsize; + } + return maxsize - left ? : -EFAULT; } static ssize_t pipe_get_pages(struct iov_iter *i, @@ -1393,7 +1363,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); capacity = min(npages, maxpages) * PAGE_SIZE - *start; - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, *start); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1575,7 +1545,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, iter_head, start); + n = __pipe_get_pages(i, maxsize, p, iter_head, *start); if (n > 0) *pages = p; else -- cgit From ca591967543ab1af7e6e68bd505ef7869d3f2175 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jun 2022 14:26:23 -0400 Subject: ITER_PIPE: lose iter_head argument of __pipe_get_pages() it's only used to get to the partial buffer we can add to, and that's always the last one, i.e. pipe->head - 1. Signed-off-by: Al Viro --- lib/iov_iter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index bf600b4fe980..95c56d42505b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1318,14 +1318,13 @@ EXPORT_SYMBOL(iov_iter_gap_alignment); static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, - int iter_head, size_t off) { struct pipe_inode_info *pipe = i->pipe; ssize_t left = maxsize; if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, iter_head); + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); get_page(*pages++ = buf->page); left -= PAGE_SIZE - off; @@ -1363,7 +1362,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); capacity = min(npages, maxpages) * PAGE_SIZE - *start; - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, *start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, *start); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1545,7 +1544,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, iter_head, *start); + n = __pipe_get_pages(i, maxsize, p, *start); if (n > 0) *pages = p; else -- cgit From 2c855de93314e9573f31044976ffd89cb70a2dbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 16:03:25 -0400 Subject: ITER_PIPE: clean pipe_advance() up instead of setting ->iov_offset for new position and calling pipe_truncate() to adjust ->len of the last buffer and discard everything after it, adjust ->len at the same time we set ->iov_offset and use pipe_discard_from() to deal with buffers past that. Signed-off-by: Al Viro --- lib/iov_iter.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 95c56d42505b..402d49688a16 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -845,27 +845,27 @@ static inline void pipe_truncate(struct iov_iter *i) static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - if (size) { - struct pipe_buffer *buf; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset, left = size; + unsigned int off = i->iov_offset; + if (!off && !size) { + pipe_discard_from(pipe, i->start_head); // discard everything + return; + } + i->count -= size; + while (1) { + struct pipe_buffer *buf = pipe_buf(pipe, i->head); if (off) /* make it relative to the beginning of buffer */ - left += off - pipe->bufs[i_head & p_mask].offset; - while (1) { - buf = &pipe->bufs[i_head & p_mask]; - if (left <= buf->len) - break; - left -= buf->len; - i_head++; + size += off - buf->offset; + if (size <= buf->len) { + buf->len = size; + i->iov_offset = buf->offset + size; + break; } - i->head = i_head; - i->iov_offset = buf->offset + left; + size -= buf->len; + i->head++; + off = 0; } - i->count -= size; - /* ... and discard everything past that point */ - pipe_truncate(i); + pipe_discard_from(pipe, i->head + 1); // discard everything past this one } static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) -- cgit From 92acdc4f37207c556baee0ea28ce0823d22b9812 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2022 17:54:35 -0400 Subject: ITER_PIPE: clean iov_iter_revert() Fold pipe_truncate() into it, clean up. We can release buffers in the same loop where we walk backwards to the iterator beginning looking for the place where the new position will be. Signed-off-by: Al Viro --- lib/iov_iter.c | 60 ++++++++++++++-------------------------------------------- 1 file changed, 14 insertions(+), 46 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 402d49688a16..c2e08004a1eb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -816,32 +816,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt } EXPORT_SYMBOL(copy_page_from_iter_atomic); -static inline void pipe_truncate(struct iov_iter *i) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_head = pipe->head; - unsigned int p_mask = pipe->ring_size - 1; - - if (!pipe_empty(p_head, p_tail)) { - struct pipe_buffer *buf; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - - if (off) { - buf = &pipe->bufs[i_head & p_mask]; - buf->len = off - buf->offset; - i_head++; - } - while (p_head != i_head) { - p_head--; - pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); - } - - pipe->head = p_head; - } -} - static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; @@ -936,28 +910,22 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) i->count += unroll; if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - while (1) { - struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; - size_t n = off - b->offset; - if (unroll < n) { - off -= unroll; - break; - } - unroll -= n; - if (!unroll && i_head == i->start_head) { - off = 0; - break; + unsigned int head = pipe->head; + + while (head > i->start_head) { + struct pipe_buffer *b = pipe_buf(pipe, --head); + if (unroll < b->len) { + b->len -= unroll; + i->iov_offset = b->offset + b->len; + i->head = head; + return; } - i_head--; - b = &pipe->bufs[i_head & p_mask]; - off = b->offset + b->len; + unroll -= b->len; + pipe_buf_release(pipe, b); + pipe->head--; } - i->iov_offset = off; - i->head = i_head; - pipe_truncate(i); + i->iov_offset = 0; + i->head = head; return; } if (unlikely(iov_iter_is_discard(i))) -- cgit From 10f525a8cd7a525e9fc73288bb35428c9cad5e63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 02:02:51 -0400 Subject: ITER_PIPE: cache the type of last buffer We often need to find whether the last buffer is anon or not, and currently it's rather clumsy: check if ->iov_offset is non-zero (i.e. that pipe is not empty) if so, get the corresponding pipe_buffer and check its ->ops if it's &default_pipe_buf_ops, we have an anon buffer. Let's replace the use of ->iov_offset (which is nowhere near similar to its role for other flavours) with signed field (->last_offset), with the following rules: empty, no buffers occupied: 0 anon, with bytes up to N-1 filled: N zero-copy, with bytes up to N-1 filled: -N That way abs(i->last_offset) is equal to what used to be in i->iov_offset and empty vs. anon vs. zero-copy can be distinguished by the sign of i->last_offset. Checks for "should we extend the last buffer or should we start a new one?" become easier to follow that way. Note that most of the operations can only be done in a sane state - i.e. when the pipe has nothing past the current position of iterator. About the only thing that could be done outside of that state is iov_iter_advance(), which transitions to the sane state by truncating the pipe. There are only two cases where we leave the sane state: 1) iov_iter_get_pages()/iov_iter_get_pages_alloc(). Will be dealt with later, when we make get_pages advancing - the callers are actually happier that way. 2) iov_iter copied, then something is put into the copy. Since they share the underlying pipe, the original gets behind. When we decide that we are done with the copy (original is not usable until then) we advance the original. direct_io used to be done that way; nowadays it operates on the original and we do iov_iter_revert() to discard the excessive data. At the moment there's nothing in the kernel that could do that to ITER_PIPE iterators, so this reason for insane state is theoretical right now. Signed-off-by: Al Viro --- include/linux/uio.h | 5 +++- lib/iov_iter.c | 77 ++++++++++++++++++++++++++--------------------------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 85bef84fd294..e7fc29b5ad19 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,10 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; - size_t iov_offset; + union { + size_t iov_offset; + int last_offset; + }; size_t count; union { const struct iovec *iov; diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c2e08004a1eb..8834f3f61220 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -199,7 +199,7 @@ static bool sanity(const struct iov_iter *i) unsigned int i_head = i->head; unsigned int idx; - if (i->iov_offset) { + if (i->last_offset) { struct pipe_buffer *p; if (unlikely(p_occupancy == 0)) goto Bad; // pipe must be non-empty @@ -207,7 +207,7 @@ static bool sanity(const struct iov_iter *i) goto Bad; // must be at the last buffer... p = pipe_buf(pipe, i_head); - if (unlikely(p->offset + p->len != i->iov_offset)) + if (unlikely(p->offset + p->len != abs(i->last_offset))) goto Bad; // ... at the end of segment } else { if (i_head != p_head) @@ -215,7 +215,7 @@ static bool sanity(const struct iov_iter *i) } return true; Bad: - printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); + printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset); printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", p_head, p_tail, pipe->ring_size); for (idx = 0; idx < pipe->ring_size; idx++) @@ -259,30 +259,31 @@ static void push_page(struct pipe_inode_info *pipe, struct page *page, get_page(page); } -static inline bool allocated(struct pipe_buffer *buf) +static inline int last_offset(const struct pipe_buffer *buf) { - return buf->ops == &default_pipe_buf_ops; + if (buf->ops == &default_pipe_buf_ops) + return buf->len; // buf->offset is 0 for those + else + return -(buf->offset + buf->len); } static struct page *append_pipe(struct iov_iter *i, size_t size, unsigned int *off) { struct pipe_inode_info *pipe = i->pipe; - size_t offset = i->iov_offset; + int offset = i->last_offset; struct pipe_buffer *buf; struct page *page; - if (offset && offset < PAGE_SIZE) { - // some space in the last buffer; can we add to it? + if (offset > 0 && offset < PAGE_SIZE) { + // some space in the last buffer; add to it buf = pipe_buf(pipe, pipe->head - 1); - if (allocated(buf)) { - size = min_t(size_t, size, PAGE_SIZE - offset); - buf->len += size; - i->iov_offset += size; - i->count -= size; - *off = offset; - return buf->page; - } + size = min_t(size_t, size, PAGE_SIZE - offset); + buf->len += size; + i->last_offset += size; + i->count -= size; + *off = offset; + return buf->page; } // OK, we need a new buffer *off = 0; @@ -293,7 +294,7 @@ static struct page *append_pipe(struct iov_iter *i, size_t size, if (!page) return NULL; i->head = pipe->head - 1; - i->iov_offset = size; + i->last_offset = size; i->count -= size; return page; } @@ -313,11 +314,11 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by if (!sanity(i)) return 0; - if (offset && i->iov_offset == offset) { // could we merge it? + if (offset && i->last_offset == -offset) { // could we merge it? struct pipe_buffer *buf = pipe_buf(pipe, head - 1); if (buf->page == page) { buf->len += bytes; - i->iov_offset += bytes; + i->last_offset -= bytes; i->count -= bytes; return bytes; } @@ -326,7 +327,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; push_page(pipe, page, offset, bytes); - i->iov_offset = offset + bytes; + i->last_offset = -(offset + bytes); i->head = head; i->count -= bytes; return bytes; @@ -438,16 +439,15 @@ EXPORT_SYMBOL(iov_iter_init); static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { - unsigned int iter_head = i->head; - size_t off = i->iov_offset; + int off = i->last_offset; - if (off && (!allocated(pipe_buf(i->pipe, iter_head)) || - off == PAGE_SIZE)) { - iter_head++; - off = 0; + if (off > 0 && off < PAGE_SIZE) { // anon and not full + *iter_headp = i->pipe->head - 1; + *offp = off; + } else { + *iter_headp = i->pipe->head; + *offp = 0; } - *iter_headp = iter_head; - *offp = off; } static size_t copy_pipe_to_iter(const void *addr, size_t bytes, @@ -819,7 +819,7 @@ EXPORT_SYMBOL(copy_page_from_iter_atomic); static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - unsigned int off = i->iov_offset; + int off = i->last_offset; if (!off && !size) { pipe_discard_from(pipe, i->start_head); // discard everything @@ -829,10 +829,10 @@ static void pipe_advance(struct iov_iter *i, size_t size) while (1) { struct pipe_buffer *buf = pipe_buf(pipe, i->head); if (off) /* make it relative to the beginning of buffer */ - size += off - buf->offset; + size += abs(off) - buf->offset; if (size <= buf->len) { buf->len = size; - i->iov_offset = buf->offset + size; + i->last_offset = last_offset(buf); break; } size -= buf->len; @@ -916,7 +916,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) struct pipe_buffer *b = pipe_buf(pipe, --head); if (unroll < b->len) { b->len -= unroll; - i->iov_offset = b->offset + b->len; + i->last_offset = last_offset(b); i->head = head; return; } @@ -924,7 +924,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) pipe_buf_release(pipe, b); pipe->head--; } - i->iov_offset = 0; + i->last_offset = 0; i->head = head; return; } @@ -1027,7 +1027,7 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, .pipe = pipe, .head = pipe->head, .start_head = pipe->head, - .iov_offset = 0, + .last_offset = 0, .count = count }; } @@ -1158,13 +1158,12 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, return iov_iter_aligned_bvec(i, addr_mask, len_mask); if (iov_iter_is_pipe(i)) { - unsigned int p_mask = i->pipe->ring_size - 1; size_t size = i->count; if (size & len_mask) return false; - if (size && allocated(&i->pipe->bufs[i->head & p_mask])) { - if (i->iov_offset & addr_mask) + if (size && i->last_offset > 0) { + if (i->last_offset & addr_mask) return false; } @@ -1243,8 +1242,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) if (iov_iter_is_pipe(i)) { size_t size = i->count; - if (size && i->iov_offset && allocated(pipe_buf(i->pipe, i->head))) - return size | i->iov_offset; + if (size && i->last_offset > 0) + return size | i->last_offset; return size; } -- cgit From 12d426ab64a1c75f1b2ee5c33e933a4c16004049 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 09:44:38 -0400 Subject: ITER_PIPE: fold data_start() and pipe_space_for_user() together All their callers are next to each other; all of them want the total amount of pages and, possibly, the offset in the partial final buffer. Combine into a new helper (pipe_npages()), fix the bogosity in pipe_space_for_user(), while we are at it. Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 20 -------------------- lib/iov_iter.c | 44 +++++++++++++++++++------------------------- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4ea496924106..6cb65df3e3ba 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -156,26 +156,6 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } -/** - * pipe_space_for_user - Return number of slots available to userspace - * @head: The pipe ring head pointer - * @tail: The pipe ring tail pointer - * @pipe: The pipe info structure - */ -static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, - struct pipe_inode_info *pipe) -{ - unsigned int p_occupancy, p_space; - - p_occupancy = pipe_occupancy(head, tail); - if (p_occupancy >= pipe->max_usage) - return 0; - p_space = pipe->ring_size - p_occupancy; - if (p_space > pipe->max_usage) - p_space = pipe->max_usage; - return p_space; -} - /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8834f3f61220..12dda1013bea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -436,18 +436,20 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static inline void data_start(const struct iov_iter *i, - unsigned int *iter_headp, size_t *offp) +// returns the offset in partial buffer (if any) +static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages) { + struct pipe_inode_info *pipe = i->pipe; + int used = pipe->head - pipe->tail; int off = i->last_offset; + *npages = max((int)pipe->max_usage - used, 0); + if (off > 0 && off < PAGE_SIZE) { // anon and not full - *iter_headp = i->pipe->head - 1; - *offp = off; - } else { - *iter_headp = i->pipe->head; - *offp = 0; + (*npages)++; + return off; } + return 0; } static size_t copy_pipe_to_iter(const void *addr, size_t bytes, @@ -1318,18 +1320,16 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - unsigned int iter_head, npages; + unsigned int npages, off; size_t capacity; if (!sanity(i)) return -EFAULT; - data_start(i, &iter_head, start); - /* Amount of free space: some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); - capacity = min(npages, maxpages) * PAGE_SIZE - *start; + *start = off = pipe_npages(i, &npages); + capacity = min(npages, maxpages) * PAGE_SIZE - off; - return __pipe_get_pages(i, min(maxsize, capacity), pages, *start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, off); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1494,24 +1494,22 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - unsigned int iter_head, npages; + unsigned int npages, off; ssize_t n; if (!sanity(i)) return -EFAULT; - data_start(i, &iter_head, start); - /* Amount of free space: some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); - n = npages * PAGE_SIZE - *start; + *start = off = pipe_npages(i, &npages); + n = npages * PAGE_SIZE - off; if (maxsize > n) maxsize = n; else - npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); + npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, *start); + n = __pipe_get_pages(i, maxsize, p, off); if (n > 0) *pages = p; else @@ -1739,16 +1737,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); if (iov_iter_is_pipe(i)) { - unsigned int iter_head; int npages; - size_t off; if (!sanity(i)) return 0; - data_start(i, &iter_head, &off); - /* some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); + pipe_npages(i, &npages); return min(npages, maxpages); } if (iov_iter_is_xarray(i)) { -- cgit From 91329559eb07c9b12c7ce80e893ad39579c40aa2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 20:38:20 -0400 Subject: iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper Incidentally, ITER_XARRAY did *not* free the sucker in case when iter_xarray_populate_pages() returned 0... Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 12dda1013bea..e14749711e34 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1506,15 +1506,10 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, maxsize = n; else npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - p = get_pages_array(npages); + *pages = p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, off); - if (n > 0) - *pages = p; - else - kvfree(p); - return n; + return __pipe_get_pages(i, maxsize, p, off); } static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, @@ -1544,10 +1539,9 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, count++; } - p = get_pages_array(count); + *pages = p = get_pages_array(count); if (!p) return -ENOMEM; - *pages = p; nr = iter_xarray_populate_pages(p, i->xarray, index, count); if (nr == 0) @@ -1556,7 +1550,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); } -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -1583,16 +1577,12 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - p = get_pages_array(n); + *pages = p = get_pages_array(n); if (!p) return -ENOMEM; res = get_user_pages_fast(addr, n, gup_flags, p); - if (unlikely(res <= 0)) { - kvfree(p); - *pages = NULL; + if (unlikely(res <= 0)) return res; - } - *pages = p; return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { @@ -1613,6 +1603,22 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return iter_xarray_get_pages_alloc(i, pages, maxsize, start); return -EFAULT; } + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + ssize_t len; + + *pages = NULL; + + len = __iov_iter_get_pages_alloc(i, pages, maxsize, start); + if (len <= 0) { + kvfree(*pages); + *pages = NULL; + } + return len; +} EXPORT_SYMBOL(iov_iter_get_pages_alloc); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, -- cgit From c81ce28df500b04444ef97dc82a7b0299ce717e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 15:15:14 -0400 Subject: iov_iter_get_pages(): sanity-check arguments zero maxpages is bogus, but best treated as "just return 0"; NULL pages, OTOH, should be treated as a hard bug. get rid of now completely useless checks in xarray_get_pages{,_alloc}(). Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e14749711e34..7d3158d1f8ea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1368,9 +1368,6 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, size_t size = maxsize; loff_t pos; - if (!size || !maxpages) - return 0; - pos = i->xarray_start + i->iov_offset; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; @@ -1440,10 +1437,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (!maxsize) + if (!maxsize || !maxpages) return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; + BUG_ON(!pages); if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; @@ -1522,9 +1520,6 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, size_t size = maxsize; loff_t pos; - if (!size) - return 0; - pos = i->xarray_start + i->iov_offset; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; -- cgit From acbdeb8320b0a470bef1b6c0105d8c2bbc4c4ba0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:35:35 -0400 Subject: unify pipe_get_pages() and pipe_get_pages_alloc() The differences between those two are * pipe_get_pages() gets a non-NULL struct page ** value pointing to preallocated array + array size. * pipe_get_pages_alloc() gets an address of struct page ** variable that contains NULL, allocates the array and (on success) stores its address in that variable. Not hard to combine - always pass struct page ***, have the previous pipe_get_pages_alloc() caller pass ~0U as cap for array size. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 49 +++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7d3158d1f8ea..916c628f80a0 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1284,6 +1284,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); +static struct page **get_pages_array(size_t n) +{ + return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); +} + static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, @@ -1317,10 +1322,11 @@ static inline ssize_t __pipe_get_pages(struct iov_iter *i, } static ssize_t pipe_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, unsigned maxpages, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { unsigned int npages, off; + struct page **p; size_t capacity; if (!sanity(i)) @@ -1328,8 +1334,15 @@ static ssize_t pipe_get_pages(struct iov_iter *i, *start = off = pipe_npages(i, &npages); capacity = min(npages, maxpages) * PAGE_SIZE - off; + maxsize = min(maxsize, capacity); + p = *pages; + if (!p) { + *pages = p = get_pages_array(DIV_ROUND_UP(maxsize + off, PAGE_SIZE)); + if (!p) + return -ENOMEM; + } - return __pipe_get_pages(i, min(maxsize, capacity), pages, off); + return __pipe_get_pages(i, maxsize, p, off); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1475,41 +1488,13 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, pages, maxsize, maxpages, start); + return pipe_get_pages(i, &pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages); -static struct page **get_pages_array(size_t n) -{ - return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); -} - -static ssize_t pipe_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, - size_t *start) -{ - struct page **p; - unsigned int npages, off; - ssize_t n; - - if (!sanity(i)) - return -EFAULT; - - *start = off = pipe_npages(i, &npages); - n = npages * PAGE_SIZE - off; - if (maxsize > n) - maxsize = n; - else - npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - *pages = p = get_pages_array(npages); - if (!p) - return -ENOMEM; - return __pipe_get_pages(i, maxsize, p, off); -} - static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *_start_offset) @@ -1593,7 +1578,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages_alloc(i, pages, maxsize, start); + return pipe_get_pages(i, pages, maxsize, ~0U, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages_alloc(i, pages, maxsize, start); return -EFAULT; -- cgit From 68fe506f3731ecf7881de9512cc5f4c14fd13f3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:48:03 -0400 Subject: unify xarray_get_pages() and xarray_get_pages_alloc() same as for pipes Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 49 ++++++++++--------------------------------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 916c628f80a0..6904a1cc36f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1373,7 +1373,7 @@ static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa } static ssize_t iter_xarray_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { unsigned nr, offset; @@ -1398,7 +1398,13 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, if (count > maxpages) count = maxpages; - nr = iter_xarray_populate_pages(pages, i->xarray, index, count); + if (!*pages) { + *pages = get_pages_array(count); + if (!*pages) + return -ENOMEM; + } + + nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); if (nr == 0) return 0; @@ -1490,46 +1496,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (iov_iter_is_pipe(i)) return pipe_get_pages(i, &pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); + return iter_xarray_get_pages(i, &pages, maxsize, maxpages, start); return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages); -static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, - size_t *_start_offset) -{ - struct page **p; - unsigned nr, offset; - pgoff_t index, count; - size_t size = maxsize; - loff_t pos; - - pos = i->xarray_start + i->iov_offset; - index = pos >> PAGE_SHIFT; - offset = pos & ~PAGE_MASK; - *_start_offset = offset; - - count = 1; - if (size > PAGE_SIZE - offset) { - size -= PAGE_SIZE - offset; - count += size >> PAGE_SHIFT; - size &= ~PAGE_MASK; - if (size) - count++; - } - - *pages = p = get_pages_array(count); - if (!p) - return -ENOMEM; - - nr = iter_xarray_populate_pages(p, i->xarray, index, count); - if (nr == 0) - return 0; - - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); -} - static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) @@ -1580,7 +1551,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, ~0U, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages_alloc(i, pages, maxsize, start); + return iter_xarray_get_pages(i, pages, maxsize, ~0U, start); return -EFAULT; } -- cgit From 451c0ba9475ebdce36249c5c769efa5d580d1d83 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:54:15 -0400 Subject: unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts same as for pipes and xarrays; after that iov_iter_get_pages() becomes a wrapper for __iov_iter_get_pages_alloc(). Signed-off-by: Al Viro --- lib/iov_iter.c | 86 ++++++++++++++++++---------------------------------------- 1 file changed, 27 insertions(+), 59 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6904a1cc36f2..8c6cdc1cf832 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1448,19 +1448,18 @@ static struct page *first_bvec_segment(const struct iov_iter *i, return page; } -ssize_t iov_iter_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, unsigned maxpages, - size_t *start) +static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, size_t *start) { int n, res; if (maxsize > i->count) maxsize = i->count; - if (!maxsize || !maxpages) + if (!maxsize) return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - BUG_ON(!pages); if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; @@ -1477,83 +1476,52 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; - res = get_user_pages_fast(addr, n, gup_flags, pages); + if (!*pages) { + *pages = get_pages_array(n); + if (!*pages) + return -ENOMEM; + } + res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { + struct page **p; struct page *page; page = first_bvec_segment(i, &maxsize, start); n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; + p = *pages; + if (!p) { + *pages = p = get_pages_array(n); + if (!p) + return -ENOMEM; + } for (int k = 0; k < n; k++) - get_page(*pages++ = page++); + get_page(*p++ = page++); return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, &pages, maxsize, maxpages, start); + return pipe_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, &pages, maxsize, maxpages, start); + return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; } -EXPORT_SYMBOL(iov_iter_get_pages); -static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - struct page **p; - int n, res; - - if (maxsize > i->count) - maxsize = i->count; - if (!maxsize) + if (!maxpages) return 0; - if (maxsize > MAX_RW_COUNT) - maxsize = MAX_RW_COUNT; - - if (likely(user_backed_iter(i))) { - unsigned int gup_flags = 0; - unsigned long addr; - - if (iov_iter_rw(i) != WRITE) - gup_flags |= FOLL_WRITE; - if (i->nofault) - gup_flags |= FOLL_NOFAULT; - - addr = first_iovec_segment(i, &maxsize); - *start = addr % PAGE_SIZE; - addr &= PAGE_MASK; - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - res = get_user_pages_fast(addr, n, gup_flags, p); - if (unlikely(res <= 0)) - return res; - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); - } - if (iov_iter_is_bvec(i)) { - struct page *page; + BUG_ON(!pages); - page = first_bvec_segment(i, &maxsize, start); - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - for (int k = 0; k < n; k++) - get_page(*p++ = page++); - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); - } - if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, pages, maxsize, ~0U, start); - if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, pages, maxsize, ~0U, start); - return -EFAULT; + return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); } +EXPORT_SYMBOL(iov_iter_get_pages); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, @@ -1563,7 +1531,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *pages = NULL; - len = __iov_iter_get_pages_alloc(i, pages, maxsize, start); + len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); if (len <= 0) { kvfree(*pages); *pages = NULL; -- cgit From 0aa4fc32f54028f6fbb35bf71df4b0d86ff1662b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 20:30:35 -0400 Subject: ITER_XARRAY: don't open-code DIV_ROUND_UP() Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8c6cdc1cf832..c78129e709f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1386,15 +1386,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, offset = pos & ~PAGE_MASK; *_start_offset = offset; - count = 1; - if (size > PAGE_SIZE - offset) { - size -= PAGE_SIZE - offset; - count += size >> PAGE_SHIFT; - size &= ~PAGE_MASK; - if (size) - count++; - } - + count = DIV_ROUND_UP(size + offset, PAGE_SIZE); if (count > maxpages) count = maxpages; -- cgit From 8520008417c581c4c22e39597f92b9814ae34c31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 14:30:39 -0400 Subject: fold __pipe_get_pages() into pipe_get_pages() ... and don't mangle maxsize there - turn the loop into counting one instead. Easier to see that we won't run out of array that way. Note that special treatment of the partial buffer in that thing is an artifact of the non-advancing semantics of iov_iter_get_pages() - if not for that, it would be append_pipe(), same as the body of the loop that follows it. IOW, once we make iov_iter_get_pages() advancing, the whole thing will turn into calculate how many pages do we want allocate an array (if needed) call append_pipe() that many times. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 75 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c78129e709f2..a9446efac70d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1289,60 +1289,61 @@ static struct page **get_pages_array(size_t n) return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); } -static inline ssize_t __pipe_get_pages(struct iov_iter *i, - size_t maxsize, - struct page **pages, - size_t off) -{ - struct pipe_inode_info *pipe = i->pipe; - ssize_t left = maxsize; - - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); - - get_page(*pages++ = buf->page); - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += maxsize; - return maxsize; - } - buf->len = PAGE_SIZE; - } - while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { - struct page *page = push_anon(pipe, - min_t(ssize_t, left, PAGE_SIZE)); - if (!page) - break; - get_page(*pages++ = page); - left -= PAGE_SIZE; - if (left <= 0) - return maxsize; - } - return maxsize - left ? : -EFAULT; -} - static ssize_t pipe_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { + struct pipe_inode_info *pipe = i->pipe; unsigned int npages, off; struct page **p; - size_t capacity; + ssize_t left; + int count; if (!sanity(i)) return -EFAULT; *start = off = pipe_npages(i, &npages); - capacity = min(npages, maxpages) * PAGE_SIZE - off; - maxsize = min(maxsize, capacity); + count = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); + if (count > npages) + count = npages; + if (count > maxpages) + count = maxpages; p = *pages; if (!p) { - *pages = p = get_pages_array(DIV_ROUND_UP(maxsize + off, PAGE_SIZE)); + *pages = p = get_pages_array(count); if (!p) return -ENOMEM; } - return __pipe_get_pages(i, maxsize, p, off); + left = maxsize; + npages = 0; + if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); + + get_page(*p++ = buf->page); + left -= PAGE_SIZE - off; + if (left <= 0) { + buf->len += maxsize; + return maxsize; + } + buf->len = PAGE_SIZE; + npages = 1; + } + for ( ; npages < count; npages++) { + struct page *page; + unsigned int size = min_t(ssize_t, left, PAGE_SIZE); + + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + break; + page = push_anon(pipe, size); + if (!page) + break; + get_page(*p++ = page); + left -= size; + } + if (!npages) + return -EFAULT; + return maxsize - left; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, -- cgit From 3cf42da327f26eb4461864dd64812345b37f4fd9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 14:45:41 -0400 Subject: iov_iter: saner helper for page array allocation All call sites of get_pages_array() are essenitally identical now. Replace with common helper... Returns number of slots available in resulting array or 0 on OOM; it's up to the caller to make sure it doesn't ask to zero-entry array (i.e. neither maxpages nor size are allowed to be zero). Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 77 ++++++++++++++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a9446efac70d..f003a20d8683 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1284,9 +1284,20 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static struct page **get_pages_array(size_t n) +static int want_pages_array(struct page ***res, size_t size, + size_t start, unsigned int maxpages) { - return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); + unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); + + if (count > maxpages) + count = maxpages; + WARN_ON(!count); // caller should've prevented that + if (!*res) { + *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); + if (!*res) + return 0; + } + return count; } static ssize_t pipe_get_pages(struct iov_iter *i, @@ -1294,27 +1305,20 @@ static ssize_t pipe_get_pages(struct iov_iter *i, size_t *start) { struct pipe_inode_info *pipe = i->pipe; - unsigned int npages, off; + unsigned int npages, off, count; struct page **p; ssize_t left; - int count; if (!sanity(i)) return -EFAULT; *start = off = pipe_npages(i, &npages); - count = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - if (count > npages) - count = npages; - if (count > maxpages) - count = maxpages; + if (!npages) + return -EFAULT; + count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); + if (!count) + return -ENOMEM; p = *pages; - if (!p) { - *pages = p = get_pages_array(count); - if (!p) - return -ENOMEM; - } - left = maxsize; npages = 0; if (off) { @@ -1377,9 +1381,8 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { - unsigned nr, offset; - pgoff_t index, count; - size_t size = maxsize; + unsigned nr, offset, count; + pgoff_t index; loff_t pos; pos = i->xarray_start + i->iov_offset; @@ -1387,16 +1390,9 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, offset = pos & ~PAGE_MASK; *_start_offset = offset; - count = DIV_ROUND_UP(size + offset, PAGE_SIZE); - if (count > maxpages) - count = maxpages; - - if (!*pages) { - *pages = get_pages_array(count); - if (!*pages) - return -ENOMEM; - } - + count = want_pages_array(pages, maxsize, offset, maxpages); + if (!count) + return -ENOMEM; nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); if (nr == 0) return 0; @@ -1445,7 +1441,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start) { - int n, res; + unsigned int n; if (maxsize > i->count) maxsize = i->count; @@ -1457,6 +1453,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; + int res; if (iov_iter_rw(i) != WRITE) gup_flags |= FOLL_WRITE; @@ -1466,14 +1463,9 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, addr = first_iovec_segment(i, &maxsize); *start = addr % PAGE_SIZE; addr &= PAGE_MASK; - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - if (n > maxpages) - n = maxpages; - if (!*pages) { - *pages = get_pages_array(n); - if (!*pages) - return -ENOMEM; - } + n = want_pages_array(pages, maxsize, *start, maxpages); + if (!n) + return -ENOMEM; res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; @@ -1484,15 +1476,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page *page; page = first_bvec_segment(i, &maxsize, start); - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - if (n > maxpages) - n = maxpages; + n = want_pages_array(pages, maxsize, *start, maxpages); + if (!n) + return -ENOMEM; p = *pages; - if (!p) { - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - } for (int k = 0; k < n; k++) get_page(*p++ = page++); return min_t(size_t, maxsize, n * PAGE_SIZE - *start); -- cgit From 1ef255e257173f4bc44317ef2076e7e0de688fdf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Jun 2022 10:28:36 -0400 Subject: iov_iter: advancing variants of iov_iter_get_pages{,_alloc}() Most of the users immediately follow successful iov_iter_get_pages() with advancing by the amount it had returned. Provide inline wrappers doing that, convert trivial open-coded uses of those. BTW, iov_iter_get_pages() never returns more than it had been asked to; such checks in cifs ought to be removed someday... Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- drivers/vhost/scsi.c | 4 +--- fs/ceph/file.c | 3 +-- fs/cifs/file.c | 6 ++---- fs/cifs/misc.c | 3 +-- fs/direct-io.c | 3 +-- fs/fuse/dev.c | 3 +-- fs/fuse/file.c | 3 +-- fs/nfs/direct.c | 6 ++---- include/linux/uio.h | 20 ++++++++++++++++++++ net/core/datagram.c | 3 +-- net/core/skmsg.c | 3 +-- net/rds/message.c | 3 +-- net/tls/tls_sw.c | 4 +--- 13 files changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ffd9e6c2ffc1..9b65509424dc 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -643,14 +643,12 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, size_t offset; unsigned int npages = 0; - bytes = iov_iter_get_pages(iter, pages, LONG_MAX, + bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, VHOST_SCSI_PREALLOC_UPAGES, &offset); /* No pages were pinned */ if (bytes <= 0) return bytes < 0 ? bytes : -EFAULT; - iov_iter_advance(iter, bytes); - while (bytes) { unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); sg_set_page(sg++, pages[npages++], n, offset); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c535de5852bf..8fab5db16c73 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -95,12 +95,11 @@ static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize, size_t start; int idx = 0; - bytes = iov_iter_get_pages(iter, pages, maxsize - size, + bytes = iov_iter_get_pages2(iter, pages, maxsize - size, ITER_GET_BVECS_PAGES, &start); if (bytes < 0) return size ?: bytes; - iov_iter_advance(iter, bytes); size += bytes; for ( ; bytes; idx++, bvec_idx++) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e1e05b253daa..3ba013e2987f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3022,7 +3022,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (ctx->direct_io) { ssize_t result; - result = iov_iter_get_pages_alloc( + result = iov_iter_get_pages_alloc2( from, &pagevec, cur_len, &start); if (result < 0) { cifs_dbg(VFS, @@ -3036,7 +3036,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, break; } cur_len = (size_t)result; - iov_iter_advance(from, cur_len); nr_pages = (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; @@ -3758,7 +3757,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, if (ctx->direct_io) { ssize_t result; - result = iov_iter_get_pages_alloc( + result = iov_iter_get_pages_alloc2( &direct_iov, &pagevec, cur_len, &start); if (result < 0) { @@ -3774,7 +3773,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; } cur_len = (size_t)result; - iov_iter_advance(&direct_iov, cur_len); rdata = cifs_readdata_direct_alloc( pagevec, cifs_uncached_readv_complete); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 0e84e6fcf8ab..f833953bab61 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1029,7 +1029,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) saved_len = count; while (count && npages < max_pages) { - rc = iov_iter_get_pages(iter, pages, count, max_pages, &start); + rc = iov_iter_get_pages2(iter, pages, count, max_pages, &start); if (rc < 0) { cifs_dbg(VFS, "Couldn't get user pages (rc=%zd)\n", rc); break; @@ -1041,7 +1041,6 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) break; } - iov_iter_advance(iter, rc); count -= rc; rc += start; cur_npages = DIV_ROUND_UP(rc, PAGE_SIZE); diff --git a/fs/direct-io.c b/fs/direct-io.c index c7fc01c2d509..f669163d5860 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -169,7 +169,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) const enum req_op dio_op = dio->opf & REQ_OP_MASK; ssize_t ret; - ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, + ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, &sdio->from); if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) { @@ -191,7 +191,6 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) } if (ret >= 0) { - iov_iter_advance(sdio->iter, ret); ret += sdio->from; sdio->head = 0; sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8d657c2cd6f7..51897427a534 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -730,14 +730,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) } } else { size_t off; - err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off); + err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off); if (err < 0) return err; BUG_ON(!err); cs->len = err; cs->offset = off; cs->pg = page; - iov_iter_advance(cs->iter, err); } return lock_request(cs->req); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c982e3afe3b4..69e19fc0afc1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1401,14 +1401,13 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, while (nbytes < *nbytesp && ap->num_pages < max_pages) { unsigned npages; size_t start; - ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], + ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages], *nbytesp - nbytes, max_pages - ap->num_pages, &start); if (ret < 0) break; - iov_iter_advance(ii, ret); nbytes += ret; ret += start; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 022e1ce63e62..c275c83f0aef 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -364,13 +364,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t pgbase; unsigned npages, i; - result = iov_iter_get_pages_alloc(iter, &pagevec, + result = iov_iter_get_pages_alloc2(iter, &pagevec, rsize, &pgbase); if (result < 0) break; bytes = result; - iov_iter_advance(iter, bytes); npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; @@ -812,13 +811,12 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t pgbase; unsigned npages, i; - result = iov_iter_get_pages_alloc(iter, &pagevec, + result = iov_iter_get_pages_alloc2(iter, &pagevec, wsize, &pgbase); if (result < 0) break; bytes = result; - iov_iter_advance(iter, bytes); npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; diff --git a/include/linux/uio.h b/include/linux/uio.h index e7fc29b5ad19..b70d28693400 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -351,4 +351,24 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } +static inline ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, + size_t maxsize, unsigned maxpages, size_t *start) +{ + ssize_t res = iov_iter_get_pages(i, pages, maxsize, maxpages, start); + + if (res >= 0) + iov_iter_advance(i, res); + return res; +} + +static inline ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, + size_t maxsize, size_t *start) +{ + ssize_t res = iov_iter_get_pages_alloc(i, pages, maxsize, start); + + if (res >= 0) + iov_iter_advance(i, res); + return res; +} + #endif diff --git a/net/core/datagram.c b/net/core/datagram.c index f3988ef8e9af..7255531f63ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -632,12 +632,11 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - copied = iov_iter_get_pages(from, pages, length, + copied = iov_iter_get_pages2(from, pages, length, MAX_SKB_FRAGS - frag, &start); if (copied < 0) return -EFAULT; - iov_iter_advance(from, copied); length -= copied; truesize = PAGE_ALIGN(copied + start); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 81627892bdd4..cf3c24c8610d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -324,14 +324,13 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, goto out; } - copied = iov_iter_get_pages(from, pages, bytes, maxpages, + copied = iov_iter_get_pages2(from, pages, bytes, maxpages, &offset); if (copied <= 0) { ret = -EFAULT; goto out; } - iov_iter_advance(from, copied); bytes -= copied; msg->sg.size += copied; diff --git a/net/rds/message.c b/net/rds/message.c index 799034e0f513..d74be4e3f3fa 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -391,7 +391,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * size_t start; ssize_t copied; - copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE, 1, &start); if (copied < 0) { struct mmpin *mmp; @@ -405,7 +405,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * goto err; } total_copied += copied; - iov_iter_advance(from, copied); length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 17db8c8811fa..f76119f62f1b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1352,7 +1352,7 @@ static int tls_setup_from_iter(struct iov_iter *from, rc = -EFAULT; goto out; } - copied = iov_iter_get_pages(from, pages, + copied = iov_iter_get_pages2(from, pages, length, maxpages, &offset); if (copied <= 0) { @@ -1360,8 +1360,6 @@ static int tls_setup_from_iter(struct iov_iter *from, goto out; } - iov_iter_advance(from, copied); - length -= copied; size += copied; while (copied) { -- cgit From 480cb846c27bda4e14d98a45a9f50c250f38f266 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Jun 2022 10:37:57 -0400 Subject: block: convert to advancing variants of iov_iter_get_pages{,_alloc}() ... doing revert if we end up not using some pages Signed-off-by: Al Viro --- block/bio.c | 25 ++++++++++++++----------- block/blk-map.c | 7 ++++--- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/block/bio.c b/block/bio.c index d6eb90d9b20b..3d3a2678fea2 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1200,7 +1200,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) struct page **pages = (struct page **)bv; ssize_t size, left; unsigned len, i = 0; - size_t offset; + size_t offset, trim; int ret = 0; /* @@ -1218,16 +1218,19 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * result to ensure the bio's total size is correct. The remainder of * the iov data will be picked up in the next bio iteration. */ - size = iov_iter_get_pages(iter, pages, UINT_MAX - bio->bi_iter.bi_size, + size = iov_iter_get_pages2(iter, pages, UINT_MAX - bio->bi_iter.bi_size, nr_pages, &offset); - if (size > 0) { - nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); - size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); - } else - nr_pages = 0; - - if (unlikely(size <= 0)) { - ret = size ? size : -EFAULT; + if (unlikely(size <= 0)) + return size ? size : -EFAULT; + + nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); + + trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); + iov_iter_revert(iter, trim); + + size -= trim; + if (unlikely(!size)) { + ret = -EFAULT; goto out; } @@ -1246,7 +1249,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) offset = 0; } - iov_iter_advance(iter, size - left); + iov_iter_revert(iter, left); out: while (i < nr_pages) put_page(pages[i++]); diff --git a/block/blk-map.c b/block/blk-map.c index df8b066cd548..7196a6b64c80 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -254,7 +254,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, size_t offs, added = 0; int npages; - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); + bytes = iov_iter_get_pages_alloc2(iter, &pages, LONG_MAX, &offs); if (unlikely(bytes <= 0)) { ret = bytes ? bytes : -EFAULT; goto out_unmap; @@ -284,7 +284,6 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, bytes -= n; offs = 0; } - iov_iter_advance(iter, added); } /* * release the pages we didn't map into the bio, if any @@ -293,8 +292,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, put_page(pages[j++]); kvfree(pages); /* couldn't stuff something into bio? */ - if (bytes) + if (bytes) { + iov_iter_revert(iter, bytes); break; + } } ret = blk_rq_append_bio(rq, bio); -- cgit From 7d690c157c58d22de9ad71ef5c4e1f43cd8ad0e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Jun 2022 11:07:52 -0400 Subject: iter_to_pipe(): switch to advancing variant of iov_iter_get_pages() ... and untangle the cleanup on failure to add into pipe. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- fs/splice.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 877290500050..0878b852b355 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1158,39 +1158,40 @@ static int iter_to_pipe(struct iov_iter *from, }; size_t total = 0; int ret = 0; - bool failed = false; - while (iov_iter_count(from) && !failed) { + while (iov_iter_count(from)) { struct page *pages[16]; - ssize_t copied; + ssize_t left; size_t start; - int n; + int i, n; - copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); - if (copied <= 0) { - ret = copied; + left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start); + if (left <= 0) { + ret = left; break; } - for (n = 0; copied; n++, start = 0) { - int size = min_t(int, copied, PAGE_SIZE - start); - if (!failed) { - buf.page = pages[n]; - buf.offset = start; - buf.len = size; - ret = add_to_pipe(pipe, &buf); - if (unlikely(ret < 0)) { - failed = true; - } else { - iov_iter_advance(from, ret); - total += ret; - } - } else { - put_page(pages[n]); + n = DIV_ROUND_UP(left + start, PAGE_SIZE); + for (i = 0; i < n; i++) { + int size = min_t(int, left, PAGE_SIZE - start); + + buf.page = pages[i]; + buf.offset = start; + buf.len = size; + ret = add_to_pipe(pipe, &buf); + if (unlikely(ret < 0)) { + iov_iter_revert(from, left); + // this one got dropped by add_to_pipe() + while (++i < n) + put_page(pages[i]); + goto out; } - copied -= size; + total += ret; + left -= size; + start = 0; } } +out: return total ? total : ret; } -- cgit From dc5801f60b269a73fcce789856c99d1845f75827 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Jun 2022 11:14:04 -0400 Subject: af_alg_make_sg(): switch to advancing variant of iov_iter_get_pages() ... and adjust the callers Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- crypto/af_alg.c | 3 +-- crypto/algif_hash.c | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index c8289b7a85ba..e893c0f6c879 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -404,7 +404,7 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len) ssize_t n; int npages, i; - n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off); + n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off); if (n < 0) return n; @@ -1191,7 +1191,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, len += err; atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; - iov_iter_advance(&msg->msg_iter, err); } *outlen = len; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 50f7b22f1b48..1d017ec5c63c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -102,11 +102,12 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, err = crypto_wait_req(crypto_ahash_update(&ctx->req), &ctx->wait); af_alg_free_sg(&ctx->sgl); - if (err) + if (err) { + iov_iter_revert(&msg->msg_iter, len); goto unlock; + } copied += len; - iov_iter_advance(&msg->msg_iter, len); } err = 0; -- cgit From 7f02464739da05a51cadb997a00a301f734e9c87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 11:42:02 -0400 Subject: 9p: convert to advancing variant of iov_iter_get_pages_alloc() that one is somewhat clumsier than usual and needs serious testing. Signed-off-by: Al Viro --- net/9p/client.c | 39 +++++++++++++++++++++++---------------- net/9p/protocol.c | 3 +-- net/9p/trans_virtio.c | 3 ++- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index d403085b9ef5..cb4324211561 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1491,7 +1491,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int count = iov_iter_count(to); - int rsize, non_zc = 0; + int rsize, received, non_zc = 0; char *dataptr; *err = 0; @@ -1520,36 +1520,40 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, } if (IS_ERR(req)) { *err = PTR_ERR(req); + if (!non_zc) + iov_iter_revert(to, count - iov_iter_count(to)); return 0; } *err = p9pdu_readf(&req->rc, clnt->proto_version, - "D", &count, &dataptr); + "D", &received, &dataptr); if (*err) { + if (!non_zc) + iov_iter_revert(to, count - iov_iter_count(to)); trace_9p_protocol_dump(clnt, &req->rc); p9_tag_remove(clnt, req); return 0; } - if (rsize < count) { - pr_err("bogus RREAD count (%d > %d)\n", count, rsize); - count = rsize; + if (rsize < received) { + pr_err("bogus RREAD count (%d > %d)\n", received, rsize); + received = rsize; } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); if (non_zc) { - int n = copy_to_iter(dataptr, count, to); + int n = copy_to_iter(dataptr, received, to); - if (n != count) { + if (n != received) { *err = -EFAULT; p9_tag_remove(clnt, req); return n; } } else { - iov_iter_advance(to, count); + iov_iter_revert(to, count - received - iov_iter_count(to)); } p9_tag_remove(clnt, req); - return count; + return received; } EXPORT_SYMBOL(p9_client_read_once); @@ -1567,6 +1571,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; + int written; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; @@ -1584,27 +1589,29 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) offset, rsize, from); } if (IS_ERR(req)) { + iov_iter_revert(from, count - iov_iter_count(from)); *err = PTR_ERR(req); break; } - *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count); + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); if (*err) { + iov_iter_revert(from, count - iov_iter_count(from)); trace_9p_protocol_dump(clnt, &req->rc); p9_tag_remove(clnt, req); break; } - if (rsize < count) { - pr_err("bogus RWRITE count (%d > %d)\n", count, rsize); - count = rsize; + if (rsize < written) { + pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); + written = rsize; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); p9_tag_remove(clnt, req); - iov_iter_advance(from, count); - total += count; - offset += count; + iov_iter_revert(from, count - written - iov_iter_count(from)); + total += written; + offset += written; } return total; } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 3754c33e2974..83694c631989 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -63,9 +63,8 @@ static size_t pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); - struct iov_iter i = *from; - if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i)) + if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, from)) len = 0; pdu->size += len; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2a210c2f8e40..1977d33475fe 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -331,7 +331,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, if (err == -ERESTARTSYS) return err; } - n = iov_iter_get_pages_alloc(data, pages, count, offs); + n = iov_iter_get_pages_alloc2(data, pages, count, offs); if (n < 0) return n; *need_drop = 1; @@ -373,6 +373,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, (*pages)[index] = kmap_to_page(p); p += PAGE_SIZE; } + iov_iter_advance(data, len); return len; } } -- cgit From b53589927d73e28c62d3cd92ed4e1a0ea3c830ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 11:43:27 -0400 Subject: ceph: switch the last caller of iov_iter_get_pages_alloc() here nothing even looks at the iov_iter after the call, so we couldn't care less whether it advances or not. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d6e5916138e4..2c3a9b5b4b74 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -329,7 +329,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); - err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off); + err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); if (err < 0) { dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err); goto out; -- cgit From eba2d3d798295dc43cae8fade102f9d083a2a741 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 13:05:12 -0400 Subject: get rid of non-advancing variants mechanical change; will be further massaged in subsequent commits Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/uio.h | 24 ++---------------------- lib/iov_iter.c | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index b70d28693400..5896af36199c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -247,9 +247,9 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); -ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); @@ -351,24 +351,4 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } -static inline ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, - size_t maxsize, unsigned maxpages, size_t *start) -{ - ssize_t res = iov_iter_get_pages(i, pages, maxsize, maxpages, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - -static inline ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, - size_t maxsize, size_t *start) -{ - ssize_t res = iov_iter_get_pages_alloc(i, pages, maxsize, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f003a20d8683..c48c83602aae 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1328,6 +1328,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, left -= PAGE_SIZE - off; if (left <= 0) { buf->len += maxsize; + iov_iter_advance(i, maxsize); return maxsize; } buf->len = PAGE_SIZE; @@ -1347,7 +1348,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, } if (!npages) return -EFAULT; - return maxsize - left; + maxsize -= left; + iov_iter_advance(i, maxsize); + return maxsize; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1397,7 +1400,9 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, if (nr == 0) return 0; - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + iov_iter_advance(i, maxsize); + return maxsize; } /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ @@ -1469,7 +1474,9 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); + maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start); + iov_iter_advance(i, maxsize); + return maxsize; } if (iov_iter_is_bvec(i)) { struct page **p; @@ -1481,8 +1488,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) - get_page(*p++ = page++); - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); + get_page(p[k] = page + k); + maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); + iov_iter_advance(i, maxsize); + return maxsize; } if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1491,7 +1500,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -EFAULT; } -ssize_t iov_iter_get_pages(struct iov_iter *i, +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { @@ -1501,9 +1510,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); } -EXPORT_SYMBOL(iov_iter_get_pages); +EXPORT_SYMBOL(iov_iter_get_pages2); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -1518,7 +1527,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, } return len; } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); +EXPORT_SYMBOL(iov_iter_get_pages_alloc2); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) -- cgit From 746de1f86fcd33464acac047f111eea877f2f7a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 16:38:53 -0400 Subject: pipe_get_pages(): switch to append_pipe() now that we are advancing the iterator, there's no need to treat the first page separately - just call append_pipe() in a loop. Signed-off-by: Al Viro --- lib/iov_iter.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c48c83602aae..415d51bbc727 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1304,10 +1304,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int npages, off, count; + unsigned int npages, count, off, chunk; struct page **p; - ssize_t left; + size_t left; if (!sanity(i)) return -EFAULT; @@ -1319,38 +1318,16 @@ static ssize_t pipe_get_pages(struct iov_iter *i, if (!count) return -ENOMEM; p = *pages; - left = maxsize; - npages = 0; - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); - - get_page(*p++ = buf->page); - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += maxsize; - iov_iter_advance(i, maxsize); - return maxsize; - } - buf->len = PAGE_SIZE; - npages = 1; - } - for ( ; npages < count; npages++) { - struct page *page; - unsigned int size = min_t(ssize_t, left, PAGE_SIZE); - - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - break; - page = push_anon(pipe, size); + for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { + struct page *page = append_pipe(i, left, &off); if (!page) break; + chunk = min_t(size_t, left, PAGE_SIZE - off); get_page(*p++ = page); - left -= size; } if (!npages) return -EFAULT; - maxsize -= left; - iov_iter_advance(i, maxsize); - return maxsize; + return maxsize - left; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, -- cgit From 310d9d5a5009a93377200b98daa2d84aa2bd8160 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jun 2022 04:04:33 -0400 Subject: expand those iov_iter_advance()... Signed-off-by: Al Viro --- lib/iov_iter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 415d51bbc727..46ec07886d7b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1378,7 +1378,8 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, return 0; maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); - iov_iter_advance(i, maxsize); + i->iov_offset += maxsize; + i->count -= maxsize; return maxsize; } @@ -1467,7 +1468,13 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, for (int k = 0; k < n; k++) get_page(p[k] = page + k); maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); - iov_iter_advance(i, maxsize); + i->count -= maxsize; + i->iov_offset += maxsize; + if (i->iov_offset == i->bvec->bv_len) { + i->iov_offset = 0; + i->bvec++; + i->nr_segs--; + } return maxsize; } if (iov_iter_is_pipe(i)) -- cgit From f0f6b614f83dbae99d283b7b12ab5dd2e04df979 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Jun 2022 17:21:37 -0400 Subject: copy_page_to_iter(): don't split high-order page in case of ITER_PIPE ... just shove it into one pipe_buffer. Signed-off-by: Al Viro --- lib/iov_iter.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 46ec07886d7b..4e3696d349a4 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -706,30 +706,21 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) return false; } -static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - if (unlikely(iov_iter_is_pipe(i))) { - return copy_page_to_iter_pipe(page, offset, bytes, i); - } else { - void *kaddr = kmap_local_page(page); - size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); - kunmap_local(kaddr); - return wanted; - } -} - size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; + if (unlikely(iov_iter_is_pipe(i))) + return copy_page_to_iter_pipe(page, offset, bytes, i); page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { - size_t n = __copy_page_to_iter(page, offset, - min(bytes, (size_t)PAGE_SIZE - offset), i); + void *kaddr = kmap_local_page(page); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_to_iter(kaddr + offset, n, i); + kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) -- cgit From c7d57ab1632d29e256e3ae68f925751142330d88 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Jun 2022 17:24:09 -0400 Subject: hugetlbfs: copy_page_to_iter() can deal with compound pages ... since April 2021 Signed-off-by: Al Viro --- fs/hugetlbfs/inode.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 20336cb3c040..40e124a24efa 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -284,35 +284,6 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } #endif -static size_t -hugetlbfs_read_actor(struct page *page, unsigned long offset, - struct iov_iter *to, unsigned long size) -{ - size_t copied = 0; - int i, chunksize; - - /* Find which 4k chunk and offset with in that chunk */ - i = offset >> PAGE_SHIFT; - offset = offset & ~PAGE_MASK; - - while (size) { - size_t n; - chunksize = PAGE_SIZE; - if (offset) - chunksize -= offset; - if (chunksize > size) - chunksize = size; - n = copy_page_to_iter(&page[i], offset, chunksize, to); - copied += n; - if (n != chunksize) - return copied; - offset = 0; - size -= chunksize; - i++; - } - return copied; -} - /* * Support for read() - Find the page attached to f_mapping and copy out the * data. Its *very* similar to generic_file_buffered_read(), we can't use that @@ -363,7 +334,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) /* * We have the page, copy it to user space buffer. */ - copied = hugetlbfs_read_actor(page, offset, to, nr); + copied = copy_page_to_iter(page, offset, nr, to); put_page(page); } offset += copied; -- cgit From c03f05f183cd15f4259684ab658fbc3d23797d99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Jul 2022 12:54:53 -0400 Subject: fix copy_page_from_iter() for compound destinations had been broken for ITER_BVEC et.al. since ever (OK, v3.17 when ITER_BVEC had first appeared)... Signed-off-by: Al Viro --- lib/iov_iter.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4e3696d349a4..4b7fce72e3e5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -738,13 +738,27 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (page_copy_sane(page, offset, bytes)) { + size_t res = 0; + if (!page_copy_sane(page, offset, bytes)) + return 0; + page += offset / PAGE_SIZE; // first subpage + offset %= PAGE_SIZE; + while (1) { void *kaddr = kmap_local_page(page); - size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_from_iter(kaddr + offset, n, i); kunmap_local(kaddr); - return wanted; + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } } - return 0; + return res; } EXPORT_SYMBOL(copy_page_from_iter); -- cgit