diff options
Diffstat (limited to 'fs/splice.c')
| -rw-r--r-- | fs/splice.c | 1146 |
1 files changed, 676 insertions, 470 deletions
diff --git a/fs/splice.c b/fs/splice.c index d7c8a7c4db07..d338fe56b50b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -30,15 +30,32 @@ #include <linux/export.h> #include <linux/syscalls.h> #include <linux/uio.h> +#include <linux/fsnotify.h> #include <linux/security.h> #include <linux/gfp.h> +#include <linux/net.h> #include <linux/socket.h> -#include <linux/compat.h> #include <linux/sched/signal.h> #include "internal.h" /* + * Splice doesn't support FMODE_NOWAIT. Since pipes may set this flag to + * indicate they support non-blocking reads or writes, we must clear it + * here if set to avoid blocking other users of this pipe if splice is + * being done on it. + */ +static noinline void pipe_clear_nowait(struct file *file) +{ + fmode_t fmode = READ_ONCE(file->f_mode); + + do { + if (!(fmode & FMODE_NOWAIT)) + break; + } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_NOWAIT)); +} + +/* * Attempt to steal a page from a pipe buffer. This should perhaps go into * a vm helper function, it's already simplified quite a bit by the * addition of remove_mapping(). If success is returned, the caller may @@ -47,45 +64,44 @@ static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct page *page = buf->page; + struct folio *folio = page_folio(buf->page); struct address_space *mapping; - lock_page(page); + folio_lock(folio); - mapping = page_mapping(page); + mapping = folio_mapping(folio); if (mapping) { - WARN_ON(!PageUptodate(page)); + WARN_ON(!folio_test_uptodate(folio)); /* * At least for ext2 with nobh option, we need to wait on - * writeback completing on this page, since we'll remove it + * writeback completing on this folio, since we'll remove it * from the pagecache. Otherwise truncate wont wait on the - * page, allowing the disk blocks to be reused by someone else + * folio, allowing the disk blocks to be reused by someone else * before we actually wrote our data to them. fs corruption * ensues. */ - wait_on_page_writeback(page); + folio_wait_writeback(folio); - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) + if (!filemap_release_folio(folio, GFP_KERNEL)) goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag * and return good. */ - if (remove_mapping(mapping, page)) { + if (remove_mapping(mapping, folio)) { buf->flags |= PIPE_BUF_FLAG_LRU; return true; } } /* - * Raced with truncate or failed to remove page from current + * Raced with truncate or failed to remove folio from current * address space, unlock and return failure. */ out_unlock: - unlock_page(page); + folio_unlock(folio); return false; } @@ -103,17 +119,17 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct page *page = buf->page; + struct folio *folio = page_folio(buf->page); int err; - if (!PageUptodate(page)) { - lock_page(page); + if (!folio_test_uptodate(folio)) { + folio_lock(folio); /* - * Page got truncated/unhashed. This will cause a 0-byte + * Folio got truncated/unhashed. This will cause a 0-byte * splice, if this is the first page. */ - if (!page->mapping) { + if (!folio->mapping) { err = -ENODATA; goto error; } @@ -121,20 +137,18 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, /* * Uh oh, read-error from disk. */ - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { err = -EIO; goto error; } - /* - * Page is ok afterall, we are done. - */ - unlock_page(page); + /* Folio is ok after all, we are done */ + folio_unlock(folio); } return 0; error: - unlock_page(page); + folio_unlock(folio); return err; } @@ -186,8 +200,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, unsigned int spd_pages = spd->nr_pages; unsigned int tail = pipe->tail; unsigned int head = pipe->head; - unsigned int mask = pipe->ring_size - 1; - int ret = 0, page_nr = 0; + ssize_t ret = 0; + int page_nr = 0; if (!spd_pages) return 0; @@ -199,7 +213,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, } while (!pipe_full(head, tail, pipe->max_usage)) { - struct pipe_buffer *buf = &pipe->bufs[head & mask]; + struct pipe_buffer *buf = pipe_buf(pipe, head); buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; @@ -232,7 +246,6 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { unsigned int head = pipe->head; unsigned int tail = pipe->tail; - unsigned int mask = pipe->ring_size - 1; int ret; if (unlikely(!pipe->readers)) { @@ -241,7 +254,7 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) } else if (pipe_full(head, tail, pipe->max_usage)) { ret = -EAGAIN; } else { - pipe->bufs[head & mask] = *buf; + *pipe_buf(pipe, head) = *buf; pipe->head = head + 1; return buf->len; } @@ -284,50 +297,107 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) } /** - * generic_file_splice_read - splice data from file to a pipe - * @in: file to splice from - * @ppos: position in @in - * @pipe: pipe to splice to - * @len: number of bytes to splice - * @flags: splice modifier flags + * copy_splice_read - Copy data from a file and splice the copy into a pipe + * @in: The file to read from + * @ppos: Pointer to the file position to read from + * @pipe: The pipe to splice into + * @len: The amount to splice + * @flags: The SPLICE_F_* flags * - * Description: - * Will read pages from given file and fill them into a pipe. Can be - * used as long as it has more or less sane ->read_iter(). + * This function allocates a bunch of pages sufficient to hold the requested + * amount of data (but limited by the remaining pipe capacity), passes it to + * the file's ->read_iter() to read into and then splices the used pages into + * the pipe. * + * Return: On success, the number of bytes read will be returned and *@ppos + * will be updated if appropriate; 0 will be returned if there is no more data + * to be read; -EAGAIN will be returned if the pipe had no space, and some + * other negative error code will be returned on error. A short read may occur + * if the pipe has insufficient space, we reach the end of the data or we hit a + * hole. */ -ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +ssize_t copy_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) { struct iov_iter to; + struct bio_vec *bv; struct kiocb kiocb; - unsigned int i_head; - int ret; + struct page **pages; + ssize_t ret; + size_t used, npages, chunk, remain, keep = 0; + int i; + + /* Work out how much data we can actually add into the pipe */ + used = pipe_buf_usage(pipe); + npages = max_t(ssize_t, pipe->max_usage - used, 0); + len = min_t(size_t, len, npages * PAGE_SIZE); + npages = DIV_ROUND_UP(len, PAGE_SIZE); + + bv = kzalloc(array_size(npages, sizeof(bv[0])) + + array_size(npages, sizeof(struct page *)), GFP_KERNEL); + if (!bv) + return -ENOMEM; + + pages = (struct page **)(bv + npages); + npages = alloc_pages_bulk(GFP_USER, npages, pages); + if (!npages) { + kfree(bv); + return -ENOMEM; + } + + remain = len = min_t(size_t, len, npages * PAGE_SIZE); - iov_iter_pipe(&to, READ, pipe, len); - i_head = to.head; + for (i = 0; i < npages; i++) { + chunk = min_t(size_t, PAGE_SIZE, remain); + bv[i].bv_page = pages[i]; + bv[i].bv_offset = 0; + bv[i].bv_len = chunk; + remain -= chunk; + } + + /* Do the I/O */ + iov_iter_bvec(&to, ITER_DEST, bv, npages, len); init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; - ret = call_read_iter(in, &kiocb, &to); + ret = in->f_op->read_iter(&kiocb, &to); + if (ret > 0) { + keep = DIV_ROUND_UP(ret, PAGE_SIZE); *ppos = kiocb.ki_pos; - file_accessed(in); - } else if (ret < 0) { - to.head = i_head; - to.iov_offset = 0; - iov_iter_advance(&to, 0); /* to free what was emitted */ - /* - * callers of ->splice_read() expect -EAGAIN on - * "can't put anything in there", rather than -EFAULT. - */ - if (ret == -EFAULT) - ret = -EAGAIN; } + /* + * Callers of ->splice_read() expect -EAGAIN on "can't put anything in + * there", rather than -EFAULT. + */ + if (ret == -EFAULT) + ret = -EAGAIN; + + /* Free any pages that didn't get touched at all. */ + if (keep < npages) + release_pages(pages + keep, npages - keep); + + /* Push the remaining pages into the pipe. */ + remain = ret; + for (i = 0; i < keep; i++) { + struct pipe_buffer *buf = pipe_head_buf(pipe); + + chunk = min_t(size_t, remain, PAGE_SIZE); + *buf = (struct pipe_buffer) { + .ops = &default_pipe_buf_ops, + .page = bv[i].bv_page, + .offset = 0, + .len = chunk, + }; + pipe->head++; + remain -= chunk; + } + + kfree(bv); return ret; } -EXPORT_SYMBOL(generic_file_splice_read); +EXPORT_SYMBOL(copy_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { .release = generic_pipe_buf_release, @@ -342,113 +412,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); -static ssize_t kernel_readv(struct file *file, const struct kvec *vec, - unsigned long vlen, loff_t offset) -{ - mm_segment_t old_fs; - loff_t pos = offset; - ssize_t res; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0); - set_fs(old_fs); - - return res; -} - -static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; - struct iov_iter to; - struct page **pages; - unsigned int nr_pages; - unsigned int mask; - size_t offset, base, copied = 0; - ssize_t res; - int i; - - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - return -EAGAIN; - - /* - * Try to keep page boundaries matching to source pagecache ones - - * it probably won't be much help, but... - */ - offset = *ppos & ~PAGE_MASK; - - iov_iter_pipe(&to, READ, pipe, len + offset); - - res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); - if (res <= 0) - return -ENOMEM; - - nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE); - - vec = __vec; - if (nr_pages > PIPE_DEF_BUFFERS) { - vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL); - if (unlikely(!vec)) { - res = -ENOMEM; - goto out; - } - } - - mask = pipe->ring_size - 1; - pipe->bufs[to.head & mask].offset = offset; - pipe->bufs[to.head & mask].len -= offset; - - for (i = 0; i < nr_pages; i++) { - size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); - vec[i].iov_base = page_address(pages[i]) + offset; - vec[i].iov_len = this_len; - len -= this_len; - offset = 0; - } - - res = kernel_readv(in, vec, nr_pages, *ppos); - if (res > 0) { - copied = res; - *ppos += res; - } - - if (vec != __vec) - kfree(vec); -out: - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); - kvfree(pages); - iov_iter_advance(&to, copied); /* truncates and discards */ - return res; -} - -/* - * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' - * using sendpage(). Return the number of bytes sent. - */ -static int pipe_to_sendpage(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, struct splice_desc *sd) -{ - struct file *file = sd->u.file; - loff_t pos = sd->pos; - int more; - - if (!likely(file->f_op->sendpage)) - return -EINVAL; - - more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - - if (sd->len < sd->total_len && - pipe_occupancy(pipe->head, pipe->tail) > 1) - more |= MSG_SENDPAGE_NOTLAST; - - return file->f_op->sendpage(file, buf->page, buf->offset, - sd->len, &pos, more); -} - static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); @@ -482,11 +445,10 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des { unsigned int head = pipe->head; unsigned int tail = pipe->tail; - unsigned int mask = pipe->ring_size - 1; int ret; while (!pipe_empty(head, tail)) { - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + struct pipe_buffer *buf = pipe_buf(pipe, tail); sd->len = buf->len; if (sd->len > sd->total_len) @@ -526,6 +488,21 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des return 1; } +/* We know we have a pipe buffer, but maybe it's empty? */ +static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) +{ + unsigned int tail = pipe->tail; + struct pipe_buffer *buf = pipe_buf(pipe, tail); + + if (unlikely(!buf->len)) { + pipe_buf_release(pipe, buf); + pipe->tail = tail+1; + return true; + } + + return false; +} + /** * splice_from_pipe_next - wait for some data to splice from * @pipe: pipe to splice from @@ -545,7 +522,8 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des if (signal_pending(current)) return -ERESTARTSYS; - while (pipe_empty(pipe->head, pipe->tail)) { +repeat: + while (pipe_is_empty(pipe)) { if (!pipe->writers) return 0; @@ -563,9 +541,12 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des sd->need_wakeup = false; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } + if (eat_empty_buffer(pipe)) + goto repeat; + return 1; } @@ -609,7 +590,7 @@ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_des * Description: * This function does little more than loop over the pipe and call * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * the desired destination. See pipe_to_file, pipe_to_sendmsg, or * pipe_to_user. * */ @@ -689,10 +670,13 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; int nbufs = pipe->max_usage; - struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), - GFP_KERNEL); + struct bio_vec *array; ssize_t ret; + if (!out->f_op->write_iter) + return -EINVAL; + + array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (unlikely(!array)) return -ENOMEM; @@ -700,8 +684,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, splice_from_pipe_begin(&sd); while (sd.total_len) { + struct kiocb kiocb; struct iov_iter from; - unsigned int head, tail, mask; + unsigned int head, tail; size_t left; int n; @@ -722,16 +707,17 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; /* build the vector */ left = sd.total_len; - for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) { - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) { + struct pipe_buffer *buf = pipe_buf(pipe, tail); size_t this_len = buf->len; - if (this_len > left) - this_len = left; + /* zero-length bvecs are not supported, skip them */ + if (!this_len) + continue; + this_len = min(this_len, left); ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { @@ -740,16 +726,22 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, goto done; } - array[n].bv_page = buf->page; - array[n].bv_len = this_len; - array[n].bv_offset = buf->offset; + bvec_set_page(&array[n], buf->page, this_len, + buf->offset); left -= this_len; + n++; } - iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); - ret = vfs_iter_write(out, &from, &sd.pos, 0); + iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left); + init_sync_kiocb(&kiocb, out); + kiocb.ki_pos = sd.pos; + ret = out->f_op->write_iter(&kiocb, &from); + sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; @@ -758,7 +750,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, /* dismiss the fully eaten buffers, adjust the partial one */ tail = pipe->tail; while (ret) { - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + struct pipe_buffer *buf = pipe_buf(pipe, tail); if (ret >= buf->len) { ret -= buf->len; buf->len = 0; @@ -788,35 +780,9 @@ done: EXPORT_SYMBOL(iter_file_splice_write); -static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - int ret; - void *data; - loff_t tmp = sd->pos; - - data = kmap(buf->page); - ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); - kunmap(buf->page); - - return ret; -} - -static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, - size_t len, unsigned int flags) -{ - ssize_t ret; - - ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); - if (ret > 0) - *ppos += ret; - - return ret; -} - +#ifdef CONFIG_NET /** - * generic_splice_sendpage - splice data from a pipe to a socket + * splice_to_socket - splice data from a pipe to a socket * @pipe: pipe to splice from * @out: socket to write to * @ppos: position in @out @@ -828,48 +794,222 @@ static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, * is involved. * */ -ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); + struct socket *sock = sock_from_file(out); + struct bio_vec bvec[16]; + struct msghdr msg = {}; + ssize_t ret = 0; + size_t spliced = 0; + bool need_wakeup = false; + + pipe_lock(pipe); + + while (len > 0) { + unsigned int head, tail, bc = 0; + size_t remain = len; + + /* + * Check for signal early to make process killable when there + * are always buffers available + */ + ret = -ERESTARTSYS; + if (signal_pending(current)) + break; + + while (pipe_is_empty(pipe)) { + ret = 0; + if (!pipe->writers) + goto out; + + if (spliced) + goto out; + + ret = -EAGAIN; + if (flags & SPLICE_F_NONBLOCK) + goto out; + + ret = -ERESTARTSYS; + if (signal_pending(current)) + goto out; + + if (need_wakeup) { + wakeup_pipe_writers(pipe); + need_wakeup = false; + } + + pipe_wait_readable(pipe); + } + + head = pipe->head; + tail = pipe->tail; + + while (!pipe_empty(head, tail)) { + struct pipe_buffer *buf = pipe_buf(pipe, tail); + size_t seg; + + if (!buf->len) { + tail++; + continue; + } + + seg = min_t(size_t, remain, buf->len); + + ret = pipe_buf_confirm(pipe, buf); + if (unlikely(ret)) { + if (ret == -ENODATA) + ret = 0; + break; + } + + bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset); + remain -= seg; + if (remain == 0 || bc >= ARRAY_SIZE(bvec)) + break; + tail++; + } + + if (!bc) + break; + + msg.msg_flags = MSG_SPLICE_PAGES; + if (flags & SPLICE_F_MORE) + msg.msg_flags |= MSG_MORE; + if (remain && pipe_occupancy(pipe->head, tail) > 0) + msg.msg_flags |= MSG_MORE; + if (out->f_flags & O_NONBLOCK) + msg.msg_flags |= MSG_DONTWAIT; + + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, + len - remain); + ret = sock_sendmsg(sock, &msg); + if (ret <= 0) + break; + + spliced += ret; + len -= ret; + tail = pipe->tail; + while (ret > 0) { + struct pipe_buffer *buf = pipe_buf(pipe, tail); + size_t seg = min_t(size_t, ret, buf->len); + + buf->offset += seg; + buf->len -= seg; + ret -= seg; + + if (!buf->len) { + pipe_buf_release(pipe, buf); + tail++; + } + } + + if (tail != pipe->tail) { + pipe->tail = tail; + if (pipe->files) + need_wakeup = true; + } + } + +out: + pipe_unlock(pipe); + if (need_wakeup) + wakeup_pipe_writers(pipe); + return spliced ?: ret; } +#endif -EXPORT_SYMBOL(generic_splice_sendpage); +static int warn_unsupported(struct file *file, const char *op) +{ + pr_debug_ratelimited( + "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} /* * Attempt to initiate a splice from pipe to file. */ -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +static ssize_t do_splice_from(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - if (out->f_op->splice_write) - return out->f_op->splice_write(pipe, out, ppos, len, flags); - return default_file_splice_write(pipe, out, ppos, len, flags); + if (unlikely(!out->f_op->splice_write)) + return warn_unsupported(out, "write"); + return out->f_op->splice_write(pipe, out, ppos, len, flags); } /* - * Attempt to initiate a splice from a file to a pipe. + * Indicate to the caller that there was a premature EOF when reading from the + * source and the caller didn't indicate they would be sending more data after + * this. */ -static long do_splice_to(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +static void do_splice_eof(struct splice_desc *sd) { - int ret; + if (sd->splice_eof) + sd->splice_eof(sd); +} + +/* + * Callers already called rw_verify_area() on the entire range. + * No need to call it for sub ranges. + */ +static ssize_t do_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + unsigned int p_space; if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; + if (!len) + return 0; - ret = rw_verify_area(READ, in, ppos, len); - if (unlikely(ret < 0)) - return ret; + /* Don't try to read more the pipe has space for. */ + p_space = pipe->max_usage - pipe_buf_usage(pipe); + len = min_t(size_t, len, p_space << PAGE_SHIFT); if (unlikely(len > MAX_RW_COUNT)) len = MAX_RW_COUNT; - if (in->f_op->splice_read) - return in->f_op->splice_read(in, ppos, pipe, len, flags); - return default_file_splice_read(in, ppos, pipe, len, flags); + if (unlikely(!in->f_op->splice_read)) + return warn_unsupported(in, "read"); + /* + * O_DIRECT and DAX don't deal with the pagecache, so we allocate a + * buffer, copy into it and splice that into the pipe. + */ + if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host)) + return copy_splice_read(in, ppos, pipe, len, flags); + return in->f_op->splice_read(in, ppos, pipe, len, flags); +} + +/** + * vfs_splice_read - Read data from a file and splice it into a pipe + * @in: File to splice from + * @ppos: Input file offset + * @pipe: Pipe to splice to + * @len: Number of bytes to splice + * @flags: Splice modifier flags (SPLICE_F_*) + * + * Splice the requested amount of data from the input file to the pipe. This + * is synchronous as the caller must hold the pipe lock across the entire + * operation. + * + * If successful, it returns the amount of data spliced, 0 if it hit the EOF or + * a hole and a negative error code otherwise. + */ +ssize_t vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + ssize_t ret; + + ret = rw_verify_area(READ, in, ppos, len); + if (unlikely(ret < 0)) + return ret; + + return do_splice_read(in, ppos, pipe, len, flags); } +EXPORT_SYMBOL_GPL(vfs_splice_read); /** * splice_direct_to_actor - splices data directly between two non-pipes @@ -888,18 +1028,16 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, splice_direct_actor *actor) { struct pipe_inode_info *pipe; - long ret, bytes; - umode_t i_mode; + ssize_t ret, bytes; size_t len; int i, flags, more; /* - * We require the input being a regular file, as we don't want to - * randomly drop data for eg socket -> socket splicing. Use the - * piped splicing for that! + * We require the input to be seekable, as we don't want to randomly + * drop data for eg socket -> socket splicing. Use the piped splicing + * for that! */ - i_mode = file_inode(in)->i_mode; - if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) + if (unlikely(!(in->f_mode & FMODE_LSEEK))) return -EINVAL; /* @@ -925,44 +1063,40 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, /* * Do the splice. */ - ret = 0; bytes = 0; len = sd->total_len; + + /* Don't block on output, we have to drain the direct pipe. */ flags = sd->flags; + sd->flags &= ~SPLICE_F_NONBLOCK; /* - * Don't block on output, we have to drain the direct pipe. + * We signal MORE until we've read sufficient data to fulfill the + * request and we keep signalling it if the caller set it. */ - sd->flags &= ~SPLICE_F_NONBLOCK; more = sd->flags & SPLICE_F_MORE; + sd->flags |= SPLICE_F_MORE; - WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); + WARN_ON_ONCE(!pipe_is_empty(pipe)); while (len) { - unsigned int p_space; size_t read_len; loff_t pos = sd->pos, prev_pos = pos; - /* Don't try to read more the pipe has space for. */ - p_space = pipe->max_usage - - pipe_occupancy(pipe->head, pipe->tail); - read_len = min_t(size_t, len, p_space << PAGE_SHIFT); - ret = do_splice_to(in, &pos, pipe, read_len, flags); + ret = do_splice_read(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) - goto out_release; + goto read_failure; read_len = ret; sd->total_len = read_len; /* - * If more data is pending, set SPLICE_F_MORE - * If this is the last data and SPLICE_F_MORE was not set - * initially, clears it. + * If we now have sufficient data to fulfill the request then + * we clear SPLICE_F_MORE if it was not set initially. */ - if (read_len < len) - sd->flags |= SPLICE_F_MORE; - else if (!more) + if (read_len >= len && !more) sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we @@ -989,6 +1123,15 @@ done: file_accessed(in); return bytes; +read_failure: + /* + * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that + * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a + * "->splice_in()" that returned EOF (ie zero) *and* we have sent at + * least 1 byte *then* we will also do the ->splice_eof() call. + */ + if (ret == 0 && !more && len > 0 && bytes) + do_splice_eof(sd); out_release: /* * If we did an incomplete transfer we must release @@ -1012,29 +1155,34 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { struct file *file = sd->u.file; + long ret; - return do_splice_from(pipe, file, sd->opos, sd->total_len, - sd->flags); + file_start_write(file); + ret = do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags); + file_end_write(file); + return ret; } -/** - * do_splice_direct - splices data directly between two files - * @in: file to splice from - * @ppos: input file offset - * @out: file to splice to - * @opos: output file offset - * @len: number of bytes to splice - * @flags: splice modifier flags - * - * Description: - * For use by do_sendfile(). splice can easily emulate sendfile, but - * doing it in the application would incur an extra system call - * (splice in + splice out, as compared to just sendfile()). So this helper - * can splice directly through a process-private pipe. - * - */ -long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags) +static int splice_file_range_actor(struct pipe_inode_info *pipe, + struct splice_desc *sd) +{ + struct file *file = sd->u.file; + + return do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags); +} + +static void direct_file_splice_eof(struct splice_desc *sd) +{ + struct file *file = sd->u.file; + + if (file->f_op->splice_eof) + file->f_op->splice_eof(file); +} + +static ssize_t do_splice_direct_actor(struct file *in, loff_t *ppos, + struct file *out, loff_t *opos, + size_t len, unsigned int flags, + splice_direct_actor *actor) { struct splice_desc sd = { .len = len, @@ -1042,9 +1190,10 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, .flags = flags, .pos = *ppos, .u.file = out, + .splice_eof = direct_file_splice_eof, .opos = opos, }; - long ret; + ssize_t ret; if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; @@ -1052,18 +1201,63 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; - ret = rw_verify_area(WRITE, out, opos, len); - if (unlikely(ret < 0)) - return ret; - - ret = splice_direct_to_actor(in, &sd, direct_splice_actor); + ret = splice_direct_to_actor(in, &sd, actor); if (ret > 0) *ppos = sd.pos; return ret; } +/** + * do_splice_direct - splices data directly between two files + * @in: file to splice from + * @ppos: input file offset + * @out: file to splice to + * @opos: output file offset + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Description: + * For use by do_sendfile(). splice can easily emulate sendfile, but + * doing it in the application would incur an extra system call + * (splice in + splice out, as compared to just sendfile()). So this helper + * can splice directly through a process-private pipe. + * + * Callers already called rw_verify_area() on the entire range. + */ +ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags) +{ + return do_splice_direct_actor(in, ppos, out, opos, len, flags, + direct_splice_actor); +} EXPORT_SYMBOL(do_splice_direct); +/** + * splice_file_range - splices data between two files for copy_file_range() + * @in: file to splice from + * @ppos: input file offset + * @out: file to splice to + * @opos: output file offset + * @len: number of bytes to splice + * + * Description: + * For use by ->copy_file_range() methods. + * Like do_splice_direct(), but vfs_copy_file_range() already holds + * start_file_write() on @out file. + * + * Callers already called rw_verify_area() on the entire range. + */ +ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len) +{ + lockdep_assert(file_write_started(out)); + + return do_splice_direct_actor(in, ppos, out, opos, + min_t(size_t, len, MAX_RW_COUNT), + 0, splice_file_range_actor); +} +EXPORT_SYMBOL(splice_file_range); + static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { for (;;) { @@ -1071,13 +1265,13 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; - pipe_wait(pipe); + pipe_wait_writable(pipe); } } @@ -1085,17 +1279,33 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); +ssize_t splice_file_to_pipe(struct file *in, + struct pipe_inode_info *opipe, + loff_t *offset, + size_t len, unsigned int flags) +{ + ssize_t ret; + + pipe_lock(opipe); + ret = wait_for_space(opipe, flags); + if (!ret) + ret = do_splice_read(in, offset, opipe, len, flags); + pipe_unlock(opipe); + if (ret > 0) + wakeup_pipe_readers(opipe); + return ret; +} + /* * Determine where to splice to/from. */ -long do_splice(struct file *in, loff_t __user *off_in, - struct file *out, loff_t __user *off_out, - size_t len, unsigned int flags) +ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset; - long ret; + ssize_t ret; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) @@ -1115,17 +1325,14 @@ long do_splice(struct file *in, loff_t __user *off_in, if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; - return splice_pipe_to_pipe(ipipe, opipe, len, flags); - } - - if (ipipe) { + ret = splice_pipe_to_pipe(ipipe, opipe, len, flags); + } else if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; - if (copy_from_user(&offset, off_out, sizeof(loff_t))) - return -EFAULT; + offset = *off_out; } else { offset = out->f_pos; } @@ -1146,95 +1353,139 @@ long do_splice(struct file *in, loff_t __user *off_in, if (!off_out) out->f_pos = offset; - else if (copy_to_user(off_out, &offset, sizeof(loff_t))) - ret = -EFAULT; - - return ret; - } - - if (opipe) { + else + *off_out = offset; + } else if (opipe) { if (off_out) return -ESPIPE; if (off_in) { if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; - if (copy_from_user(&offset, off_in, sizeof(loff_t))) - return -EFAULT; + offset = *off_in; } else { offset = in->f_pos; } + ret = rw_verify_area(READ, in, &offset, len); + if (unlikely(ret < 0)) + return ret; + if (out->f_flags & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; - pipe_lock(opipe); - ret = wait_for_space(opipe, flags); - if (!ret) { - unsigned int p_space; - - /* Don't try to read more the pipe has space for. */ - p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail); - len = min_t(size_t, len, p_space << PAGE_SHIFT); + ret = splice_file_to_pipe(in, opipe, &offset, len, flags); - ret = do_splice_to(in, &offset, opipe, len, flags); - } - pipe_unlock(opipe); - if (ret > 0) - wakeup_pipe_readers(opipe); if (!off_in) in->f_pos = offset; - else if (copy_to_user(off_in, &offset, sizeof(loff_t))) - ret = -EFAULT; + else + *off_in = offset; + } else { + ret = -EINVAL; + } - return ret; + if (ret > 0) { + /* + * Generate modify out before access in: + * do_splice_from() may've already sent modify out, + * and this ensures the events get merged. + */ + fsnotify_modify(out); + fsnotify_access(in); } - return -EINVAL; + return ret; +} + +static ssize_t __do_splice(struct file *in, loff_t __user *off_in, + struct file *out, loff_t __user *off_out, + size_t len, unsigned int flags) +{ + struct pipe_inode_info *ipipe; + struct pipe_inode_info *opipe; + loff_t offset, *__off_in = NULL, *__off_out = NULL; + ssize_t ret; + + ipipe = get_pipe_info(in, true); + opipe = get_pipe_info(out, true); + + if (ipipe) { + if (off_in) + return -ESPIPE; + pipe_clear_nowait(in); + } + if (opipe) { + if (off_out) + return -ESPIPE; + pipe_clear_nowait(out); + } + + if (off_out) { + if (copy_from_user(&offset, off_out, sizeof(loff_t))) + return -EFAULT; + __off_out = &offset; + } + if (off_in) { + if (copy_from_user(&offset, off_in, sizeof(loff_t))) + return -EFAULT; + __off_in = &offset; + } + + ret = do_splice(in, __off_in, out, __off_out, len, flags); + if (ret < 0) + return ret; + + if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) + return -EFAULT; + if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) + return -EFAULT; + + return ret; } -static int iter_to_pipe(struct iov_iter *from, - struct pipe_inode_info *pipe, - unsigned flags) +static ssize_t iter_to_pipe(struct iov_iter *from, + struct pipe_inode_info *pipe, + unsigned int flags) { struct pipe_buffer buf = { .ops = &user_page_pipe_buf_ops, .flags = flags }; size_t total = 0; - int ret = 0; - bool failed = false; + ssize_t ret = 0; - while (iov_iter_count(from) && !failed) { + while (iov_iter_count(from)) { struct page *pages[16]; - ssize_t copied; + ssize_t left; size_t start; - int n; + int i, n; - copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); - if (copied <= 0) { - ret = copied; + left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start); + if (left <= 0) { + ret = left; break; } - for (n = 0; copied; n++, start = 0) { - int size = min_t(int, copied, PAGE_SIZE - start); - if (!failed) { - buf.page = pages[n]; - buf.offset = start; - buf.len = size; - ret = add_to_pipe(pipe, &buf); - if (unlikely(ret < 0)) { - failed = true; - } else { - iov_iter_advance(from, ret); - total += ret; - } - } else { - put_page(pages[n]); + n = DIV_ROUND_UP(left + start, PAGE_SIZE); + for (i = 0; i < n; i++) { + int size = min_t(int, left, PAGE_SIZE - start); + + buf.page = pages[i]; + buf.offset = start; + buf.len = size; + ret = add_to_pipe(pipe, &buf); + if (unlikely(ret < 0)) { + iov_iter_revert(from, left); + // this one got dropped by add_to_pipe() + while (++i < n) + put_page(pages[i]); + goto out; } - copied -= size; + total += ret; + left -= size; + start = 0; } } +out: return total ? total : ret; } @@ -1247,10 +1498,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, /* * For lack of a better implementation, implement vmsplice() to userspace - * as a simple copy of the pipes pages to the user iov. + * as a simple copy of the pipe's pages to the user iov. */ -static long vmsplice_to_user(struct file *file, struct iov_iter *iter, - unsigned int flags) +static ssize_t vmsplice_to_user(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { @@ -1258,17 +1509,22 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, .flags = flags, .u.data = iter }; - long ret = 0; + ssize_t ret = 0; if (!pipe) return -EBADF; + pipe_clear_nowait(file); + if (sd.total_len) { pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); } + if (ret > 0) + fsnotify_access(file); + return ret; } @@ -1277,11 +1533,11 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, - unsigned int flags) +static ssize_t vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe; - long ret = 0; + ssize_t ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) @@ -1291,29 +1547,18 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, if (!pipe) return -EBADF; + pipe_clear_nowait(file); + pipe_lock(pipe); ret = wait_for_space(pipe, flags); if (!ret) ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); - if (ret > 0) + if (ret > 0) { wakeup_pipe_readers(pipe); - return ret; -} - -static int vmsplice_type(struct fd f, int *type) -{ - if (!f.file) - return -EBADF; - if (f.file->f_mode & FMODE_WRITE) { - *type = WRITE; - } else if (f.file->f_mode & FMODE_READ) { - *type = READ; - } else { - fdput(f); - return -EBADF; + fsnotify_modify(file); } - return 0; + return ret; } /* @@ -1332,20 +1577,6 @@ static int vmsplice_type(struct fd f, int *type) * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags) -{ - if (unlikely(flags & ~SPLICE_F_ALL)) - return -EINVAL; - - if (!iov_iter_count(iter)) - return 0; - - if (iov_iter_rw(iter) == WRITE) - return vmsplice_to_pipe(f, iter, flags); - else - return vmsplice_to_user(f, iter, flags); -} - SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { @@ -1353,76 +1584,57 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct iovec *iov = iovstack; struct iov_iter iter; ssize_t error; - struct fd f; int type; - f = fdget(fd); - error = vmsplice_type(f, &type); - if (error) - return error; + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; + + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; + if (fd_file(f)->f_mode & FMODE_WRITE) + type = ITER_SOURCE; + else if (fd_file(f)->f_mode & FMODE_READ) + type = ITER_DEST; + else + return -EBADF; error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (error >= 0) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } - fdput(f); - return error; -} - -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, - unsigned int, nr_segs, unsigned int, flags) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t error; - struct fd f; - int type; - - f = fdget(fd); - error = vmsplice_type(f, &type); - if (error) + if (error < 0) return error; - error = compat_import_iovec(type, iov32, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (error >= 0) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } - fdput(f); + if (!iov_iter_count(&iter)) + error = 0; + else if (type == ITER_SOURCE) + error = vmsplice_to_pipe(fd_file(f), &iter, flags); + else + error = vmsplice_to_user(fd_file(f), &iter, flags); + + kfree(iov); return error; } -#endif SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { - struct fd in, out; - long error; - if (unlikely(!len)) return 0; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; - error = -EBADF; - in = fdget(fd_in); - if (in.file) { - out = fdget(fd_out); - if (out.file) { - error = do_splice(in.file, off_in, out.file, off_out, - len, flags); - fdput(out); - } - fdput(in); - } - return error; + CLASS(fd, in)(fd_in); + if (fd_empty(in)) + return -EBADF; + + CLASS(fd, out)(fd_out); + if (fd_empty(out)) + return -EBADF; + + return __do_splice(fd_file(in), off_in, fd_file(out), off_out, + len, flags); } /* @@ -1437,13 +1649,13 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_empty(pipe->head, pipe->tail)) + if (!pipe_is_empty(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -1454,7 +1666,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ret = -EAGAIN; break; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } pipe_unlock(pipe); @@ -1473,13 +1685,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (pipe_is_full(pipe)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; @@ -1493,7 +1705,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ret = -ERESTARTSYS; break; } - pipe_wait(pipe); + pipe_wait_writable(pipe); } pipe_unlock(pipe); @@ -1510,7 +1722,6 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; - unsigned int i_mask, o_mask; int ret = 0; bool input_wakeup = false; @@ -1532,9 +1743,7 @@ retry: pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; - i_mask = ipipe->ring_size - 1; o_head = opipe->head; - o_mask = opipe->ring_size - 1; do { size_t o_len; @@ -1577,8 +1786,8 @@ retry: goto retry; } - ibuf = &ipipe->bufs[i_tail & i_mask]; - obuf = &opipe->bufs[o_head & o_mask]; + ibuf = pipe_buf(ipipe, i_tail); + obuf = pipe_buf(opipe, o_head); if (len >= ibuf->len) { /* @@ -1640,15 +1849,14 @@ retry: /* * Link contents of ipipe to opipe. */ -static int link_pipe(struct pipe_inode_info *ipipe, - struct pipe_inode_info *opipe, - size_t len, unsigned int flags) +static ssize_t link_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; - unsigned int i_mask, o_mask; - int ret = 0; + ssize_t ret = 0; /* * Potential ABBA deadlock, work around it by ordering lock @@ -1658,9 +1866,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; - i_mask = ipipe->ring_size - 1; o_head = opipe->head; - o_mask = opipe->ring_size - 1; do { if (!opipe->readers) { @@ -1681,8 +1887,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, pipe_full(o_head, o_tail, opipe->max_usage)) break; - ibuf = &ipipe->bufs[i_tail & i_mask]; - obuf = &opipe->bufs[o_head & o_mask]; + ibuf = pipe_buf(ipipe, i_tail); + obuf = pipe_buf(opipe, o_head); /* * Get a reference to this pipe buffer, @@ -1731,11 +1937,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ -long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) +ssize_t do_tee(struct file *in, struct file *out, size_t len, + unsigned int flags) { struct pipe_inode_info *ipipe = get_pipe_info(in, true); struct pipe_inode_info *opipe = get_pipe_info(out, true); - int ret = -EINVAL; + ssize_t ret = -EINVAL; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) @@ -1761,30 +1968,29 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) } } + if (ret > 0) { + fsnotify_access(in); + fsnotify_modify(out); + } + return ret; } SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in, out; - int error; - if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; if (unlikely(!len)) return 0; - error = -EBADF; - in = fdget(fdin); - if (in.file) { - out = fdget(fdout); - if (out.file) { - error = do_tee(in.file, out.file, len, flags); - fdput(out); - } - fdput(in); - } + CLASS(fd, in)(fdin); + if (fd_empty(in)) + return -EBADF; - return error; + CLASS(fd, out)(fdout); + if (fd_empty(out)) + return -EBADF; + + return do_tee(fd_file(in), fd_file(out), len, flags); } |
