From 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:33 +0200 Subject: fs: don't allow kernel reads and writes without iter ops Don't allow calling ->read or ->write with set_fs as a preparation for killing off set_fs. All the instances that we use kernel_read/write on are using the iter ops already. If a file has both the regular ->read/->write methods and the iter variants those could have different semantics for messed up enough drivers. Also fails the kernel access to them in that case. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- fs/read_write.c | 67 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 25 deletions(-) (limited to 'fs/read_write.c') diff --git a/fs/read_write.c b/fs/read_write.c index 5db58b8c78d0..702c4301d9eb 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,27 +419,41 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } +static int warn_unsupported(struct file *file, const char *op) +{ + pr_warn_ratelimited( + "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} + ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs = get_fs(); + struct kvec iov = { + .iov_base = buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) return -EINVAL; if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; + /* + * Also fail if ->read_iter and ->read are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->read_iter || file->f_op->read)) + return warn_unsupported(file, "read"); - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - set_fs(KERNEL_DS); - if (file->f_op->read) - ret = file->f_op->read(file, (void __user *)buf, count, pos); - else if (file->f_op->read_iter) - ret = new_sync_read(file, (void __user *)buf, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *pos; + iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); + ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { + *pos = kiocb.ki_pos; fsnotify_access(file); add_rchar(current, ret); } @@ -510,28 +524,31 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t /* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - const char __user *p; + struct kvec iov = { + .iov_base = (void *)buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; + /* + * Also fail if ->write_iter and ->write are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->write_iter || file->f_op->write)) + return warn_unsupported(file, "write"); - old_fs = get_fs(); - set_fs(KERNEL_DS); - p = (__force const char __user *)buf; - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - if (file->f_op->write) - ret = file->f_op->write(file, p, count, pos); - else if (file->f_op->write_iter) - ret = new_sync_write(file, p, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *pos; + iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); + ret = file->f_op->write_iter(&kiocb, &iter); if (ret > 0) { + *pos = kiocb.ki_pos; fsnotify_modify(file); add_wchar(current, ret); } -- cgit From 36e2c7421f02a22f71c9283e55fdb672a9eb58e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:34 +0200 Subject: fs: don't allow splice read/write without explicit ops default_file_splice_write is the last piece of generic code that uses set_fs to make the uaccess routines operate on kernel pointers. It implements a "fallback loop" for splicing from files that do not actually provide a proper splice_read method. The usual file systems and other high bandwidth instances all provide a ->splice_read, so this just removes support for various device drivers and procfs/debugfs files. If splice support for any of those turns out to be important it can be added back by switching them to the iter ops and using generic_file_splice_read. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- fs/read_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/read_write.c') diff --git a/fs/read_write.c b/fs/read_write.c index 702c4301d9eb..8c61f67453e3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1077,7 +1077,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, } EXPORT_SYMBOL(vfs_iter_write); -ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, +static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; -- cgit From 4c207ef48269377236cd38979197c5e1631c8c16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 3 Oct 2020 03:55:22 +0100 Subject: fs: Allow a NULL pos pointer to __kernel_write Linus prefers that callers be allowed to pass in a NULL pointer for ppos like new_sync_write(). Reviewed-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Al Viro --- fs/read_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/read_write.c') diff --git a/fs/read_write.c b/fs/read_write.c index 8c61f67453e3..516eb51af70e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -544,11 +544,12 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t return warn_unsupported(file, "write"); init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *pos; + kiocb.ki_pos = pos ? *pos : 0; iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); ret = file->f_op->write_iter(&kiocb, &iter); if (ret > 0) { - *pos = kiocb.ki_pos; + if (pos) + *pos = kiocb.ki_pos; fsnotify_modify(file); add_wchar(current, ret); } -- cgit From 7b84b665c874f60d84547635341e418f20cbbab2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 3 Oct 2020 03:55:23 +0100 Subject: fs: Allow a NULL pos pointer to __kernel_read Match the behaviour of new_sync_read() and __kernel_write(). Reviewed-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Al Viro --- fs/read_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/read_write.c') diff --git a/fs/read_write.c b/fs/read_write.c index 516eb51af70e..498cc00f3c08 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -449,11 +449,12 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) return warn_unsupported(file, "read"); init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *pos; + kiocb.ki_pos = pos ? *pos : 0; iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { - *pos = kiocb.ki_pos; + if (pos) + *pos = kiocb.ki_pos; fsnotify_access(file); add_rchar(current, ret); } -- cgit