From cc44c17baf7f3f833d36b2f2a1edb1cc0b6f2cc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jul 2020 00:12:07 -0400 Subject: csum_partial_copy_nocheck(): drop the last argument It's always 0. Note that we theoretically could use ~0U as well - result will be the same modulo 0xffff, _if_ the damn thing did the right thing for any value of initial sum; later we'll make use of that when convenient. However, unlike csum_and_copy_..._user(), there are instances that did not work for arbitrary initial sums; c6x is one such. Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5e40786c8f12..84f3a9156684 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -581,7 +581,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, static __wsum csum_and_memcpy(void *to, const void *from, size_t len, __wsum sum, size_t off) { - __wsum next = csum_partial_copy_nocheck(from, to, len, 0); + __wsum next = csum_partial_copy_nocheck(from, to, len); return csum_block_add(sum, next, off); } -- cgit From 99a2c96d52d312b11a943372964226fa134de3b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jul 2020 23:51:04 -0400 Subject: csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum Preparation for the change of calling conventions; right now all callers pass 0 as initial sum. Passing 0xffffffff instead yields the values comparable mod 0xffff and guarantees that 0 will not be returned on success. Signed-off-by: Al Viro --- lib/iov_iter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 84f3a9156684..437fbc14c972 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1452,7 +1452,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, int err = 0; next = csum_and_copy_from_user(v.iov_base, (to += v.iov_len) - v.iov_len, - v.iov_len, 0, &err); + v.iov_len, ~0U, &err); if (!err) { sum = csum_block_add(sum, next, off); off += v.iov_len; @@ -1494,7 +1494,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, int err = 0; next = csum_and_copy_from_user(v.iov_base, (to += v.iov_len) - v.iov_len, - v.iov_len, 0, &err); + v.iov_len, ~0U, &err); if (err) return false; sum = csum_block_add(sum, next, off); @@ -1540,7 +1540,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, int err = 0; next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len, 0, &err); + v.iov_len, ~0U, &err); if (!err) { sum = csum_block_add(sum, next, off); off += v.iov_len; -- cgit From c693cc4676a055c4126e487b30b0a96ea7ec9936 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jul 2020 00:27:49 -0400 Subject: saner calling conventions for csum_and_copy_..._user() All callers of these primitives will * discard anything we might've copied in case of error * ignore the csum value in case of error * always pass 0xffffffff as the initial sum, so the resulting csum value (in case of success, that is) will never be 0. That suggest the following calling conventions: * don't pass err_ptr - just return 0 on error. * don't bother with zeroing destination, etc. in case of error * don't pass the initial sum - just use 0xffffffff. This commit does the minimal conversion in the instances of csum_and_copy_...(); the changes of actual asm code behind them are done later in the series. Note that this asm code is often shared with csum_partial_copy_nocheck(); the difference is that csum_partial_copy_nocheck() passes 0 for initial sum while csum_and_copy_..._user() pass 0xffffffff. Fortunately, we are free to pass 0xffffffff in all cases and subsequent patches will use that freedom without any special comments. A part that could be split off: parisc and uml/i386 claimed to have csum_and_copy_to_user() instances of their own, but those were identical to the generic one, so we simply drop them. Not sure if it's worth a separate commit... Signed-off-by: Al Viro --- lib/iov_iter.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 437fbc14c972..ccb247faf79b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1449,15 +1449,14 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, return 0; } iterate_and_advance(i, bytes, v, ({ - int err = 0; next = csum_and_copy_from_user(v.iov_base, (to += v.iov_len) - v.iov_len, - v.iov_len, ~0U, &err); - if (!err) { + v.iov_len); + if (next) { sum = csum_block_add(sum, next, off); off += v.iov_len; } - err ? v.iov_len : 0; + next ? 0 : v.iov_len; }), ({ char *p = kmap_atomic(v.bv_page); sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, @@ -1491,11 +1490,10 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, if (unlikely(i->count < bytes)) return false; iterate_all_kinds(i, bytes, v, ({ - int err = 0; next = csum_and_copy_from_user(v.iov_base, (to += v.iov_len) - v.iov_len, - v.iov_len, ~0U, &err); - if (err) + v.iov_len); + if (!next) return false; sum = csum_block_add(sum, next, off); off += v.iov_len; @@ -1537,15 +1535,14 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, return 0; } iterate_and_advance(i, bytes, v, ({ - int err = 0; next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len, ~0U, &err); - if (!err) { + v.iov_len); + if (next) { sum = csum_block_add(sum, next, off); off += v.iov_len; } - err ? v.iov_len : 0; + next ? 0 : v.iov_len; }), ({ char *p = kmap_atomic(v.bv_page); sum = csum_and_memcpy(p + v.bv_offset, -- cgit From fb041b598997d63c0f7d7305dfae70046bf66fe1 Mon Sep 17 00:00:00 2001 From: David Laight Date: Fri, 25 Sep 2020 06:51:39 +0200 Subject: iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c This lets the compiler inline it into import_iovec() generating much better code. Signed-off-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5e40786c8f12..ccea9db3f72b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1650,6 +1650,109 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) } EXPORT_SYMBOL(dup_iter); +/** + * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace + * into the kernel and check that it is valid. + * + * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE. + * @uvector: Pointer to the userspace array. + * @nr_segs: Number of elements in userspace array. + * @fast_segs: Number of elements in @fast_pointer. + * @fast_pointer: Pointer to (usually small on-stack) kernel array. + * @ret_pointer: (output parameter) Pointer to a variable that will point to + * either @fast_pointer, a newly allocated kernel array, or NULL, + * depending on which array was used. + * + * This function copies an array of &struct iovec of @nr_segs from + * userspace into the kernel and checks that each element is valid (e.g. + * it does not point to a kernel address or cause overflow by being too + * large, etc.). + * + * As an optimization, the caller may provide a pointer to a small + * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long + * (the size of this array, or 0 if unused, should be given in @fast_segs). + * + * @ret_pointer will always point to the array that was used, so the + * caller must take care not to call kfree() on it e.g. in case the + * @fast_pointer array was used and it was allocated on the stack. + * + * Return: The total number of bytes covered by the iovec array on success + * or a negative error code on error. + */ +ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, struct iovec **ret_pointer) +{ + unsigned long seg; + ssize_t ret; + struct iovec *iov = fast_pointer; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) { + ret = 0; + goto out; + } + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (nr_segs > UIO_MAXIOV) { + ret = -EINVAL; + goto out; + } + if (nr_segs > fast_segs) { + iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) { + ret = -ENOMEM; + goto out; + } + } + if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { + ret = -EFAULT; + goto out; + } + + /* + * According to the Single Unix Specification we should return EINVAL + * if an element length is < 0 when cast to ssize_t or if the + * total length would overflow the ssize_t return value of the + * system call. + * + * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the + * overflow case. + */ + ret = 0; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; + + /* see if we we're about to use an invalid len or if + * it's about to overflow ssize_t */ + if (len < 0) { + ret = -EINVAL; + goto out; + } + if (type >= 0 + && unlikely(!access_ok(buf, len))) { + ret = -EFAULT; + goto out; + } + if (len > MAX_RW_COUNT - ret) { + len = MAX_RW_COUNT - ret; + iov[seg].iov_len = len; + } + ret += len; + } +out: + *ret_pointer = iov; + return ret; +} + /** * import_iovec() - Copy an array of &struct iovec from userspace * into the kernel, check that it is valid, and initialize a new @@ -1695,6 +1798,79 @@ EXPORT_SYMBOL(import_iovec); #ifdef CONFIG_COMPAT #include +ssize_t compat_rw_copy_check_uvector(int type, + const struct compat_iovec __user *uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, struct iovec **ret_pointer) +{ + compat_ssize_t tot_len; + struct iovec *iov = *ret_pointer = fast_pointer; + ssize_t ret = 0; + int seg; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) + goto out; + + ret = -EINVAL; + if (nr_segs > UIO_MAXIOV) + goto out; + if (nr_segs > fast_segs) { + ret = -ENOMEM; + iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) + goto out; + } + *ret_pointer = iov; + + ret = -EFAULT; + if (!access_ok(uvector, nr_segs*sizeof(*uvector))) + goto out; + + /* + * Single unix specification: + * We should -EINVAL if an element length is not >= 0 and fitting an + * ssize_t. + * + * In Linux, the total length is limited to MAX_RW_COUNT, there is + * no overflow possibility. + */ + tot_len = 0; + ret = -EINVAL; + for (seg = 0; seg < nr_segs; seg++) { + compat_uptr_t buf; + compat_ssize_t len; + + if (__get_user(len, &uvector->iov_len) || + __get_user(buf, &uvector->iov_base)) { + ret = -EFAULT; + goto out; + } + if (len < 0) /* size_t not fitting in compat_ssize_t .. */ + goto out; + if (type >= 0 && + !access_ok(compat_ptr(buf), len)) { + ret = -EFAULT; + goto out; + } + if (len > MAX_RW_COUNT - tot_len) + len = MAX_RW_COUNT - tot_len; + tot_len += len; + iov->iov_base = compat_ptr(buf); + iov->iov_len = (compat_size_t) len; + uvector++; + iov++; + } + ret = tot_len; + +out: + return ret; +} + ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, -- cgit From bfdc59701d6d100c99c3b987bcffd1c204e393c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:40 +0200 Subject: iov_iter: refactor rw_copy_check_uvector and import_iovec Split rw_copy_check_uvector into two new helpers with more sensible calling conventions: - iovec_from_user copies a iovec from userspace either into the provided stack buffer if it fits, or allocates a new buffer for it. Returns the actually used iovec. It also verifies that iov_len does fit a signed type, and handles compat iovecs if the compat flag is set. - __import_iovec consolidates the native and compat versions of import_iovec. It calls iovec_from_user, then validates each iovec actually points to user addresses, and ensures the total length doesn't overflow. This has two major implications: - the access_process_vm case loses the total lenght checking, which wasn't required anyway, given that each call receives two iovecs for the local and remote side of the operation, and it verifies the total length on the local side already. - instead of a single loop there now are two loops over the iovecs. Given that the iovecs are cache hot this doesn't make a major difference Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 300 ++++++++++++++++++++++----------------------------------- 1 file changed, 114 insertions(+), 186 deletions(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ccea9db3f72b..e6cb51da3424 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) } EXPORT_SYMBOL(dup_iter); -/** - * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace - * into the kernel and check that it is valid. - * - * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE. - * @uvector: Pointer to the userspace array. - * @nr_segs: Number of elements in userspace array. - * @fast_segs: Number of elements in @fast_pointer. - * @fast_pointer: Pointer to (usually small on-stack) kernel array. - * @ret_pointer: (output parameter) Pointer to a variable that will point to - * either @fast_pointer, a newly allocated kernel array, or NULL, - * depending on which array was used. - * - * This function copies an array of &struct iovec of @nr_segs from - * userspace into the kernel and checks that each element is valid (e.g. - * it does not point to a kernel address or cause overflow by being too - * large, etc.). - * - * As an optimization, the caller may provide a pointer to a small - * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long - * (the size of this array, or 0 if unused, should be given in @fast_segs). - * - * @ret_pointer will always point to the array that was used, so the - * caller must take care not to call kfree() on it e.g. in case the - * @fast_pointer array was used and it was allocated on the stack. - * - * Return: The total number of bytes covered by the iovec array on success - * or a negative error code on error. - */ -ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, struct iovec **ret_pointer) +static int copy_compat_iovec_from_user(struct iovec *iov, + const struct iovec __user *uvec, unsigned long nr_segs) +{ + const struct compat_iovec __user *uiov = + (const struct compat_iovec __user *)uvec; + int ret = -EFAULT, i; + + if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) + return -EFAULT; + + for (i = 0; i < nr_segs; i++) { + compat_uptr_t buf; + compat_ssize_t len; + + unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); + unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); + + /* check for compat_size_t not fitting in compat_ssize_t .. */ + if (len < 0) { + ret = -EINVAL; + goto uaccess_end; + } + iov[i].iov_base = compat_ptr(buf); + iov[i].iov_len = len; + } + + ret = 0; +uaccess_end: + user_access_end(); + return ret; +} + +static int copy_iovec_from_user(struct iovec *iov, + const struct iovec __user *uvec, unsigned long nr_segs) { unsigned long seg; - ssize_t ret; - struct iovec *iov = fast_pointer; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - if (nr_segs == 0) { - ret = 0; - goto out; + if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) + return -EFAULT; + for (seg = 0; seg < nr_segs; seg++) { + if ((ssize_t)iov[seg].iov_len < 0) + return -EINVAL; } + return 0; +} + +struct iovec *iovec_from_user(const struct iovec __user *uvec, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_iov, bool compat) +{ + struct iovec *iov = fast_iov; + int ret; + /* - * First get the "struct iovec" from user memory and - * verify all the pointers + * SuS says "The readv() function *may* fail if the iovcnt argument was + * less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... */ - if (nr_segs > UIO_MAXIOV) { - ret = -EINVAL; - goto out; - } + if (nr_segs == 0) + return iov; + if (nr_segs > UIO_MAXIOV) + return ERR_PTR(-EINVAL); if (nr_segs > fast_segs) { iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) { - ret = -ENOMEM; - goto out; - } + if (!iov) + return ERR_PTR(-ENOMEM); } - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { - ret = -EFAULT; - goto out; + + if (compat) + ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); + else + ret = copy_iovec_from_user(iov, uvec, nr_segs); + if (ret) { + if (iov != fast_iov) + kfree(iov); + return ERR_PTR(ret); + } + + return iov; +} + +ssize_t __import_iovec(int type, const struct iovec __user *uvec, + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, + struct iov_iter *i, bool compat) +{ + ssize_t total_len = 0; + unsigned long seg; + struct iovec *iov; + + iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); + if (IS_ERR(iov)) { + *iovp = NULL; + return PTR_ERR(iov); } /* - * According to the Single Unix Specification we should return EINVAL - * if an element length is < 0 when cast to ssize_t or if the - * total length would overflow the ssize_t return value of the - * system call. + * According to the Single Unix Specification we should return EINVAL if + * an element length is < 0 when cast to ssize_t or if the total length + * would overflow the ssize_t return value of the system call. * * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the * overflow case. */ - ret = 0; for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; ssize_t len = (ssize_t)iov[seg].iov_len; - /* see if we we're about to use an invalid len or if - * it's about to overflow ssize_t */ - if (len < 0) { - ret = -EINVAL; - goto out; + if (!access_ok(iov[seg].iov_base, len)) { + if (iov != *iovp) + kfree(iov); + *iovp = NULL; + return -EFAULT; } - if (type >= 0 - && unlikely(!access_ok(buf, len))) { - ret = -EFAULT; - goto out; - } - if (len > MAX_RW_COUNT - ret) { - len = MAX_RW_COUNT - ret; + + if (len > MAX_RW_COUNT - total_len) { + len = MAX_RW_COUNT - total_len; iov[seg].iov_len = len; } - ret += len; + total_len += len; } -out: - *ret_pointer = iov; - return ret; + + iov_iter_init(i, type, iov, nr_segs, total_len); + if (iov == *iovp) + *iovp = NULL; + else + *iovp = iov; + return total_len; } /** @@ -1759,10 +1786,10 @@ out: * &struct iov_iter iterator to access it. * * @type: One of %READ or %WRITE. - * @uvector: Pointer to the userspace array. + * @uvec: Pointer to the userspace array. * @nr_segs: Number of elements in userspace array. * @fast_segs: Number of elements in @iov. - * @iov: (input and output parameter) Pointer to pointer to (usually small + * @iovp: (input and output parameter) Pointer to pointer to (usually small * on-stack) kernel array. * @i: Pointer to iterator that will be initialized on success. * @@ -1775,120 +1802,21 @@ out: * * Return: Negative error code on error, bytes imported on success */ -ssize_t import_iovec(int type, const struct iovec __user * uvector, +ssize_t import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i) + struct iovec **iovp, struct iov_iter *i) { - ssize_t n; - struct iovec *p; - n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, - *iov, &p); - if (n < 0) { - if (p != *iov) - kfree(p); - *iov = NULL; - return n; - } - iov_iter_init(i, type, p, nr_segs, n); - *iov = p == *iov ? NULL : p; - return n; + return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false); } EXPORT_SYMBOL(import_iovec); #ifdef CONFIG_COMPAT -#include - -ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, struct iovec **ret_pointer) -{ - compat_ssize_t tot_len; - struct iovec *iov = *ret_pointer = fast_pointer; - ssize_t ret = 0; - int seg; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - if (nr_segs == 0) - goto out; - - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (nr_segs > fast_segs) { - ret = -ENOMEM; - iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) - goto out; - } - *ret_pointer = iov; - - ret = -EFAULT; - if (!access_ok(uvector, nr_segs*sizeof(*uvector))) - goto out; - - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. - * - * In Linux, the total length is limited to MAX_RW_COUNT, there is - * no overflow possibility. - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - compat_uptr_t buf; - compat_ssize_t len; - - if (__get_user(len, &uvector->iov_len) || - __get_user(buf, &uvector->iov_base)) { - ret = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting in compat_ssize_t .. */ - goto out; - if (type >= 0 && - !access_ok(compat_ptr(buf), len)) { - ret = -EFAULT; - goto out; - } - if (len > MAX_RW_COUNT - tot_len) - len = MAX_RW_COUNT - tot_len; - tot_len += len; - iov->iov_base = compat_ptr(buf); - iov->iov_len = (compat_size_t) len; - uvector++; - iov++; - } - ret = tot_len; - -out: - return ret; -} - -ssize_t compat_import_iovec(int type, - const struct compat_iovec __user * uvector, - unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i) +ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec, + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, + struct iov_iter *i) { - ssize_t n; - struct iovec *p; - n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, - *iov, &p); - if (n < 0) { - if (p != *iov) - kfree(p); - *iov = NULL; - return n; - } - iov_iter_init(i, type, p, nr_segs, n); - *iov = p == *iov ? NULL : p; - return n; + return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs, + fast_segs, iovp, i, true); } EXPORT_SYMBOL(compat_import_iovec); #endif -- cgit From 89cd35c58bc2e36bfdc23dde67a429b08cf4ae03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:41 +0200 Subject: iov_iter: transparently handle compat iovecs in import_iovec Use in compat_syscall to import either native or the compat iovecs, and remove the now superflous compat_import_iovec. This removes the need for special compat logic in most callers, and the remaining ones can still be simplified by using __import_iovec with a bool compat parameter. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e6cb51da3424..2e452ce7388b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1806,21 +1806,11 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i) { - return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false); + return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, + in_compat_syscall()); } EXPORT_SYMBOL(import_iovec); -#ifdef CONFIG_COMPAT -ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec, - unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, - struct iov_iter *i) -{ - return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs, - fast_segs, iovp, i, true); -} -EXPORT_SYMBOL(compat_import_iovec); -#endif - int import_single_range(int rw, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i) { -- cgit From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2020 20:40:16 -0700 Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Michael Ellerman Cc: Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com --- lib/iov_iter.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5e40786c8f12..d13304a034f5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE -static int copyout_mcsafe(void __user *to, const void *from, size_t n) +#ifdef CONFIG_ARCH_HAS_COPY_MC +static int copyout_mc(void __user *to, const void *from, size_t n) { if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); - n = copy_to_user_mcsafe((__force void *) to, from, n); + n = copy_mc_to_user((__force void *) to, from, n); } return n; } -static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, +static unsigned long copy_mc_to_page(struct page *page, size_t offset, const char *from, size_t len) { unsigned long ret; char *to; to = kmap_atomic(page); - ret = memcpy_mcsafe(to + offset, from, len); + ret = copy_mc_to_kernel(to + offset, from, len); kunmap_atomic(to); return ret; } -static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, +static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; @@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, size_t chunk = min_t(size_t, n, PAGE_SIZE - off); unsigned long rem; - rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, + rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); i->head = i_head; i->iov_offset = off + chunk - rem; @@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, } /** - * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * _copy_mc_to_iter - copy to iter with source memory error exception handling * @addr: source kernel address * @bytes: total transfer length * @iter: destination iterator * - * The pmem driver arranges for filesystem-dax to use this facility via - * dax_copy_to_iter() for protecting read/write to persistent memory. - * Unless / until an architecture can guarantee identical performance - * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a - * performance regression to switch more users to the mcsafe version. + * The pmem driver deploys this for the dax operation + * (dax_copy_to_iter()) for dax reads (bypass page-cache and the + * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes + * successfully copied. * - * Otherwise, the main differences between this and typical _copy_to_iter(). + * The main differences between this and typical _copy_to_iter(). * * * Typical tail/residue handling after a fault retries the copy * byte-by-byte until the fault happens again. Re-triggering machine @@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. * Compare to copy_to_iter() where only ITER_IOVEC attempts might return * a short copy. - * - * See MCSAFE_TEST for self-test. */ -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; if (unlikely(iov_iter_is_pipe(i))) - return copy_pipe_to_iter_mcsafe(addr, bytes, i); + return copy_mc_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, - copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len), ({ - rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len); + rem = copy_mc_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); if (rem) { curr_addr = (unsigned long) from; bytes = curr_addr - s_addr - rem; @@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) } }), ({ - rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, - v.iov_len); + rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) + - v.iov_len, v.iov_len); if (rem) { curr_addr = (unsigned long) from; bytes = curr_addr - s_addr - rem; @@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) return bytes; } -EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); -#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ +EXPORT_SYMBOL_GPL(_copy_mc_to_iter); +#endif /* CONFIG_ARCH_HAS_COPY_MC */ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { -- cgit From 4d0e9df5e43dba52d38b251e3b909df8fa1110be Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:50 -0700 Subject: lib, uaccess: add failure injection to usercopy functions To test fault-tolerance of user memory access functions, introduce fault injection to usercopy functions. If a failure is expected return either -EFAULT or the total amount of bytes that were not copied. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Al Viro Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Marco Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-3-alinde@google.com Signed-off-by: Linus Torvalds --- lib/iov_iter.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/iov_iter.c') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 14cae2584db4..1635111c5bd2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,8 @@ static int copyout(void __user *to, const void *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); @@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n) static int copyin(void *to, const void __user *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(from, n)) { instrument_copy_from_user(to, from, n); n = raw_copy_from_user(to, from, n); -- cgit